diff options
author | Tom Stellard <[email protected]> | 2013-01-04 15:38:37 +0000 |
---|---|---|
committer | Tom Stellard <[email protected]> | 2013-01-04 21:05:09 +0000 |
commit | aed37cbee8efb59b2f1a6bc69adcbaecd9e4fa13 (patch) | |
tree | 5748d373dc01011b860049208135c3d942882e29 | |
parent | 05c143cc049a87c515ecdc5695e5912da60cf5cb (diff) |
radeon/llvm: Remove backend code from Mesa
This code now lives in an external tree.
For the next Mesa release fetch the code from the master branch
of this LLVM repo:
http://cgit.freedesktop.org/~tstellar/llvm/
For all subsequent Mesa releases, fetch the code from the official LLVM
project:
www.llvm.org
99 files changed, 0 insertions, 19168 deletions
diff --git a/src/gallium/drivers/radeon/.gitignore b/src/gallium/drivers/radeon/.gitignore deleted file mode 100644 index b723d73ff40..00000000000 --- a/src/gallium/drivers/radeon/.gitignore +++ /dev/null @@ -1,18 +0,0 @@ -AMDGPUInstrEnums.h.include -AMDGPUInstrEnums.include -AMDGPUInstrEnums.td -AMDILGenAsmWriter.inc -AMDILGenCallingConv.inc -AMDILGenCodeEmitter.inc -AMDILGenDAGISel.inc -AMDILGenEDInfo.inc -AMDILGenInstrInfo.inc -AMDILGenIntrinsics.inc -AMDILGenRegisterInfo.inc -AMDILGenSubtargetInfo.inc -R600HwRegInfo.include -R600Intrinsics.td -R600RegisterInfo.td -SIRegisterGetHWRegNum.inc -SIRegisterInfo.td -loader diff --git a/src/gallium/drivers/radeon/AMDGPU.h b/src/gallium/drivers/radeon/AMDGPU.h deleted file mode 100644 index f484caa9a3e..00000000000 --- a/src/gallium/drivers/radeon/AMDGPU.h +++ /dev/null @@ -1,46 +0,0 @@ -//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef AMDGPU_H -#define AMDGPU_H - -#include "AMDGPUTargetMachine.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Target/TargetMachine.h" - -namespace llvm { - -class FunctionPass; -class AMDGPUTargetMachine; - -// R600 Passes -FunctionPass* createR600KernelParametersPass(const TargetData* TD); -FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm); - -// SI Passes -FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm); -FunctionPass *createSILowerFlowControlPass(TargetMachine &tm); -FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS); -FunctionPass *createSILowerLiteralConstantsPass(TargetMachine &tm); - -// Passes common to R600 and SI -FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm); - -} // End namespace llvm - -namespace ShaderType { - enum Type { - PIXEL = 0, - VERTEX = 1, - GEOMETRY = 2, - COMPUTE = 3 - }; -} - -#endif // AMDGPU_H diff --git a/src/gallium/drivers/radeon/AMDGPU.td b/src/gallium/drivers/radeon/AMDGPU.td deleted file mode 100644 index 5086f63d79c..00000000000 --- a/src/gallium/drivers/radeon/AMDGPU.td +++ /dev/null @@ -1,38 +0,0 @@ -//===-- AMDIL.td - AMDIL Tablegen files --*- tablegen -*-------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// - -// Include AMDIL TD files -include "AMDILBase.td" - - -def AMDGPUInstrInfo : InstrInfo {} - -//===----------------------------------------------------------------------===// -// Declare the target which we are implementing -//===----------------------------------------------------------------------===// -def AMDGPUAsmWriter : AsmWriter { - string AsmWriterClassName = "InstPrinter"; - int Variant = 0; - bit isMCAsmWriter = 1; -} - -def AMDGPU : Target { - // Pull in Instruction Info: - let InstructionSet = AMDGPUInstrInfo; - let AssemblyWriters = [AMDGPUAsmWriter]; -} - -// Include AMDGPU TD files -include "R600Schedule.td" -include "SISchedule.td" -include "Processors.td" -include "AMDGPUInstrInfo.td" -include "AMDGPUIntrinsics.td" -include "AMDGPURegisterInfo.td" -include "AMDGPUInstructions.td" diff --git a/src/gallium/drivers/radeon/AMDGPUAsmPrinter.cpp b/src/gallium/drivers/radeon/AMDGPUAsmPrinter.cpp deleted file mode 100644 index 392791cd49d..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUAsmPrinter.cpp +++ /dev/null @@ -1,134 +0,0 @@ -//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer --------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// The AMDGPUAsmPrinter is used to print both assembly string and also binary -// code. When passed an MCAsmStreamer it prints assembly and when passed -// an MCObjectStreamer it outputs binary code. -// -//===----------------------------------------------------------------------===// -// - - -#include "AMDGPUAsmPrinter.h" -#include "AMDGPU.h" -#include "SIMachineFunctionInfo.h" -#include "SIRegisterInfo.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Support/TargetRegistry.h" - -using namespace llvm; - - -static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm, - MCStreamer &Streamer) { - return new AMDGPUAsmPrinter(tm, Streamer); -} - -extern "C" void LLVMInitializeAMDGPUAsmPrinter() { - TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass); -} - -/// runOnMachineFunction - We need to override this function so we can avoid -/// the call to EmitFunctionHeader(), which the MCPureStreamer can't handle. -bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { - const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>(); - if (STM.dumpCode()) { - MF.dump(); - } - SetupMachineFunction(MF); - if (STM.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) { - EmitProgramInfo(MF); - } - OutStreamer.SwitchSection(getObjFileLowering().getTextSection()); - EmitFunctionBody(); - return false; -} - -void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) { - unsigned MaxSGPR = 0; - unsigned MaxVGPR = 0; - bool VCCUsed = false; - const SIRegisterInfo * RI = - static_cast<const SIRegisterInfo*>(TM.getRegisterInfo()); - - for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); - BB != BB_E; ++BB) { - MachineBasicBlock &MBB = *BB; - for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); - I != E; ++I) { - MachineInstr &MI = *I; - - unsigned numOperands = MI.getNumOperands(); - for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) { - MachineOperand & MO = MI.getOperand(op_idx); - unsigned maxUsed; - unsigned width = 0; - bool isSGPR = false; - unsigned reg; - unsigned hwReg; - if (!MO.isReg()) { - continue; - } - reg = MO.getReg(); - if (reg == AMDGPU::VCC) { - VCCUsed = true; - continue; - } - switch (reg) { - default: break; - case AMDGPU::EXEC: - case AMDGPU::SI_LITERAL_CONSTANT: - case AMDGPU::SREG_LIT_0: - case AMDGPU::M0: - continue; - } - - if (AMDGPU::SReg_32RegClass.contains(reg)) { - isSGPR = true; - width = 1; - } else if (AMDGPU::VReg_32RegClass.contains(reg)) { - isSGPR = false; - width = 1; - } else if (AMDGPU::SReg_64RegClass.contains(reg)) { - isSGPR = true; - width = 2; - } else if (AMDGPU::VReg_64RegClass.contains(reg)) { - isSGPR = false; - width = 2; - } else if (AMDGPU::SReg_128RegClass.contains(reg)) { - isSGPR = true; - width = 4; - } else if (AMDGPU::VReg_128RegClass.contains(reg)) { - isSGPR = false; - width = 4; - } else if (AMDGPU::SReg_256RegClass.contains(reg)) { - isSGPR = true; - width = 8; - } else { - assert(!"Unknown register class"); - } - hwReg = RI->getHWRegNum(reg); - maxUsed = hwReg + width - 1; - if (isSGPR) { - MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR; - } else { - MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR; - } - } - } - } - if (VCCUsed) { - MaxSGPR += 2; - } - SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>(); - OutStreamer.EmitIntValue(MaxSGPR + 1, 4); - OutStreamer.EmitIntValue(MaxVGPR + 1, 4); - OutStreamer.EmitIntValue(MFI->SPIPSInputAddr, 4); -} diff --git a/src/gallium/drivers/radeon/AMDGPUAsmPrinter.h b/src/gallium/drivers/radeon/AMDGPUAsmPrinter.h deleted file mode 100644 index b35d2e9b2ca..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUAsmPrinter.h +++ /dev/null @@ -1,43 +0,0 @@ -//===-- AMDGPUAsmPrinter.h - Print AMDGPU assembly code -------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// AMDGPU Assembly printer class. -// -//===----------------------------------------------------------------------===// - -#ifndef AMDGPU_ASMPRINTER_H -#define AMDGPU_ASMPRINTER_H - -#include "llvm/CodeGen/AsmPrinter.h" - -namespace llvm { - -class AMDGPUAsmPrinter : public AsmPrinter { - -public: - explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer) { } - - virtual bool runOnMachineFunction(MachineFunction &MF); - - virtual const char *getPassName() const { - return "AMDGPU Assembly Printer"; - } - - /// EmitProgramInfo - Emit register usage information so that the GPU driver - /// can correctly setup the GPU state. - void EmitProgramInfo(MachineFunction &MF); - - /// EmitInstuction - Implemented in AMDGPUMCInstLower.cpp - virtual void EmitInstruction(const MachineInstr *MI); -}; - -} // End anonymous llvm - -#endif //AMDGPU_ASMPRINTER_H diff --git a/src/gallium/drivers/radeon/AMDGPUCodeEmitter.h b/src/gallium/drivers/radeon/AMDGPUCodeEmitter.h deleted file mode 100644 index f1daec19d54..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUCodeEmitter.h +++ /dev/null @@ -1,48 +0,0 @@ -//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// CodeEmitter interface for R600 and SI codegen. -// -//===----------------------------------------------------------------------===// - -#ifndef AMDGPUCODEEMITTER_H -#define AMDGPUCODEEMITTER_H - -namespace llvm { - - class AMDGPUCodeEmitter { - public: - uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const; - virtual uint64_t getMachineOpValue(const MachineInstr &MI, - const MachineOperand &MO) const { return 0; } - virtual unsigned GPR4AlignEncode(const MachineInstr &MI, - unsigned OpNo) const { - return 0; - } - virtual unsigned GPR2AlignEncode(const MachineInstr &MI, - unsigned OpNo) const { - return 0; - } - virtual uint64_t VOPPostEncode(const MachineInstr &MI, - uint64_t Value) const { - return Value; - } - virtual uint64_t i32LiteralEncode(const MachineInstr &MI, - unsigned OpNo) const { - return 0; - } - virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo) - const { - return 0; - } - }; - -} // End namespace llvm - -#endif // AMDGPUCODEEMITTER_H diff --git a/src/gallium/drivers/radeon/AMDGPUConvertToISA.cpp b/src/gallium/drivers/radeon/AMDGPUConvertToISA.cpp deleted file mode 100644 index fbca0a7b832..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUConvertToISA.cpp +++ /dev/null @@ -1,62 +0,0 @@ -//===-- AMDGPUConvertToISA.cpp - Lower AMDIL to HW ISA --------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass lowers AMDIL machine instructions to the appropriate hardware -// instructions. -// -//===----------------------------------------------------------------------===// - -#include "AMDGPU.h" -#include "AMDGPUInstrInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" - -using namespace llvm; - -namespace { - -class AMDGPUConvertToISAPass : public MachineFunctionPass { - -private: - static char ID; - TargetMachine &TM; - -public: - AMDGPUConvertToISAPass(TargetMachine &tm) : - MachineFunctionPass(ID), TM(tm) { } - - virtual bool runOnMachineFunction(MachineFunction &MF); - - virtual const char *getPassName() const {return "AMDGPU Convert to ISA";} - -}; - -} // End anonymous namespace - -char AMDGPUConvertToISAPass::ID = 0; - -FunctionPass *llvm::createAMDGPUConvertToISAPass(TargetMachine &tm) { - return new AMDGPUConvertToISAPass(tm); -} - -bool AMDGPUConvertToISAPass::runOnMachineFunction(MachineFunction &MF) -{ - const AMDGPUInstrInfo * TII = - static_cast<const AMDGPUInstrInfo*>(TM.getInstrInfo()); - - for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); - BB != BB_E; ++BB) { - MachineBasicBlock &MBB = *BB; - for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); - I != E; ++I) { - MachineInstr &MI = *I; - TII->convertToISA(MI, MF, MBB.findDebugLoc(I)); - } - } - return false; -} diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp deleted file mode 100644 index d37df6b986a..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp +++ /dev/null @@ -1,351 +0,0 @@ -//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is the parent TargetLowering class for hardware code gen targets. -// -//===----------------------------------------------------------------------===// - -#include "AMDGPUISelLowering.h" -#include "AMDILIntrinsicInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" - -using namespace llvm; - -AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : - TargetLowering(TM, new TargetLoweringObjectFileELF()) -{ - - // Initialize target lowering borrowed from AMDIL - InitAMDILLowering(); - - // We need to custom lower some of the intrinsics - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); - - // Library functions. These default to Expand, but we have instructions - // for them. - setOperationAction(ISD::FCEIL, MVT::f32, Legal); - setOperationAction(ISD::FEXP2, MVT::f32, Legal); - setOperationAction(ISD::FPOW, MVT::f32, Legal); - setOperationAction(ISD::FLOG2, MVT::f32, Legal); - setOperationAction(ISD::FABS, MVT::f32, Legal); - setOperationAction(ISD::FFLOOR, MVT::f32, Legal); - setOperationAction(ISD::FRINT, MVT::f32, Legal); - - setOperationAction(ISD::UDIV, MVT::i32, Expand); - setOperationAction(ISD::UDIVREM, MVT::i32, Custom); - setOperationAction(ISD::UREM, MVT::i32, Expand); -} - -//===---------------------------------------------------------------------===// -// TargetLowering Callbacks -//===---------------------------------------------------------------------===// - -SDValue AMDGPUTargetLowering::LowerFormalArguments( - SDValue Chain, - CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc DL, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const -{ - for (unsigned i = 0, e = Ins.size(); i < e; ++i) { - InVals.push_back(SDValue()); - } - return Chain; -} - -SDValue AMDGPUTargetLowering::LowerReturn( - SDValue Chain, - CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - DebugLoc DL, SelectionDAG &DAG) const -{ - return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain); -} - -//===---------------------------------------------------------------------===// -// Target specific lowering -//===---------------------------------------------------------------------===// - -SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) - const -{ - switch (Op.getOpcode()) { - default: - Op.getNode()->dump(); - assert(0 && "Custom lowering code for this" - "instruction is not implemented yet!"); - break; - // AMDIL DAG lowering - case ISD::SDIV: return LowerSDIV(Op, DAG); - case ISD::SREM: return LowerSREM(Op, DAG); - case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); - case ISD::BRCOND: return LowerBRCOND(Op, DAG); - // AMDGPU DAG lowering - case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); - case ISD::UDIVREM: return LowerUDIVREM(Op, DAG); - } - return Op; -} - -SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, - SelectionDAG &DAG) const -{ - unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); - DebugLoc DL = Op.getDebugLoc(); - EVT VT = Op.getValueType(); - - switch (IntrinsicID) { - default: return Op; - case AMDGPUIntrinsic::AMDIL_abs: - return LowerIntrinsicIABS(Op, DAG); - case AMDGPUIntrinsic::AMDIL_exp: - return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1)); - case AMDGPUIntrinsic::AMDGPU_lrp: - return LowerIntrinsicLRP(Op, DAG); - case AMDGPUIntrinsic::AMDIL_fraction: - return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1)); - case AMDGPUIntrinsic::AMDIL_mad: - return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1), - Op.getOperand(2), Op.getOperand(3)); - case AMDGPUIntrinsic::AMDIL_max: - return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1), - Op.getOperand(2)); - case AMDGPUIntrinsic::AMDGPU_imax: - return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1), - Op.getOperand(2)); - case AMDGPUIntrinsic::AMDGPU_umax: - return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1), - Op.getOperand(2)); - case AMDGPUIntrinsic::AMDIL_min: - return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1), - Op.getOperand(2)); - case AMDGPUIntrinsic::AMDGPU_imin: - return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1), - Op.getOperand(2)); - case AMDGPUIntrinsic::AMDGPU_umin: - return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1), - Op.getOperand(2)); - case AMDGPUIntrinsic::AMDIL_round_nearest: - return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1)); - } -} - -///IABS(a) = SMAX(sub(0, a), a) -SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op, - SelectionDAG &DAG) const -{ - - DebugLoc DL = Op.getDebugLoc(); - EVT VT = Op.getValueType(); - SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), - Op.getOperand(1)); - - return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1)); -} - -/// Linear Interpolation -/// LRP(a, b, c) = muladd(a, b, (1 - a) * c) -SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op, - SelectionDAG &DAG) const -{ - DebugLoc DL = Op.getDebugLoc(); - EVT VT = Op.getValueType(); - SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT, - DAG.getConstantFP(1.0f, MVT::f32), - Op.getOperand(1)); - SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA, - Op.getOperand(3)); - return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1), - Op.getOperand(2), - OneSubAC); -} - - - -SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, - SelectionDAG &DAG) const -{ - DebugLoc DL = Op.getDebugLoc(); - EVT VT = Op.getValueType(); - - SDValue Num = Op.getOperand(0); - SDValue Den = Op.getOperand(1); - - SmallVector<SDValue, 8> Results; - - // RCP = URECIP(Den) = 2^32 / Den + e - // e is rounding error. - SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den); - - // RCP_LO = umulo(RCP, Den) */ - SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den); - - // RCP_HI = mulhu (RCP, Den) */ - SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den); - - // NEG_RCP_LO = -RCP_LO - SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), - RCP_LO); - - // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO) - SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT), - NEG_RCP_LO, RCP_LO, - ISD::SETEQ); - // Calculate the rounding error from the URECIP instruction - // E = mulhu(ABS_RCP_LO, RCP) - SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP); - - // RCP_A_E = RCP + E - SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E); - - // RCP_S_E = RCP - E - SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E); - - // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E) - SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT), - RCP_A_E, RCP_S_E, - ISD::SETEQ); - // Quotient = mulhu(Tmp0, Num) - SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num); - - // Num_S_Remainder = Quotient * Den - SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den); - - // Remainder = Num - Num_S_Remainder - SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder); - - // Remainder_GE_Den = (Remainder >= Den ? -1 : 0) - SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den, - DAG.getConstant(-1, VT), - DAG.getConstant(0, VT), - ISD::SETGE); - // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0) - SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder, - DAG.getConstant(0, VT), - DAG.getConstant(-1, VT), - DAG.getConstant(0, VT), - ISD::SETGE); - // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero - SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den, - Remainder_GE_Zero); - - // Calculate Division result: - - // Quotient_A_One = Quotient + 1 - SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient, - DAG.getConstant(1, VT)); - - // Quotient_S_One = Quotient - 1 - SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient, - DAG.getConstant(1, VT)); - - // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One) - SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT), - Quotient, Quotient_A_One, ISD::SETEQ); - - // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div) - Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), - Quotient_S_One, Div, ISD::SETEQ); - - // Calculate Rem result: - - // Remainder_S_Den = Remainder - Den - SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den); - - // Remainder_A_Den = Remainder + Den - SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den); - - // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den) - SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT), - Remainder, Remainder_S_Den, ISD::SETEQ); - - // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem) - Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), - Remainder_A_Den, Rem, ISD::SETEQ); - - DAG.ReplaceAllUsesWith(Op.getValue(0).getNode(), &Div); - DAG.ReplaceAllUsesWith(Op.getValue(1).getNode(), &Rem); - - return Op; -} - -//===----------------------------------------------------------------------===// -// Helper functions -//===----------------------------------------------------------------------===// - -bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const -{ - if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { - return CFP->isExactlyValue(1.0); - } - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { - return C->isAllOnesValue(); - } - return false; -} - -bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const -{ - if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { - return CFP->getValueAPF().isZero(); - } - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { - return C->isNullValue(); - } - return false; -} - -SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG, - const TargetRegisterClass *RC, - unsigned Reg, EVT VT) const { - MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - unsigned VirtualRegister; - if (!MRI.isLiveIn(Reg)) { - VirtualRegister = MRI.createVirtualRegister(RC); - MRI.addLiveIn(Reg, VirtualRegister); - } else { - VirtualRegister = MRI.getLiveInVirtReg(Reg); - } - return DAG.getRegister(VirtualRegister, VT); -} - -#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node; - -const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const -{ - switch (Opcode) { - default: return 0; - // AMDIL DAG nodes - NODE_NAME_CASE(MAD); - NODE_NAME_CASE(CALL); - NODE_NAME_CASE(UMUL); - NODE_NAME_CASE(DIV_INF); - NODE_NAME_CASE(RET_FLAG); - NODE_NAME_CASE(BRANCH_COND); - - // AMDGPU DAG nodes - NODE_NAME_CASE(FRACT) - NODE_NAME_CASE(FMAX) - NODE_NAME_CASE(SMAX) - NODE_NAME_CASE(UMAX) - NODE_NAME_CASE(FMIN) - NODE_NAME_CASE(SMIN) - NODE_NAME_CASE(UMIN) - NODE_NAME_CASE(URECIP) - NODE_NAME_CASE(INTERP) - NODE_NAME_CASE(INTERP_P0) - } -} diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.h b/src/gallium/drivers/radeon/AMDGPUISelLowering.h deleted file mode 100644 index 2d8ed82c117..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUISelLowering.h +++ /dev/null @@ -1,142 +0,0 @@ -//===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the interface defintiion of the TargetLowering class -// that is common to all AMD GPUs. -// -//===----------------------------------------------------------------------===// - -#ifndef AMDGPUISELLOWERING_H -#define AMDGPUISELLOWERING_H - -#include "llvm/Target/TargetLowering.h" - -namespace llvm { - -class MachineRegisterInfo; - -class AMDGPUTargetLowering : public TargetLowering -{ -private: - SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; - -protected: - - /// CreateLiveInRegister - Helper function that adds Reg to the LiveIn list - /// of the DAG's MachineFunction. This returns a Register SDNode representing - /// Reg. - SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, - unsigned Reg, EVT VT) const; - - bool isHWTrueValue(SDValue Op) const; - bool isHWFalseValue(SDValue Op) const; - -public: - AMDGPUTargetLowering(TargetMachine &TM); - - virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc DL, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const; - - virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - DebugLoc DL, SelectionDAG &DAG) const; - - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const; - virtual const char* getTargetNodeName(unsigned Opcode) const; - -// Functions defined in AMDILISelLowering.cpp -public: - - /// computeMaskedBitsForTargetNode - Determine which of the bits specified - /// in Mask are known to be either zero or one and return them in the - /// KnownZero/KnownOne bitsets. - virtual void computeMaskedBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, - const SelectionDAG &DAG, - unsigned Depth = 0) const; - - virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info, - const CallInst &I, unsigned Intrinsic) const; - - /// isFPImmLegal - We want to mark f32/f64 floating point values as legal. - bool isFPImmLegal(const APFloat &Imm, EVT VT) const; - - /// ShouldShrinkFPConstant - We don't want to shrink f64/f32 constants. - bool ShouldShrinkFPConstant(EVT VT) const; - -private: - void InitAMDILLowering(); - SDValue LowerSREM(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSREM8(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSREM16(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSDIV24(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSDIV32(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSDIV64(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; - EVT genIntType(uint32_t size = 32, uint32_t numEle = 1) const; - SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; -}; - -namespace AMDGPUISD -{ - -enum -{ - // AMDIL ISD Opcodes - FIRST_NUMBER = ISD::BUILTIN_OP_END, - MAD, // 32bit Fused Multiply Add instruction - CALL, // Function call based on a single integer - UMUL, // 32bit unsigned multiplication - DIV_INF, // Divide with infinity returned on zero divisor - RET_FLAG, - BRANCH_COND, - // End AMDIL ISD Opcodes - BITALIGN, - FRACT, - FMAX, - SMAX, - UMAX, - FMIN, - SMIN, - UMIN, - URECIP, - INTERP, - INTERP_P0, - LAST_AMDGPU_ISD_NUMBER -}; - - -} // End namespace AMDGPUISD - -namespace SIISD { - -enum { - SI_FIRST = AMDGPUISD::LAST_AMDGPU_ISD_NUMBER, - VCC_AND, - VCC_BITCAST -}; - -} // End namespace SIISD - -} // End namespace llvm - -#endif // AMDGPUISELLOWERING_H diff --git a/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp b/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp deleted file mode 100644 index 9aae09a4a15..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp +++ /dev/null @@ -1,258 +0,0 @@ -//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the implementation of the TargetInstrInfo class that is -// common to all AMD GPUs. -// -//===----------------------------------------------------------------------===// - -#include "AMDGPUInstrInfo.h" -#include "AMDGPURegisterInfo.h" -#include "AMDGPUTargetMachine.h" -#include "AMDIL.h" -#include "AMDILUtilityFunctions.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" - -#define GET_INSTRINFO_CTOR -#include "AMDGPUGenInstrInfo.inc" - -using namespace llvm; - -AMDGPUInstrInfo::AMDGPUInstrInfo(TargetMachine &tm) - : AMDGPUGenInstrInfo(0,0), RI(tm, *this), TM(tm) { } - -const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const { - return RI; -} - -bool AMDGPUInstrInfo::isCoalescableExtInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SubIdx) const { -// TODO: Implement this function - return false; -} - -unsigned AMDGPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const { -// TODO: Implement this function - return 0; -} - -unsigned AMDGPUInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, - int &FrameIndex) const { -// TODO: Implement this function - return 0; -} - -bool AMDGPUInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI, - const MachineMemOperand *&MMO, - int &FrameIndex) const { -// TODO: Implement this function - return false; -} -unsigned AMDGPUInstrInfo::isStoreFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const { -// TODO: Implement this function - return 0; -} -unsigned AMDGPUInstrInfo::isStoreFromStackSlotPostFE(const MachineInstr *MI, - int &FrameIndex) const { -// TODO: Implement this function - return 0; -} -bool AMDGPUInstrInfo::hasStoreFromStackSlot(const MachineInstr *MI, - const MachineMemOperand *&MMO, - int &FrameIndex) const { -// TODO: Implement this function - return false; -} - -MachineInstr * -AMDGPUInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, - MachineBasicBlock::iterator &MBBI, - LiveVariables *LV) const { -// TODO: Implement this function - return NULL; -} -bool AMDGPUInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter, - MachineBasicBlock &MBB) const { - while (iter != MBB.end()) { - switch (iter->getOpcode()) { - default: - break; - ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND); - case AMDGPU::BRANCH: - return true; - }; - ++iter; - } - return false; -} - -MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) { - MachineBasicBlock::iterator tmp = MBB->end(); - if (!MBB->size()) { - return MBB->end(); - } - while (--tmp) { - if (tmp->getOpcode() == AMDGPU::ENDLOOP - || tmp->getOpcode() == AMDGPU::ENDIF - || tmp->getOpcode() == AMDGPU::ELSE) { - if (tmp == MBB->begin()) { - return tmp; - } else { - continue; - } - } else { - return ++tmp; - } - } - return MBB->end(); -} - -void -AMDGPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned SrcReg, bool isKill, - int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { - assert(!"Not Implemented"); -} - -void -AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { - assert(!"Not Implemented"); -} - -MachineInstr * -AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const { -// TODO: Implement this function - return 0; -} -MachineInstr* -AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, - MachineInstr *LoadMI) const { - // TODO: Implement this function - return 0; -} -bool -AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops) const -{ - // TODO: Implement this function - return false; -} -bool -AMDGPUInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, - unsigned Reg, bool UnfoldLoad, - bool UnfoldStore, - SmallVectorImpl<MachineInstr*> &NewMIs) const { - // TODO: Implement this function - return false; -} - -bool -AMDGPUInstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, - SmallVectorImpl<SDNode*> &NewNodes) const { - // TODO: Implement this function - return false; -} - -unsigned -AMDGPUInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, - bool UnfoldLoad, bool UnfoldStore, - unsigned *LoadRegIndex) const { - // TODO: Implement this function - return 0; -} - -bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, - int64_t Offset1, int64_t Offset2, - unsigned NumLoads) const { - assert(Offset2 > Offset1 - && "Second offset should be larger than first offset!"); - // If we have less than 16 loads in a row, and the offsets are within 16, - // then schedule together. - // TODO: Make the loads schedule near if it fits in a cacheline - return (NumLoads < 16 && (Offset2 - Offset1) < 16); -} - -bool -AMDGPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) - const { - // TODO: Implement this function - return true; -} -void AMDGPUInstrInfo::insertNoop(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI) const { - // TODO: Implement this function -} - -bool AMDGPUInstrInfo::isPredicated(const MachineInstr *MI) const { - // TODO: Implement this function - return false; -} -bool -AMDGPUInstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, - const SmallVectorImpl<MachineOperand> &Pred2) - const { - // TODO: Implement this function - return false; -} - -bool AMDGPUInstrInfo::DefinesPredicate(MachineInstr *MI, - std::vector<MachineOperand> &Pred) const { - // TODO: Implement this function - return false; -} - -bool AMDGPUInstrInfo::isPredicable(MachineInstr *MI) const { - // TODO: Implement this function - return MI->getDesc().isPredicable(); -} - -bool -AMDGPUInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { - // TODO: Implement this function - return true; -} - -void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF, - DebugLoc DL) const -{ - MachineRegisterInfo &MRI = MF.getRegInfo(); - const AMDGPURegisterInfo & RI = getRegisterInfo(); - - for (unsigned i = 0; i < MI.getNumOperands(); i++) { - MachineOperand &MO = MI.getOperand(i); - // Convert dst regclass to one that is supported by the ISA - if (MO.isReg() && MO.isDef()) { - if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { - const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg()); - const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass); - - assert(newRegClass); - - MRI.setRegClass(MO.getReg(), newRegClass); - } - } - } -} diff --git a/src/gallium/drivers/radeon/AMDGPUInstrInfo.h b/src/gallium/drivers/radeon/AMDGPUInstrInfo.h deleted file mode 100644 index a3080767883..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUInstrInfo.h +++ /dev/null @@ -1,148 +0,0 @@ -//===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the definition of a TargetInstrInfo class that is common -// to all AMD GPUs. -// -//===----------------------------------------------------------------------===// - -#ifndef AMDGPUINSTRUCTIONINFO_H_ -#define AMDGPUINSTRUCTIONINFO_H_ - -#include "AMDGPURegisterInfo.h" -#include "AMDGPUInstrInfo.h" -#include "llvm/Target/TargetInstrInfo.h" - -#include <map> - -#define GET_INSTRINFO_HEADER -#define GET_INSTRINFO_ENUM -#include "AMDGPUGenInstrInfo.inc" - -#define OPCODE_IS_ZERO_INT 0x00000042 -#define OPCODE_IS_NOT_ZERO_INT 0x00000045 -#define OPCODE_IS_ZERO 0x00000020 -#define OPCODE_IS_NOT_ZERO 0x00000023 - -namespace llvm { - -class AMDGPUTargetMachine; -class MachineFunction; -class MachineInstr; -class MachineInstrBuilder; - -class AMDGPUInstrInfo : public AMDGPUGenInstrInfo { -private: - const AMDGPURegisterInfo RI; - TargetMachine &TM; - bool getNextBranchInstr(MachineBasicBlock::iterator &iter, - MachineBasicBlock &MBB) const; -public: - explicit AMDGPUInstrInfo(TargetMachine &tm); - - virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0; - - bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, - unsigned &DstReg, unsigned &SubIdx) const; - - unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; - unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI, - int &FrameIndex) const; - bool hasLoadFromStackSlot(const MachineInstr *MI, - const MachineMemOperand *&MMO, - int &FrameIndex) const; - unsigned isStoreFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; - unsigned isStoreFromStackSlotPostFE(const MachineInstr *MI, - int &FrameIndex) const; - bool hasStoreFromStackSlot(const MachineInstr *MI, - const MachineMemOperand *&MMO, - int &FrameIndex) const; - - MachineInstr * - convertToThreeAddress(MachineFunction::iterator &MFI, - MachineBasicBlock::iterator &MBBI, - LiveVariables *LV) const; - - - virtual void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const = 0; - - void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - -protected: - MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const; - MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, - MachineInstr *LoadMI) const; -public: - bool canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops) const; - bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, - unsigned Reg, bool UnfoldLoad, bool UnfoldStore, - SmallVectorImpl<MachineInstr *> &NewMIs) const; - bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, - SmallVectorImpl<SDNode *> &NewNodes) const; - unsigned getOpcodeAfterMemoryUnfold(unsigned Opc, - bool UnfoldLoad, bool UnfoldStore, - unsigned *LoadRegIndex = 0) const; - bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, - int64_t Offset1, int64_t Offset2, - unsigned NumLoads) const; - - bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; - void insertNoop(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI) const; - bool isPredicated(const MachineInstr *MI) const; - bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, - const SmallVectorImpl<MachineOperand> &Pred2) const; - bool DefinesPredicate(MachineInstr *MI, - std::vector<MachineOperand> &Pred) const; - bool isPredicable(MachineInstr *MI) const; - bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const; - - // Helper functions that check the opcode for status information - bool isLoadInst(llvm::MachineInstr *MI) const; - bool isExtLoadInst(llvm::MachineInstr *MI) const; - bool isSWSExtLoadInst(llvm::MachineInstr *MI) const; - bool isSExtLoadInst(llvm::MachineInstr *MI) const; - bool isZExtLoadInst(llvm::MachineInstr *MI) const; - bool isAExtLoadInst(llvm::MachineInstr *MI) const; - bool isStoreInst(llvm::MachineInstr *MI) const; - bool isTruncStoreInst(llvm::MachineInstr *MI) const; - - virtual MachineInstr* getMovImmInstr(MachineFunction *MF, unsigned DstReg, - int64_t Imm) const = 0; - virtual unsigned getIEQOpcode() const = 0; - virtual bool isMov(unsigned opcode) const = 0; - - /// convertToISA - Convert the AMDIL MachineInstr to a supported ISA - /// MachineInstr - virtual void convertToISA(MachineInstr & MI, MachineFunction &MF, - DebugLoc DL) const; - -}; - -} // End llvm namespace - -#endif // AMDGPUINSTRINFO_H_ diff --git a/src/gallium/drivers/radeon/AMDGPUInstrInfo.td b/src/gallium/drivers/radeon/AMDGPUInstrInfo.td deleted file mode 100644 index 23ca35aadc2..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUInstrInfo.td +++ /dev/null @@ -1,71 +0,0 @@ -//===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains DAG node defintions for the AMDGPU target. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// AMDGPU DAG Profiles -//===----------------------------------------------------------------------===// - -def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [ - SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3> -]>; - -//===----------------------------------------------------------------------===// -// AMDGPU DAG Nodes -// - -// out = ((a << 32) | b) >> c) -// -// Can be used to optimize rtol: -// rotl(a, b) = bitalign(a, a, 32 - b) -def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>; - -// out = a - floor(a) -def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>; - -// out = max(a, b) a and b are floats -def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp, - [SDNPCommutative, SDNPAssociative] ->; - -// out = max(a, b) a and b are signed ints -def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp, - [SDNPCommutative, SDNPAssociative] ->; - -// out = max(a, b) a and b are unsigned ints -def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp, - [SDNPCommutative, SDNPAssociative] ->; - -// out = min(a, b) a and b are floats -def AMDGPUfmin : SDNode<"AMDGPUISD::FMIN", SDTFPBinOp, - [SDNPCommutative, SDNPAssociative] ->; - -// out = min(a, b) a snd b are signed ints -def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp, - [SDNPCommutative, SDNPAssociative] ->; - -// out = min(a, b) a and b are unsigned ints -def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp, - [SDNPCommutative, SDNPAssociative] ->; - -// urecip - This operation is a helper for integer division, it returns the -// result of 1 / a as a fractional unsigned integer. -// out = (2^32 / a) + e -// e is rounding error -def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>; - -def fpow : SDNode<"ISD::FPOW", SDTFPBinOp>; diff --git a/src/gallium/drivers/radeon/AMDGPUInstructions.td b/src/gallium/drivers/radeon/AMDGPUInstructions.td deleted file mode 100644 index 9dbdc615e2d..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUInstructions.td +++ /dev/null @@ -1,183 +0,0 @@ -//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains instruction defs that are common to all hw codegen -// targets. -// -//===----------------------------------------------------------------------===// - -class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction { - field bits<16> AMDILOp = 0; - field bits<3> Gen = 0; - - let Namespace = "AMDGPU"; - let OutOperandList = outs; - let InOperandList = ins; - let AsmString = asm; - let Pattern = pattern; - let Itinerary = NullALU; - let TSFlags{42-40} = Gen; - let TSFlags{63-48} = AMDILOp; -} - -class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern> - : AMDGPUInst<outs, ins, asm, pattern> { - - field bits<32> Inst = 0xffffffff; - -} - -def COND_EQ : PatLeaf < - (cond), - [{switch(N->get()){{default: return false; - case ISD::SETOEQ: case ISD::SETUEQ: - case ISD::SETEQ: return true;}}}] ->; - -def COND_NE : PatLeaf < - (cond), - [{switch(N->get()){{default: return false; - case ISD::SETONE: case ISD::SETUNE: - case ISD::SETNE: return true;}}}] ->; -def COND_GT : PatLeaf < - (cond), - [{switch(N->get()){{default: return false; - case ISD::SETOGT: case ISD::SETUGT: - case ISD::SETGT: return true;}}}] ->; - -def COND_GE : PatLeaf < - (cond), - [{switch(N->get()){{default: return false; - case ISD::SETOGE: case ISD::SETUGE: - case ISD::SETGE: return true;}}}] ->; - -def COND_LT : PatLeaf < - (cond), - [{switch(N->get()){{default: return false; - case ISD::SETOLT: case ISD::SETULT: - case ISD::SETLT: return true;}}}] ->; - -def COND_LE : PatLeaf < - (cond), - [{switch(N->get()){{default: return false; - case ISD::SETOLE: case ISD::SETULE: - case ISD::SETLE: return true;}}}] ->; - -//===----------------------------------------------------------------------===// -// Load/Store Pattern Fragments -//===----------------------------------------------------------------------===// - -def zextloadi8_global : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr), [{ - return isGlobalLoad(dyn_cast<LoadSDNode>(N)); -}]>; - -class Constants { -int TWO_PI = 0x40c90fdb; -int PI = 0x40490fdb; -int TWO_PI_INV = 0x3e22f983; -} -def CONST : Constants; - -def FP_ZERO : PatLeaf < - (fpimm), - [{return N->getValueAPF().isZero();}] ->; - -def FP_ONE : PatLeaf < - (fpimm), - [{return N->isExactlyValue(1.0);}] ->; - -let isCodeGenOnly = 1, isPseudo = 1, usesCustomInserter = 1 in { - -class CLAMP <RegisterClass rc> : AMDGPUShaderInst < - (outs rc:$dst), - (ins rc:$src0), - "CLAMP $dst, $src0", - [(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))] ->; - -class FABS <RegisterClass rc> : AMDGPUShaderInst < - (outs rc:$dst), - (ins rc:$src0), - "FABS $dst, $src0", - [(set rc:$dst, (fabs rc:$src0))] ->; - -class FNEG <RegisterClass rc> : AMDGPUShaderInst < - (outs rc:$dst), - (ins rc:$src0), - "FNEG $dst, $src0", - [(set rc:$dst, (fneg rc:$src0))] ->; - -def SHADER_TYPE : AMDGPUShaderInst < - (outs), - (ins i32imm:$type), - "SHADER_TYPE $type", - [(int_AMDGPU_shader_type imm:$type)] ->; - -} // End isCodeGenOnly = 1, isPseudo = 1, hasCustomInserter = 1 - -/* Generic helper patterns for intrinsics */ -/* -------------------------------------- */ - -class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul, - RegisterClass rc> : Pat < - (fpow rc:$src0, rc:$src1), - (exp_ieee (mul rc:$src1, (log_ieee rc:$src0))) ->; - -/* Other helper patterns */ -/* --------------------- */ - -/* Extract element pattern */ -class Extract_Element <ValueType sub_type, ValueType vec_type, - RegisterClass vec_class, int sub_idx, - SubRegIndex sub_reg>: Pat< - (sub_type (vector_extract (vec_type vec_class:$src), sub_idx)), - (EXTRACT_SUBREG vec_class:$src, sub_reg) ->; - -/* Insert element pattern */ -class Insert_Element <ValueType elem_type, ValueType vec_type, - RegisterClass elem_class, RegisterClass vec_class, - int sub_idx, SubRegIndex sub_reg> : Pat < - - (vec_type (vector_insert (vec_type vec_class:$vec), - (elem_type elem_class:$elem), sub_idx)), - (INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg) ->; - -// Vector Build pattern -class Vector_Build <ValueType vecType, RegisterClass vectorClass, - ValueType elemType, RegisterClass elemClass> : Pat < - (vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y), - (elemType elemClass:$z), (elemType elemClass:$w))), - (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG - (vecType (IMPLICIT_DEF)), elemClass:$x, sel_x), elemClass:$y, sel_y), - elemClass:$z, sel_z), elemClass:$w, sel_w) ->; - -// bitconvert pattern -class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat < - (dt (bitconvert (st rc:$src0))), - (dt rc:$src0) ->; - -include "R600Instructions.td" - -include "SIInstrInfo.td" - diff --git a/src/gallium/drivers/radeon/AMDGPUIntrinsics.td b/src/gallium/drivers/radeon/AMDGPUIntrinsics.td deleted file mode 100644 index eaca4cf9856..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUIntrinsics.td +++ /dev/null @@ -1,63 +0,0 @@ -//===-- AMDGPUIntrinsics.td - Common intrinsics -*- tablegen -*-----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines intrinsics that are used by all hw codegen targets. -// -//===----------------------------------------------------------------------===// - -let TargetPrefix = "AMDGPU", isTarget = 1 in { - - def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; - def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>; - def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>; - def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>; - def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>; - - def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; - def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; - def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>; - def int_AMDGPU_kilp : Intrinsic<[], [], []>; - def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - def int_AMDGPU_ssg : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; - - def int_AMDGPU_shader_type : Intrinsic<[], [llvm_i32_ty], []>; -} - -let TargetPrefix = "TGSI", isTarget = 1 in { - - def int_TGSI_lit_z : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],[]>; -} - -include "SIIntrinsics.td" diff --git a/src/gallium/drivers/radeon/AMDGPUMCInstLower.cpp b/src/gallium/drivers/radeon/AMDGPUMCInstLower.cpp deleted file mode 100644 index f3d80a39c3c..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUMCInstLower.cpp +++ /dev/null @@ -1,82 +0,0 @@ -//===- AMDGPUMCInstLower.cpp - Lower AMDGPU MachineInstr to an MCInst -----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains code to lower AMDGPU MachineInstrs to their corresponding -// MCInst. -// -//===----------------------------------------------------------------------===// -// - -#include "AMDGPUMCInstLower.h" -#include "AMDGPUAsmPrinter.h" -#include "R600InstrInfo.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/Constants.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/Support/ErrorHandling.h" - -using namespace llvm; - -AMDGPUMCInstLower::AMDGPUMCInstLower() { } - -void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { - OutMI.setOpcode(MI->getOpcode()); - - for (unsigned i = 0, e = MI->getNumExplicitOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - - MCOperand MCOp; - switch (MO.getType()) { - default: - llvm_unreachable("unknown operand type"); - case MachineOperand::MO_FPImmediate: { - const APFloat &FloatValue = MO.getFPImm()->getValueAPF(); - assert(&FloatValue.getSemantics() == &APFloat::IEEEsingle && - "Only floating point immediates are supported at the moment."); - MCOp = MCOperand::CreateFPImm(FloatValue.convertToFloat()); - break; - } - case MachineOperand::MO_Immediate: - MCOp = MCOperand::CreateImm(MO.getImm()); - break; - case MachineOperand::MO_Register: - MCOp = MCOperand::CreateReg(MO.getReg()); - break; - } - OutMI.addOperand(MCOp); - } -} - -void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) { - AMDGPUMCInstLower MCInstLowering; - - // Ignore placeholder instructions: - if (MI->getOpcode() == AMDGPU::MASK_WRITE) { - return; - } - - if (MI->isBundle()) { - const MachineBasicBlock *MBB = MI->getParent(); - MachineBasicBlock::const_instr_iterator I = MI; - ++I; - while (I != MBB->end() && I->isInsideBundle()) { - MCInst MCBundleInst; - const MachineInstr *BundledInst = I; - MCInstLowering.lower(BundledInst, MCBundleInst); - OutStreamer.EmitInstruction(MCBundleInst); - ++I; - } - } else { - MCInst TmpInst; - MCInstLowering.lower(MI, TmpInst); - OutStreamer.EmitInstruction(TmpInst); - } -} diff --git a/src/gallium/drivers/radeon/AMDGPUMCInstLower.h b/src/gallium/drivers/radeon/AMDGPUMCInstLower.h deleted file mode 100644 index 3f68ff0874e..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUMCInstLower.h +++ /dev/null @@ -1,30 +0,0 @@ -//===- AMDGPUMCInstLower.h MachineInstr Lowering Interface ------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef AMDGPU_MCINSTLOWER_H -#define AMDGPU_MCINSTLOWER_H - -namespace llvm { - -class MCInst; -class MachineInstr; - -class AMDGPUMCInstLower { - -public: - AMDGPUMCInstLower(); - - /// lower - Lower a MachineInstr to an MCInst - void lower(const MachineInstr *MI, MCInst &OutMI) const; - -}; - -} // End namespace llvm - -#endif //AMDGPU_MCINSTLOWER_H diff --git a/src/gallium/drivers/radeon/AMDGPURegisterInfo.cpp b/src/gallium/drivers/radeon/AMDGPURegisterInfo.cpp deleted file mode 100644 index 69bda631738..00000000000 --- a/src/gallium/drivers/radeon/AMDGPURegisterInfo.cpp +++ /dev/null @@ -1,50 +0,0 @@ -//===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Parent TargetRegisterInfo class common to all hw codegen targets. -// -//===----------------------------------------------------------------------===// - -#include "AMDGPURegisterInfo.h" -#include "AMDGPUTargetMachine.h" - -using namespace llvm; - -AMDGPURegisterInfo::AMDGPURegisterInfo(TargetMachine &tm, - const TargetInstrInfo &tii) -: AMDGPUGenRegisterInfo(0), - TM(tm), - TII(tii) - { } - -//===----------------------------------------------------------------------===// -// Function handling callbacks - Functions are a seldom used feature of GPUS, so -// they are not supported at this time. -//===----------------------------------------------------------------------===// - -const uint16_t AMDGPURegisterInfo::CalleeSavedReg = AMDGPU::NoRegister; - -const uint16_t* AMDGPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) - const { - return &CalleeSavedReg; -} - -void AMDGPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, - int SPAdj, - RegScavenger *RS) const { - assert(!"Subroutines not supported yet"); -} - -unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const { - assert(!"Subroutines not supported yet"); - return 0; -} - -#define GET_REGINFO_TARGET_DESC -#include "AMDGPUGenRegisterInfo.inc" diff --git a/src/gallium/drivers/radeon/AMDGPURegisterInfo.h b/src/gallium/drivers/radeon/AMDGPURegisterInfo.h deleted file mode 100644 index 326610d333e..00000000000 --- a/src/gallium/drivers/radeon/AMDGPURegisterInfo.h +++ /dev/null @@ -1,62 +0,0 @@ -//===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the TargetRegisterInfo interface that is implemented -// by all hw codegen targets. -// -//===----------------------------------------------------------------------===// - -#ifndef AMDGPUREGISTERINFO_H_ -#define AMDGPUREGISTERINFO_H_ - -#include "llvm/ADT/BitVector.h" -#include "llvm/Target/TargetRegisterInfo.h" - -#define GET_REGINFO_HEADER -#define GET_REGINFO_ENUM -#include "AMDGPUGenRegisterInfo.inc" - -namespace llvm { - -class AMDGPUTargetMachine; -class TargetInstrInfo; - -struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo -{ - TargetMachine &TM; - const TargetInstrInfo &TII; - static const uint16_t CalleeSavedReg; - - AMDGPURegisterInfo(TargetMachine &tm, const TargetInstrInfo &tii); - - virtual BitVector getReservedRegs(const MachineFunction &MF) const { - assert(!"Unimplemented"); return BitVector(); - } - - /// getISARegClass - rc is an AMDIL reg class. This function returns the - /// ISA reg class that is equivalent to the given AMDIL reg class. - virtual const TargetRegisterClass * getISARegClass( - const TargetRegisterClass * rc) const { - assert(!"Unimplemented"); return NULL; - } - - virtual const TargetRegisterClass* getCFGStructurizerRegClass(MVT VT) const { - assert(!"Unimplemented"); return NULL; - } - - const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const; - void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, - RegScavenger *RS) const; - unsigned getFrameRegister(const MachineFunction &MF) const; - -}; - -} // End namespace llvm - -#endif // AMDIDSAREGISTERINFO_H_ diff --git a/src/gallium/drivers/radeon/AMDGPURegisterInfo.td b/src/gallium/drivers/radeon/AMDGPURegisterInfo.td deleted file mode 100644 index 8181e023aa3..00000000000 --- a/src/gallium/drivers/radeon/AMDGPURegisterInfo.td +++ /dev/null @@ -1,22 +0,0 @@ -//===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Tablegen register definitions common to all hw codegen targets. -// -//===----------------------------------------------------------------------===// - -let Namespace = "AMDGPU" in { - def sel_x : SubRegIndex; - def sel_y : SubRegIndex; - def sel_z : SubRegIndex; - def sel_w : SubRegIndex; -} - -include "R600RegisterInfo.td" -include "SIRegisterInfo.td" diff --git a/src/gallium/drivers/radeon/AMDGPUSubtarget.cpp b/src/gallium/drivers/radeon/AMDGPUSubtarget.cpp deleted file mode 100644 index d4a70b6c62f..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUSubtarget.cpp +++ /dev/null @@ -1,94 +0,0 @@ -//===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the AMDGPU specific subclass of TargetSubtarget. -// -//===----------------------------------------------------------------------===// - -#include "AMDGPUSubtarget.h" - -using namespace llvm; - -#define GET_SUBTARGETINFO_ENUM -#define GET_SUBTARGETINFO_TARGET_DESC -#define GET_SUBTARGETINFO_CTOR -#include "AMDGPUGenSubtargetInfo.inc" - -AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) : - AMDGPUGenSubtargetInfo(TT, CPU, FS), mDumpCode(false) { - InstrItins = getInstrItineraryForCPU(CPU); - - memset(CapsOverride, 0, sizeof(*CapsOverride) - * AMDGPUDeviceInfo::MaxNumberCapabilities); - // Default card - StringRef GPU = CPU; - mIs64bit = false; - mDefaultSize[0] = 64; - mDefaultSize[1] = 1; - mDefaultSize[2] = 1; - ParseSubtargetFeatures(GPU, FS); - mDevName = GPU; - mDevice = AMDGPUDeviceInfo::getDeviceFromName(mDevName, this, mIs64bit); -} - -AMDGPUSubtarget::~AMDGPUSubtarget() -{ - delete mDevice; -} - -bool -AMDGPUSubtarget::isOverride(AMDGPUDeviceInfo::Caps caps) const -{ - assert(caps < AMDGPUDeviceInfo::MaxNumberCapabilities && - "Caps index is out of bounds!"); - return CapsOverride[caps]; -} -bool -AMDGPUSubtarget::is64bit() const -{ - return mIs64bit; -} -bool -AMDGPUSubtarget::isTargetELF() const -{ - return false; -} -size_t -AMDGPUSubtarget::getDefaultSize(uint32_t dim) const -{ - if (dim > 3) { - return 1; - } else { - return mDefaultSize[dim]; - } -} - -std::string -AMDGPUSubtarget::getDataLayout() const -{ - if (!mDevice) { - return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16" - "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32" - "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64" - "-v96:128:128-v128:128:128-v192:256:256-v256:256:256" - "-v512:512:512-v1024:1024:1024-v2048:2048:2048-a0:0:64"); - } - return mDevice->getDataLayout(); -} - -std::string -AMDGPUSubtarget::getDeviceName() const -{ - return mDevName; -} -const AMDGPUDevice * -AMDGPUSubtarget::device() const -{ - return mDevice; -} diff --git a/src/gallium/drivers/radeon/AMDGPUSubtarget.h b/src/gallium/drivers/radeon/AMDGPUSubtarget.h deleted file mode 100644 index 30bda83a205..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUSubtarget.h +++ /dev/null @@ -1,66 +0,0 @@ -//=====-- AMDGPUSubtarget.h - Define Subtarget for the AMDIL ---*- C++ -*-====// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// -// This file declares the AMDGPU specific subclass of TargetSubtarget. -// -//===----------------------------------------------------------------------===// - -#ifndef _AMDGPUSUBTARGET_H_ -#define _AMDGPUSUBTARGET_H_ -#include "AMDILDevice.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Target/TargetSubtargetInfo.h" - -#define GET_SUBTARGETINFO_HEADER -#include "AMDGPUGenSubtargetInfo.inc" - -#define MAX_CB_SIZE (1 << 16) - -namespace llvm { - -class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo -{ -private: - bool CapsOverride[AMDGPUDeviceInfo::MaxNumberCapabilities]; - const AMDGPUDevice *mDevice; - size_t mDefaultSize[3]; - size_t mMinimumSize[3]; - std::string mDevName; - bool mIs64bit; - bool mIs32on64bit; - bool mDumpCode; - bool mR600ALUInst; - - InstrItineraryData InstrItins; - -public: - AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS); - virtual ~AMDGPUSubtarget(); - - const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } - virtual void ParseSubtargetFeatures(llvm::StringRef CPU, llvm::StringRef FS); - - bool isOverride(AMDGPUDeviceInfo::Caps) const; - bool is64bit() const; - - // Helper functions to simplify if statements - bool isTargetELF() const; - const AMDGPUDevice* device() const; - std::string getDataLayout() const; - std::string getDeviceName() const; - virtual size_t getDefaultSize(uint32_t dim) const; - bool dumpCode() const { return mDumpCode; } - bool r600ALUEncoding() const { return mR600ALUInst; } - -}; - -} // End namespace llvm - -#endif // AMDGPUSUBTARGET_H_ diff --git a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp b/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp deleted file mode 100644 index bfe9d81303b..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp +++ /dev/null @@ -1,143 +0,0 @@ -//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// The AMDGPU target machine contains all of the hardware specific information -// needed to emit code for R600 and SI GPUs. -// -//===----------------------------------------------------------------------===// - -#include "AMDGPUTargetMachine.h" -#include "AMDGPU.h" -#include "R600ISelLowering.h" -#include "R600InstrInfo.h" -#include "SIISelLowering.h" -#include "SIInstrInfo.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/Verifier.h" -#include "llvm/CodeGen/MachineFunctionAnalysis.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/PassManager.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/raw_os_ostream.h" -#include "llvm/Transforms/IPO.h" -#include "llvm/Transforms/Scalar.h" -#include <llvm/CodeGen/Passes.h> - -using namespace llvm; - -extern "C" void LLVMInitializeAMDGPUTarget() { - // Register the target - RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget); -} - -AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - TargetOptions Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OptLevel -) -: - LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel), - Subtarget(TT, CPU, FS), - DataLayout(Subtarget.getDataLayout()), - FrameLowering(TargetFrameLowering::StackGrowsUp, - Subtarget.device()->getStackAlignment(), 0), - IntrinsicInfo(this), - InstrItins(&Subtarget.getInstrItineraryData()), - mDump(false) - -{ - // TLInfo uses InstrInfo so it must be initialized after. - if (Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { - InstrInfo = new R600InstrInfo(*this); - TLInfo = new R600TargetLowering(*this); - } else { - InstrInfo = new SIInstrInfo(*this); - TLInfo = new SITargetLowering(*this); - } -} - -AMDGPUTargetMachine::~AMDGPUTargetMachine() -{ -} - -namespace { -class AMDGPUPassConfig : public TargetPassConfig { -public: - AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) {} - - AMDGPUTargetMachine &getAMDGPUTargetMachine() const { - return getTM<AMDGPUTargetMachine>(); - } - - virtual bool addPreISel(); - virtual bool addInstSelector(); - virtual bool addPreRegAlloc(); - virtual bool addPostRegAlloc(); - virtual bool addPreSched2(); - virtual bool addPreEmitPass(); -}; -} // End of anonymous namespace - -TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) { - return new AMDGPUPassConfig(this, PM); -} - -bool -AMDGPUPassConfig::addPreISel() -{ - return false; -} - -bool AMDGPUPassConfig::addInstSelector() { - PM->add(createAMDGPUPeepholeOpt(*TM)); - PM->add(createAMDGPUISelDag(getAMDGPUTargetMachine())); - return false; -} - -bool AMDGPUPassConfig::addPreRegAlloc() { - const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); - - if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) { - PM->add(createSIAssignInterpRegsPass(*TM)); - } - PM->add(createAMDGPUConvertToISAPass(*TM)); - return false; -} - -bool AMDGPUPassConfig::addPostRegAlloc() { - return false; -} - -bool AMDGPUPassConfig::addPreSched2() { - - addPass(IfConverterID); - return false; -} - -bool AMDGPUPassConfig::addPreEmitPass() { - PM->add(createAMDGPUCFGPreparationPass(*TM)); - PM->add(createAMDGPUCFGStructurizerPass(*TM)); - - const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); - if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { - PM->add(createR600ExpandSpecialInstrsPass(*TM)); - addPass(FinalizeMachineBundlesID); - } else { - PM->add(createSILowerLiteralConstantsPass(*TM)); - // piglit is unreliable (VM protection faults, GPU lockups) with this pass: - //PM->add(createSILowerFlowControlPass(*TM)); - } - - return false; -} - diff --git a/src/gallium/drivers/radeon/AMDGPUTargetMachine.h b/src/gallium/drivers/radeon/AMDGPUTargetMachine.h deleted file mode 100644 index 8b405a882cc..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUTargetMachine.h +++ /dev/null @@ -1,70 +0,0 @@ -//===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// The AMDGPU TargetMachine interface definition for hw codgen targets. -// -//===----------------------------------------------------------------------===// - -#ifndef AMDGPU_TARGET_MACHINE_H -#define AMDGPU_TARGET_MACHINE_H - -#include "AMDGPUInstrInfo.h" -#include "AMDGPUSubtarget.h" -#include "AMDILFrameLowering.h" -#include "AMDILIntrinsicInfo.h" -#include "R600ISelLowering.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/Target/TargetData.h" - -namespace llvm { - -MCAsmInfo* createMCAsmInfo(const Target &T, StringRef TT); - -class AMDGPUTargetMachine : public LLVMTargetMachine { - - AMDGPUSubtarget Subtarget; - const TargetData DataLayout; - AMDGPUFrameLowering FrameLowering; - AMDGPUIntrinsicInfo IntrinsicInfo; - const AMDGPUInstrInfo * InstrInfo; - AMDGPUTargetLowering * TLInfo; - const InstrItineraryData* InstrItins; - bool mDump; - -public: - AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef FS, - StringRef CPU, - TargetOptions Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL); - ~AMDGPUTargetMachine(); - virtual const AMDGPUFrameLowering* getFrameLowering() const { - return &FrameLowering; - } - virtual const AMDGPUIntrinsicInfo* getIntrinsicInfo() const { - return &IntrinsicInfo; - } - virtual const AMDGPUInstrInfo *getInstrInfo() const {return InstrInfo;} - virtual const AMDGPUSubtarget *getSubtargetImpl() const {return &Subtarget; } - virtual const AMDGPURegisterInfo *getRegisterInfo() const { - return &InstrInfo->getRegisterInfo(); - } - virtual AMDGPUTargetLowering * getTargetLowering() const { - return TLInfo; - } - virtual const InstrItineraryData* getInstrItineraryData() const { - return InstrItins; - } - virtual const TargetData* getTargetData() const { return &DataLayout; } - virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); -}; - -} // End namespace llvm - -#endif // AMDGPU_TARGET_MACHINE_H diff --git a/src/gallium/drivers/radeon/AMDIL.h b/src/gallium/drivers/radeon/AMDIL.h deleted file mode 100644 index e96b123bb7d..00000000000 --- a/src/gallium/drivers/radeon/AMDIL.h +++ /dev/null @@ -1,106 +0,0 @@ -//===-- AMDIL.h - Top-level interface for AMDIL representation --*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// -// This file contains the entry points for global functions defined in the LLVM -// AMDGPU back-end. -// -//===----------------------------------------------------------------------===// - -#ifndef AMDIL_H_ -#define AMDIL_H_ - -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/Target/TargetMachine.h" - -#define ARENA_SEGMENT_RESERVED_UAVS 12 -#define DEFAULT_ARENA_UAV_ID 8 -#define DEFAULT_RAW_UAV_ID 7 -#define GLOBAL_RETURN_RAW_UAV_ID 11 -#define HW_MAX_NUM_CB 8 -#define MAX_NUM_UNIQUE_UAVS 8 -#define OPENCL_MAX_NUM_ATOMIC_COUNTERS 8 -#define OPENCL_MAX_READ_IMAGES 128 -#define OPENCL_MAX_WRITE_IMAGES 8 -#define OPENCL_MAX_SAMPLERS 16 - -// The next two values can never be zero, as zero is the ID that is -// used to assert against. -#define DEFAULT_LDS_ID 1 -#define DEFAULT_GDS_ID 1 -#define DEFAULT_SCRATCH_ID 1 -#define DEFAULT_VEC_SLOTS 8 - -#define OCL_DEVICE_RV710 0x0001 -#define OCL_DEVICE_RV730 0x0002 -#define OCL_DEVICE_RV770 0x0004 -#define OCL_DEVICE_CEDAR 0x0008 -#define OCL_DEVICE_REDWOOD 0x0010 -#define OCL_DEVICE_JUNIPER 0x0020 -#define OCL_DEVICE_CYPRESS 0x0040 -#define OCL_DEVICE_CAICOS 0x0080 -#define OCL_DEVICE_TURKS 0x0100 -#define OCL_DEVICE_BARTS 0x0200 -#define OCL_DEVICE_CAYMAN 0x0400 -#define OCL_DEVICE_ALL 0x3FFF - -/// The number of function ID's that are reserved for -/// internal compiler usage. -const unsigned int RESERVED_FUNCS = 1024; - -namespace llvm { -class AMDGPUInstrPrinter; -class FunctionPass; -class MCAsmInfo; -class raw_ostream; -class Target; -class TargetMachine; - -/// Instruction selection passes. -FunctionPass* - createAMDGPUISelDag(TargetMachine &TM); -FunctionPass* - createAMDGPUPeepholeOpt(TargetMachine &TM); - -/// Pre emit passes. -FunctionPass* - createAMDGPUCFGPreparationPass(TargetMachine &TM); -FunctionPass* - createAMDGPUCFGStructurizerPass(TargetMachine &TM); - -extern Target TheAMDGPUTarget; -} // end namespace llvm; - -/// Include device information enumerations -#include "AMDILDeviceInfo.h" - -namespace llvm { -/// OpenCL uses address spaces to differentiate between -/// various memory regions on the hardware. On the CPU -/// all of the address spaces point to the same memory, -/// however on the GPU, each address space points to -/// a seperate piece of memory that is unique from other -/// memory locations. -namespace AMDGPUAS { -enum AddressSpaces { - PRIVATE_ADDRESS = 0, // Address space for private memory. - GLOBAL_ADDRESS = 1, // Address space for global memory (RAT0, VTX0). - CONSTANT_ADDRESS = 2, // Address space for constant memory. - LOCAL_ADDRESS = 3, // Address space for local memory. - REGION_ADDRESS = 4, // Address space for region memory. - ADDRESS_NONE = 5, // Address space for unknown memory. - PARAM_D_ADDRESS = 6, // Address space for direct addressible parameter memory (CONST0) - PARAM_I_ADDRESS = 7, // Address space for indirect addressible parameter memory (VTX1) - USER_SGPR_ADDRESS = 8, // Address space for USER_SGPRS on SI - LAST_ADDRESS = 9 -}; - -} // namespace AMDGPUAS - -} // end namespace llvm -#endif // AMDIL_H_ diff --git a/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp b/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp deleted file mode 100644 index 8561f0b3175..00000000000 --- a/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp +++ /dev/null @@ -1,129 +0,0 @@ -//===-- AMDIL7XXDevice.cpp - Device Info for 7XX GPUs ---------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -#include "AMDIL7XXDevice.h" -#include "AMDGPUSubtarget.h" -#include "AMDILDevice.h" - -using namespace llvm; - -AMDGPU7XXDevice::AMDGPU7XXDevice(AMDGPUSubtarget *ST) : AMDGPUDevice(ST) -{ - setCaps(); - std::string name = mSTM->getDeviceName(); - if (name == "rv710") { - mDeviceFlag = OCL_DEVICE_RV710; - } else if (name == "rv730") { - mDeviceFlag = OCL_DEVICE_RV730; - } else { - mDeviceFlag = OCL_DEVICE_RV770; - } -} - -AMDGPU7XXDevice::~AMDGPU7XXDevice() -{ -} - -void AMDGPU7XXDevice::setCaps() -{ - mSWBits.set(AMDGPUDeviceInfo::LocalMem); -} - -size_t AMDGPU7XXDevice::getMaxLDSSize() const -{ - if (usesHardware(AMDGPUDeviceInfo::LocalMem)) { - return MAX_LDS_SIZE_700; - } - return 0; -} - -size_t AMDGPU7XXDevice::getWavefrontSize() const -{ - return AMDGPUDevice::HalfWavefrontSize; -} - -uint32_t AMDGPU7XXDevice::getGeneration() const -{ - return AMDGPUDeviceInfo::HD4XXX; -} - -uint32_t AMDGPU7XXDevice::getResourceID(uint32_t DeviceID) const -{ - switch (DeviceID) { - default: - assert(0 && "ID type passed in is unknown!"); - break; - case GLOBAL_ID: - case CONSTANT_ID: - case RAW_UAV_ID: - case ARENA_UAV_ID: - break; - case LDS_ID: - if (usesHardware(AMDGPUDeviceInfo::LocalMem)) { - return DEFAULT_LDS_ID; - } - break; - case SCRATCH_ID: - if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) { - return DEFAULT_SCRATCH_ID; - } - break; - case GDS_ID: - assert(0 && "GDS UAV ID is not supported on this chip"); - if (usesHardware(AMDGPUDeviceInfo::RegionMem)) { - return DEFAULT_GDS_ID; - } - break; - }; - - return 0; -} - -uint32_t AMDGPU7XXDevice::getMaxNumUAVs() const -{ - return 1; -} - -AMDGPU770Device::AMDGPU770Device(AMDGPUSubtarget *ST): AMDGPU7XXDevice(ST) -{ - setCaps(); -} - -AMDGPU770Device::~AMDGPU770Device() -{ -} - -void AMDGPU770Device::setCaps() -{ - if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) { - mSWBits.set(AMDGPUDeviceInfo::FMA); - mHWBits.set(AMDGPUDeviceInfo::DoubleOps); - } - mSWBits.set(AMDGPUDeviceInfo::BarrierDetect); - mHWBits.reset(AMDGPUDeviceInfo::LongOps); - mSWBits.set(AMDGPUDeviceInfo::LongOps); - mSWBits.set(AMDGPUDeviceInfo::LocalMem); -} - -size_t AMDGPU770Device::getWavefrontSize() const -{ - return AMDGPUDevice::WavefrontSize; -} - -AMDGPU710Device::AMDGPU710Device(AMDGPUSubtarget *ST) : AMDGPU7XXDevice(ST) -{ -} - -AMDGPU710Device::~AMDGPU710Device() -{ -} - -size_t AMDGPU710Device::getWavefrontSize() const -{ - return AMDGPUDevice::QuarterWavefrontSize; -} diff --git a/src/gallium/drivers/radeon/AMDIL7XXDevice.h b/src/gallium/drivers/radeon/AMDIL7XXDevice.h deleted file mode 100644 index e848e2e0f2c..00000000000 --- a/src/gallium/drivers/radeon/AMDIL7XXDevice.h +++ /dev/null @@ -1,70 +0,0 @@ -//==-- AMDIL7XXDevice.h - Define 7XX Device Device for AMDIL ---*- C++ -*--===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// -// Interface for the subtarget data classes. -// -//===----------------------------------------------------------------------===// -// This file will define the interface that each generation needs to -// implement in order to correctly answer queries on the capabilities of the -// specific hardware. -//===----------------------------------------------------------------------===// -#ifndef _AMDIL7XXDEVICEIMPL_H_ -#define _AMDIL7XXDEVICEIMPL_H_ -#include "AMDILDevice.h" - -namespace llvm { -class AMDGPUSubtarget; - -//===----------------------------------------------------------------------===// -// 7XX generation of devices and their respective sub classes -//===----------------------------------------------------------------------===// - -// The AMDGPU7XXDevice class represents the generic 7XX device. All 7XX -// devices are derived from this class. The AMDGPU7XX device will only -// support the minimal features that are required to be considered OpenCL 1.0 -// compliant and nothing more. -class AMDGPU7XXDevice : public AMDGPUDevice { -public: - AMDGPU7XXDevice(AMDGPUSubtarget *ST); - virtual ~AMDGPU7XXDevice(); - virtual size_t getMaxLDSSize() const; - virtual size_t getWavefrontSize() const; - virtual uint32_t getGeneration() const; - virtual uint32_t getResourceID(uint32_t DeviceID) const; - virtual uint32_t getMaxNumUAVs() const; - -protected: - virtual void setCaps(); -}; // AMDGPU7XXDevice - -// The AMDGPU770Device class represents the RV770 chip and it's -// derivative cards. The difference between this device and the base -// class is this device device adds support for double precision -// and has a larger wavefront size. -class AMDGPU770Device : public AMDGPU7XXDevice { -public: - AMDGPU770Device(AMDGPUSubtarget *ST); - virtual ~AMDGPU770Device(); - virtual size_t getWavefrontSize() const; -private: - virtual void setCaps(); -}; // AMDGPU770Device - -// The AMDGPU710Device class derives from the 7XX base class, but this -// class is a smaller derivative, so we need to overload some of the -// functions in order to correctly specify this information. -class AMDGPU710Device : public AMDGPU7XXDevice { -public: - AMDGPU710Device(AMDGPUSubtarget *ST); - virtual ~AMDGPU710Device(); - virtual size_t getWavefrontSize() const; -}; // AMDGPU710Device - -} // namespace llvm -#endif // _AMDILDEVICEIMPL_H_ diff --git a/src/gallium/drivers/radeon/AMDILBase.td b/src/gallium/drivers/radeon/AMDILBase.td deleted file mode 100644 index ffe9ce2c532..00000000000 --- a/src/gallium/drivers/radeon/AMDILBase.td +++ /dev/null @@ -1,85 +0,0 @@ -//===- AMDIL.td - AMDIL Target Machine -------------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Target-independent interfaces which we are implementing -//===----------------------------------------------------------------------===// - -include "llvm/Target/Target.td" - -// Dummy Instruction itineraries for pseudo instructions -def ALU_NULL : FuncUnit; -def NullALU : InstrItinClass; - -//===----------------------------------------------------------------------===// -// AMDIL Subtarget features. -//===----------------------------------------------------------------------===// -def FeatureFP64 : SubtargetFeature<"fp64", - "CapsOverride[AMDGPUDeviceInfo::DoubleOps]", - "true", - "Enable 64bit double precision operations">; -def FeatureByteAddress : SubtargetFeature<"byte_addressable_store", - "CapsOverride[AMDGPUDeviceInfo::ByteStores]", - "true", - "Enable byte addressable stores">; -def FeatureBarrierDetect : SubtargetFeature<"barrier_detect", - "CapsOverride[AMDGPUDeviceInfo::BarrierDetect]", - "true", - "Enable duplicate barrier detection(HD5XXX or later).">; -def FeatureImages : SubtargetFeature<"images", - "CapsOverride[AMDGPUDeviceInfo::Images]", - "true", - "Enable image functions">; -def FeatureMultiUAV : SubtargetFeature<"multi_uav", - "CapsOverride[AMDGPUDeviceInfo::MultiUAV]", - "true", - "Generate multiple UAV code(HD5XXX family or later)">; -def FeatureMacroDB : SubtargetFeature<"macrodb", - "CapsOverride[AMDGPUDeviceInfo::MacroDB]", - "true", - "Use internal macrodb, instead of macrodb in driver">; -def FeatureNoAlias : SubtargetFeature<"noalias", - "CapsOverride[AMDGPUDeviceInfo::NoAlias]", - "true", - "assert that all kernel argument pointers are not aliased">; -def FeatureNoInline : SubtargetFeature<"no-inline", - "CapsOverride[AMDGPUDeviceInfo::NoInline]", - "true", - "specify whether to not inline functions">; - -def Feature64BitPtr : SubtargetFeature<"64BitPtr", - "mIs64bit", - "false", - "Specify if 64bit addressing should be used.">; - -def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr", - "mIs32on64bit", - "false", - "Specify if 64bit sized pointers with 32bit addressing should be used.">; -def FeatureDebug : SubtargetFeature<"debug", - "CapsOverride[AMDGPUDeviceInfo::Debug]", - "true", - "Debug mode is enabled, so disable hardware accelerated address spaces.">; -def FeatureDumpCode : SubtargetFeature <"DumpCode", - "mDumpCode", - "true", - "Dump MachineInstrs in the CodeEmitter">; - -def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst", - "mR600ALUInst", - "false", - "Older version of ALU instructions encoding.">; - - -//===----------------------------------------------------------------------===// -// Register File, Calling Conv, Instruction Descriptions -//===----------------------------------------------------------------------===// - - -include "AMDILRegisterInfo.td" -include "AMDILInstrInfo.td" - diff --git a/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp b/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp deleted file mode 100644 index 20e27ef1132..00000000000 --- a/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp +++ /dev/null @@ -1,3274 +0,0 @@ -//===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// - -#define DEBUGME 0 -#define DEBUG_TYPE "structcfg" - -#include "AMDGPUInstrInfo.h" -#include "AMDIL.h" -#include "AMDILUtilityFunctions.h" -#include "llvm/ADT/SCCIterator.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/DominatorInternals.h" -#include "llvm/Analysis/Dominators.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionAnalysis.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" - -#define FirstNonDebugInstr(A) A->begin() -using namespace llvm; - -// TODO: move-begin. - -//===----------------------------------------------------------------------===// -// -// Statistics for CFGStructurizer. -// -//===----------------------------------------------------------------------===// - -STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern " - "matched"); -STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern " - "matched"); -STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break " - "pattern matched"); -STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue " - "pattern matched"); -STATISTIC(numLoopPatternMatch, "CFGStructurizer number of loop pattern " - "matched"); -STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks"); -STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions"); - -//===----------------------------------------------------------------------===// -// -// Miscellaneous utility for CFGStructurizer. -// -//===----------------------------------------------------------------------===// -namespace llvmCFGStruct -{ -#define SHOWNEWINSTR(i) \ - if (DEBUGME) errs() << "New instr: " << *i << "\n" - -#define SHOWNEWBLK(b, msg) \ -if (DEBUGME) { \ - errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ - errs() << "\n"; \ -} - -#define SHOWBLK_DETAIL(b, msg) \ -if (DEBUGME) { \ - if (b) { \ - errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ - b->print(errs()); \ - errs() << "\n"; \ - } \ -} - -#define INVALIDSCCNUM -1 -#define INVALIDREGNUM 0 - -template<class LoopinfoT> -void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) { - for (typename LoopinfoT::iterator iter = LoopInfo.begin(), - iterEnd = LoopInfo.end(); - iter != iterEnd; ++iter) { - (*iter)->print(OS, 0); - } -} - -template<class NodeT> -void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) { - size_t sz = Src.size(); - for (size_t i = 0; i < sz/2; ++i) { - NodeT *t = Src[i]; - Src[i] = Src[sz - i - 1]; - Src[sz - i - 1] = t; - } -} - -} //end namespace llvmCFGStruct - - -//===----------------------------------------------------------------------===// -// -// MachinePostDominatorTree -// -//===----------------------------------------------------------------------===// - -namespace llvm { - -/// PostDominatorTree Class - Concrete subclass of DominatorTree that is used -/// to compute the a post-dominator tree. -/// -struct MachinePostDominatorTree : public MachineFunctionPass { - static char ID; // Pass identification, replacement for typeid - DominatorTreeBase<MachineBasicBlock> *DT; - MachinePostDominatorTree() : MachineFunctionPass(ID) - { - DT = new DominatorTreeBase<MachineBasicBlock>(true); //true indicate - // postdominator - } - - ~MachinePostDominatorTree(); - - virtual bool runOnMachineFunction(MachineFunction &MF); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - inline const std::vector<MachineBasicBlock *> &getRoots() const { - return DT->getRoots(); - } - - inline MachineDomTreeNode *getRootNode() const { - return DT->getRootNode(); - } - - inline MachineDomTreeNode *operator[](MachineBasicBlock *BB) const { - return DT->getNode(BB); - } - - inline MachineDomTreeNode *getNode(MachineBasicBlock *BB) const { - return DT->getNode(BB); - } - - inline bool dominates(MachineDomTreeNode *A, MachineDomTreeNode *B) const { - return DT->dominates(A, B); - } - - inline bool dominates(MachineBasicBlock *A, MachineBasicBlock *B) const { - return DT->dominates(A, B); - } - - inline bool - properlyDominates(const MachineDomTreeNode *A, MachineDomTreeNode *B) const { - return DT->properlyDominates(A, B); - } - - inline bool - properlyDominates(MachineBasicBlock *A, MachineBasicBlock *B) const { - return DT->properlyDominates(A, B); - } - - inline MachineBasicBlock * - findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B) { - return DT->findNearestCommonDominator(A, B); - } - - virtual void print(llvm::raw_ostream &OS, const Module *M = 0) const { - DT->print(OS); - } -}; -} //end of namespace llvm - -char MachinePostDominatorTree::ID = 0; -static RegisterPass<MachinePostDominatorTree> -machinePostDominatorTreePass("machinepostdomtree", - "MachinePostDominator Tree Construction", - true, true); - -//const PassInfo *const llvm::MachinePostDominatorsID -//= &machinePostDominatorTreePass; - -bool MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) { - DT->recalculate(F); - //DEBUG(DT->dump()); - return false; -} - -MachinePostDominatorTree::~MachinePostDominatorTree() { - delete DT; -} - -//===----------------------------------------------------------------------===// -// -// supporting data structure for CFGStructurizer -// -//===----------------------------------------------------------------------===// - -namespace llvmCFGStruct -{ -template<class PassT> -struct CFGStructTraits { -}; - -template <class InstrT> -class BlockInformation { -public: - bool isRetired; - int sccNum; - //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr; - //Instructions defining the corresponding successor. - BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {} -}; - -template <class BlockT, class InstrT, class RegiT> -class LandInformation { -public: - BlockT *landBlk; - std::set<RegiT> breakInitRegs; //Registers that need to "reg = 0", before - //WHILELOOP(thisloop) init before entering - //thisloop. - std::set<RegiT> contInitRegs; //Registers that need to "reg = 0", after - //WHILELOOP(thisloop) init after entering - //thisloop. - std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop - //land block, branch cond on this reg. - std::set<RegiT> breakOnRegs; //registers that need to "if (reg) break - //endif" after ENDLOOP(thisloop) break - //outerLoopOf(thisLoop). - std::set<RegiT> contOnRegs; //registers that need to "if (reg) continue - //endif" after ENDLOOP(thisloop) continue on - //outerLoopOf(thisLoop). - LandInformation() : landBlk(NULL) {} -}; - -} //end of namespace llvmCFGStruct - -//===----------------------------------------------------------------------===// -// -// CFGStructurizer -// -//===----------------------------------------------------------------------===// - -namespace llvmCFGStruct -{ -// bixia TODO: port it to BasicBlock, not just MachineBasicBlock. -template<class PassT> -class CFGStructurizer -{ -public: - typedef enum { - Not_SinglePath = 0, - SinglePath_InPath = 1, - SinglePath_NotInPath = 2 - } PathToKind; - -public: - typedef typename PassT::InstructionType InstrT; - typedef typename PassT::FunctionType FuncT; - typedef typename PassT::DominatortreeType DomTreeT; - typedef typename PassT::PostDominatortreeType PostDomTreeT; - typedef typename PassT::DomTreeNodeType DomTreeNodeT; - typedef typename PassT::LoopinfoType LoopInfoT; - - typedef GraphTraits<FuncT *> FuncGTraits; - //typedef FuncGTraits::nodes_iterator BlockIterator; - typedef typename FuncT::iterator BlockIterator; - - typedef typename FuncGTraits::NodeType BlockT; - typedef GraphTraits<BlockT *> BlockGTraits; - typedef GraphTraits<Inverse<BlockT *> > InvBlockGTraits; - //typedef BlockGTraits::succ_iterator InstructionIterator; - typedef typename BlockT::iterator InstrIterator; - - typedef CFGStructTraits<PassT> CFGTraits; - typedef BlockInformation<InstrT> BlockInfo; - typedef std::map<BlockT *, BlockInfo *> BlockInfoMap; - - typedef int RegiT; - typedef typename PassT::LoopType LoopT; - typedef LandInformation<BlockT, InstrT, RegiT> LoopLandInfo; - typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap; - //landing info for loop break - typedef SmallVector<BlockT *, 32> BlockTSmallerVector; - -public: - CFGStructurizer(); - ~CFGStructurizer(); - - /// Perform the CFG structurization - bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri); - - /// Perform the CFG preparation - bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri); - -private: - void reversePredicateSetter(typename BlockT::iterator); - void orderBlocks(); - void printOrderedBlocks(llvm::raw_ostream &OS); - int patternMatch(BlockT *CurBlock); - int patternMatchGroup(BlockT *CurBlock); - - int serialPatternMatch(BlockT *CurBlock); - int ifPatternMatch(BlockT *CurBlock); - int switchPatternMatch(BlockT *CurBlock); - int loopendPatternMatch(BlockT *CurBlock); - int loopPatternMatch(BlockT *CurBlock); - - int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader); - int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader); - //int loopWithoutBreak(BlockT *); - - void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop, - BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock); - void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop, - BlockT *ContBlock, LoopT *contLoop); - bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block); - int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock, - BlockT *FalseBlock); - int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock, - BlockT *FalseBlock); - int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock, - BlockT *FalseBlock, BlockT **LandBlockPtr); - void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock, - BlockT *FalseBlock, BlockT *LandBlock, - bool Detail = false); - PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock, - bool AllowSideEntry = true); - BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock, - bool AllowSideEntry = true); - int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock); - void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock); - - void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock, - BlockT *TrueBlock, BlockT *FalseBlock, - BlockT *LandBlock); - void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand); - void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock, - BlockT *ExitLandBlock, RegiT SetReg); - void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock, - RegiT SetReg); - BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep, - std::set<BlockT*> &ExitBlockSet, - BlockT *ExitLandBlk); - BlockT *addLoopEndbranchBlock(LoopT *LoopRep, - BlockTSmallerVector &ExitingBlocks, - BlockTSmallerVector &ExitBlocks); - BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep); - void removeUnconditionalBranch(BlockT *SrcBlock); - void removeRedundantConditionalBranch(BlockT *SrcBlock); - void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks); - - void removeSuccessor(BlockT *SrcBlock); - BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock); - BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock); - - void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock, - InstrIterator InsertPos); - - void recordSccnum(BlockT *SrcBlock, int SCCNum); - int getSCCNum(BlockT *srcBlk); - - void retireBlock(BlockT *DstBlock, BlockT *SrcBlock); - bool isRetiredBlock(BlockT *SrcBlock); - bool isActiveLoophead(BlockT *CurBlock); - bool needMigrateBlock(BlockT *Block); - - BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock, - BlockTSmallerVector &exitBlocks, - std::set<BlockT*> &ExitBlockSet); - void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL); - BlockT *getLoopLandBlock(LoopT *LoopRep); - LoopLandInfo *getLoopLandInfo(LoopT *LoopRep); - - void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum); - void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum); - void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum); - void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum); - void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum); - - bool hasBackEdge(BlockT *curBlock); - unsigned getLoopDepth (LoopT *LoopRep); - int countActiveBlock( - typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart, - typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd); - BlockT *findNearestCommonPostDom(std::set<BlockT *>&); - BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2); - -private: - DomTreeT *domTree; - PostDomTreeT *postDomTree; - LoopInfoT *loopInfo; - PassT *passRep; - FuncT *funcRep; - - BlockInfoMap blockInfoMap; - LoopLandInfoMap loopLandInfoMap; - SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks; - const AMDGPURegisterInfo *TRI; - -}; //template class CFGStructurizer - -template<class PassT> CFGStructurizer<PassT>::CFGStructurizer() - : domTree(NULL), postDomTree(NULL), loopInfo(NULL) { -} - -template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() { - for (typename BlockInfoMap::iterator I = blockInfoMap.begin(), - E = blockInfoMap.end(); I != E; ++I) { - delete I->second; - } -} - -template<class PassT> -bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass, - const AMDGPURegisterInfo * tri) { - passRep = &pass; - funcRep = &func; - TRI = tri; - - bool changed = false; - //func.RenumberBlocks(); - - //to do, if not reducible flow graph, make it so ??? - - if (DEBUGME) { - errs() << "AMDGPUCFGStructurizer::prepare\n"; - //func.viewCFG(); - //func.viewCFGOnly(); - //func.dump(); - } - - //FIXME: gcc complains on this. - //domTree = &pass.getAnalysis<DomTreeT>(); - //domTree = CFGTraits::getDominatorTree(pass); - //if (DEBUGME) { - // domTree->print(errs()); - //} - - //FIXME: gcc complains on this. - //domTree = &pass.getAnalysis<DomTreeT>(); - //postDomTree = CFGTraits::getPostDominatorTree(pass); - //if (DEBUGME) { - // postDomTree->print(errs()); - //} - - //FIXME: gcc complains on this. - //loopInfo = &pass.getAnalysis<LoopInfoT>(); - loopInfo = CFGTraits::getLoopInfo(pass); - if (DEBUGME) { - errs() << "LoopInfo:\n"; - PrintLoopinfo(*loopInfo, errs()); - } - - orderBlocks(); - if (DEBUGME) { - errs() << "Ordered blocks:\n"; - printOrderedBlocks(errs()); - } - - SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks; - - for (typename LoopInfoT::iterator iter = loopInfo->begin(), - iterEnd = loopInfo->end(); - iter != iterEnd; ++iter) { - LoopT* loopRep = (*iter); - BlockTSmallerVector exitingBlks; - loopRep->getExitingBlocks(exitingBlks); - - if (exitingBlks.size() == 0) { - BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep); - if (dummyExitBlk != NULL) - retBlks.push_back(dummyExitBlk); - } - } - - // Remove unconditional branch instr. - // Add dummy exit block iff there are multiple returns. - - for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator - iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end(); - iterBlk != iterEndBlk; - ++iterBlk) { - BlockT *curBlk = *iterBlk; - removeUnconditionalBranch(curBlk); - removeRedundantConditionalBranch(curBlk); - if (CFGTraits::isReturnBlock(curBlk)) { - retBlks.push_back(curBlk); - } - assert(curBlk->succ_size() <= 2); - //assert(curBlk->size() > 0); - //removeEmptyBlock(curBlk) ?? - } //for - - if (retBlks.size() >= 2) { - addDummyExitBlock(retBlks); - changed = true; - } - - return changed; -} //CFGStructurizer::prepare - -template<class PassT> -bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass, - const AMDGPURegisterInfo * tri) { - passRep = &pass; - funcRep = &func; - TRI = tri; - - //func.RenumberBlocks(); - - //Assume reducible CFG... - if (DEBUGME) { - errs() << "AMDGPUCFGStructurizer::run\n"; - //errs() << func.getFunction()->getNameStr() << "\n"; - func.viewCFG(); - //func.viewCFGOnly(); - //func.dump(); - } - -#if 1 - //FIXME: gcc complains on this. - //domTree = &pass.getAnalysis<DomTreeT>(); - domTree = CFGTraits::getDominatorTree(pass); - if (DEBUGME) { - domTree->print(errs(), (const llvm::Module*)0); - } -#endif - - //FIXME: gcc complains on this. - //domTree = &pass.getAnalysis<DomTreeT>(); - postDomTree = CFGTraits::getPostDominatorTree(pass); - if (DEBUGME) { - postDomTree->print(errs()); - } - - //FIXME: gcc complains on this. - //loopInfo = &pass.getAnalysis<LoopInfoT>(); - loopInfo = CFGTraits::getLoopInfo(pass); - if (DEBUGME) { - errs() << "LoopInfo:\n"; - PrintLoopinfo(*loopInfo, errs()); - } - - orderBlocks(); -//#define STRESSTEST -#ifdef STRESSTEST - //Use the worse block ordering to test the algorithm. - ReverseVector(orderedBlks); -#endif - - if (DEBUGME) { - errs() << "Ordered blocks:\n"; - printOrderedBlocks(errs()); - } - int numIter = 0; - bool finish = false; - BlockT *curBlk; - bool makeProgress = false; - int numRemainedBlk = countActiveBlock(orderedBlks.begin(), - orderedBlks.end()); - - do { - ++numIter; - if (DEBUGME) { - errs() << "numIter = " << numIter - << ", numRemaintedBlk = " << numRemainedBlk << "\n"; - } - - typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator - iterBlk = orderedBlks.begin(); - typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator - iterBlkEnd = orderedBlks.end(); - - typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator - sccBeginIter = iterBlk; - BlockT *sccBeginBlk = NULL; - int sccNumBlk = 0; // The number of active blocks, init to a - // maximum possible number. - int sccNumIter; // Number of iteration in this SCC. - - while (iterBlk != iterBlkEnd) { - curBlk = *iterBlk; - - if (sccBeginBlk == NULL) { - sccBeginIter = iterBlk; - sccBeginBlk = curBlk; - sccNumIter = 0; - sccNumBlk = numRemainedBlk; // Init to maximum possible number. - if (DEBUGME) { - errs() << "start processing SCC" << getSCCNum(sccBeginBlk); - errs() << "\n"; - } - } - - if (!isRetiredBlock(curBlk)) { - patternMatch(curBlk); - } - - ++iterBlk; - - bool contNextScc = true; - if (iterBlk == iterBlkEnd - || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) { - // Just finish one scc. - ++sccNumIter; - int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk); - if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) { - if (DEBUGME) { - errs() << "Can't reduce SCC " << getSCCNum(curBlk) - << ", sccNumIter = " << sccNumIter; - errs() << "doesn't make any progress\n"; - } - contNextScc = true; - } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) { - sccNumBlk = sccRemainedNumBlk; - iterBlk = sccBeginIter; - contNextScc = false; - if (DEBUGME) { - errs() << "repeat processing SCC" << getSCCNum(curBlk) - << "sccNumIter = " << sccNumIter << "\n"; - func.viewCFG(); - //func.viewCFGOnly(); - } - } else { - // Finish the current scc. - contNextScc = true; - } - } else { - // Continue on next component in the current scc. - contNextScc = false; - } - - if (contNextScc) { - sccBeginBlk = NULL; - } - } //while, "one iteration" over the function. - - BlockT *entryBlk = FuncGTraits::nodes_begin(&func); - if (entryBlk->succ_size() == 0) { - finish = true; - if (DEBUGME) { - errs() << "Reduce to one block\n"; - } - } else { - int newnumRemainedBlk - = countActiveBlock(orderedBlks.begin(), orderedBlks.end()); - // consider cloned blocks ?? - if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) { - makeProgress = true; - numRemainedBlk = newnumRemainedBlk; - } else { - makeProgress = false; - if (DEBUGME) { - errs() << "No progress\n"; - } - } - } - } while (!finish && makeProgress); - - // Misc wrap up to maintain the consistency of the Function representation. - CFGTraits::wrapup(FuncGTraits::nodes_begin(&func)); - - // Detach retired Block, release memory. - for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(), - iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) { - if ((*iterMap).second && (*iterMap).second->isRetired) { - assert(((*iterMap).first)->getNumber() != -1); - if (DEBUGME) { - errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n"; - } - (*iterMap).first->eraseFromParent(); //Remove from the parent Function. - } - delete (*iterMap).second; - } - blockInfoMap.clear(); - - // clear loopLandInfoMap - for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(), - iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) { - delete (*iterMap).second; - } - loopLandInfoMap.clear(); - - if (DEBUGME) { - func.viewCFG(); - //func.dump(); - } - - if (!finish) { - assert(!"IRREDUCIBL_CF"); - } - - return true; -} //CFGStructurizer::run - -/// Print the ordered Blocks. -/// -template<class PassT> -void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os) { - size_t i = 0; - for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator - iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end(); - iterBlk != iterBlkEnd; - ++iterBlk, ++i) { - os << "BB" << (*iterBlk)->getNumber(); - os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")"; - if (i != 0 && i % 10 == 0) { - os << "\n"; - } else { - os << " "; - } - } -} //printOrderedBlocks - -/// Compute the reversed DFS post order of Blocks -/// -template<class PassT> void CFGStructurizer<PassT>::orderBlocks() { - int sccNum = 0; - BlockT *bb; - for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep), - sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) { - std::vector<BlockT *> &sccNext = *sccIter; - for (typename std::vector<BlockT *>::const_iterator - blockIter = sccNext.begin(), blockEnd = sccNext.end(); - blockIter != blockEnd; ++blockIter) { - bb = *blockIter; - orderedBlks.push_back(bb); - recordSccnum(bb, sccNum); - } - } - - //walk through all the block in func to check for unreachable - for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep), - blockEnd1 = FuncGTraits::nodes_end(funcRep); - blockIter1 != blockEnd1; ++blockIter1) { - BlockT *bb = &(*blockIter1); - sccNum = getSCCNum(bb); - if (sccNum == INVALIDSCCNUM) { - errs() << "unreachable block BB" << bb->getNumber() << "\n"; - } - } //end of for -} //orderBlocks - -template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) { - int numMatch = 0; - int curMatch; - - if (DEBUGME) { - errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n"; - } - - while ((curMatch = patternMatchGroup(curBlk)) > 0) { - numMatch += curMatch; - } - - if (DEBUGME) { - errs() << "End patternMatch BB" << curBlk->getNumber() - << ", numMatch = " << numMatch << "\n"; - } - - return numMatch; -} //patternMatch - -template<class PassT> -int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk) { - int numMatch = 0; - numMatch += serialPatternMatch(curBlk); - numMatch += ifPatternMatch(curBlk); - //numMatch += switchPatternMatch(curBlk); - numMatch += loopendPatternMatch(curBlk); - numMatch += loopPatternMatch(curBlk); - return numMatch; -}//patternMatchGroup - -template<class PassT> -int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk) { - if (curBlk->succ_size() != 1) { - return 0; - } - - BlockT *childBlk = *curBlk->succ_begin(); - if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) { - return 0; - } - - mergeSerialBlock(curBlk, childBlk); - ++numSerialPatternMatch; - return 1; -} //serialPatternMatch - -template<class PassT> -int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk) { - //two edges - if (curBlk->succ_size() != 2) { - return 0; - } - - if (hasBackEdge(curBlk)) { - return 0; - } - - InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk); - if (branchInstr == NULL) { - return 0; - } - - assert(CFGTraits::isCondBranch(branchInstr)); - - BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr); - BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr); - BlockT *landBlk; - int cloned = 0; - - // TODO: Simplify - if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1 - && *trueBlk->succ_begin() == *falseBlk->succ_begin()) { - landBlk = *trueBlk->succ_begin(); - } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) { - landBlk = NULL; - } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) { - landBlk = falseBlk; - falseBlk = NULL; - } else if (falseBlk->succ_size() == 1 - && *falseBlk->succ_begin() == trueBlk) { - landBlk = trueBlk; - trueBlk = NULL; - } else if (falseBlk->succ_size() == 1 - && isSameloopDetachedContbreak(trueBlk, falseBlk)) { - landBlk = *falseBlk->succ_begin(); - } else if (trueBlk->succ_size() == 1 - && isSameloopDetachedContbreak(falseBlk, trueBlk)) { - landBlk = *trueBlk->succ_begin(); - } else { - return handleJumpintoIf(curBlk, trueBlk, falseBlk); - } - - // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the - // new BB created for landBlk==NULL may introduce new challenge to the - // reduction process. - if (landBlk != NULL && - ((trueBlk && trueBlk->pred_size() > 1) - || (falseBlk && falseBlk->pred_size() > 1))) { - cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk); - } - - if (trueBlk && trueBlk->pred_size() > 1) { - trueBlk = cloneBlockForPredecessor(trueBlk, curBlk); - ++cloned; - } - - if (falseBlk && falseBlk->pred_size() > 1) { - falseBlk = cloneBlockForPredecessor(falseBlk, curBlk); - ++cloned; - } - - mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk); - - ++numIfPatternMatch; - - numClonedBlock += cloned; - - return 1 + cloned; -} //ifPatternMatch - -template<class PassT> -int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk) { - return 0; -} //switchPatternMatch - -template<class PassT> -int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk) { - LoopT *loopRep = loopInfo->getLoopFor(curBlk); - typename std::vector<LoopT *> nestedLoops; - while (loopRep) { - nestedLoops.push_back(loopRep); - loopRep = loopRep->getParentLoop(); - } - - if (nestedLoops.size() == 0) { - return 0; - } - - // Process nested loop outside->inside, so "continue" to a outside loop won't - // be mistaken as "break" of the current loop. - int num = 0; - for (typename std::vector<LoopT *>::reverse_iterator - iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend(); - iter != iterEnd; ++iter) { - loopRep = *iter; - - if (getLoopLandBlock(loopRep) != NULL) { - continue; - } - - BlockT *loopHeader = loopRep->getHeader(); - - int numBreak = loopbreakPatternMatch(loopRep, loopHeader); - - if (numBreak == -1) { - break; - } - - int numCont = loopcontPatternMatch(loopRep, loopHeader); - num += numBreak + numCont; - } - - return num; -} //loopendPatternMatch - -template<class PassT> -int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk) { - if (curBlk->succ_size() != 0) { - return 0; - } - - int numLoop = 0; - LoopT *loopRep = loopInfo->getLoopFor(curBlk); - while (loopRep && loopRep->getHeader() == curBlk) { - LoopLandInfo *loopLand = getLoopLandInfo(loopRep); - if (loopLand) { - BlockT *landBlk = loopLand->landBlk; - assert(landBlk); - if (!isRetiredBlock(landBlk)) { - mergeLooplandBlock(curBlk, loopLand); - ++numLoop; - } - } - loopRep = loopRep->getParentLoop(); - } - - numLoopPatternMatch += numLoop; - - return numLoop; -} //loopPatternMatch - -template<class PassT> -int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep, - BlockT *loopHeader) { - BlockTSmallerVector exitingBlks; - loopRep->getExitingBlocks(exitingBlks); - - if (DEBUGME) { - errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n"; - } - - if (exitingBlks.size() == 0) { - setLoopLandBlock(loopRep); - return 0; - } - - // Compute the corresponding exitBlks and exit block set. - BlockTSmallerVector exitBlks; - std::set<BlockT *> exitBlkSet; - for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(), - iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) { - BlockT *exitingBlk = *iter; - BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk); - exitBlks.push_back(exitBlk); - exitBlkSet.insert(exitBlk); //non-duplicate insert - } - - assert(exitBlkSet.size() > 0); - assert(exitBlks.size() == exitingBlks.size()); - - if (DEBUGME) { - errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n"; - } - - // Find exitLandBlk. - BlockT *exitLandBlk = NULL; - int numCloned = 0; - int numSerial = 0; - - if (exitBlkSet.size() == 1) - { - exitLandBlk = *exitBlkSet.begin(); - } else { - exitLandBlk = findNearestCommonPostDom(exitBlkSet); - - if (exitLandBlk == NULL) { - return -1; - } - - bool allInPath = true; - bool allNotInPath = true; - for (typename std::set<BlockT*>::const_iterator - iter = exitBlkSet.begin(), - iterEnd = exitBlkSet.end(); - iter != iterEnd; ++iter) { - BlockT *exitBlk = *iter; - - PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true); - if (DEBUGME) { - errs() << "BB" << exitBlk->getNumber() - << " to BB" << exitLandBlk->getNumber() << " PathToKind=" - << pathKind << "\n"; - } - - allInPath = allInPath && (pathKind == SinglePath_InPath); - allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath); - - if (!allInPath && !allNotInPath) { - if (DEBUGME) { - errs() << "singlePath check fail\n"; - } - return -1; - } - } // check all exit blocks - - if (allNotInPath) { -#if 1 - - // TODO: Simplify, maybe separate function? - //funcRep->viewCFG(); - LoopT *parentLoopRep = loopRep->getParentLoop(); - BlockT *parentLoopHeader = NULL; - if (parentLoopRep) - parentLoopHeader = parentLoopRep->getHeader(); - - if (exitLandBlk == parentLoopHeader && - (exitLandBlk = relocateLoopcontBlock(parentLoopRep, - loopRep, - exitBlkSet, - exitLandBlk)) != NULL) { - if (DEBUGME) { - errs() << "relocateLoopcontBlock success\n"; - } - } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep, - exitingBlks, - exitBlks)) != NULL) { - if (DEBUGME) { - errs() << "insertEndbranchBlock success\n"; - } - } else { - if (DEBUGME) { - errs() << "loop exit fail\n"; - } - return -1; - } -#else - return -1; -#endif - } - - // Handle side entry to exit path. - exitBlks.clear(); - exitBlkSet.clear(); - for (typename BlockTSmallerVector::iterator iterExiting = - exitingBlks.begin(), - iterExitingEnd = exitingBlks.end(); - iterExiting != iterExitingEnd; ++iterExiting) { - BlockT *exitingBlk = *iterExiting; - BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk); - BlockT *newExitBlk = exitBlk; - - if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) { - newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk); - ++numCloned; - } - - numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk); - - exitBlks.push_back(newExitBlk); - exitBlkSet.insert(newExitBlk); - } - - for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(), - iterExitEnd = exitBlks.end(); - iterExit != iterExitEnd; ++iterExit) { - BlockT *exitBlk = *iterExit; - numSerial += serialPatternMatch(exitBlk); - } - - for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(), - iterExitEnd = exitBlks.end(); - iterExit != iterExitEnd; ++iterExit) { - BlockT *exitBlk = *iterExit; - if (exitBlk->pred_size() > 1) { - if (exitBlk != exitLandBlk) { - return -1; - } - } else { - if (exitBlk != exitLandBlk && - (exitBlk->succ_size() != 1 || - *exitBlk->succ_begin() != exitLandBlk)) { - return -1; - } - } - } - } // else - - // LoopT *exitLandLoop = loopInfo->getLoopFor(exitLandBlk); - exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet); - - // Fold break into the breaking block. Leverage across level breaks. - assert(exitingBlks.size() == exitBlks.size()); - for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(), - iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end(); - iterExit != iterExitEnd; ++iterExit, ++iterExiting) { - BlockT *exitBlk = *iterExit; - BlockT *exitingBlk = *iterExiting; - assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk); - LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk); - handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk); - } - - int numBreak = static_cast<int>(exitingBlks.size()); - numLoopbreakPatternMatch += numBreak; - numClonedBlock += numCloned; - return numBreak + numSerial + numCloned; -} //loopbreakPatternMatch - -template<class PassT> -int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep, - BlockT *loopHeader) { - int numCont = 0; - SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk; - for (typename InvBlockGTraits::ChildIteratorType iter = - InvBlockGTraits::child_begin(loopHeader), - iterEnd = InvBlockGTraits::child_end(loopHeader); - iter != iterEnd; ++iter) { - BlockT *curBlk = *iter; - if (loopRep->contains(curBlk)) { - handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk), - loopHeader, loopRep); - contBlk.push_back(curBlk); - ++numCont; - } - } - - for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator - iter = contBlk.begin(), iterEnd = contBlk.end(); - iter != iterEnd; ++iter) { - (*iter)->removeSuccessor(loopHeader); - } - - numLoopcontPatternMatch += numCont; - - return numCont; -} //loopcontPatternMatch - - -template<class PassT> -bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk, - BlockT *src2Blk) { - // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the - // same loop with LoopLandInfo without explicitly keeping track of - // loopContBlks and loopBreakBlks, this is a method to get the information. - // - if (src1Blk->succ_size() == 0) { - LoopT *loopRep = loopInfo->getLoopFor(src1Blk); - if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) { - LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; - if (theEntry != NULL) { - if (DEBUGME) { - errs() << "isLoopContBreakBlock yes src1 = BB" - << src1Blk->getNumber() - << " src2 = BB" << src2Blk->getNumber() << "\n"; - } - return true; - } - } - } - return false; -} //isSameloopDetachedContbreak - -template<class PassT> -int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk, - BlockT *trueBlk, - BlockT *falseBlk) { - int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk); - if (num == 0) { - if (DEBUGME) { - errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n"; - } - num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk); - } - return num; -} - -template<class PassT> -int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk, - BlockT *trueBlk, - BlockT *falseBlk) { - int num = 0; - BlockT *downBlk; - - //trueBlk could be the common post dominator - downBlk = trueBlk; - - if (DEBUGME) { - errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber() - << " true = BB" << trueBlk->getNumber() - << ", numSucc=" << trueBlk->succ_size() - << " false = BB" << falseBlk->getNumber() << "\n"; - } - - while (downBlk) { - if (DEBUGME) { - errs() << "check down = BB" << downBlk->getNumber(); - } - - if (//postDomTree->dominates(downBlk, falseBlk) && - singlePathTo(falseBlk, downBlk) == SinglePath_InPath) { - if (DEBUGME) { - errs() << " working\n"; - } - - num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk); - num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk); - - numClonedBlock += num; - num += serialPatternMatch(*headBlk->succ_begin()); - num += serialPatternMatch(*(++headBlk->succ_begin())); - num += ifPatternMatch(headBlk); - assert(num > 0); // - - break; - } - if (DEBUGME) { - errs() << " not working\n"; - } - downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL; - } // walk down the postDomTree - - return num; -} //handleJumpintoIf - -template<class PassT> -void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk, - BlockT *trueBlk, - BlockT *falseBlk, - BlockT *landBlk, - bool detail) { - errs() << "head = BB" << headBlk->getNumber() - << " size = " << headBlk->size(); - if (detail) { - errs() << "\n"; - headBlk->print(errs()); - errs() << "\n"; - } - - if (trueBlk) { - errs() << ", true = BB" << trueBlk->getNumber() << " size = " - << trueBlk->size() << " numPred = " << trueBlk->pred_size(); - if (detail) { - errs() << "\n"; - trueBlk->print(errs()); - errs() << "\n"; - } - } - if (falseBlk) { - errs() << ", false = BB" << falseBlk->getNumber() << " size = " - << falseBlk->size() << " numPred = " << falseBlk->pred_size(); - if (detail) { - errs() << "\n"; - falseBlk->print(errs()); - errs() << "\n"; - } - } - if (landBlk) { - errs() << ", land = BB" << landBlk->getNumber() << " size = " - << landBlk->size() << " numPred = " << landBlk->pred_size(); - if (detail) { - errs() << "\n"; - landBlk->print(errs()); - errs() << "\n"; - } - } - - errs() << "\n"; -} //showImproveSimpleJumpintoIf - -template<class PassT> -int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk, - BlockT *trueBlk, - BlockT *falseBlk, - BlockT **plandBlk) { - bool migrateTrue = false; - bool migrateFalse = false; - - BlockT *landBlk = *plandBlk; - - assert((trueBlk == NULL || trueBlk->succ_size() <= 1) - && (falseBlk == NULL || falseBlk->succ_size() <= 1)); - - if (trueBlk == falseBlk) { - return 0; - } - -#if 0 - if (DEBUGME) { - errs() << "improveSimpleJumpintoIf: "; - showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0); - } -#endif - - // unsigned landPredSize = landBlk ? landBlk->pred_size() : 0; - // May consider the # landBlk->pred_size() as it represents the number of - // assignment initReg = .. needed to insert. - migrateTrue = needMigrateBlock(trueBlk); - migrateFalse = needMigrateBlock(falseBlk); - - if (!migrateTrue && !migrateFalse) { - return 0; - } - - // If we need to migrate either trueBlk and falseBlk, migrate the rest that - // have more than one predecessors. without doing this, its predecessor - // rather than headBlk will have undefined value in initReg. - if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) { - migrateTrue = true; - } - if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) { - migrateFalse = true; - } - - if (DEBUGME) { - errs() << "before improveSimpleJumpintoIf: "; - showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0); - //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1); - } - - // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk - // - // new: headBlk => if () {initReg = 1; org trueBlk branch} else - // {initReg = 0; org falseBlk branch } - // => landBlk => if (initReg) {org trueBlk} else {org falseBlk} - // => org landBlk - // if landBlk->pred_size() > 2, put the about if-else inside - // if (initReg !=2) {...} - // - // add initReg = initVal to headBlk - - const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); - unsigned initReg = - funcRep->getRegInfo().createVirtualRegister(I32RC); - if (!migrateTrue || !migrateFalse) { - int initVal = migrateTrue ? 0 : 1; - CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal); - } - - int numNewBlk = 0; - - if (landBlk == NULL) { - landBlk = funcRep->CreateMachineBasicBlock(); - funcRep->push_back(landBlk); //insert to function - - if (trueBlk) { - trueBlk->addSuccessor(landBlk); - } else { - headBlk->addSuccessor(landBlk); - } - - if (falseBlk) { - falseBlk->addSuccessor(landBlk); - } else { - headBlk->addSuccessor(landBlk); - } - - numNewBlk ++; - } - - bool landBlkHasOtherPred = (landBlk->pred_size() > 2); - - //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL" - typename BlockT::iterator insertPos = - CFGTraits::getInstrPos - (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDGPU::ENDIF, passRep)); - - if (landBlkHasOtherPred) { - unsigned immReg = - funcRep->getRegInfo().createVirtualRegister(I32RC); - CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2); - unsigned cmpResReg = - funcRep->getRegInfo().createVirtualRegister(I32RC); - - CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg, - initReg, immReg); - CFGTraits::insertCondBranchBefore(landBlk, insertPos, - AMDGPU::IF_LOGICALZ_i32, passRep, - cmpResReg, DebugLoc()); - } - - CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDGPU::IF_LOGICALNZ_i32, - passRep, initReg, DebugLoc()); - - if (migrateTrue) { - migrateInstruction(trueBlk, landBlk, insertPos); - // need to uncondionally insert the assignment to ensure a path from its - // predecessor rather than headBlk has valid value in initReg if - // (initVal != 1). - CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1); - } - CFGTraits::insertInstrBefore(insertPos, AMDGPU::ELSE, passRep); - - if (migrateFalse) { - migrateInstruction(falseBlk, landBlk, insertPos); - // need to uncondionally insert the assignment to ensure a path from its - // predecessor rather than headBlk has valid value in initReg if - // (initVal != 0) - CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0); - } - //CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep); - - if (landBlkHasOtherPred) { - // add endif - CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep); - - // put initReg = 2 to other predecessors of landBlk - for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(), - predIterEnd = landBlk->pred_end(); predIter != predIterEnd; - ++predIter) { - BlockT *curBlk = *predIter; - if (curBlk != trueBlk && curBlk != falseBlk) { - CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2); - } - } //for - } - if (DEBUGME) { - errs() << "result from improveSimpleJumpintoIf: "; - showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0); - //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1); - } - - // update landBlk - *plandBlk = landBlk; - - return numNewBlk; -} //improveSimpleJumpintoIf - -template<class PassT> -void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk, - LoopT *exitingLoop, - BlockT *exitBlk, - LoopT *exitLoop, - BlockT *landBlk) { - if (DEBUGME) { - errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop) - << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n"; - } - const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); - - RegiT initReg = INVALIDREGNUM; - if (exitingLoop != exitLoop) { - initReg = static_cast<int> - (funcRep->getRegInfo().createVirtualRegister(I32RC)); - assert(initReg != INVALIDREGNUM); - addLoopBreakInitReg(exitLoop, initReg); - while (exitingLoop != exitLoop && exitingLoop) { - addLoopBreakOnReg(exitingLoop, initReg); - exitingLoop = exitingLoop->getParentLoop(); - } - assert(exitingLoop == exitLoop); - } - - mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg); - -} //handleLoopbreak - -template<class PassT> -void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk, - LoopT *contingLoop, - BlockT *contBlk, - LoopT *contLoop) { - if (DEBUGME) { - errs() << "loopcontPattern cont = BB" << contingBlk->getNumber() - << " header = BB" << contBlk->getNumber() << "\n"; - - errs() << "Trying to continue loop-depth = " - << getLoopDepth(contLoop) - << " from loop-depth = " << getLoopDepth(contingLoop) << "\n"; - } - - RegiT initReg = INVALIDREGNUM; - const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); - if (contingLoop != contLoop) { - initReg = static_cast<int> - (funcRep->getRegInfo().createVirtualRegister(I32RC)); - assert(initReg != INVALIDREGNUM); - addLoopContInitReg(contLoop, initReg); - while (contingLoop && contingLoop->getParentLoop() != contLoop) { - addLoopBreakOnReg(contingLoop, initReg); //not addLoopContOnReg - contingLoop = contingLoop->getParentLoop(); - } - assert(contingLoop && contingLoop->getParentLoop() == contLoop); - addLoopContOnReg(contingLoop, initReg); - } - - settleLoopcontBlock(contingBlk, contBlk, initReg); - //contingBlk->removeSuccessor(loopHeader); -} //handleLoopcontBlock - -template<class PassT> -void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) { - if (DEBUGME) { - errs() << "serialPattern BB" << dstBlk->getNumber() - << " <= BB" << srcBlk->getNumber() << "\n"; - } - //removeUnconditionalBranch(dstBlk); - dstBlk->splice(dstBlk->end(), srcBlk, FirstNonDebugInstr(srcBlk), srcBlk->end()); - - dstBlk->removeSuccessor(srcBlk); - CFGTraits::cloneSuccessorList(dstBlk, srcBlk); - - removeSuccessor(srcBlk); - retireBlock(dstBlk, srcBlk); -} //mergeSerialBlock - -template<class PassT> -void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr, - BlockT *curBlk, - BlockT *trueBlk, - BlockT *falseBlk, - BlockT *landBlk) { - if (DEBUGME) { - errs() << "ifPattern BB" << curBlk->getNumber(); - errs() << "{ "; - if (trueBlk) { - errs() << "BB" << trueBlk->getNumber(); - } - errs() << " } else "; - errs() << "{ "; - if (falseBlk) { - errs() << "BB" << falseBlk->getNumber(); - } - errs() << " }\n "; - errs() << "landBlock: "; - if (landBlk == NULL) { - errs() << "NULL"; - } else { - errs() << "BB" << landBlk->getNumber(); - } - errs() << "\n"; - } - - int oldOpcode = branchInstr->getOpcode(); - DebugLoc branchDL = branchInstr->getDebugLoc(); - -// transform to -// if cond -// trueBlk -// else -// falseBlk -// endif -// landBlk - - typename BlockT::iterator branchInstrPos = - CFGTraits::getInstrPos(curBlk, branchInstr); - CFGTraits::insertCondBranchBefore(branchInstrPos, - CFGTraits::getBranchNzeroOpcode(oldOpcode), - passRep, - branchDL); - - if (trueBlk) { - curBlk->splice(branchInstrPos, trueBlk, FirstNonDebugInstr(trueBlk), trueBlk->end()); - curBlk->removeSuccessor(trueBlk); - if (landBlk && trueBlk->succ_size()!=0) { - trueBlk->removeSuccessor(landBlk); - } - retireBlock(curBlk, trueBlk); - } - CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ELSE, passRep); - - if (falseBlk) { - curBlk->splice(branchInstrPos, falseBlk, FirstNonDebugInstr(falseBlk), - falseBlk->end()); - curBlk->removeSuccessor(falseBlk); - if (landBlk && falseBlk->succ_size() != 0) { - falseBlk->removeSuccessor(landBlk); - } - retireBlock(curBlk, falseBlk); - } - CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep); - - //curBlk->remove(branchInstrPos); - branchInstr->eraseFromParent(); - - if (landBlk && trueBlk && falseBlk) { - curBlk->addSuccessor(landBlk); - } - -} //mergeIfthenelseBlock - -template<class PassT> -void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk, - LoopLandInfo *loopLand) { - BlockT *landBlk = loopLand->landBlk; - - if (DEBUGME) { - errs() << "loopPattern header = BB" << dstBlk->getNumber() - << " land = BB" << landBlk->getNumber() << "\n"; - } - - // Loop contInitRegs are init at the beginning of the loop. - for (typename std::set<RegiT>::const_iterator iter = - loopLand->contInitRegs.begin(), - iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) { - CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0); - } - - /* we last inserterd the DebugLoc in the - * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk. - * search for the DebugLoc in the that statement. - * if not found, we have to insert the empty/default DebugLoc */ - InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk); - DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc(); - - CFGTraits::insertInstrBefore(dstBlk, AMDGPU::WHILELOOP, passRep, DLBreak); - // Loop breakInitRegs are init before entering the loop. - for (typename std::set<RegiT>::const_iterator iter = - loopLand->breakInitRegs.begin(), - iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter) - { - CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0); - } - // Loop endbranchInitRegs are init before entering the loop. - for (typename std::set<RegiT>::const_iterator iter = - loopLand->endbranchInitRegs.begin(), - iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) { - CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0); - } - - /* we last inserterd the DebugLoc in the continue statement in the current dstBlk - * search for the DebugLoc in the continue statement. - * if not found, we have to insert the empty/default DebugLoc */ - InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk); - DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc(); - - CFGTraits::insertInstrEnd(dstBlk, AMDGPU::ENDLOOP, passRep, DLContinue); - // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this - // loop. - for (typename std::set<RegiT>::const_iterator iter = - loopLand->breakOnRegs.begin(), - iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) { - CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::BREAK_LOGICALNZ_i32, passRep, - *iter); - } - - // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this - // loop. - for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(), - iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) { - CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::CONTINUE_LOGICALNZ_i32, - passRep, *iter); - } - - dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end()); - - for (typename BlockT::succ_iterator iter = landBlk->succ_begin(), - iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) { - dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of. - } - - removeSuccessor(landBlk); - retireBlock(dstBlk, landBlk); -} //mergeLooplandBlock - -template<class PassT> -void CFGStructurizer<PassT>::reversePredicateSetter(typename BlockT::iterator I) -{ - while (I--) { - if (I->getOpcode() == AMDGPU::PRED_X) { - switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) { - case OPCODE_IS_ZERO_INT: - static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT); - return; - case OPCODE_IS_NOT_ZERO_INT: - static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO_INT); - return; - case OPCODE_IS_ZERO: - static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO); - return; - case OPCODE_IS_NOT_ZERO: - static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO); - return; - default: - assert(0 && "PRED_X Opcode invalid!"); - } - } - } -} - -template<class PassT> -void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk, - BlockT *exitBlk, - BlockT *exitLandBlk, - RegiT setReg) { - if (DEBUGME) { - errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber() - << " exit = BB" << exitBlk->getNumber() - << " land = BB" << exitLandBlk->getNumber() << "\n"; - } - - InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk); - assert(branchInstr && CFGTraits::isCondBranch(branchInstr)); - - DebugLoc DL = branchInstr->getDebugLoc(); - - BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr); - int oldOpcode = branchInstr->getOpcode(); - - // transform exitingBlk to - // if ( ) { - // exitBlk (if exitBlk != exitLandBlk) - // setReg = 1 - // break - // }endif - // successor = {orgSuccessor(exitingBlk) - exitBlk} - - typename BlockT::iterator branchInstrPos = - CFGTraits::getInstrPos(exitingBlk, branchInstr); - - if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) { - //break_logical - - if (trueBranch != exitBlk) { - reversePredicateSetter(branchInstrPos); - } - int newOpcode = CFGTraits::getBreakZeroOpcode(oldOpcode); - CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL); - } else { - if (trueBranch != exitBlk) { - reversePredicateSetter(branchInstr); - } - int newOpcode = CFGTraits::getBreakZeroOpcode(oldOpcode); - CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL); - if (exitBlk != exitLandBlk) { - //splice is insert-before ... - exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(), - exitBlk->end()); - } - if (setReg != INVALIDREGNUM) { - CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1); - } - CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::BREAK, passRep); - CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep); - } //if_logical - - //now branchInst can be erase safely - //exitingBlk->eraseFromParent(branchInstr); - branchInstr->eraseFromParent(); - - //now take care of successors, retire blocks - exitingBlk->removeSuccessor(exitBlk); - if (exitBlk != exitLandBlk) { - //splice is insert-before ... - exitBlk->removeSuccessor(exitLandBlk); - retireBlock(exitingBlk, exitBlk); - } - -} //mergeLoopbreakBlock - -template<class PassT> -void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk, - BlockT *contBlk, - RegiT setReg) { - if (DEBUGME) { - errs() << "settleLoopcontBlock conting = BB" - << contingBlk->getNumber() - << ", cont = BB" << contBlk->getNumber() << "\n"; - } - - InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk); - if (branchInstr) { - assert(CFGTraits::isCondBranch(branchInstr)); - typename BlockT::iterator branchInstrPos = - CFGTraits::getInstrPos(contingBlk, branchInstr); - BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr); - int oldOpcode = branchInstr->getOpcode(); - DebugLoc DL = branchInstr->getDebugLoc(); - - // transform contingBlk to - // if () { - // move instr after branchInstr - // continue - // or - // setReg = 1 - // break - // }endif - // successor = {orgSuccessor(contingBlk) - loopHeader} - - bool useContinueLogical = - (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr); - - if (useContinueLogical == false) - { - int branchOpcode = - trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode) - : CFGTraits::getBranchZeroOpcode(oldOpcode); - - CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL); - - if (setReg != INVALIDREGNUM) { - CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1); - // insertEnd to ensure phi-moves, if exist, go before the continue-instr. - CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, DL); - } else { - // insertEnd to ensure phi-moves, if exist, go before the continue-instr. - CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, DL); - } - - CFGTraits::insertInstrEnd(contingBlk, AMDGPU::ENDIF, passRep, DL); - } else { - int branchOpcode = - trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode) - : CFGTraits::getContinueZeroOpcode(oldOpcode); - - CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL); - } - - //contingBlk->eraseFromParent(branchInstr); - branchInstr->eraseFromParent(); - } else { - /* if we've arrived here then we've already erased the branch instruction - * travel back up the basic block to see the last reference of our debug location - * we've just inserted that reference here so it should be representative */ - if (setReg != INVALIDREGNUM) { - CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1); - // insertEnd to ensure phi-moves, if exist, go before the continue-instr. - CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk)); - } else { - // insertEnd to ensure phi-moves, if exist, go before the continue-instr. - CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk)); - } - } //else - -} //settleLoopcontBlock - -// BBs in exitBlkSet are determined as in break-path for loopRep, -// before we can put code for BBs as inside loop-body for loopRep -// check whether those BBs are determined as cont-BB for parentLoopRep -// earlier. -// If so, generate a new BB newBlk -// (1) set newBlk common successor of BBs in exitBlkSet -// (2) change the continue-instr in BBs in exitBlkSet to break-instr -// (3) generate continue-instr in newBlk -// -template<class PassT> -typename CFGStructurizer<PassT>::BlockT * -CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep, - LoopT *loopRep, - std::set<BlockT *> &exitBlkSet, - BlockT *exitLandBlk) { - std::set<BlockT *> endBlkSet; - -// BlockT *parentLoopHead = parentLoopRep->getHeader(); - - - for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(), - iterEnd = exitBlkSet.end(); - iter != iterEnd; ++iter) { - BlockT *exitBlk = *iter; - BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk); - - if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL) - return NULL; - - endBlkSet.insert(endBlk); - } - - BlockT *newBlk = funcRep->CreateMachineBasicBlock(); - funcRep->push_back(newBlk); //insert to function - CFGTraits::insertInstrEnd(newBlk, AMDGPU::CONTINUE, passRep); - SHOWNEWBLK(newBlk, "New continue block: "); - - for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(), - iterEnd = endBlkSet.end(); - iter != iterEnd; ++iter) { - BlockT *endBlk = *iter; - InstrT *contInstr = CFGTraits::getContinueInstr(endBlk); - if (contInstr) { - contInstr->eraseFromParent(); - } - endBlk->addSuccessor(newBlk); - if (DEBUGME) { - errs() << "Add new continue Block to BB" - << endBlk->getNumber() << " successors\n"; - } - } - - return newBlk; -} //relocateLoopcontBlock - - -// LoopEndbranchBlock is a BB created by the CFGStructurizer to use as -// LoopLandBlock. This BB branch on the loop endBranchInit register to the -// pathes corresponding to the loop exiting branches. - -template<class PassT> -typename CFGStructurizer<PassT>::BlockT * -CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep, - BlockTSmallerVector &exitingBlks, - BlockTSmallerVector &exitBlks) { - const AMDGPUInstrInfo *tii = - static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo()); - const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); - - RegiT endBranchReg = static_cast<int> - (funcRep->getRegInfo().createVirtualRegister(I32RC)); - assert(endBranchReg >= 0); - - // reg = 0 before entering the loop - addLoopEndbranchInitReg(loopRep, endBranchReg); - - uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size()); - assert(numBlks >=2 && numBlks == exitBlks.size()); - - BlockT *preExitingBlk = exitingBlks[0]; - BlockT *preExitBlk = exitBlks[0]; - BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock(); - funcRep->push_back(preBranchBlk); //insert to function - SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: "); - - BlockT *newLandBlk = preBranchBlk; - - CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk, - newLandBlk); - preExitingBlk->removeSuccessor(preExitBlk); - preExitingBlk->addSuccessor(newLandBlk); - - //it is redundant to add reg = 0 to exitingBlks[0] - - // For 1..n th exiting path (the last iteration handles two pathes) create the - // branch to the previous path and the current path. - for (uint32_t i = 1; i < numBlks; ++i) { - BlockT *curExitingBlk = exitingBlks[i]; - BlockT *curExitBlk = exitBlks[i]; - BlockT *curBranchBlk; - - if (i == numBlks - 1) { - curBranchBlk = curExitBlk; - } else { - curBranchBlk = funcRep->CreateMachineBasicBlock(); - funcRep->push_back(curBranchBlk); //insert to function - SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: "); - } - - // Add reg = i to exitingBlks[i]. - CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep, - endBranchReg, i); - - // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge - // (exitingBlks[i], newLandBlk). - CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk, - newLandBlk); - curExitingBlk->removeSuccessor(curExitBlk); - curExitingBlk->addSuccessor(newLandBlk); - - // add to preBranchBlk the branch instruction: - // if (endBranchReg == preVal) - // preExitBlk - // else - // curBranchBlk - // - // preValReg = i - 1 - - DebugLoc DL; - RegiT preValReg = static_cast<int> - (funcRep->getRegInfo().createVirtualRegister(I32RC)); - - preBranchBlk->insert(preBranchBlk->begin(), - tii->getMovImmInstr(preBranchBlk->getParent(), preValReg, - i - 1)); - - // condResReg = (endBranchReg == preValReg) - RegiT condResReg = static_cast<int> - (funcRep->getRegInfo().createVirtualRegister(I32RC)); - BuildMI(preBranchBlk, DL, tii->get(tii->getIEQOpcode()), condResReg) - .addReg(endBranchReg).addReg(preValReg); - - BuildMI(preBranchBlk, DL, tii->get(AMDGPU::BRANCH_COND_i32)) - .addMBB(preExitBlk).addReg(condResReg); - - preBranchBlk->addSuccessor(preExitBlk); - preBranchBlk->addSuccessor(curBranchBlk); - - // Update preExitingBlk, preExitBlk, preBranchBlk. - preExitingBlk = curExitingBlk; - preExitBlk = curExitBlk; - preBranchBlk = curBranchBlk; - - } //end for 1 .. n blocks - - return newLandBlk; -} //addLoopEndbranchBlock - -template<class PassT> -typename CFGStructurizer<PassT>::PathToKind -CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk, - bool allowSideEntry) { - assert(dstBlk); - - if (srcBlk == dstBlk) { - return SinglePath_InPath; - } - - while (srcBlk && srcBlk->succ_size() == 1) { - srcBlk = *srcBlk->succ_begin(); - if (srcBlk == dstBlk) { - return SinglePath_InPath; - } - - if (!allowSideEntry && srcBlk->pred_size() > 1) { - return Not_SinglePath; - } - } - - if (srcBlk && srcBlk->succ_size()==0) { - return SinglePath_NotInPath; - } - - return Not_SinglePath; -} //singlePathTo - -// If there is a single path from srcBlk to dstBlk, return the last block before -// dstBlk If there is a single path from srcBlk->end without dstBlk, return the -// last block in the path Otherwise, return NULL -template<class PassT> -typename CFGStructurizer<PassT>::BlockT * -CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk, - bool allowSideEntry) { - assert(dstBlk); - - if (srcBlk == dstBlk) { - return srcBlk; - } - - if (srcBlk->succ_size() == 0) { - return srcBlk; - } - - while (srcBlk && srcBlk->succ_size() == 1) { - BlockT *preBlk = srcBlk; - - srcBlk = *srcBlk->succ_begin(); - if (srcBlk == NULL) { - return preBlk; - } - - if (!allowSideEntry && srcBlk->pred_size() > 1) { - return NULL; - } - } - - if (srcBlk && srcBlk->succ_size()==0) { - return srcBlk; - } - - return NULL; - -} //singlePathEnd - -template<class PassT> -int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk, - BlockT *dstBlk) { - int cloned = 0; - assert(preBlk->isSuccessor(srcBlk)); - while (srcBlk && srcBlk != dstBlk) { - assert(srcBlk->succ_size() == 1); - if (srcBlk->pred_size() > 1) { - srcBlk = cloneBlockForPredecessor(srcBlk, preBlk); - ++cloned; - } - - preBlk = srcBlk; - srcBlk = *srcBlk->succ_begin(); - } - - return cloned; -} //cloneOnSideEntryTo - -template<class PassT> -typename CFGStructurizer<PassT>::BlockT * -CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk, - BlockT *predBlk) { - assert(predBlk->isSuccessor(curBlk) && - "succBlk is not a prececessor of curBlk"); - - BlockT *cloneBlk = CFGTraits::clone(curBlk); //clone instructions - CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk); - //srcBlk, oldBlk, newBlk - - predBlk->removeSuccessor(curBlk); - predBlk->addSuccessor(cloneBlk); - - // add all successor to cloneBlk - CFGTraits::cloneSuccessorList(cloneBlk, curBlk); - - numClonedInstr += curBlk->size(); - - if (DEBUGME) { - errs() << "Cloned block: " << "BB" - << curBlk->getNumber() << "size " << curBlk->size() << "\n"; - } - - SHOWNEWBLK(cloneBlk, "result of Cloned block: "); - - return cloneBlk; -} //cloneBlockForPredecessor - -template<class PassT> -typename CFGStructurizer<PassT>::BlockT * -CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep, - BlockT *exitingBlk) { - BlockT *exitBlk = NULL; - - for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(), - iterSuccEnd = exitingBlk->succ_end(); - iterSucc != iterSuccEnd; ++iterSucc) { - BlockT *curBlk = *iterSucc; - if (!loopRep->contains(curBlk)) { - assert(exitBlk == NULL); - exitBlk = curBlk; - } - } - - assert(exitBlk != NULL); - - return exitBlk; -} //exitingBlock2ExitBlock - -template<class PassT> -void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk, - BlockT *dstBlk, - InstrIterator insertPos) { - InstrIterator spliceEnd; - //look for the input branchinstr, not the AMDGPU branchinstr - InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk); - if (branchInstr == NULL) { - if (DEBUGME) { - errs() << "migrateInstruction don't see branch instr\n" ; - } - spliceEnd = srcBlk->end(); - } else { - if (DEBUGME) { - errs() << "migrateInstruction see branch instr\n" ; - branchInstr->dump(); - } - spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr); - } - if (DEBUGME) { - errs() << "migrateInstruction before splice dstSize = " << dstBlk->size() - << "srcSize = " << srcBlk->size() << "\n"; - } - - //splice insert before insertPos - dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd); - - if (DEBUGME) { - errs() << "migrateInstruction after splice dstSize = " << dstBlk->size() - << "srcSize = " << srcBlk->size() << "\n"; - } -} //migrateInstruction - -// normalizeInfiniteLoopExit change -// B1: -// uncond_br LoopHeader -// -// to -// B1: -// cond_br 1 LoopHeader dummyExit -// and return the newly added dummy exit block -// -template<class PassT> -typename CFGStructurizer<PassT>::BlockT * -CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) { - BlockT *loopHeader; - BlockT *loopLatch; - loopHeader = LoopRep->getHeader(); - loopLatch = LoopRep->getLoopLatch(); - BlockT *dummyExitBlk = NULL; - const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); - if (loopHeader!=NULL && loopLatch!=NULL) { - InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch); - if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) { - dummyExitBlk = funcRep->CreateMachineBasicBlock(); - funcRep->push_back(dummyExitBlk); //insert to function - SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: "); - - if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n"; - - typename BlockT::iterator insertPos = - CFGTraits::getInstrPos(loopLatch, branchInstr); - unsigned immReg = - funcRep->getRegInfo().createVirtualRegister(I32RC); - CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1); - InstrT *newInstr = - CFGTraits::insertInstrBefore(insertPos, AMDGPU::BRANCH_COND_i32, passRep); - MachineInstrBuilder(newInstr).addMBB(loopHeader).addReg(immReg, false); - - SHOWNEWINSTR(newInstr); - - branchInstr->eraseFromParent(); - loopLatch->addSuccessor(dummyExitBlk); - } - } - - return dummyExitBlk; -} //normalizeInfiniteLoopExit - -template<class PassT> -void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) { - InstrT *branchInstr; - - // I saw two unconditional branch in one basic block in example - // test_fc_do_while_or.c need to fix the upstream on this to remove the loop. - while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk)) - && CFGTraits::isUncondBranch(branchInstr)) { - if (DEBUGME) { - errs() << "Removing unconditional branch instruction" ; - branchInstr->dump(); - } - branchInstr->eraseFromParent(); - } -} //removeUnconditionalBranch - -template<class PassT> -void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) { - if (srcBlk->succ_size() == 2) { - BlockT *blk1 = *srcBlk->succ_begin(); - BlockT *blk2 = *(++srcBlk->succ_begin()); - - if (blk1 == blk2) { - InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk); - assert(branchInstr && CFGTraits::isCondBranch(branchInstr)); - if (DEBUGME) { - errs() << "Removing unneeded conditional branch instruction" ; - branchInstr->dump(); - } - branchInstr->eraseFromParent(); - SHOWNEWBLK(blk1, "Removing redundant successor"); - srcBlk->removeSuccessor(blk1); - } - } -} //removeRedundantConditionalBranch - -template<class PassT> -void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*, - DEFAULT_VEC_SLOTS> &retBlks) { - BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock(); - funcRep->push_back(dummyExitBlk); //insert to function - CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep); - - for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator iter = - retBlks.begin(), - iterEnd = retBlks.end(); iter != iterEnd; ++iter) { - BlockT *curBlk = *iter; - InstrT *curInstr = CFGTraits::getReturnInstr(curBlk); - if (curInstr) { - curInstr->eraseFromParent(); - } -#if 0 - if (curBlk->size()==0 && curBlk->pred_size() == 1) { - if (DEBUGME) { - errs() << "Replace empty block BB" << curBlk->getNumber() - << " with dummyExitBlock\n"; - } - BlockT *predb = *curBlk->pred_begin(); - predb->removeSuccessor(curBlk); - curBlk = predb; - } //handle empty curBlk -#endif - curBlk->addSuccessor(dummyExitBlk); - if (DEBUGME) { - errs() << "Add dummyExitBlock to BB" << curBlk->getNumber() - << " successors\n"; - } - } //for - - SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: "); -} //addDummyExitBlock - -template<class PassT> -void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk) { - while (srcBlk->succ_size()) { - srcBlk->removeSuccessor(*srcBlk->succ_begin()); - } -} - -template<class PassT> -void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum) { - BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk]; - - if (srcBlkInfo == NULL) { - srcBlkInfo = new BlockInfo(); - } - - srcBlkInfo->sccNum = sccNum; -} - -template<class PassT> -int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) { - BlockInfo *srcBlkInfo = blockInfoMap[srcBlk]; - return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM; -} - -template<class PassT> -void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) { - if (DEBUGME) { - errs() << "Retiring BB" << srcBlk->getNumber() << "\n"; - } - - BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk]; - - if (srcBlkInfo == NULL) { - srcBlkInfo = new BlockInfo(); - } - - srcBlkInfo->isRetired = true; - //int i = srcBlk->succ_size(); - //int j = srcBlk->pred_size(); - assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0 - && "can't retire block yet"); -} - -template<class PassT> -bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk) { - BlockInfo *srcBlkInfo = blockInfoMap[srcBlk]; - return (srcBlkInfo && srcBlkInfo->isRetired); -} - -template<class PassT> -bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk) { - LoopT *loopRep = loopInfo->getLoopFor(curBlk); - while (loopRep && loopRep->getHeader() == curBlk) { - LoopLandInfo *loopLand = getLoopLandInfo(loopRep); - - if(loopLand == NULL) - return true; - - BlockT *landBlk = loopLand->landBlk; - assert(landBlk); - if (!isRetiredBlock(landBlk)) { - return true; - } - - loopRep = loopRep->getParentLoop(); - } - - return false; -} //isActiveLoophead - -template<class PassT> -bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk) { - const unsigned blockSizeThreshold = 30; - const unsigned cloneInstrThreshold = 100; - - bool multiplePreds = blk && (blk->pred_size() > 1); - - if(!multiplePreds) - return false; - - unsigned blkSize = blk->size(); - return ((blkSize > blockSizeThreshold) - && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold)); -} //needMigrateBlock - -template<class PassT> -typename CFGStructurizer<PassT>::BlockT * -CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk, - BlockTSmallerVector &exitBlks, - std::set<BlockT *> &exitBlkSet) { - SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks; //in exit path blocks - - for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(), - predIterEnd = landBlk->pred_end(); - predIter != predIterEnd; ++predIter) { - BlockT *curBlk = *predIter; - if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) { - inpathBlks.push_back(curBlk); - } - } //for - - //if landBlk has predecessors that are not in the given loop, - //create a new block - BlockT *newLandBlk = landBlk; - if (inpathBlks.size() != landBlk->pred_size()) { - newLandBlk = funcRep->CreateMachineBasicBlock(); - funcRep->push_back(newLandBlk); //insert to function - newLandBlk->addSuccessor(landBlk); - for (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::iterator iter = - inpathBlks.begin(), - iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) { - BlockT *curBlk = *iter; - CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk); - //srcBlk, oldBlk, newBlk - curBlk->removeSuccessor(landBlk); - curBlk->addSuccessor(newLandBlk); - } - for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) { - if (exitBlks[i] == landBlk) { - exitBlks[i] = newLandBlk; - } - } - SHOWNEWBLK(newLandBlk, "NewLandingBlock: "); - } - - setLoopLandBlock(loopRep, newLandBlk); - - return newLandBlk; -} // recordLoopbreakLand - -template<class PassT> -void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) { - LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; - - if (theEntry == NULL) { - theEntry = new LoopLandInfo(); - } - assert(theEntry->landBlk == NULL); - - if (blk == NULL) { - blk = funcRep->CreateMachineBasicBlock(); - funcRep->push_back(blk); //insert to function - SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: "); - } - - theEntry->landBlk = blk; - - if (DEBUGME) { - errs() << "setLoopLandBlock loop-header = BB" - << loopRep->getHeader()->getNumber() - << " landing-block = BB" << blk->getNumber() << "\n"; - } -} // setLoopLandBlock - -template<class PassT> -void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) { - LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; - - if (theEntry == NULL) { - theEntry = new LoopLandInfo(); - } - - theEntry->breakOnRegs.insert(regNum); - - if (DEBUGME) { - errs() << "addLoopBreakOnReg loop-header = BB" - << loopRep->getHeader()->getNumber() - << " regNum = " << regNum << "\n"; - } -} // addLoopBreakOnReg - -template<class PassT> -void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) { - LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; - - if (theEntry == NULL) { - theEntry = new LoopLandInfo(); - } - theEntry->contOnRegs.insert(regNum); - - if (DEBUGME) { - errs() << "addLoopContOnReg loop-header = BB" - << loopRep->getHeader()->getNumber() - << " regNum = " << regNum << "\n"; - } -} // addLoopContOnReg - -template<class PassT> -void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) { - LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; - - if (theEntry == NULL) { - theEntry = new LoopLandInfo(); - } - theEntry->breakInitRegs.insert(regNum); - - if (DEBUGME) { - errs() << "addLoopBreakInitReg loop-header = BB" - << loopRep->getHeader()->getNumber() - << " regNum = " << regNum << "\n"; - } -} // addLoopBreakInitReg - -template<class PassT> -void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) { - LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; - - if (theEntry == NULL) { - theEntry = new LoopLandInfo(); - } - theEntry->contInitRegs.insert(regNum); - - if (DEBUGME) { - errs() << "addLoopContInitReg loop-header = BB" - << loopRep->getHeader()->getNumber() - << " regNum = " << regNum << "\n"; - } -} // addLoopContInitReg - -template<class PassT> -void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep, - RegiT regNum) { - LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; - - if (theEntry == NULL) { - theEntry = new LoopLandInfo(); - } - theEntry->endbranchInitRegs.insert(regNum); - - if (DEBUGME) - { - errs() << "addLoopEndbranchInitReg loop-header = BB" - << loopRep->getHeader()->getNumber() - << " regNum = " << regNum << "\n"; - } -} // addLoopEndbranchInitReg - -template<class PassT> -typename CFGStructurizer<PassT>::LoopLandInfo * -CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep) { - LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; - - return theEntry; -} // getLoopLandInfo - -template<class PassT> -typename CFGStructurizer<PassT>::BlockT * -CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep) { - LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; - - return theEntry ? theEntry->landBlk : NULL; -} // getLoopLandBlock - - -template<class PassT> -bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk) { - LoopT *loopRep = loopInfo->getLoopFor(curBlk); - if (loopRep == NULL) - return false; - - BlockT *loopHeader = loopRep->getHeader(); - - return curBlk->isSuccessor(loopHeader); - -} //hasBackEdge - -template<class PassT> -unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep) { - return loopRep ? loopRep->getLoopDepth() : 0; -} //getLoopDepth - -template<class PassT> -int CFGStructurizer<PassT>::countActiveBlock -(typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterStart, - typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterEnd) { - int count = 0; - while (iterStart != iterEnd) { - if (!isRetiredBlock(*iterStart)) { - ++count; - } - ++iterStart; - } - - return count; -} //countActiveBlock - -// This is work around solution for findNearestCommonDominator not avaiable to -// post dom a proper fix should go to Dominators.h. - -template<class PassT> -typename CFGStructurizer<PassT>::BlockT* -CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) { - - if (postDomTree->dominates(blk1, blk2)) { - return blk1; - } - if (postDomTree->dominates(blk2, blk1)) { - return blk2; - } - - DomTreeNodeT *node1 = postDomTree->getNode(blk1); - DomTreeNodeT *node2 = postDomTree->getNode(blk2); - - // Handle newly cloned node. - if (node1 == NULL && blk1->succ_size() == 1) { - return findNearestCommonPostDom(*blk1->succ_begin(), blk2); - } - if (node2 == NULL && blk2->succ_size() == 1) { - return findNearestCommonPostDom(blk1, *blk2->succ_begin()); - } - - if (node1 == NULL || node2 == NULL) { - return NULL; - } - - node1 = node1->getIDom(); - while (node1) { - if (postDomTree->dominates(node1, node2)) { - return node1->getBlock(); - } - node1 = node1->getIDom(); - } - - return NULL; -} - -template<class PassT> -typename CFGStructurizer<PassT>::BlockT * -CFGStructurizer<PassT>::findNearestCommonPostDom -(typename std::set<BlockT *> &blks) { - BlockT *commonDom; - typename std::set<BlockT *>::const_iterator iter = blks.begin(); - typename std::set<BlockT *>::const_iterator iterEnd = blks.end(); - for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) { - BlockT *curBlk = *iter; - if (curBlk != commonDom) { - commonDom = findNearestCommonPostDom(curBlk, commonDom); - } - } - - if (DEBUGME) { - errs() << "Common post dominator for exit blocks is "; - if (commonDom) { - errs() << "BB" << commonDom->getNumber() << "\n"; - } else { - errs() << "NULL\n"; - } - } - - return commonDom; -} //findNearestCommonPostDom - -} //end namespace llvm - -//todo: move-end - - -//===----------------------------------------------------------------------===// -// -// CFGStructurizer for AMDGPU -// -//===----------------------------------------------------------------------===// - - -using namespace llvmCFGStruct; - -namespace llvm -{ -class AMDGPUCFGStructurizer : public MachineFunctionPass -{ -public: - typedef MachineInstr InstructionType; - typedef MachineFunction FunctionType; - typedef MachineBasicBlock BlockType; - typedef MachineLoopInfo LoopinfoType; - typedef MachineDominatorTree DominatortreeType; - typedef MachinePostDominatorTree PostDominatortreeType; - typedef MachineDomTreeNode DomTreeNodeType; - typedef MachineLoop LoopType; - -protected: - TargetMachine &TM; - const TargetInstrInfo *TII; - const AMDGPURegisterInfo *TRI; - -public: - AMDGPUCFGStructurizer(char &pid, TargetMachine &tm); - const TargetInstrInfo *getTargetInstrInfo() const; - //bool runOnMachineFunction(MachineFunction &F); - -private: - -}; //end of class AMDGPUCFGStructurizer - -//char AMDGPUCFGStructurizer::ID = 0; -} //end of namespace llvm -AMDGPUCFGStructurizer::AMDGPUCFGStructurizer(char &pid, TargetMachine &tm - ) -: MachineFunctionPass(pid), TM(tm), TII(tm.getInstrInfo()), - TRI(static_cast<const AMDGPURegisterInfo *>(tm.getRegisterInfo()) - ) { -} - -const TargetInstrInfo *AMDGPUCFGStructurizer::getTargetInstrInfo() const { - return TII; -} -//===----------------------------------------------------------------------===// -// -// CFGPrepare -// -//===----------------------------------------------------------------------===// - - -using namespace llvmCFGStruct; - -namespace llvm -{ -class AMDGPUCFGPrepare : public AMDGPUCFGStructurizer -{ -public: - static char ID; - -public: - AMDGPUCFGPrepare(TargetMachine &tm); - - virtual const char *getPassName() const; - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - - bool runOnMachineFunction(MachineFunction &F); - -private: - -}; //end of class AMDGPUCFGPrepare - -char AMDGPUCFGPrepare::ID = 0; -} //end of namespace llvm - -AMDGPUCFGPrepare::AMDGPUCFGPrepare(TargetMachine &tm) - : AMDGPUCFGStructurizer(ID, tm ) -{ -} -const char *AMDGPUCFGPrepare::getPassName() const { - return "AMD IL Control Flow Graph Preparation Pass"; -} - -void AMDGPUCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addPreserved<MachineFunctionAnalysis>(); - AU.addRequired<MachineFunctionAnalysis>(); - AU.addRequired<MachineDominatorTree>(); - AU.addRequired<MachinePostDominatorTree>(); - AU.addRequired<MachineLoopInfo>(); -} - -//===----------------------------------------------------------------------===// -// -// CFGPerform -// -//===----------------------------------------------------------------------===// - - -using namespace llvmCFGStruct; - -namespace llvm -{ -class AMDGPUCFGPerform : public AMDGPUCFGStructurizer -{ -public: - static char ID; - -public: - AMDGPUCFGPerform(TargetMachine &tm); - virtual const char *getPassName() const; - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - bool runOnMachineFunction(MachineFunction &F); - -private: - -}; //end of class AMDGPUCFGPerform - -char AMDGPUCFGPerform::ID = 0; -} //end of namespace llvm - - AMDGPUCFGPerform::AMDGPUCFGPerform(TargetMachine &tm) -: AMDGPUCFGStructurizer(ID, tm) -{ -} - -const char *AMDGPUCFGPerform::getPassName() const { - return "AMD IL Control Flow Graph structurizer Pass"; -} - -void AMDGPUCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addPreserved<MachineFunctionAnalysis>(); - AU.addRequired<MachineFunctionAnalysis>(); - AU.addRequired<MachineDominatorTree>(); - AU.addRequired<MachinePostDominatorTree>(); - AU.addRequired<MachineLoopInfo>(); -} - -//===----------------------------------------------------------------------===// -// -// CFGStructTraits<AMDGPUCFGStructurizer> -// -//===----------------------------------------------------------------------===// - -namespace llvmCFGStruct -{ -// this class is tailor to the AMDGPU backend -template<> -struct CFGStructTraits<AMDGPUCFGStructurizer> -{ - typedef int RegiT; - - static int getBreakNzeroOpcode(int oldOpcode) { - switch(oldOpcode) { - case AMDGPU::JUMP: return AMDGPU::BREAK_LOGICALNZ_i32; - default: - assert(0 && "internal error"); - }; - return -1; - } - - static int getBreakZeroOpcode(int oldOpcode) { - switch(oldOpcode) { - case AMDGPU::JUMP: return AMDGPU::BREAK_LOGICALZ_i32; - default: - assert(0 && "internal error"); - }; - return -1; - } - - static int getBranchNzeroOpcode(int oldOpcode) { - switch(oldOpcode) { - case AMDGPU::JUMP: return AMDGPU::IF_LOGICALNZ_i32; - ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALNZ); - case AMDGPU::SI_IF_NZ: return AMDGPU::SI_IF_NZ; - default: - assert(0 && "internal error"); - }; - return -1; - } - - static int getBranchZeroOpcode(int oldOpcode) { - switch(oldOpcode) { - case AMDGPU::JUMP: return AMDGPU::IF_LOGICALZ_i32; - ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALZ); - case AMDGPU::SI_IF_Z: return AMDGPU::SI_IF_Z; - default: - assert(0 && "internal error"); - }; - return -1; - } - - static int getContinueNzeroOpcode(int oldOpcode) - { - switch(oldOpcode) { - case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32; - default: - assert(0 && "internal error"); - }; - return -1; - } - - static int getContinueZeroOpcode(int oldOpcode) { - switch(oldOpcode) { - case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32; - default: - assert(0 && "internal error"); - }; - return -1; - } - -// the explicitly represented branch target is the true branch target -#define getExplicitBranch getTrueBranch -#define setExplicitBranch setTrueBranch - - static MachineBasicBlock *getTrueBranch(MachineInstr *instr) { - return instr->getOperand(0).getMBB(); - } - - static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) { - instr->getOperand(0).setMBB(blk); - } - - static MachineBasicBlock * - getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) { - assert(blk->succ_size() == 2); - MachineBasicBlock *trueBranch = getTrueBranch(instr); - MachineBasicBlock::succ_iterator iter = blk->succ_begin(); - MachineBasicBlock::succ_iterator iterNext = iter; - ++iterNext; - - return (*iter == trueBranch) ? *iterNext : *iter; - } - - static bool isCondBranch(MachineInstr *instr) { - switch (instr->getOpcode()) { - case AMDGPU::JUMP: - return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() != 0; - ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND); - case AMDGPU::SI_IF_NZ: - case AMDGPU::SI_IF_Z: - break; - default: - return false; - } - return true; - } - - static bool isUncondBranch(MachineInstr *instr) { - switch (instr->getOpcode()) { - case AMDGPU::JUMP: - return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() == 0; - case AMDGPU::BRANCH: - return true; - default: - return false; - } - return true; - } - - static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) { - //get DebugLoc from the first MachineBasicBlock instruction with debug info - DebugLoc DL; - for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) { - MachineInstr *instr = &(*iter); - if (instr->getDebugLoc().isUnknown() == false) { - DL = instr->getDebugLoc(); - } - } - return DL; - } - - static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) { - MachineBasicBlock::reverse_iterator iter = blk->rbegin(); - MachineInstr *instr = &*iter; - if (instr && (isCondBranch(instr) || isUncondBranch(instr))) { - return instr; - } - return NULL; - } - - // The correct naming for this is getPossibleLoopendBlockBranchInstr. - // - // BB with backward-edge could have move instructions after the branch - // instruction. Such move instruction "belong to" the loop backward-edge. - // - static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) { - const AMDGPUInstrInfo * TII = static_cast<const AMDGPUInstrInfo *>( - blk->getParent()->getTarget().getInstrInfo()); - - for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(), - iterEnd = blk->rend(); iter != iterEnd; ++iter) { - // FIXME: Simplify - MachineInstr *instr = &*iter; - if (instr) { - if (isCondBranch(instr) || isUncondBranch(instr)) { - return instr; - } else if (!TII->isMov(instr->getOpcode())) { - break; - } - } - } - return NULL; - } - - static MachineInstr *getReturnInstr(MachineBasicBlock *blk) { - MachineBasicBlock::reverse_iterator iter = blk->rbegin(); - if (iter != blk->rend()) { - MachineInstr *instr = &(*iter); - if (instr->getOpcode() == AMDGPU::RETURN) { - return instr; - } - } - return NULL; - } - - static MachineInstr *getContinueInstr(MachineBasicBlock *blk) { - MachineBasicBlock::reverse_iterator iter = blk->rbegin(); - if (iter != blk->rend()) { - MachineInstr *instr = &(*iter); - if (instr->getOpcode() == AMDGPU::CONTINUE) { - return instr; - } - } - return NULL; - } - - static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) { - for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) { - MachineInstr *instr = &(*iter); - if ((instr->getOpcode() == AMDGPU::BREAK_LOGICALNZ_i32) || (instr->getOpcode() == AMDGPU::BREAK_LOGICALZ_i32)) { - return instr; - } - } - return NULL; - } - - static bool isReturnBlock(MachineBasicBlock *blk) { - MachineInstr *instr = getReturnInstr(blk); - bool isReturn = (blk->succ_size() == 0); - if (instr) { - assert(isReturn); - } else if (isReturn) { - if (DEBUGME) { - errs() << "BB" << blk->getNumber() - <<" is return block without RETURN instr\n"; - } - } - - return isReturn; - } - - static MachineBasicBlock::iterator - getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) { - assert(instr->getParent() == blk && "instruction doesn't belong to block"); - MachineBasicBlock::iterator iter = blk->begin(); - MachineBasicBlock::iterator iterEnd = blk->end(); - while (&(*iter) != instr && iter != iterEnd) { - ++iter; - } - - assert(iter != iterEnd); - return iter; - }//getInstrPos - - static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode, - AMDGPUCFGStructurizer *passRep) { - return insertInstrBefore(blk,newOpcode,passRep,DebugLoc()); - } //insertInstrBefore - - static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode, - AMDGPUCFGStructurizer *passRep, DebugLoc DL) { - const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); - MachineInstr *newInstr = - blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL); - - MachineBasicBlock::iterator res; - if (blk->begin() != blk->end()) { - blk->insert(blk->begin(), newInstr); - } else { - blk->push_back(newInstr); - } - - SHOWNEWINSTR(newInstr); - - return newInstr; - } //insertInstrBefore - - static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode, - AMDGPUCFGStructurizer *passRep) { - insertInstrEnd(blk,newOpcode,passRep,DebugLoc()); - } //insertInstrEnd - - static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode, - AMDGPUCFGStructurizer *passRep, DebugLoc DL) { - const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); - MachineInstr *newInstr = blk->getParent() - ->CreateMachineInstr(tii->get(newOpcode), DL); - - blk->push_back(newInstr); - //assume the instruction doesn't take any reg operand ... - - SHOWNEWINSTR(newInstr); - } //insertInstrEnd - - static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos, - int newOpcode, - AMDGPUCFGStructurizer *passRep) { - MachineInstr *oldInstr = &(*instrPos); - const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); - MachineBasicBlock *blk = oldInstr->getParent(); - MachineInstr *newInstr = - blk->getParent()->CreateMachineInstr(tii->get(newOpcode), - DebugLoc()); - - blk->insert(instrPos, newInstr); - //assume the instruction doesn't take any reg operand ... - - SHOWNEWINSTR(newInstr); - return newInstr; - } //insertInstrBefore - - static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos, - int newOpcode, - AMDGPUCFGStructurizer *passRep, - DebugLoc DL) { - MachineInstr *oldInstr = &(*instrPos); - const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); - MachineBasicBlock *blk = oldInstr->getParent(); - MachineInstr *newInstr = - blk->getParent()->CreateMachineInstr(tii->get(newOpcode), - DL); - - blk->insert(instrPos, newInstr); - MachineInstrBuilder(newInstr).addReg(oldInstr->getOperand(1).getReg(), - false); - - SHOWNEWINSTR(newInstr); - //erase later oldInstr->eraseFromParent(); - } //insertCondBranchBefore - - static void insertCondBranchBefore(MachineBasicBlock *blk, - MachineBasicBlock::iterator insertPos, - int newOpcode, - AMDGPUCFGStructurizer *passRep, - RegiT regNum, - DebugLoc DL) { - const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); - - MachineInstr *newInstr = - blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL); - - //insert before - blk->insert(insertPos, newInstr); - MachineInstrBuilder(newInstr).addReg(regNum, false); - - SHOWNEWINSTR(newInstr); - } //insertCondBranchBefore - - static void insertCondBranchEnd(MachineBasicBlock *blk, - int newOpcode, - AMDGPUCFGStructurizer *passRep, - RegiT regNum) { - const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); - MachineInstr *newInstr = - blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DebugLoc()); - - blk->push_back(newInstr); - MachineInstrBuilder(newInstr).addReg(regNum, false); - - SHOWNEWINSTR(newInstr); - } //insertCondBranchEnd - - - static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos, - AMDGPUCFGStructurizer *passRep, - RegiT regNum, int regVal) { - MachineInstr *oldInstr = &(*instrPos); - const AMDGPUInstrInfo *tii = - static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo()); - MachineBasicBlock *blk = oldInstr->getParent(); - MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum, - regVal); - blk->insert(instrPos, newInstr); - - SHOWNEWINSTR(newInstr); - } //insertAssignInstrBefore - - static void insertAssignInstrBefore(MachineBasicBlock *blk, - AMDGPUCFGStructurizer *passRep, - RegiT regNum, int regVal) { - const AMDGPUInstrInfo *tii = - static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo()); - - MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum, - regVal); - if (blk->begin() != blk->end()) { - blk->insert(blk->begin(), newInstr); - } else { - blk->push_back(newInstr); - } - - SHOWNEWINSTR(newInstr); - - } //insertInstrBefore - - static void insertCompareInstrBefore(MachineBasicBlock *blk, - MachineBasicBlock::iterator instrPos, - AMDGPUCFGStructurizer *passRep, - RegiT dstReg, RegiT src1Reg, - RegiT src2Reg) { - const AMDGPUInstrInfo *tii = - static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo()); - MachineInstr *newInstr = - blk->getParent()->CreateMachineInstr(tii->get(tii->getIEQOpcode()), DebugLoc()); - - MachineInstrBuilder(newInstr).addReg(dstReg, RegState::Define); //set target - MachineInstrBuilder(newInstr).addReg(src1Reg); //set src value - MachineInstrBuilder(newInstr).addReg(src2Reg); //set src value - - blk->insert(instrPos, newInstr); - SHOWNEWINSTR(newInstr); - - } //insertCompareInstrBefore - - static void cloneSuccessorList(MachineBasicBlock *dstBlk, - MachineBasicBlock *srcBlk) { - for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(), - iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) { - dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of - } - } //cloneSuccessorList - - static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) { - MachineFunction *func = srcBlk->getParent(); - MachineBasicBlock *newBlk = func->CreateMachineBasicBlock(); - func->push_back(newBlk); //insert to function - //newBlk->setNumber(srcBlk->getNumber()); - for (MachineBasicBlock::iterator iter = srcBlk->begin(), - iterEnd = srcBlk->end(); - iter != iterEnd; ++iter) { - MachineInstr *instr = func->CloneMachineInstr(iter); - newBlk->push_back(instr); - } - return newBlk; - } - - //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because - //the AMDGPU instruction is not recognized as terminator fix this and retire - //this routine - static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk, - MachineBasicBlock *oldBlk, - MachineBasicBlock *newBlk) { - MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk); - if (branchInstr && isCondBranch(branchInstr) && - getExplicitBranch(branchInstr) == oldBlk) { - setExplicitBranch(branchInstr, newBlk); - } - } - - static void wrapup(MachineBasicBlock *entryBlk) { - assert((!entryBlk->getParent()->getJumpTableInfo() - || entryBlk->getParent()->getJumpTableInfo()->isEmpty()) - && "found a jump table"); - - //collect continue right before endloop - SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr; - MachineBasicBlock::iterator pre = entryBlk->begin(); - MachineBasicBlock::iterator iterEnd = entryBlk->end(); - MachineBasicBlock::iterator iter = pre; - while (iter != iterEnd) { - if (pre->getOpcode() == AMDGPU::CONTINUE - && iter->getOpcode() == AMDGPU::ENDLOOP) { - contInstr.push_back(pre); - } - pre = iter; - ++iter; - } //end while - - //delete continue right before endloop - for (unsigned i = 0; i < contInstr.size(); ++i) { - contInstr[i]->eraseFromParent(); - } - - // TODO to fix up jump table so later phase won't be confused. if - // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but - // there isn't such an interface yet. alternatively, replace all the other - // blocks in the jump table with the entryBlk //} - - } //wrapup - - static MachineDominatorTree *getDominatorTree(AMDGPUCFGStructurizer &pass) { - return &pass.getAnalysis<MachineDominatorTree>(); - } - - static MachinePostDominatorTree* - getPostDominatorTree(AMDGPUCFGStructurizer &pass) { - return &pass.getAnalysis<MachinePostDominatorTree>(); - } - - static MachineLoopInfo *getLoopInfo(AMDGPUCFGStructurizer &pass) { - return &pass.getAnalysis<MachineLoopInfo>(); - } -}; // template class CFGStructTraits -} //end of namespace llvm - -// createAMDGPUCFGPreparationPass- Returns a pass -FunctionPass *llvm::createAMDGPUCFGPreparationPass(TargetMachine &tm - ) { - return new AMDGPUCFGPrepare(tm ); -} - -bool AMDGPUCFGPrepare::runOnMachineFunction(MachineFunction &func) { - return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().prepare(func, - *this, - TRI); -} - -// createAMDGPUCFGStructurizerPass- Returns a pass -FunctionPass *llvm::createAMDGPUCFGStructurizerPass(TargetMachine &tm - ) { - return new AMDGPUCFGPerform(tm ); -} - -bool AMDGPUCFGPerform::runOnMachineFunction(MachineFunction &func) { - return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().run(func, - *this, - TRI); -} - -//end of file newline goes below - diff --git a/src/gallium/drivers/radeon/AMDILDevice.cpp b/src/gallium/drivers/radeon/AMDILDevice.cpp deleted file mode 100644 index 3955828ec31..00000000000 --- a/src/gallium/drivers/radeon/AMDILDevice.cpp +++ /dev/null @@ -1,137 +0,0 @@ -//===-- AMDILDevice.cpp - Base class for AMDIL Devices --------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -#include "AMDILDevice.h" -#include "AMDGPUSubtarget.h" - -using namespace llvm; -// Default implementation for all of the classes. -AMDGPUDevice::AMDGPUDevice(AMDGPUSubtarget *ST) : mSTM(ST) -{ - mHWBits.resize(AMDGPUDeviceInfo::MaxNumberCapabilities); - mSWBits.resize(AMDGPUDeviceInfo::MaxNumberCapabilities); - setCaps(); - mDeviceFlag = OCL_DEVICE_ALL; -} - -AMDGPUDevice::~AMDGPUDevice() -{ - mHWBits.clear(); - mSWBits.clear(); -} - -size_t AMDGPUDevice::getMaxGDSSize() const -{ - return 0; -} - -uint32_t -AMDGPUDevice::getDeviceFlag() const -{ - return mDeviceFlag; -} - -size_t AMDGPUDevice::getMaxNumCBs() const -{ - if (usesHardware(AMDGPUDeviceInfo::ConstantMem)) { - return HW_MAX_NUM_CB; - } - - return 0; -} - -size_t AMDGPUDevice::getMaxCBSize() const -{ - if (usesHardware(AMDGPUDeviceInfo::ConstantMem)) { - return MAX_CB_SIZE; - } - - return 0; -} - -size_t AMDGPUDevice::getMaxScratchSize() const -{ - return 65536; -} - -uint32_t AMDGPUDevice::getStackAlignment() const -{ - return 16; -} - -void AMDGPUDevice::setCaps() -{ - mSWBits.set(AMDGPUDeviceInfo::HalfOps); - mSWBits.set(AMDGPUDeviceInfo::ByteOps); - mSWBits.set(AMDGPUDeviceInfo::ShortOps); - mSWBits.set(AMDGPUDeviceInfo::HW64BitDivMod); - if (mSTM->isOverride(AMDGPUDeviceInfo::NoInline)) { - mSWBits.set(AMDGPUDeviceInfo::NoInline); - } - if (mSTM->isOverride(AMDGPUDeviceInfo::MacroDB)) { - mSWBits.set(AMDGPUDeviceInfo::MacroDB); - } - if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) { - mSWBits.set(AMDGPUDeviceInfo::ConstantMem); - } else { - mHWBits.set(AMDGPUDeviceInfo::ConstantMem); - } - if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) { - mSWBits.set(AMDGPUDeviceInfo::PrivateMem); - } else { - mHWBits.set(AMDGPUDeviceInfo::PrivateMem); - } - if (mSTM->isOverride(AMDGPUDeviceInfo::BarrierDetect)) { - mSWBits.set(AMDGPUDeviceInfo::BarrierDetect); - } - mSWBits.set(AMDGPUDeviceInfo::ByteLDSOps); - mSWBits.set(AMDGPUDeviceInfo::LongOps); -} - -AMDGPUDeviceInfo::ExecutionMode -AMDGPUDevice::getExecutionMode(AMDGPUDeviceInfo::Caps Caps) const -{ - if (mHWBits[Caps]) { - assert(!mSWBits[Caps] && "Cannot set both SW and HW caps"); - return AMDGPUDeviceInfo::Hardware; - } - - if (mSWBits[Caps]) { - assert(!mHWBits[Caps] && "Cannot set both SW and HW caps"); - return AMDGPUDeviceInfo::Software; - } - - return AMDGPUDeviceInfo::Unsupported; - -} - -bool AMDGPUDevice::isSupported(AMDGPUDeviceInfo::Caps Mode) const -{ - return getExecutionMode(Mode) != AMDGPUDeviceInfo::Unsupported; -} - -bool AMDGPUDevice::usesHardware(AMDGPUDeviceInfo::Caps Mode) const -{ - return getExecutionMode(Mode) == AMDGPUDeviceInfo::Hardware; -} - -bool AMDGPUDevice::usesSoftware(AMDGPUDeviceInfo::Caps Mode) const -{ - return getExecutionMode(Mode) == AMDGPUDeviceInfo::Software; -} - -std::string -AMDGPUDevice::getDataLayout() const -{ - return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16" - "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32" - "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64" - "-v96:128:128-v128:128:128-v192:256:256-v256:256:256" - "-v512:512:512-v1024:1024:1024-v2048:2048:2048" - "-n8:16:32:64"); -} diff --git a/src/gallium/drivers/radeon/AMDILDevice.h b/src/gallium/drivers/radeon/AMDILDevice.h deleted file mode 100644 index 864fa0a3455..00000000000 --- a/src/gallium/drivers/radeon/AMDILDevice.h +++ /dev/null @@ -1,115 +0,0 @@ -//===---- AMDILDevice.h - Define Device Data for AMDIL -----*- C++ -*------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// -// Interface for the subtarget data classes. -// -//===----------------------------------------------------------------------===// -// This file will define the interface that each generation needs to -// implement in order to correctly answer queries on the capabilities of the -// specific hardware. -//===----------------------------------------------------------------------===// -#ifndef _AMDILDEVICEIMPL_H_ -#define _AMDILDEVICEIMPL_H_ -#include "AMDIL.h" -#include "llvm/ADT/BitVector.h" - -namespace llvm { - class AMDGPUSubtarget; - class MCStreamer; -//===----------------------------------------------------------------------===// -// Interface for data that is specific to a single device -//===----------------------------------------------------------------------===// -class AMDGPUDevice { -public: - AMDGPUDevice(AMDGPUSubtarget *ST); - virtual ~AMDGPUDevice(); - - // Enum values for the various memory types. - enum { - RAW_UAV_ID = 0, - ARENA_UAV_ID = 1, - LDS_ID = 2, - GDS_ID = 3, - SCRATCH_ID = 4, - CONSTANT_ID = 5, - GLOBAL_ID = 6, - MAX_IDS = 7 - } IO_TYPE_IDS; - - // Returns the max LDS size that the hardware supports. Size is in - // bytes. - virtual size_t getMaxLDSSize() const = 0; - - // Returns the max GDS size that the hardware supports if the GDS is - // supported by the hardware. Size is in bytes. - virtual size_t getMaxGDSSize() const; - - // Returns the max number of hardware constant address spaces that - // are supported by this device. - virtual size_t getMaxNumCBs() const; - - // Returns the max number of bytes a single hardware constant buffer - // can support. Size is in bytes. - virtual size_t getMaxCBSize() const; - - // Returns the max number of bytes allowed by the hardware scratch - // buffer. Size is in bytes. - virtual size_t getMaxScratchSize() const; - - // Get the flag that corresponds to the device. - virtual uint32_t getDeviceFlag() const; - - // Returns the number of work-items that exist in a single hardware - // wavefront. - virtual size_t getWavefrontSize() const = 0; - - // Get the generational name of this specific device. - virtual uint32_t getGeneration() const = 0; - - // Get the stack alignment of this specific device. - virtual uint32_t getStackAlignment() const; - - // Get the resource ID for this specific device. - virtual uint32_t getResourceID(uint32_t DeviceID) const = 0; - - // Get the max number of UAV's for this device. - virtual uint32_t getMaxNumUAVs() const = 0; - - // API utilizing more detailed capabilities of each family of - // cards. If a capability is supported, then either usesHardware or - // usesSoftware returned true. If usesHardware returned true, then - // usesSoftware must return false for the same capability. Hardware - // execution means that the feature is done natively by the hardware - // and is not emulated by the softare. Software execution means - // that the feature could be done in the hardware, but there is - // software that emulates it with possibly using the hardware for - // support since the hardware does not fully comply with OpenCL - // specs. - bool isSupported(AMDGPUDeviceInfo::Caps Mode) const; - bool usesHardware(AMDGPUDeviceInfo::Caps Mode) const; - bool usesSoftware(AMDGPUDeviceInfo::Caps Mode) const; - virtual std::string getDataLayout() const; - static const unsigned int MAX_LDS_SIZE_700 = 16384; - static const unsigned int MAX_LDS_SIZE_800 = 32768; - static const unsigned int WavefrontSize = 64; - static const unsigned int HalfWavefrontSize = 32; - static const unsigned int QuarterWavefrontSize = 16; -protected: - virtual void setCaps(); - llvm::BitVector mHWBits; - llvm::BitVector mSWBits; - AMDGPUSubtarget *mSTM; - uint32_t mDeviceFlag; -private: - AMDGPUDeviceInfo::ExecutionMode - getExecutionMode(AMDGPUDeviceInfo::Caps Caps) const; -}; // AMDILDevice - -} // namespace llvm -#endif // _AMDILDEVICEIMPL_H_ diff --git a/src/gallium/drivers/radeon/AMDILDeviceInfo.cpp b/src/gallium/drivers/radeon/AMDILDeviceInfo.cpp deleted file mode 100644 index b2f7cfb3092..00000000000 --- a/src/gallium/drivers/radeon/AMDILDeviceInfo.cpp +++ /dev/null @@ -1,94 +0,0 @@ -//===-- AMDILDeviceInfo.cpp - AMDILDeviceInfo class -----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// -// Function that creates DeviceInfo from a device name and other information. -// -//==-----------------------------------------------------------------------===// -#include "AMDILDevices.h" -#include "AMDGPUSubtarget.h" - -using namespace llvm; -namespace llvm { -namespace AMDGPUDeviceInfo { - AMDGPUDevice* -getDeviceFromName(const std::string &deviceName, AMDGPUSubtarget *ptr, - bool is64bit, bool is64on32bit) -{ - if (deviceName.c_str()[2] == '7') { - switch (deviceName.c_str()[3]) { - case '1': - return new AMDGPU710Device(ptr); - case '7': - return new AMDGPU770Device(ptr); - default: - return new AMDGPU7XXDevice(ptr); - }; - } else if (deviceName == "cypress") { -#if DEBUG - assert(!is64bit && "This device does not support 64bit pointers!"); - assert(!is64on32bit && "This device does not support 64bit" - " on 32bit pointers!"); -#endif - return new AMDGPUCypressDevice(ptr); - } else if (deviceName == "juniper") { -#if DEBUG - assert(!is64bit && "This device does not support 64bit pointers!"); - assert(!is64on32bit && "This device does not support 64bit" - " on 32bit pointers!"); -#endif - return new AMDGPUEvergreenDevice(ptr); - } else if (deviceName == "redwood") { -#if DEBUG - assert(!is64bit && "This device does not support 64bit pointers!"); - assert(!is64on32bit && "This device does not support 64bit" - " on 32bit pointers!"); -#endif - return new AMDGPURedwoodDevice(ptr); - } else if (deviceName == "cedar") { -#if DEBUG - assert(!is64bit && "This device does not support 64bit pointers!"); - assert(!is64on32bit && "This device does not support 64bit" - " on 32bit pointers!"); -#endif - return new AMDGPUCedarDevice(ptr); - } else if (deviceName == "barts" - || deviceName == "turks") { -#if DEBUG - assert(!is64bit && "This device does not support 64bit pointers!"); - assert(!is64on32bit && "This device does not support 64bit" - " on 32bit pointers!"); -#endif - return new AMDGPUNIDevice(ptr); - } else if (deviceName == "cayman") { -#if DEBUG - assert(!is64bit && "This device does not support 64bit pointers!"); - assert(!is64on32bit && "This device does not support 64bit" - " on 32bit pointers!"); -#endif - return new AMDGPUCaymanDevice(ptr); - } else if (deviceName == "caicos") { -#if DEBUG - assert(!is64bit && "This device does not support 64bit pointers!"); - assert(!is64on32bit && "This device does not support 64bit" - " on 32bit pointers!"); -#endif - return new AMDGPUNIDevice(ptr); - } else if (deviceName == "SI") { - return new AMDGPUSIDevice(ptr); - } else { -#if DEBUG - assert(!is64bit && "This device does not support 64bit pointers!"); - assert(!is64on32bit && "This device does not support 64bit" - " on 32bit pointers!"); -#endif - return new AMDGPU7XXDevice(ptr); - } -} -} // End namespace AMDGPUDeviceInfo -} // End namespace llvm diff --git a/src/gallium/drivers/radeon/AMDILDeviceInfo.h b/src/gallium/drivers/radeon/AMDILDeviceInfo.h deleted file mode 100644 index 4fa021e3599..00000000000 --- a/src/gallium/drivers/radeon/AMDILDeviceInfo.h +++ /dev/null @@ -1,90 +0,0 @@ -//===-- AMDILDeviceInfo.h - Constants for describing devices --------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -#ifndef _AMDILDEVICEINFO_H_ -#define _AMDILDEVICEINFO_H_ - - -#include <string> - -namespace llvm -{ - class AMDGPUDevice; - class AMDGPUSubtarget; - namespace AMDGPUDeviceInfo - { - // Each Capabilities can be executed using a hardware instruction, - // emulated with a sequence of software instructions, or not - // supported at all. - enum ExecutionMode { - Unsupported = 0, // Unsupported feature on the card(Default value) - Software, // This is the execution mode that is set if the - // feature is emulated in software - Hardware // This execution mode is set if the feature exists - // natively in hardware - }; - - // Any changes to this needs to have a corresponding update to the - // twiki page GPUMetadataABI - enum Caps { - HalfOps = 0x1, // Half float is supported or not. - DoubleOps = 0x2, // Double is supported or not. - ByteOps = 0x3, // Byte(char) is support or not. - ShortOps = 0x4, // Short is supported or not. - LongOps = 0x5, // Long is supported or not. - Images = 0x6, // Images are supported or not. - ByteStores = 0x7, // ByteStores available(!HD4XXX). - ConstantMem = 0x8, // Constant/CB memory. - LocalMem = 0x9, // Local/LDS memory. - PrivateMem = 0xA, // Scratch/Private/Stack memory. - RegionMem = 0xB, // OCL GDS Memory Extension. - FMA = 0xC, // Use HW FMA or SW FMA. - ArenaSegment = 0xD, // Use for Arena UAV per pointer 12-1023. - MultiUAV = 0xE, // Use for UAV per Pointer 0-7. - Reserved0 = 0xF, // ReservedFlag - NoAlias = 0x10, // Cached loads. - Signed24BitOps = 0x11, // Peephole Optimization. - // Debug mode implies that no hardware features or optimizations - // are performned and that all memory access go through a single - // uav(Arena on HD5XXX/HD6XXX and Raw on HD4XXX). - Debug = 0x12, // Debug mode is enabled. - CachedMem = 0x13, // Cached mem is available or not. - BarrierDetect = 0x14, // Detect duplicate barriers. - Reserved1 = 0x15, // Reserved flag - ByteLDSOps = 0x16, // Flag to specify if byte LDS ops are available. - ArenaVectors = 0x17, // Flag to specify if vector loads from arena work. - TmrReg = 0x18, // Flag to specify if Tmr register is supported. - NoInline = 0x19, // Flag to specify that no inlining should occur. - MacroDB = 0x1A, // Flag to specify that backend handles macrodb. - HW64BitDivMod = 0x1B, // Flag for backend to generate 64bit div/mod. - ArenaUAV = 0x1C, // Flag to specify that arena uav is supported. - PrivateUAV = 0x1D, // Flag to specify that private memory uses uav's. - // If more capabilities are required, then - // this number needs to be increased. - // All capabilities must come before this - // number. - MaxNumberCapabilities = 0x20 - }; - // These have to be in order with the older generations - // having the lower number enumerations. - enum Generation { - HD4XXX = 0, // 7XX based devices. - HD5XXX, // Evergreen based devices. - HD6XXX, // NI/Evergreen+ based devices. - HD7XXX, - HDTEST, // Experimental feature testing device. - HDNUMGEN - }; - - - AMDGPUDevice* - getDeviceFromName(const std::string &name, AMDGPUSubtarget *ptr, - bool is64bit = false, bool is64on32bit = false); - } // namespace AMDILDeviceInfo -} // namespace llvm -#endif // _AMDILDEVICEINFO_H_ diff --git a/src/gallium/drivers/radeon/AMDILDevices.h b/src/gallium/drivers/radeon/AMDILDevices.h deleted file mode 100644 index cfcc3304b4b..00000000000 --- a/src/gallium/drivers/radeon/AMDILDevices.h +++ /dev/null @@ -1,19 +0,0 @@ -//===-- AMDILDevices.h - Consolidate AMDIL Device headers -----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -#ifndef __AMDIL_DEVICES_H_ -#define __AMDIL_DEVICES_H_ -// Include all of the device specific header files -// This file is for Internal use only! -#include "AMDIL7XXDevice.h" -#include "AMDILDevice.h" -#include "AMDILEvergreenDevice.h" -#include "AMDILNIDevice.h" -#include "AMDILSIDevice.h" - -#endif // _AMDIL_DEVICES_H_ diff --git a/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp b/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp deleted file mode 100644 index 3532a28fe0f..00000000000 --- a/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp +++ /dev/null @@ -1,169 +0,0 @@ -//===-- AMDILEvergreenDevice.cpp - Device Info for Evergreen --------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -#include "AMDILEvergreenDevice.h" - -using namespace llvm; - -AMDGPUEvergreenDevice::AMDGPUEvergreenDevice(AMDGPUSubtarget *ST) -: AMDGPUDevice(ST) { - setCaps(); - std::string name = ST->getDeviceName(); - if (name == "cedar") { - mDeviceFlag = OCL_DEVICE_CEDAR; - } else if (name == "redwood") { - mDeviceFlag = OCL_DEVICE_REDWOOD; - } else if (name == "cypress") { - mDeviceFlag = OCL_DEVICE_CYPRESS; - } else { - mDeviceFlag = OCL_DEVICE_JUNIPER; - } -} - -AMDGPUEvergreenDevice::~AMDGPUEvergreenDevice() { -} - -size_t AMDGPUEvergreenDevice::getMaxLDSSize() const { - if (usesHardware(AMDGPUDeviceInfo::LocalMem)) { - return MAX_LDS_SIZE_800; - } else { - return 0; - } -} -size_t AMDGPUEvergreenDevice::getMaxGDSSize() const { - if (usesHardware(AMDGPUDeviceInfo::RegionMem)) { - return MAX_LDS_SIZE_800; - } else { - return 0; - } -} -uint32_t AMDGPUEvergreenDevice::getMaxNumUAVs() const { - return 12; -} - -uint32_t AMDGPUEvergreenDevice::getResourceID(uint32_t id) const { - switch(id) { - default: - assert(0 && "ID type passed in is unknown!"); - break; - case CONSTANT_ID: - case RAW_UAV_ID: - return GLOBAL_RETURN_RAW_UAV_ID; - case GLOBAL_ID: - case ARENA_UAV_ID: - return DEFAULT_ARENA_UAV_ID; - case LDS_ID: - if (usesHardware(AMDGPUDeviceInfo::LocalMem)) { - return DEFAULT_LDS_ID; - } else { - return DEFAULT_ARENA_UAV_ID; - } - case GDS_ID: - if (usesHardware(AMDGPUDeviceInfo::RegionMem)) { - return DEFAULT_GDS_ID; - } else { - return DEFAULT_ARENA_UAV_ID; - } - case SCRATCH_ID: - if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) { - return DEFAULT_SCRATCH_ID; - } else { - return DEFAULT_ARENA_UAV_ID; - } - }; - return 0; -} - -size_t AMDGPUEvergreenDevice::getWavefrontSize() const { - return AMDGPUDevice::WavefrontSize; -} - -uint32_t AMDGPUEvergreenDevice::getGeneration() const { - return AMDGPUDeviceInfo::HD5XXX; -} - -void AMDGPUEvergreenDevice::setCaps() { - mSWBits.set(AMDGPUDeviceInfo::ArenaSegment); - mHWBits.set(AMDGPUDeviceInfo::ArenaUAV); - mHWBits.set(AMDGPUDeviceInfo::HW64BitDivMod); - mSWBits.reset(AMDGPUDeviceInfo::HW64BitDivMod); - mSWBits.set(AMDGPUDeviceInfo::Signed24BitOps); - if (mSTM->isOverride(AMDGPUDeviceInfo::ByteStores)) { - mHWBits.set(AMDGPUDeviceInfo::ByteStores); - } - if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) { - mSWBits.set(AMDGPUDeviceInfo::LocalMem); - mSWBits.set(AMDGPUDeviceInfo::RegionMem); - } else { - mHWBits.set(AMDGPUDeviceInfo::LocalMem); - mHWBits.set(AMDGPUDeviceInfo::RegionMem); - } - mHWBits.set(AMDGPUDeviceInfo::Images); - if (mSTM->isOverride(AMDGPUDeviceInfo::NoAlias)) { - mHWBits.set(AMDGPUDeviceInfo::NoAlias); - } - mHWBits.set(AMDGPUDeviceInfo::CachedMem); - if (mSTM->isOverride(AMDGPUDeviceInfo::MultiUAV)) { - mHWBits.set(AMDGPUDeviceInfo::MultiUAV); - } - mHWBits.set(AMDGPUDeviceInfo::ByteLDSOps); - mSWBits.reset(AMDGPUDeviceInfo::ByteLDSOps); - mHWBits.set(AMDGPUDeviceInfo::ArenaVectors); - mHWBits.set(AMDGPUDeviceInfo::LongOps); - mSWBits.reset(AMDGPUDeviceInfo::LongOps); - mHWBits.set(AMDGPUDeviceInfo::TmrReg); -} - -AMDGPUCypressDevice::AMDGPUCypressDevice(AMDGPUSubtarget *ST) - : AMDGPUEvergreenDevice(ST) { - setCaps(); -} - -AMDGPUCypressDevice::~AMDGPUCypressDevice() { -} - -void AMDGPUCypressDevice::setCaps() { - if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) { - mHWBits.set(AMDGPUDeviceInfo::DoubleOps); - mHWBits.set(AMDGPUDeviceInfo::FMA); - } -} - - -AMDGPUCedarDevice::AMDGPUCedarDevice(AMDGPUSubtarget *ST) - : AMDGPUEvergreenDevice(ST) { - setCaps(); -} - -AMDGPUCedarDevice::~AMDGPUCedarDevice() { -} - -void AMDGPUCedarDevice::setCaps() { - mSWBits.set(AMDGPUDeviceInfo::FMA); -} - -size_t AMDGPUCedarDevice::getWavefrontSize() const { - return AMDGPUDevice::QuarterWavefrontSize; -} - -AMDGPURedwoodDevice::AMDGPURedwoodDevice(AMDGPUSubtarget *ST) - : AMDGPUEvergreenDevice(ST) { - setCaps(); -} - -AMDGPURedwoodDevice::~AMDGPURedwoodDevice() -{ -} - -void AMDGPURedwoodDevice::setCaps() { - mSWBits.set(AMDGPUDeviceInfo::FMA); -} - -size_t AMDGPURedwoodDevice::getWavefrontSize() const { - return AMDGPUDevice::HalfWavefrontSize; -} diff --git a/src/gallium/drivers/radeon/AMDILEvergreenDevice.h b/src/gallium/drivers/radeon/AMDILEvergreenDevice.h deleted file mode 100644 index cde1472f934..00000000000 --- a/src/gallium/drivers/radeon/AMDILEvergreenDevice.h +++ /dev/null @@ -1,87 +0,0 @@ -//==- AMDILEvergreenDevice.h - Define Evergreen Device for AMDIL -*- C++ -*--=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// -// Interface for the subtarget data classes. -// -//===----------------------------------------------------------------------===// -// This file will define the interface that each generation needs to -// implement in order to correctly answer queries on the capabilities of the -// specific hardware. -//===----------------------------------------------------------------------===// -#ifndef _AMDILEVERGREENDEVICE_H_ -#define _AMDILEVERGREENDEVICE_H_ -#include "AMDILDevice.h" -#include "AMDGPUSubtarget.h" - -namespace llvm { - class AMDGPUSubtarget; -//===----------------------------------------------------------------------===// -// Evergreen generation of devices and their respective sub classes -//===----------------------------------------------------------------------===// - - -// The AMDGPUEvergreenDevice is the base device class for all of the Evergreen -// series of cards. This class contains information required to differentiate -// the Evergreen device from the generic AMDGPUDevice. This device represents -// that capabilities of the 'Juniper' cards, also known as the HD57XX. -class AMDGPUEvergreenDevice : public AMDGPUDevice { -public: - AMDGPUEvergreenDevice(AMDGPUSubtarget *ST); - virtual ~AMDGPUEvergreenDevice(); - virtual size_t getMaxLDSSize() const; - virtual size_t getMaxGDSSize() const; - virtual size_t getWavefrontSize() const; - virtual uint32_t getGeneration() const; - virtual uint32_t getMaxNumUAVs() const; - virtual uint32_t getResourceID(uint32_t) const; -protected: - virtual void setCaps(); -}; // AMDGPUEvergreenDevice - -// The AMDGPUCypressDevice is similiar to the AMDGPUEvergreenDevice, except it has -// support for double precision operations. This device is used to represent -// both the Cypress and Hemlock cards, which are commercially known as HD58XX -// and HD59XX cards. -class AMDGPUCypressDevice : public AMDGPUEvergreenDevice { -public: - AMDGPUCypressDevice(AMDGPUSubtarget *ST); - virtual ~AMDGPUCypressDevice(); -private: - virtual void setCaps(); -}; // AMDGPUCypressDevice - - -// The AMDGPUCedarDevice is the class that represents all of the 'Cedar' based -// devices. This class differs from the base AMDGPUEvergreenDevice in that the -// device is a ~quarter of the 'Juniper'. These are commercially known as the -// HD54XX and HD53XX series of cards. -class AMDGPUCedarDevice : public AMDGPUEvergreenDevice { -public: - AMDGPUCedarDevice(AMDGPUSubtarget *ST); - virtual ~AMDGPUCedarDevice(); - virtual size_t getWavefrontSize() const; -private: - virtual void setCaps(); -}; // AMDGPUCedarDevice - -// The AMDGPURedwoodDevice is the class the represents all of the 'Redwood' based -// devices. This class differs from the base class, in that these devices are -// considered about half of a 'Juniper' device. These are commercially known as -// the HD55XX and HD56XX series of cards. -class AMDGPURedwoodDevice : public AMDGPUEvergreenDevice { -public: - AMDGPURedwoodDevice(AMDGPUSubtarget *ST); - virtual ~AMDGPURedwoodDevice(); - virtual size_t getWavefrontSize() const; -private: - virtual void setCaps(); -}; // AMDGPURedwoodDevice - -} // namespace llvm -#endif // _AMDGPUEVERGREENDEVICE_H_ diff --git a/src/gallium/drivers/radeon/AMDILFrameLowering.cpp b/src/gallium/drivers/radeon/AMDILFrameLowering.cpp deleted file mode 100644 index f2a0fe5a39c..00000000000 --- a/src/gallium/drivers/radeon/AMDILFrameLowering.cpp +++ /dev/null @@ -1,53 +0,0 @@ -//===----------------------- AMDILFrameLowering.cpp -----------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// -// Interface to describe a layout of a stack frame on a AMDIL target machine -// -//===----------------------------------------------------------------------===// -#include "AMDILFrameLowering.h" -#include "llvm/CodeGen/MachineFrameInfo.h" - -using namespace llvm; -AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl, - int LAO, unsigned TransAl) - : TargetFrameLowering(D, StackAl, LAO, TransAl) -{ -} - -AMDGPUFrameLowering::~AMDGPUFrameLowering() -{ -} - -/// getFrameIndexOffset - Returns the displacement from the frame register to -/// the stack frame of the specified index. -int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF, - int FI) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - return MFI->getObjectOffset(FI); -} - -const TargetFrameLowering::SpillSlot * -AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const -{ - NumEntries = 0; - return 0; -} -void -AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const -{ -} -void -AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const -{ -} -bool -AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const -{ - return false; -} diff --git a/src/gallium/drivers/radeon/AMDILFrameLowering.h b/src/gallium/drivers/radeon/AMDILFrameLowering.h deleted file mode 100644 index 934ee46821d..00000000000 --- a/src/gallium/drivers/radeon/AMDILFrameLowering.h +++ /dev/null @@ -1,46 +0,0 @@ -//===--------------------- AMDILFrameLowering.h -----------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Interface to describe a layout of a stack frame on a AMDIL target machine -// -//===----------------------------------------------------------------------===// -#ifndef _AMDILFRAME_LOWERING_H_ -#define _AMDILFRAME_LOWERING_H_ - -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/Target/TargetFrameLowering.h" - -/// Information about the stack frame layout on the AMDGPU targets. It holds -/// the direction of the stack growth, the known stack alignment on entry to -/// each function, and the offset to the locals area. -/// See TargetFrameInfo for more comments. - -namespace llvm { - class AMDGPUFrameLowering : public TargetFrameLowering { - public: - AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO, unsigned - TransAl = 1); - virtual ~AMDGPUFrameLowering(); - virtual int getFrameIndexOffset(const MachineFunction &MF, - int FI) const; - virtual const SpillSlot * - getCalleeSavedSpillSlots(unsigned &NumEntries) const; - virtual void emitPrologue(MachineFunction &MF) const; - virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; - virtual bool hasFP(const MachineFunction &MF) const; - }; // class AMDGPUFrameLowering -} // namespace llvm -#endif // _AMDILFRAME_LOWERING_H_ diff --git a/src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp b/src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp deleted file mode 100644 index 807113134d2..00000000000 --- a/src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp +++ /dev/null @@ -1,395 +0,0 @@ -//===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// -// This file defines an instruction selector for the AMDIL target. -// -//===----------------------------------------------------------------------===// -#include "AMDGPUInstrInfo.h" -#include "AMDGPUISelLowering.h" // For AMDGPUISD -#include "AMDGPURegisterInfo.h" -#include "AMDILDevices.h" -#include "AMDILUtilityFunctions.h" -#include "llvm/ADT/ValueMap.h" -#include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Support/Compiler.h" -#include <list> -#include <queue> - -using namespace llvm; - -//===----------------------------------------------------------------------===// -// Instruction Selector Implementation -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// AMDGPUDAGToDAGISel - AMDGPU specific code to select AMDGPU machine instructions -// //for SelectionDAG operations. -// -namespace { -class AMDGPUDAGToDAGISel : public SelectionDAGISel { - // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can - // make the right decision when generating code for different targets. - const AMDGPUSubtarget &Subtarget; -public: - AMDGPUDAGToDAGISel(TargetMachine &TM); - virtual ~AMDGPUDAGToDAGISel(); - - SDNode *Select(SDNode *N); - virtual const char *getPassName() const; - -private: - inline SDValue getSmallIPtrImm(unsigned Imm); - - // Complex pattern selectors - bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2); - bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2); - bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2); - - static bool checkType(const Value *ptr, unsigned int addrspace); - static const Value *getBasePointerValue(const Value *V); - - static bool isGlobalStore(const StoreSDNode *N); - static bool isPrivateStore(const StoreSDNode *N); - static bool isLocalStore(const StoreSDNode *N); - static bool isRegionStore(const StoreSDNode *N); - - static bool isCPLoad(const LoadSDNode *N); - static bool isConstantLoad(const LoadSDNode *N, int cbID); - static bool isGlobalLoad(const LoadSDNode *N); - static bool isPrivateLoad(const LoadSDNode *N); - static bool isLocalLoad(const LoadSDNode *N); - static bool isRegionLoad(const LoadSDNode *N); - - bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset); - bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset); - bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); - - // Include the pieces autogenerated from the target description. -#include "AMDGPUGenDAGISel.inc" -}; -} // end anonymous namespace - -// createAMDGPUISelDag - This pass converts a legalized DAG into a AMDGPU-specific -// DAG, ready for instruction scheduling. -// -FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM - ) { - return new AMDGPUDAGToDAGISel(TM); -} - -AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM - ) - : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<AMDGPUSubtarget>()) -{ -} - -AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() { -} - -SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) { - return CurDAG->getTargetConstant(Imm, MVT::i32); -} - -bool AMDGPUDAGToDAGISel::SelectADDRParam( - SDValue Addr, SDValue& R1, SDValue& R2) { - - if (Addr.getOpcode() == ISD::FrameIndex) { - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); - R2 = CurDAG->getTargetConstant(0, MVT::i32); - } else { - R1 = Addr; - R2 = CurDAG->getTargetConstant(0, MVT::i32); - } - } else if (Addr.getOpcode() == ISD::ADD) { - R1 = Addr.getOperand(0); - R2 = Addr.getOperand(1); - } else { - R1 = Addr; - R2 = CurDAG->getTargetConstant(0, MVT::i32); - } - return true; -} - -bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) { - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) { - return false; - } - return SelectADDRParam(Addr, R1, R2); -} - - -bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) { - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) { - return false; - } - - if (Addr.getOpcode() == ISD::FrameIndex) { - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); - R2 = CurDAG->getTargetConstant(0, MVT::i64); - } else { - R1 = Addr; - R2 = CurDAG->getTargetConstant(0, MVT::i64); - } - } else if (Addr.getOpcode() == ISD::ADD) { - R1 = Addr.getOperand(0); - R2 = Addr.getOperand(1); - } else { - R1 = Addr; - R2 = CurDAG->getTargetConstant(0, MVT::i64); - } - return true; -} - -SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { - unsigned int Opc = N->getOpcode(); - if (N->isMachineOpcode()) { - return NULL; // Already selected. - } - switch (Opc) { - default: break; - case ISD::FrameIndex: - { - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) { - unsigned int FI = FIN->getIndex(); - EVT OpVT = N->getValueType(0); - unsigned int NewOpc = AMDGPU::COPY; - SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32); - return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI); - } - } - break; - } - return SelectCode(N); -} - -bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) { - if (!ptr) { - return false; - } - Type *ptrType = ptr->getType(); - return dyn_cast<PointerType>(ptrType)->getAddressSpace() == addrspace; -} - -const Value * AMDGPUDAGToDAGISel::getBasePointerValue(const Value *V) -{ - if (!V) { - return NULL; - } - const Value *ret = NULL; - ValueMap<const Value *, bool> ValueBitMap; - std::queue<const Value *, std::list<const Value *> > ValueQueue; - ValueQueue.push(V); - while (!ValueQueue.empty()) { - V = ValueQueue.front(); - if (ValueBitMap.find(V) == ValueBitMap.end()) { - ValueBitMap[V] = true; - if (dyn_cast<Argument>(V) && dyn_cast<PointerType>(V->getType())) { - ret = V; - break; - } else if (dyn_cast<GlobalVariable>(V)) { - ret = V; - break; - } else if (dyn_cast<Constant>(V)) { - const ConstantExpr *CE = dyn_cast<ConstantExpr>(V); - if (CE) { - ValueQueue.push(CE->getOperand(0)); - } - } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { - ret = AI; - break; - } else if (const Instruction *I = dyn_cast<Instruction>(V)) { - uint32_t numOps = I->getNumOperands(); - for (uint32_t x = 0; x < numOps; ++x) { - ValueQueue.push(I->getOperand(x)); - } - } else { - // assert(0 && "Found a Value that we didn't know how to handle!"); - } - } - ValueQueue.pop(); - } - return ret; -} - -bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) { - return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS); -} - -bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) { - return (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS) - && !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS) - && !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS)); -} - -bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) { - return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS); -} - -bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) { - return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS); -} - -bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) { - if (checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)) { - return true; - } - MachineMemOperand *MMO = N->getMemOperand(); - const Value *V = MMO->getValue(); - const Value *BV = getBasePointerValue(V); - if (MMO - && MMO->getValue() - && ((V && dyn_cast<GlobalValue>(V)) - || (BV && dyn_cast<GlobalValue>( - getBasePointerValue(MMO->getValue()))))) { - return checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS); - } else { - return false; - } -} - -bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) { - return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS); -} - -bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) { - return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS); -} - -bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) { - return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS); -} - -bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) { - MachineMemOperand *MMO = N->getMemOperand(); - if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) { - if (MMO) { - const Value *V = MMO->getValue(); - const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V); - if (PSV && PSV == PseudoSourceValue::getConstantPool()) { - return true; - } - } - } - return false; -} - -bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) { - if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) { - // Check to make sure we are not a constant pool load or a constant load - // that is marked as a private load - if (isCPLoad(N) || isConstantLoad(N, -1)) { - return false; - } - } - if (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS) - && !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS) - && !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS) - && !checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS) - && !checkType(N->getSrcValue(), AMDGPUAS::PARAM_D_ADDRESS) - && !checkType(N->getSrcValue(), AMDGPUAS::PARAM_I_ADDRESS)) - { - return true; - } - return false; -} - -const char *AMDGPUDAGToDAGISel::getPassName() const { - return "AMDGPU DAG->DAG Pattern Instruction Selection"; -} - -#ifdef DEBUGTMP -#undef INT64_C -#endif -#undef DEBUGTMP - -///==== AMDGPU Functions ====/// - -bool AMDGPUDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base, - SDValue& Offset) { - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) { - return false; - } - - - if (Addr.getOpcode() == ISD::ADD) { - bool Match = false; - - // Find the base ptr and the offset - for (unsigned i = 0; i < Addr.getNumOperands(); i++) { - SDValue Arg = Addr.getOperand(i); - ConstantSDNode * OffsetNode = dyn_cast<ConstantSDNode>(Arg); - // This arg isn't a constant so it must be the base PTR. - if (!OffsetNode) { - Base = Addr.getOperand(i); - continue; - } - // Check if the constant argument fits in 8-bits. The offset is in bytes - // so we need to convert it to dwords. - if (isUInt<8>(OffsetNode->getZExtValue() >> 2)) { - Match = true; - Offset = CurDAG->getTargetConstant(OffsetNode->getZExtValue() >> 2, - MVT::i32); - } - } - return Match; - } - - // Default case, no offset - Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return true; -} - -bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, - SDValue &Offset) -{ - ConstantSDNode * IMMOffset; - - if (Addr.getOpcode() == ISD::ADD - && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) - && isInt<16>(IMMOffset->getZExtValue())) { - - Base = Addr.getOperand(0); - Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32); - return true; - // If the pointer address is constant, we can move it to the offset field. - } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) - && isInt<16>(IMMOffset->getZExtValue())) { - Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), - CurDAG->getEntryNode().getDebugLoc(), - AMDGPU::ZERO, MVT::i32); - Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32); - return true; - } - - // Default case, no offset - Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return true; -} - -bool AMDGPUDAGToDAGISel::SelectADDRReg(SDValue Addr, SDValue& Base, - SDValue& Offset) { - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress || - Addr.getOpcode() != ISD::ADD) { - return false; - } - - Base = Addr.getOperand(0); - Offset = Addr.getOperand(1); - - return true; -} diff --git a/src/gallium/drivers/radeon/AMDILISelLowering.cpp b/src/gallium/drivers/radeon/AMDILISelLowering.cpp deleted file mode 100644 index 993025c3d47..00000000000 --- a/src/gallium/drivers/radeon/AMDILISelLowering.cpp +++ /dev/null @@ -1,677 +0,0 @@ -//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// -// This file contains TargetLowering functions borrowed from AMDLI. -// -//===----------------------------------------------------------------------===// - -#include "AMDGPUISelLowering.h" -#include "AMDGPURegisterInfo.h" -#include "AMDILDevices.h" -#include "AMDILIntrinsicInfo.h" -#include "AMDGPUSubtarget.h" -#include "AMDILUtilityFunctions.h" -#include "llvm/CallingConv.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetOptions.h" - -using namespace llvm; -//===----------------------------------------------------------------------===// -// Calling Convention Implementation -//===----------------------------------------------------------------------===// -#include "AMDGPUGenCallingConv.inc" - -//===----------------------------------------------------------------------===// -// TargetLowering Implementation Help Functions End -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// TargetLowering Class Implementation Begins -//===----------------------------------------------------------------------===// -void AMDGPUTargetLowering::InitAMDILLowering() -{ - int types[] = - { - (int)MVT::i8, - (int)MVT::i16, - (int)MVT::i32, - (int)MVT::f32, - (int)MVT::f64, - (int)MVT::i64, - (int)MVT::v2i8, - (int)MVT::v4i8, - (int)MVT::v2i16, - (int)MVT::v4i16, - (int)MVT::v4f32, - (int)MVT::v4i32, - (int)MVT::v2f32, - (int)MVT::v2i32, - (int)MVT::v2f64, - (int)MVT::v2i64 - }; - - int IntTypes[] = - { - (int)MVT::i8, - (int)MVT::i16, - (int)MVT::i32, - (int)MVT::i64 - }; - - int FloatTypes[] = - { - (int)MVT::f32, - (int)MVT::f64 - }; - - int VectorTypes[] = - { - (int)MVT::v2i8, - (int)MVT::v4i8, - (int)MVT::v2i16, - (int)MVT::v4i16, - (int)MVT::v4f32, - (int)MVT::v4i32, - (int)MVT::v2f32, - (int)MVT::v2i32, - (int)MVT::v2f64, - (int)MVT::v2i64 - }; - size_t numTypes = sizeof(types) / sizeof(*types); - size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes); - size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes); - size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes); - - const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>(); - // These are the current register classes that are - // supported - - for (unsigned int x = 0; x < numTypes; ++x) { - MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x]; - - //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types - // We cannot sextinreg, expand to shifts - setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom); - setOperationAction(ISD::SUBE, VT, Expand); - setOperationAction(ISD::SUBC, VT, Expand); - setOperationAction(ISD::ADDE, VT, Expand); - setOperationAction(ISD::ADDC, VT, Expand); - setOperationAction(ISD::BRCOND, VT, Custom); - setOperationAction(ISD::BR_JT, VT, Expand); - setOperationAction(ISD::BRIND, VT, Expand); - // TODO: Implement custom UREM/SREM routines - setOperationAction(ISD::SREM, VT, Expand); - setOperationAction(ISD::SMUL_LOHI, VT, Expand); - setOperationAction(ISD::UMUL_LOHI, VT, Expand); - if (VT != MVT::i64 && VT != MVT::v2i64) { - setOperationAction(ISD::SDIV, VT, Custom); - } - } - for (unsigned int x = 0; x < numFloatTypes; ++x) { - MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x]; - - // IL does not have these operations for floating point types - setOperationAction(ISD::FP_ROUND_INREG, VT, Expand); - setOperationAction(ISD::SETOLT, VT, Expand); - setOperationAction(ISD::SETOGE, VT, Expand); - setOperationAction(ISD::SETOGT, VT, Expand); - setOperationAction(ISD::SETOLE, VT, Expand); - setOperationAction(ISD::SETULT, VT, Expand); - setOperationAction(ISD::SETUGE, VT, Expand); - setOperationAction(ISD::SETUGT, VT, Expand); - setOperationAction(ISD::SETULE, VT, Expand); - } - - for (unsigned int x = 0; x < numIntTypes; ++x) { - MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x]; - - // GPU also does not have divrem function for signed or unsigned - setOperationAction(ISD::SDIVREM, VT, Expand); - - // GPU does not have [S|U]MUL_LOHI functions as a single instruction - setOperationAction(ISD::SMUL_LOHI, VT, Expand); - setOperationAction(ISD::UMUL_LOHI, VT, Expand); - - // GPU doesn't have a rotl, rotr, or byteswap instruction - setOperationAction(ISD::ROTR, VT, Expand); - setOperationAction(ISD::BSWAP, VT, Expand); - - // GPU doesn't have any counting operators - setOperationAction(ISD::CTPOP, VT, Expand); - setOperationAction(ISD::CTTZ, VT, Expand); - setOperationAction(ISD::CTLZ, VT, Expand); - } - - for ( unsigned int ii = 0; ii < numVectorTypes; ++ii ) - { - MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii]; - - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); - setOperationAction(ISD::SDIVREM, VT, Expand); - setOperationAction(ISD::SMUL_LOHI, VT, Expand); - // setOperationAction(ISD::VSETCC, VT, Expand); - setOperationAction(ISD::SELECT_CC, VT, Expand); - - } - if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) { - setOperationAction(ISD::MULHU, MVT::i64, Expand); - setOperationAction(ISD::MULHU, MVT::v2i64, Expand); - setOperationAction(ISD::MULHS, MVT::i64, Expand); - setOperationAction(ISD::MULHS, MVT::v2i64, Expand); - setOperationAction(ISD::ADD, MVT::v2i64, Expand); - setOperationAction(ISD::SREM, MVT::v2i64, Expand); - setOperationAction(ISD::Constant , MVT::i64 , Legal); - setOperationAction(ISD::SDIV, MVT::v2i64, Expand); - setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand); - setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand); - setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand); - setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand); - } - if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) { - // we support loading/storing v2f64 but not operations on the type - setOperationAction(ISD::FADD, MVT::v2f64, Expand); - setOperationAction(ISD::FSUB, MVT::v2f64, Expand); - setOperationAction(ISD::FMUL, MVT::v2f64, Expand); - setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand); - setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); - setOperationAction(ISD::ConstantFP , MVT::f64 , Legal); - // We want to expand vector conversions into their scalar - // counterparts. - setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand); - setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand); - setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand); - setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand); - setOperationAction(ISD::FABS, MVT::f64, Expand); - setOperationAction(ISD::FABS, MVT::v2f64, Expand); - } - // TODO: Fix the UDIV24 algorithm so it works for these - // types correctly. This needs vector comparisons - // for this to work correctly. - setOperationAction(ISD::UDIV, MVT::v2i8, Expand); - setOperationAction(ISD::UDIV, MVT::v4i8, Expand); - setOperationAction(ISD::UDIV, MVT::v2i16, Expand); - setOperationAction(ISD::UDIV, MVT::v4i16, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom); - setOperationAction(ISD::SUBC, MVT::Other, Expand); - setOperationAction(ISD::ADDE, MVT::Other, Expand); - setOperationAction(ISD::ADDC, MVT::Other, Expand); - setOperationAction(ISD::BRCOND, MVT::Other, Custom); - setOperationAction(ISD::BR_JT, MVT::Other, Expand); - setOperationAction(ISD::BRIND, MVT::Other, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand); - - - // Use the default implementation. - setOperationAction(ISD::ConstantFP , MVT::f32 , Legal); - setOperationAction(ISD::Constant , MVT::i32 , Legal); - - setSchedulingPreference(Sched::RegPressure); - setPow2DivIsCheap(false); - setPrefLoopAlignment(16); - setSelectIsExpensive(true); - setJumpIsExpensive(true); - - maxStoresPerMemcpy = 4096; - maxStoresPerMemmove = 4096; - maxStoresPerMemset = 4096; - -#undef numTypes -#undef numIntTypes -#undef numVectorTypes -#undef numFloatTypes -} - -bool -AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, - const CallInst &I, unsigned Intrinsic) const -{ - return false; -} -// The backend supports 32 and 64 bit floating point immediates -bool -AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const -{ - if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 - || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { - return true; - } else { - return false; - } -} - -bool -AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const -{ - if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 - || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { - return false; - } else { - return true; - } -} - - -// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to -// be zero. Op is expected to be a target specific node. Used by DAG -// combiner. - -void -AMDGPUTargetLowering::computeMaskedBitsForTargetNode( - const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, - const SelectionDAG &DAG, - unsigned Depth) const -{ - APInt KnownZero2; - APInt KnownOne2; - KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything - switch (Op.getOpcode()) { - default: break; - case ISD::SELECT_CC: - DAG.ComputeMaskedBits( - Op.getOperand(1), - KnownZero, - KnownOne, - Depth + 1 - ); - DAG.ComputeMaskedBits( - Op.getOperand(0), - KnownZero2, - KnownOne2 - ); - assert((KnownZero & KnownOne) == 0 - && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 - && "Bits known to be one AND zero?"); - // Only known if known in both the LHS and RHS - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; - break; - }; -} - -//===----------------------------------------------------------------------===// -// Other Lowering Hooks -//===----------------------------------------------------------------------===// - -SDValue -AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const -{ - EVT OVT = Op.getValueType(); - SDValue DST; - if (OVT.getScalarType() == MVT::i64) { - DST = LowerSDIV64(Op, DAG); - } else if (OVT.getScalarType() == MVT::i32) { - DST = LowerSDIV32(Op, DAG); - } else if (OVT.getScalarType() == MVT::i16 - || OVT.getScalarType() == MVT::i8) { - DST = LowerSDIV24(Op, DAG); - } else { - DST = SDValue(Op.getNode(), 0); - } - return DST; -} - -SDValue -AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const -{ - EVT OVT = Op.getValueType(); - SDValue DST; - if (OVT.getScalarType() == MVT::i64) { - DST = LowerSREM64(Op, DAG); - } else if (OVT.getScalarType() == MVT::i32) { - DST = LowerSREM32(Op, DAG); - } else if (OVT.getScalarType() == MVT::i16) { - DST = LowerSREM16(Op, DAG); - } else if (OVT.getScalarType() == MVT::i8) { - DST = LowerSREM8(Op, DAG); - } else { - DST = SDValue(Op.getNode(), 0); - } - return DST; -} - -SDValue -AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const -{ - SDValue Data = Op.getOperand(0); - VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1)); - DebugLoc DL = Op.getDebugLoc(); - EVT DVT = Data.getValueType(); - EVT BVT = BaseType->getVT(); - unsigned baseBits = BVT.getScalarType().getSizeInBits(); - unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1; - unsigned shiftBits = srcBits - baseBits; - if (srcBits < 32) { - // If the op is less than 32 bits, then it needs to extend to 32bits - // so it can properly keep the upper bits valid. - EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1); - Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data); - shiftBits = 32 - baseBits; - DVT = IVT; - } - SDValue Shift = DAG.getConstant(shiftBits, DVT); - // Shift left by 'Shift' bits. - Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift); - // Signed shift Right by 'Shift' bits. - Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift); - if (srcBits < 32) { - // Once the sign extension is done, the op needs to be converted to - // its original type. - Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType()); - } - return Data; -} -EVT -AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const -{ - int iSize = (size * numEle); - int vEle = (iSize >> ((size == 64) ? 6 : 5)); - if (!vEle) { - vEle = 1; - } - if (size == 64) { - if (vEle == 1) { - return EVT(MVT::i64); - } else { - return EVT(MVT::getVectorVT(MVT::i64, vEle)); - } - } else { - if (vEle == 1) { - return EVT(MVT::i32); - } else { - return EVT(MVT::getVectorVT(MVT::i32, vEle)); - } - } -} - -SDValue -AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const -{ - SDValue Chain = Op.getOperand(0); - SDValue Cond = Op.getOperand(1); - SDValue Jump = Op.getOperand(2); - SDValue Result; - Result = DAG.getNode( - AMDGPUISD::BRANCH_COND, - Op.getDebugLoc(), - Op.getValueType(), - Chain, Jump, Cond); - return Result; -} - -SDValue -AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const -{ - DebugLoc DL = Op.getDebugLoc(); - EVT OVT = Op.getValueType(); - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - MVT INTTY; - MVT FLTTY; - if (!OVT.isVector()) { - INTTY = MVT::i32; - FLTTY = MVT::f32; - } else if (OVT.getVectorNumElements() == 2) { - INTTY = MVT::v2i32; - FLTTY = MVT::v2f32; - } else if (OVT.getVectorNumElements() == 4) { - INTTY = MVT::v4i32; - FLTTY = MVT::v4f32; - } - unsigned bitsize = OVT.getScalarType().getSizeInBits(); - // char|short jq = ia ^ ib; - SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS); - - // jq = jq >> (bitsize - 2) - jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT)); - - // jq = jq | 0x1 - jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT)); - - // jq = (int)jq - jq = DAG.getSExtOrTrunc(jq, DL, INTTY); - - // int ia = (int)LHS; - SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY); - - // int ib, (int)RHS; - SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY); - - // float fa = (float)ia; - SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia); - - // float fb = (float)ib; - SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib); - - // float fq = native_divide(fa, fb); - SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb); - - // fq = trunc(fq); - fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq); - - // float fqneg = -fq; - SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq); - - // float fr = mad(fqneg, fb, fa); - SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa); - - // int iq = (int)fq; - SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq); - - // fr = fabs(fr); - fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr); - - // fb = fabs(fb); - fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb); - - // int cv = fr >= fb; - SDValue cv; - if (INTTY == MVT::i32) { - cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); - } else { - cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); - } - // jq = (cv ? jq : 0); - jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq, - DAG.getConstant(0, OVT)); - // dst = iq + jq; - iq = DAG.getSExtOrTrunc(iq, DL, OVT); - iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq); - return iq; -} - -SDValue -AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const -{ - DebugLoc DL = Op.getDebugLoc(); - EVT OVT = Op.getValueType(); - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - // The LowerSDIV32 function generates equivalent to the following IL. - // mov r0, LHS - // mov r1, RHS - // ilt r10, r0, 0 - // ilt r11, r1, 0 - // iadd r0, r0, r10 - // iadd r1, r1, r11 - // ixor r0, r0, r10 - // ixor r1, r1, r11 - // udiv r0, r0, r1 - // ixor r10, r10, r11 - // iadd r0, r0, r10 - // ixor DST, r0, r10 - - // mov r0, LHS - SDValue r0 = LHS; - - // mov r1, RHS - SDValue r1 = RHS; - - // ilt r10, r0, 0 - SDValue r10 = DAG.getSelectCC(DL, - r0, DAG.getConstant(0, OVT), - DAG.getConstant(-1, MVT::i32), - DAG.getConstant(0, MVT::i32), - ISD::SETLT); - - // ilt r11, r1, 0 - SDValue r11 = DAG.getSelectCC(DL, - r1, DAG.getConstant(0, OVT), - DAG.getConstant(-1, MVT::i32), - DAG.getConstant(0, MVT::i32), - ISD::SETLT); - - // iadd r0, r0, r10 - r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); - - // iadd r1, r1, r11 - r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); - - // ixor r0, r0, r10 - r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); - - // ixor r1, r1, r11 - r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); - - // udiv r0, r0, r1 - r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1); - - // ixor r10, r10, r11 - r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11); - - // iadd r0, r0, r10 - r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); - - // ixor DST, r0, r10 - SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); - return DST; -} - -SDValue -AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const -{ - return SDValue(Op.getNode(), 0); -} - -SDValue -AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const -{ - DebugLoc DL = Op.getDebugLoc(); - EVT OVT = Op.getValueType(); - MVT INTTY = MVT::i32; - if (OVT == MVT::v2i8) { - INTTY = MVT::v2i32; - } else if (OVT == MVT::v4i8) { - INTTY = MVT::v4i32; - } - SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); - SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); - LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); - LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); - return LHS; -} - -SDValue -AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const -{ - DebugLoc DL = Op.getDebugLoc(); - EVT OVT = Op.getValueType(); - MVT INTTY = MVT::i32; - if (OVT == MVT::v2i16) { - INTTY = MVT::v2i32; - } else if (OVT == MVT::v4i16) { - INTTY = MVT::v4i32; - } - SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); - SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); - LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); - LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); - return LHS; -} - -SDValue -AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const -{ - DebugLoc DL = Op.getDebugLoc(); - EVT OVT = Op.getValueType(); - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - // The LowerSREM32 function generates equivalent to the following IL. - // mov r0, LHS - // mov r1, RHS - // ilt r10, r0, 0 - // ilt r11, r1, 0 - // iadd r0, r0, r10 - // iadd r1, r1, r11 - // ixor r0, r0, r10 - // ixor r1, r1, r11 - // udiv r20, r0, r1 - // umul r20, r20, r1 - // sub r0, r0, r20 - // iadd r0, r0, r10 - // ixor DST, r0, r10 - - // mov r0, LHS - SDValue r0 = LHS; - - // mov r1, RHS - SDValue r1 = RHS; - - // ilt r10, r0, 0 - SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT); - - // ilt r11, r1, 0 - SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT); - - // iadd r0, r0, r10 - r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); - - // iadd r1, r1, r11 - r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); - - // ixor r0, r0, r10 - r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); - - // ixor r1, r1, r11 - r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); - - // udiv r20, r0, r1 - SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1); - - // umul r20, r20, r1 - r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1); - - // sub r0, r0, r20 - r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20); - - // iadd r0, r0, r10 - r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); - - // ixor DST, r0, r10 - SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); - return DST; -} - -SDValue -AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const -{ - return SDValue(Op.getNode(), 0); -} diff --git a/src/gallium/drivers/radeon/AMDILInstrInfo.td b/src/gallium/drivers/radeon/AMDILInstrInfo.td deleted file mode 100644 index 050a5bd874f..00000000000 --- a/src/gallium/drivers/radeon/AMDILInstrInfo.td +++ /dev/null @@ -1,270 +0,0 @@ -//===------------ AMDILInstrInfo.td - AMDIL Target ------*-tablegen-*------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// -// This file describes the AMDIL instructions in TableGen format. -// -//===----------------------------------------------------------------------===// -// AMDIL Instruction Predicate Definitions -// Predicate that is set to true if the hardware supports double precision -// divide -def HasHWDDiv : Predicate<"Subtarget.device()" - "->getGeneration() > AMDGPUDeviceInfo::HD4XXX && " - "Subtarget.device()->usesHardware(AMDGPUDeviceInfo::DoubleOps)">; - -// Predicate that is set to true if the hardware supports double, but not double -// precision divide in hardware -def HasSWDDiv : Predicate<"Subtarget.device()" - "->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&" - "Subtarget.device()->usesHardware(AMDGPUDeviceInfo::DoubleOps)">; - -// Predicate that is set to true if the hardware support 24bit signed -// math ops. Otherwise a software expansion to 32bit math ops is used instead. -def HasHWSign24Bit : Predicate<"Subtarget.device()" - "->getGeneration() > AMDGPUDeviceInfo::HD5XXX">; - -// Predicate that is set to true if 64bit operations are supported or not -def HasHW64Bit : Predicate<"Subtarget.device()" - "->usesHardware(AMDGPUDeviceInfo::LongOps)">; -def HasSW64Bit : Predicate<"Subtarget.device()" - "->usesSoftware(AMDGPUDeviceInfo::LongOps)">; - -// Predicate that is set to true if the timer register is supported -def HasTmrRegister : Predicate<"Subtarget.device()" - "->isSupported(AMDGPUDeviceInfo::TmrReg)">; -// Predicate that is true if we are at least evergreen series -def HasDeviceIDInst : Predicate<"Subtarget.device()" - "->getGeneration() >= AMDGPUDeviceInfo::HD5XXX">; - -// Predicate that is true if we have region address space. -def hasRegionAS : Predicate<"Subtarget.device()" - "->usesHardware(AMDGPUDeviceInfo::RegionMem)">; - -// Predicate that is false if we don't have region address space. -def noRegionAS : Predicate<"!Subtarget.device()" - "->isSupported(AMDGPUDeviceInfo::RegionMem)">; - - -// Predicate that is set to true if 64bit Mul is supported in the IL or not -def HasHW64Mul : Predicate<"Subtarget.calVersion()" - ">= CAL_VERSION_SC_139" - "&& Subtarget.device()" - "->getGeneration() >=" - "AMDGPUDeviceInfo::HD5XXX">; -def HasSW64Mul : Predicate<"Subtarget.calVersion()" - "< CAL_VERSION_SC_139">; -// Predicate that is set to true if 64bit Div/Mod is supported in the IL or not -def HasHW64DivMod : Predicate<"Subtarget.device()" - "->usesHardware(AMDGPUDeviceInfo::HW64BitDivMod)">; -def HasSW64DivMod : Predicate<"Subtarget.device()" - "->usesSoftware(AMDGPUDeviceInfo::HW64BitDivMod)">; - -// Predicate that is set to true if 64bit pointer are used. -def Has64BitPtr : Predicate<"Subtarget.is64bit()">; -def Has32BitPtr : Predicate<"!Subtarget.is64bit()">; -//===--------------------------------------------------------------------===// -// Custom Operands -//===--------------------------------------------------------------------===// -def brtarget : Operand<OtherVT>; - -//===--------------------------------------------------------------------===// -// Custom Selection DAG Type Profiles -//===--------------------------------------------------------------------===// -//===----------------------------------------------------------------------===// -// Generic Profile Types -//===----------------------------------------------------------------------===// - -def SDTIL_GenBinaryOp : SDTypeProfile<1, 2, [ - SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> - ]>; -def SDTIL_GenTernaryOp : SDTypeProfile<1, 3, [ - SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<2, 3> - ]>; -def SDTIL_GenVecBuild : SDTypeProfile<1, 1, [ - SDTCisEltOfVec<1, 0> - ]>; - -//===----------------------------------------------------------------------===// -// Flow Control Profile Types -//===----------------------------------------------------------------------===// -// Branch instruction where second and third are basic blocks -def SDTIL_BRCond : SDTypeProfile<0, 2, [ - SDTCisVT<0, OtherVT> - ]>; - -//===--------------------------------------------------------------------===// -// Custom Selection DAG Nodes -//===--------------------------------------------------------------------===// -//===----------------------------------------------------------------------===// -// Flow Control DAG Nodes -//===----------------------------------------------------------------------===// -def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>; - -//===----------------------------------------------------------------------===// -// Call/Return DAG Nodes -//===----------------------------------------------------------------------===// -def IL_retflag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInGlue]>; - -//===--------------------------------------------------------------------===// -// Instructions -//===--------------------------------------------------------------------===// -// Floating point math functions -def IL_div_inf : SDNode<"AMDGPUISD::DIV_INF", SDTIL_GenBinaryOp>; -def IL_mad : SDNode<"AMDGPUISD::MAD", SDTIL_GenTernaryOp>; - -//===----------------------------------------------------------------------===// -// Integer functions -//===----------------------------------------------------------------------===// -def IL_umul : SDNode<"AMDGPUISD::UMUL" , SDTIntBinOp, - [SDNPCommutative, SDNPAssociative]>; - -//===--------------------------------------------------------------------===// -// Custom Pattern DAG Nodes -//===--------------------------------------------------------------------===// -def global_store : PatFrag<(ops node:$val, node:$ptr), - (store node:$val, node:$ptr), [{ - return isGlobalStore(dyn_cast<StoreSDNode>(N)); -}]>; - -//===----------------------------------------------------------------------===// -// Load pattern fragments -//===----------------------------------------------------------------------===// -// Global address space loads -def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return isGlobalLoad(dyn_cast<LoadSDNode>(N)); -}]>; -// Constant address space loads -def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return isConstantLoad(dyn_cast<LoadSDNode>(N), -1); -}]>; - -//===----------------------------------------------------------------------===// -// Complex addressing mode patterns -//===----------------------------------------------------------------------===// -def ADDR : ComplexPattern<i32, 2, "SelectADDR", [], []>; -def ADDRF : ComplexPattern<i32, 2, "SelectADDR", [frameindex], []>; -def ADDR64 : ComplexPattern<i64, 2, "SelectADDR64", [], []>; -def ADDR64F : ComplexPattern<i64, 2, "SelectADDR64", [frameindex], []>; - -//===----------------------------------------------------------------------===// -// Instruction format classes -//===----------------------------------------------------------------------===// -class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern> -: Instruction { - - let Namespace = "AMDGPU"; - dag OutOperandList = outs; - dag InOperandList = ins; - let Pattern = pattern; - let AsmString = !strconcat(asmstr, "\n"); - let isPseudo = 1; - let Itinerary = NullALU; - bit hasIEEEFlag = 0; - bit hasZeroOpFlag = 0; -} - -//===--------------------------------------------------------------------===// -// Multiclass Instruction formats -//===--------------------------------------------------------------------===// -// Multiclass that handles branch instructions -multiclass BranchConditional<SDNode Op> { - def _i32 : ILFormat<(outs), - (ins brtarget:$target, GPRI32:$src0), - "; i32 Pseudo branch instruction", - [(Op bb:$target, GPRI32:$src0)]>; - def _f32 : ILFormat<(outs), - (ins brtarget:$target, GPRF32:$src0), - "; f32 Pseudo branch instruction", - [(Op bb:$target, GPRF32:$src0)]>; -} - -// Only scalar types should generate flow control -multiclass BranchInstr<string name> { - def _i32 : ILFormat<(outs), (ins GPRI32:$src), - !strconcat(name, " $src"), []>; - def _f32 : ILFormat<(outs), (ins GPRF32:$src), - !strconcat(name, " $src"), []>; -} -// Only scalar types should generate flow control -multiclass BranchInstr2<string name> { - def _i32 : ILFormat<(outs), (ins GPRI32:$src0, GPRI32:$src1), - !strconcat(name, " $src0, $src1"), []>; - def _f32 : ILFormat<(outs), (ins GPRF32:$src0, GPRF32:$src1), - !strconcat(name, " $src0, $src1"), []>; -} - -//===--------------------------------------------------------------------===// -// Intrinsics support -//===--------------------------------------------------------------------===// -include "AMDILIntrinsics.td" - -//===--------------------------------------------------------------------===// -// Instructions support -//===--------------------------------------------------------------------===// -//===---------------------------------------------------------------------===// -// Custom Inserter for Branches and returns, this eventually will be a -// seperate pass -//===---------------------------------------------------------------------===// -let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in { - def BRANCH : ILFormat<(outs), (ins brtarget:$target), - "; Pseudo unconditional branch instruction", - [(br bb:$target)]>; - defm BRANCH_COND : BranchConditional<IL_brcond>; -} - -//===---------------------------------------------------------------------===// -// Flow and Program control Instructions -//===---------------------------------------------------------------------===// -let isTerminator=1 in { - def SWITCH : ILFormat< (outs), (ins GPRI32:$src), - !strconcat("SWITCH", " $src"), []>; - def CASE : ILFormat< (outs), (ins GPRI32:$src), - !strconcat("CASE", " $src"), []>; - def BREAK : ILFormat< (outs), (ins), - "BREAK", []>; - def CONTINUE : ILFormat< (outs), (ins), - "CONTINUE", []>; - def DEFAULT : ILFormat< (outs), (ins), - "DEFAULT", []>; - def ELSE : ILFormat< (outs), (ins), - "ELSE", []>; - def ENDSWITCH : ILFormat< (outs), (ins), - "ENDSWITCH", []>; - def ENDMAIN : ILFormat< (outs), (ins), - "ENDMAIN", []>; - def END : ILFormat< (outs), (ins), - "END", []>; - def ENDFUNC : ILFormat< (outs), (ins), - "ENDFUNC", []>; - def ENDIF : ILFormat< (outs), (ins), - "ENDIF", []>; - def WHILELOOP : ILFormat< (outs), (ins), - "WHILE", []>; - def ENDLOOP : ILFormat< (outs), (ins), - "ENDLOOP", []>; - def FUNC : ILFormat< (outs), (ins), - "FUNC", []>; - def RETDYN : ILFormat< (outs), (ins), - "RET_DYN", []>; - // This opcode has custom swizzle pattern encoded in Swizzle Encoder - defm IF_LOGICALNZ : BranchInstr<"IF_LOGICALNZ">; - // This opcode has custom swizzle pattern encoded in Swizzle Encoder - defm IF_LOGICALZ : BranchInstr<"IF_LOGICALZ">; - // This opcode has custom swizzle pattern encoded in Swizzle Encoder - defm BREAK_LOGICALNZ : BranchInstr<"BREAK_LOGICALNZ">; - // This opcode has custom swizzle pattern encoded in Swizzle Encoder - defm BREAK_LOGICALZ : BranchInstr<"BREAK_LOGICALZ">; - // This opcode has custom swizzle pattern encoded in Swizzle Encoder - defm CONTINUE_LOGICALNZ : BranchInstr<"CONTINUE_LOGICALNZ">; - // This opcode has custom swizzle pattern encoded in Swizzle Encoder - defm CONTINUE_LOGICALZ : BranchInstr<"CONTINUE_LOGICALZ">; - defm IFC : BranchInstr2<"IFC">; - defm BREAKC : BranchInstr2<"BREAKC">; - defm CONTINUEC : BranchInstr2<"CONTINUEC">; -} diff --git a/src/gallium/drivers/radeon/AMDILIntrinsicInfo.cpp b/src/gallium/drivers/radeon/AMDILIntrinsicInfo.cpp deleted file mode 100644 index 23df3822a5a..00000000000 --- a/src/gallium/drivers/radeon/AMDILIntrinsicInfo.cpp +++ /dev/null @@ -1,93 +0,0 @@ -//===- AMDILIntrinsicInfo.cpp - AMDIL Intrinsic Information ------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// -// This file contains the AMDIL Implementation of the IntrinsicInfo class. -// -//===-----------------------------------------------------------------------===// - -#include "AMDILIntrinsicInfo.h" -#include "AMDIL.h" -#include "AMDGPUSubtarget.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Intrinsics.h" -#include "llvm/Module.h" - -using namespace llvm; - -#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN -#include "AMDGPUGenIntrinsics.inc" -#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN - -AMDGPUIntrinsicInfo::AMDGPUIntrinsicInfo(TargetMachine *tm) - : TargetIntrinsicInfo() -{ -} - -std::string -AMDGPUIntrinsicInfo::getName(unsigned int IntrID, Type **Tys, - unsigned int numTys) const -{ - static const char* const names[] = { -#define GET_INTRINSIC_NAME_TABLE -#include "AMDGPUGenIntrinsics.inc" -#undef GET_INTRINSIC_NAME_TABLE - }; - - //assert(!isOverloaded(IntrID) - //&& "AMDGPU Intrinsics are not overloaded"); - if (IntrID < Intrinsic::num_intrinsics) { - return 0; - } - assert(IntrID < AMDGPUIntrinsic::num_AMDGPU_intrinsics - && "Invalid intrinsic ID"); - - std::string Result(names[IntrID - Intrinsic::num_intrinsics]); - return Result; -} - -unsigned int -AMDGPUIntrinsicInfo::lookupName(const char *Name, unsigned int Len) const -{ -#define GET_FUNCTION_RECOGNIZER -#include "AMDGPUGenIntrinsics.inc" -#undef GET_FUNCTION_RECOGNIZER - AMDGPUIntrinsic::ID IntrinsicID - = (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic; - IntrinsicID = getIntrinsicForGCCBuiltin("AMDIL", Name); - - if (IntrinsicID != (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic) { - return IntrinsicID; - } - return 0; -} - -bool -AMDGPUIntrinsicInfo::isOverloaded(unsigned id) const -{ - // Overload Table -#define GET_INTRINSIC_OVERLOAD_TABLE -#include "AMDGPUGenIntrinsics.inc" -#undef GET_INTRINSIC_OVERLOAD_TABLE -} - -/// This defines the "getAttributes(ID id)" method. -#define GET_INTRINSIC_ATTRIBUTES -#include "AMDGPUGenIntrinsics.inc" -#undef GET_INTRINSIC_ATTRIBUTES - -Function* -AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID, - Type **Tys, - unsigned numTys) const -{ - //Silence a warning - AttrListPtr List = getAttributes((AMDGPUIntrinsic::ID)IntrID); - (void)List; - assert(!"Not implemented"); -} diff --git a/src/gallium/drivers/radeon/AMDILIntrinsicInfo.h b/src/gallium/drivers/radeon/AMDILIntrinsicInfo.h deleted file mode 100644 index 9ae3d4dc112..00000000000 --- a/src/gallium/drivers/radeon/AMDILIntrinsicInfo.h +++ /dev/null @@ -1,47 +0,0 @@ -//===- AMDILIntrinsicInfo.h - AMDIL Intrinsic Information ------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// -// Interface for the AMDIL Implementation of the Intrinsic Info class. -// -//===-----------------------------------------------------------------------===// -#ifndef _AMDIL_INTRINSICS_H_ -#define _AMDIL_INTRINSICS_H_ - -#include "llvm/Intrinsics.h" -#include "llvm/Target/TargetIntrinsicInfo.h" - -namespace llvm { - class TargetMachine; - namespace AMDGPUIntrinsic { - enum ID { - last_non_AMDGPU_intrinsic = Intrinsic::num_intrinsics - 1, -#define GET_INTRINSIC_ENUM_VALUES -#include "AMDGPUGenIntrinsics.inc" -#undef GET_INTRINSIC_ENUM_VALUES - , num_AMDGPU_intrinsics - }; - - } - - - class AMDGPUIntrinsicInfo : public TargetIntrinsicInfo { - public: - AMDGPUIntrinsicInfo(TargetMachine *tm); - std::string getName(unsigned int IntrId, Type **Tys = 0, - unsigned int numTys = 0) const; - unsigned int lookupName(const char *Name, unsigned int Len) const; - bool isOverloaded(unsigned int IID) const; - Function *getDeclaration(Module *M, unsigned int ID, - Type **Tys = 0, - unsigned int numTys = 0) const; - }; // AMDGPUIntrinsicInfo -} - -#endif // _AMDIL_INTRINSICS_H_ - diff --git a/src/gallium/drivers/radeon/AMDILIntrinsics.td b/src/gallium/drivers/radeon/AMDILIntrinsics.td deleted file mode 100644 index 3f9e20f0c85..00000000000 --- a/src/gallium/drivers/radeon/AMDILIntrinsics.td +++ /dev/null @@ -1,242 +0,0 @@ -//===- AMDILIntrinsics.td - Defines AMDIL Intrinscs -*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// -// This file defines all of the amdil-specific intrinsics -// -//===---------------------------------------------------------------===// -//===--------------------------------------------------------------------===// -// Intrinsic classes -// Generic versions of the above classes but for Target specific intrinsics -// instead of SDNode patterns. -//===--------------------------------------------------------------------===// -let TargetPrefix = "AMDIL", isTarget = 1 in { - class VoidIntLong : - Intrinsic<[llvm_i64_ty], [], []>; - class VoidIntInt : - Intrinsic<[llvm_i32_ty], [], []>; - class VoidIntBool : - Intrinsic<[llvm_i32_ty], [], []>; - class UnaryIntInt : - Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>; - class UnaryIntFloat : - Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; - class ConvertIntFTOI : - Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>; - class ConvertIntITOF : - Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>; - class UnaryIntNoRetInt : - Intrinsic<[], [llvm_anyint_ty], []>; - class UnaryIntNoRetFloat : - Intrinsic<[], [llvm_anyfloat_ty], []>; - class BinaryIntInt : - Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; - class BinaryIntFloat : - Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; - class BinaryIntNoRetInt : - Intrinsic<[], [llvm_anyint_ty, LLVMMatchType<0>], []>; - class BinaryIntNoRetFloat : - Intrinsic<[], [llvm_anyfloat_ty, LLVMMatchType<0>], []>; - class TernaryIntInt : - Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, - LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; - class TernaryIntFloat : - Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, - LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; - class QuaternaryIntInt : - Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, - LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; - class UnaryAtomicInt : - Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; - class BinaryAtomicInt : - Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; - class TernaryAtomicInt : - Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>; - class UnaryAtomicIntNoRet : - Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; - class BinaryAtomicIntNoRet : - Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; - class TernaryAtomicIntNoRet : - Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; -} - -let TargetPrefix = "AMDIL", isTarget = 1 in { - def int_AMDIL_abs : GCCBuiltin<"__amdil_abs">, UnaryIntInt; - - def int_AMDIL_bit_extract_i32 : GCCBuiltin<"__amdil_ibit_extract">, - TernaryIntInt; - def int_AMDIL_bit_extract_u32 : GCCBuiltin<"__amdil_ubit_extract">, - TernaryIntInt; - def int_AMDIL_bit_reverse_u32 : GCCBuiltin<"__amdil_ubit_reverse">, - UnaryIntInt; - def int_AMDIL_bit_count_i32 : GCCBuiltin<"__amdil_count_bits">, - UnaryIntInt; - def int_AMDIL_bit_find_first_lo : GCCBuiltin<"__amdil_ffb_lo">, - UnaryIntInt; - def int_AMDIL_bit_find_first_hi : GCCBuiltin<"__amdil_ffb_hi">, - UnaryIntInt; - def int_AMDIL_bit_find_first_sgn : GCCBuiltin<"__amdil_ffb_signed">, - UnaryIntInt; - def int_AMDIL_media_bitalign : GCCBuiltin<"__amdil_bitalign">, - TernaryIntInt; - def int_AMDIL_media_bytealign : GCCBuiltin<"__amdil_bytealign">, - TernaryIntInt; - def int_AMDIL_bit_insert_u32 : GCCBuiltin<"__amdil_ubit_insert">, - QuaternaryIntInt; - def int_AMDIL_bfi : GCCBuiltin<"__amdil_bfi">, - TernaryIntInt; - def int_AMDIL_bfm : GCCBuiltin<"__amdil_bfm">, - BinaryIntInt; - def int_AMDIL_mad_i32 : GCCBuiltin<"__amdil_imad">, - TernaryIntInt; - def int_AMDIL_mad_u32 : GCCBuiltin<"__amdil_umad">, - TernaryIntInt; - def int_AMDIL_mad : GCCBuiltin<"__amdil_mad">, - TernaryIntFloat; - def int_AMDIL_mulhi_i32 : GCCBuiltin<"__amdil_imul_high">, - BinaryIntInt; - def int_AMDIL_mulhi_u32 : GCCBuiltin<"__amdil_umul_high">, - BinaryIntInt; - def int_AMDIL_mul24_i32 : GCCBuiltin<"__amdil_imul24">, - BinaryIntInt; - def int_AMDIL_mul24_u32 : GCCBuiltin<"__amdil_umul24">, - BinaryIntInt; - def int_AMDIL_mulhi24_i32 : GCCBuiltin<"__amdil_imul24_high">, - BinaryIntInt; - def int_AMDIL_mulhi24_u32 : GCCBuiltin<"__amdil_umul24_high">, - BinaryIntInt; - def int_AMDIL_mad24_i32 : GCCBuiltin<"__amdil_imad24">, - TernaryIntInt; - def int_AMDIL_mad24_u32 : GCCBuiltin<"__amdil_umad24">, - TernaryIntInt; - def int_AMDIL_carry_i32 : GCCBuiltin<"__amdil_carry">, - BinaryIntInt; - def int_AMDIL_borrow_i32 : GCCBuiltin<"__amdil_borrow">, - BinaryIntInt; - def int_AMDIL_min_i32 : GCCBuiltin<"__amdil_imin">, - BinaryIntInt; - def int_AMDIL_min_u32 : GCCBuiltin<"__amdil_umin">, - BinaryIntInt; - def int_AMDIL_min : GCCBuiltin<"__amdil_min">, - BinaryIntFloat; - def int_AMDIL_max_i32 : GCCBuiltin<"__amdil_imax">, - BinaryIntInt; - def int_AMDIL_max_u32 : GCCBuiltin<"__amdil_umax">, - BinaryIntInt; - def int_AMDIL_max : GCCBuiltin<"__amdil_max">, - BinaryIntFloat; - def int_AMDIL_media_lerp_u4 : GCCBuiltin<"__amdil_u4lerp">, - TernaryIntInt; - def int_AMDIL_media_sad : GCCBuiltin<"__amdil_sad">, - TernaryIntInt; - def int_AMDIL_media_sad_hi : GCCBuiltin<"__amdil_sadhi">, - TernaryIntInt; - def int_AMDIL_fraction : GCCBuiltin<"__amdil_fraction">, - UnaryIntFloat; - def int_AMDIL_clamp : GCCBuiltin<"__amdil_clamp">, - TernaryIntFloat; - def int_AMDIL_pireduce : GCCBuiltin<"__amdil_pireduce">, - UnaryIntFloat; - def int_AMDIL_round_nearest : GCCBuiltin<"__amdil_round_nearest">, - UnaryIntFloat; - def int_AMDIL_round_neginf : GCCBuiltin<"__amdil_round_neginf">, - UnaryIntFloat; - def int_AMDIL_round_zero : GCCBuiltin<"__amdil_round_zero">, - UnaryIntFloat; - def int_AMDIL_acos : GCCBuiltin<"__amdil_acos">, - UnaryIntFloat; - def int_AMDIL_atan : GCCBuiltin<"__amdil_atan">, - UnaryIntFloat; - def int_AMDIL_asin : GCCBuiltin<"__amdil_asin">, - UnaryIntFloat; - def int_AMDIL_cos : GCCBuiltin<"__amdil_cos">, - UnaryIntFloat; - def int_AMDIL_cos_vec : GCCBuiltin<"__amdil_cos_vec">, - UnaryIntFloat; - def int_AMDIL_tan : GCCBuiltin<"__amdil_tan">, - UnaryIntFloat; - def int_AMDIL_sin : GCCBuiltin<"__amdil_sin">, - UnaryIntFloat; - def int_AMDIL_sin_vec : GCCBuiltin<"__amdil_sin_vec">, - UnaryIntFloat; - def int_AMDIL_pow : GCCBuiltin<"__amdil_pow">, BinaryIntFloat; - def int_AMDIL_div : GCCBuiltin<"__amdil_div">, BinaryIntFloat; - def int_AMDIL_udiv : GCCBuiltin<"__amdil_udiv">, BinaryIntInt; - def int_AMDIL_sqrt: GCCBuiltin<"__amdil_sqrt">, - UnaryIntFloat; - def int_AMDIL_sqrt_vec: GCCBuiltin<"__amdil_sqrt_vec">, - UnaryIntFloat; - def int_AMDIL_exp : GCCBuiltin<"__amdil_exp">, - UnaryIntFloat; - def int_AMDIL_exp_vec : GCCBuiltin<"__amdil_exp_vec">, - UnaryIntFloat; - def int_AMDIL_exn : GCCBuiltin<"__amdil_exn">, - UnaryIntFloat; - def int_AMDIL_log_vec : GCCBuiltin<"__amdil_log_vec">, - UnaryIntFloat; - def int_AMDIL_ln : GCCBuiltin<"__amdil_ln">, - UnaryIntFloat; - def int_AMDIL_sign: GCCBuiltin<"__amdil_sign">, - UnaryIntFloat; - def int_AMDIL_fma: GCCBuiltin<"__amdil_fma">, - TernaryIntFloat; - def int_AMDIL_rsq : GCCBuiltin<"__amdil_rsq">, - UnaryIntFloat; - def int_AMDIL_rsq_vec : GCCBuiltin<"__amdil_rsq_vec">, - UnaryIntFloat; - def int_AMDIL_length : GCCBuiltin<"__amdil_length">, - UnaryIntFloat; - def int_AMDIL_lerp : GCCBuiltin<"__amdil_lerp">, - TernaryIntFloat; - def int_AMDIL_media_sad4 : GCCBuiltin<"__amdil_sad4">, - Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty, - llvm_v4i32_ty, llvm_i32_ty], []>; - - def int_AMDIL_frexp_f64 : GCCBuiltin<"__amdil_frexp">, - Intrinsic<[llvm_v2i64_ty], [llvm_double_ty], []>; - def int_AMDIL_ldexp : GCCBuiltin<"__amdil_ldexp">, - Intrinsic<[llvm_anyfloat_ty], [llvm_anyfloat_ty, llvm_anyint_ty], []>; - def int_AMDIL_drcp : GCCBuiltin<"__amdil_rcp">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], []>; - def int_AMDIL_convert_f16_f32 : GCCBuiltin<"__amdil_half_to_float">, - ConvertIntITOF; - def int_AMDIL_convert_f32_f16 : GCCBuiltin<"__amdil_float_to_half">, - ConvertIntFTOI; - def int_AMDIL_convert_f32_i32_rpi : GCCBuiltin<"__amdil_float_to_int_rpi">, - ConvertIntFTOI; - def int_AMDIL_convert_f32_i32_flr : GCCBuiltin<"__amdil_float_to_int_flr">, - ConvertIntFTOI; - def int_AMDIL_convert_f32_f16_near : GCCBuiltin<"__amdil_float_to_half_near">, - ConvertIntFTOI; - def int_AMDIL_convert_f32_f16_neg_inf : GCCBuiltin<"__amdil_float_to_half_neg_inf">, - ConvertIntFTOI; - def int_AMDIL_convert_f32_f16_plus_inf : GCCBuiltin<"__amdil_float_to_half_plus_inf">, - ConvertIntFTOI; - def int_AMDIL_media_convert_f2v4u8 : GCCBuiltin<"__amdil_f_2_u4">, - Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], []>; - def int_AMDIL_media_unpack_byte_0 : GCCBuiltin<"__amdil_unpack_0">, - ConvertIntITOF; - def int_AMDIL_media_unpack_byte_1 : GCCBuiltin<"__amdil_unpack_1">, - ConvertIntITOF; - def int_AMDIL_media_unpack_byte_2 : GCCBuiltin<"__amdil_unpack_2">, - ConvertIntITOF; - def int_AMDIL_media_unpack_byte_3 : GCCBuiltin<"__amdil_unpack_3">, - ConvertIntITOF; - def int_AMDIL_dp2_add : GCCBuiltin<"__amdil_dp2_add">, - Intrinsic<[llvm_float_ty], [llvm_v2f32_ty, - llvm_v2f32_ty, llvm_float_ty], []>; - def int_AMDIL_dp2 : GCCBuiltin<"__amdil_dp2">, - Intrinsic<[llvm_float_ty], [llvm_v2f32_ty, - llvm_v2f32_ty], []>; - def int_AMDIL_dp3 : GCCBuiltin<"__amdil_dp3">, - Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, - llvm_v4f32_ty], []>; - def int_AMDIL_dp4 : GCCBuiltin<"__amdil_dp4">, - Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, - llvm_v4f32_ty], []>; -} diff --git a/src/gallium/drivers/radeon/AMDILNIDevice.cpp b/src/gallium/drivers/radeon/AMDILNIDevice.cpp deleted file mode 100644 index 0ebbc9d1e06..00000000000 --- a/src/gallium/drivers/radeon/AMDILNIDevice.cpp +++ /dev/null @@ -1,71 +0,0 @@ -//===-- AMDILNIDevice.cpp - Device Info for Northern Islands devices ------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -#include "AMDILNIDevice.h" -#include "AMDILEvergreenDevice.h" -#include "AMDGPUSubtarget.h" - -using namespace llvm; - -AMDGPUNIDevice::AMDGPUNIDevice(AMDGPUSubtarget *ST) - : AMDGPUEvergreenDevice(ST) -{ - std::string name = ST->getDeviceName(); - if (name == "caicos") { - mDeviceFlag = OCL_DEVICE_CAICOS; - } else if (name == "turks") { - mDeviceFlag = OCL_DEVICE_TURKS; - } else if (name == "cayman") { - mDeviceFlag = OCL_DEVICE_CAYMAN; - } else { - mDeviceFlag = OCL_DEVICE_BARTS; - } -} -AMDGPUNIDevice::~AMDGPUNIDevice() -{ -} - -size_t -AMDGPUNIDevice::getMaxLDSSize() const -{ - if (usesHardware(AMDGPUDeviceInfo::LocalMem)) { - return MAX_LDS_SIZE_900; - } else { - return 0; - } -} - -uint32_t -AMDGPUNIDevice::getGeneration() const -{ - return AMDGPUDeviceInfo::HD6XXX; -} - - -AMDGPUCaymanDevice::AMDGPUCaymanDevice(AMDGPUSubtarget *ST) - : AMDGPUNIDevice(ST) -{ - setCaps(); -} - -AMDGPUCaymanDevice::~AMDGPUCaymanDevice() -{ -} - -void -AMDGPUCaymanDevice::setCaps() -{ - if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) { - mHWBits.set(AMDGPUDeviceInfo::DoubleOps); - mHWBits.set(AMDGPUDeviceInfo::FMA); - } - mHWBits.set(AMDGPUDeviceInfo::Signed24BitOps); - mSWBits.reset(AMDGPUDeviceInfo::Signed24BitOps); - mSWBits.set(AMDGPUDeviceInfo::ArenaSegment); -} - diff --git a/src/gallium/drivers/radeon/AMDILNIDevice.h b/src/gallium/drivers/radeon/AMDILNIDevice.h deleted file mode 100644 index 387f7d1c3b7..00000000000 --- a/src/gallium/drivers/radeon/AMDILNIDevice.h +++ /dev/null @@ -1,59 +0,0 @@ -//===------- AMDILNIDevice.h - Define NI Device for AMDIL -*- C++ -*------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// -// Interface for the subtarget data classes. -// -//===---------------------------------------------------------------------===// -// This file will define the interface that each generation needs to -// implement in order to correctly answer queries on the capabilities of the -// specific hardware. -//===---------------------------------------------------------------------===// -#ifndef _AMDILNIDEVICE_H_ -#define _AMDILNIDEVICE_H_ -#include "AMDILEvergreenDevice.h" -#include "AMDGPUSubtarget.h" - -namespace llvm { - class AMDGPUSubtarget; -//===---------------------------------------------------------------------===// -// NI generation of devices and their respective sub classes -//===---------------------------------------------------------------------===// - -// The AMDGPUNIDevice is the base class for all Northern Island series of -// cards. It is very similiar to the AMDGPUEvergreenDevice, with the major -// exception being differences in wavefront size and hardware capabilities. The -// NI devices are all 64 wide wavefronts and also add support for signed 24 bit -// integer operations - - class AMDGPUNIDevice : public AMDGPUEvergreenDevice { - public: - AMDGPUNIDevice(AMDGPUSubtarget*); - virtual ~AMDGPUNIDevice(); - virtual size_t getMaxLDSSize() const; - virtual uint32_t getGeneration() const; - protected: - }; // AMDGPUNIDevice - -// Just as the AMDGPUCypressDevice is the double capable version of the -// AMDGPUEvergreenDevice, the AMDGPUCaymanDevice is the double capable version of -// the AMDGPUNIDevice. The other major difference that is not as useful from -// standpoint is that the Cayman Device has 4 wide ALU's, whereas the rest of the -// NI family is a 5 wide. - - class AMDGPUCaymanDevice: public AMDGPUNIDevice { - public: - AMDGPUCaymanDevice(AMDGPUSubtarget*); - virtual ~AMDGPUCaymanDevice(); - private: - virtual void setCaps(); - }; // AMDGPUCaymanDevice - - static const unsigned int MAX_LDS_SIZE_900 = AMDGPUDevice::MAX_LDS_SIZE_800; -} // namespace llvm -#endif // _AMDILNIDEVICE_H_ diff --git a/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp b/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp deleted file mode 100644 index f869b332e53..00000000000 --- a/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp +++ /dev/null @@ -1,1275 +0,0 @@ -//===-- AMDILPeepholeOptimizer.cpp - AMDIL Peephole optimizations ---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// - -#include "AMDILDevices.h" -#include "AMDGPUInstrInfo.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Constants.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionAnalysis.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/Module.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" - -#include <sstream> - -#if 0 -STATISTIC(PointerAssignments, "Number of dynamic pointer " - "assigments discovered"); -STATISTIC(PointerSubtract, "Number of pointer subtractions discovered"); -#endif - -using namespace llvm; -// The Peephole optimization pass is used to do simple last minute optimizations -// that are required for correct code or to remove redundant functions -namespace { - -class OpaqueType; - -class LLVM_LIBRARY_VISIBILITY AMDGPUPeepholeOpt : public FunctionPass { -public: - TargetMachine &TM; - static char ID; - AMDGPUPeepholeOpt(TargetMachine &tm); - ~AMDGPUPeepholeOpt(); - const char *getPassName() const; - bool runOnFunction(Function &F); - bool doInitialization(Module &M); - bool doFinalization(Module &M); - void getAnalysisUsage(AnalysisUsage &AU) const; -protected: -private: - // Function to initiate all of the instruction level optimizations. - bool instLevelOptimizations(BasicBlock::iterator *inst); - // Quick check to see if we need to dump all of the pointers into the - // arena. If this is correct, then we set all pointers to exist in arena. This - // is a workaround for aliasing of pointers in a struct/union. - bool dumpAllIntoArena(Function &F); - // Because I don't want to invalidate any pointers while in the - // safeNestedForEachFunction. I push atomic conversions to a vector and handle - // it later. This function does the conversions if required. - void doAtomicConversionIfNeeded(Function &F); - // Because __amdil_is_constant cannot be properly evaluated if - // optimizations are disabled, the call's are placed in a vector - // and evaluated after the __amdil_image* functions are evaluated - // which should allow the __amdil_is_constant function to be - // evaluated correctly. - void doIsConstCallConversionIfNeeded(); - bool mChanged; - bool mDebug; - bool mConvertAtomics; - CodeGenOpt::Level optLevel; - // Run a series of tests to see if we can optimize a CALL instruction. - bool optimizeCallInst(BasicBlock::iterator *bbb); - // A peephole optimization to optimize bit extract sequences. - bool optimizeBitExtract(Instruction *inst); - // A peephole optimization to optimize bit insert sequences. - bool optimizeBitInsert(Instruction *inst); - bool setupBitInsert(Instruction *base, - Instruction *&src, - Constant *&mask, - Constant *&shift); - // Expand the bit field insert instruction on versions of OpenCL that - // don't support it. - bool expandBFI(CallInst *CI); - // Expand the bit field mask instruction on version of OpenCL that - // don't support it. - bool expandBFM(CallInst *CI); - // On 7XX and 8XX operations, we do not have 24 bit signed operations. So in - // this case we need to expand them. These functions check for 24bit functions - // and then expand. - bool isSigned24BitOps(CallInst *CI); - void expandSigned24BitOps(CallInst *CI); - // One optimization that can occur is that if the required workgroup size is - // specified then the result of get_local_size is known at compile time and - // can be returned accordingly. - bool isRWGLocalOpt(CallInst *CI); - // On northern island cards, the division is slightly less accurate than on - // previous generations, so we need to utilize a more accurate division. So we - // can translate the accurate divide to a normal divide on all other cards. - bool convertAccurateDivide(CallInst *CI); - void expandAccurateDivide(CallInst *CI); - // If the alignment is set incorrectly, it can produce really inefficient - // code. This checks for this scenario and fixes it if possible. - bool correctMisalignedMemOp(Instruction *inst); - - // If we are in no opt mode, then we need to make sure that - // local samplers are properly propagated as constant propagation - // doesn't occur and we need to know the value of kernel defined - // samplers at compile time. - bool propagateSamplerInst(CallInst *CI); - - // Helper functions - - // Group of functions that recursively calculate the size of a structure based - // on it's sub-types. - size_t getTypeSize(Type * const T, bool dereferencePtr = false); - size_t getTypeSize(StructType * const ST, bool dereferencePtr = false); - size_t getTypeSize(IntegerType * const IT, bool dereferencePtr = false); - size_t getTypeSize(FunctionType * const FT,bool dereferencePtr = false); - size_t getTypeSize(ArrayType * const AT, bool dereferencePtr = false); - size_t getTypeSize(VectorType * const VT, bool dereferencePtr = false); - size_t getTypeSize(PointerType * const PT, bool dereferencePtr = false); - size_t getTypeSize(OpaqueType * const OT, bool dereferencePtr = false); - - LLVMContext *mCTX; - Function *mF; - const AMDGPUSubtarget *mSTM; - SmallVector< std::pair<CallInst *, Function *>, 16> atomicFuncs; - SmallVector<CallInst *, 16> isConstVec; -}; // class AMDGPUPeepholeOpt - char AMDGPUPeepholeOpt::ID = 0; - -// A template function that has two levels of looping before calling the -// function with a pointer to the current iterator. -template<class InputIterator, class SecondIterator, class Function> -Function safeNestedForEach(InputIterator First, InputIterator Last, - SecondIterator S, Function F) -{ - for ( ; First != Last; ++First) { - SecondIterator sf, sl; - for (sf = First->begin(), sl = First->end(); - sf != sl; ) { - if (!F(&sf)) { - ++sf; - } - } - } - return F; -} - -} // anonymous namespace - -namespace llvm { - FunctionPass * - createAMDGPUPeepholeOpt(TargetMachine &tm) - { - return new AMDGPUPeepholeOpt(tm); - } -} // llvm namespace - -AMDGPUPeepholeOpt::AMDGPUPeepholeOpt(TargetMachine &tm) - : FunctionPass(ID), TM(tm) -{ - mDebug = false; - optLevel = TM.getOptLevel(); - -} - -AMDGPUPeepholeOpt::~AMDGPUPeepholeOpt() -{ -} - -const char * -AMDGPUPeepholeOpt::getPassName() const -{ - return "AMDGPU PeepHole Optimization Pass"; -} - -bool -containsPointerType(Type *Ty) -{ - if (!Ty) { - return false; - } - switch(Ty->getTypeID()) { - default: - return false; - case Type::StructTyID: { - const StructType *ST = dyn_cast<StructType>(Ty); - for (StructType::element_iterator stb = ST->element_begin(), - ste = ST->element_end(); stb != ste; ++stb) { - if (!containsPointerType(*stb)) { - continue; - } - return true; - } - break; - } - case Type::VectorTyID: - case Type::ArrayTyID: - return containsPointerType(dyn_cast<SequentialType>(Ty)->getElementType()); - case Type::PointerTyID: - return true; - }; - return false; -} - -bool -AMDGPUPeepholeOpt::dumpAllIntoArena(Function &F) -{ - bool dumpAll = false; - for (Function::const_arg_iterator cab = F.arg_begin(), - cae = F.arg_end(); cab != cae; ++cab) { - const Argument *arg = cab; - const PointerType *PT = dyn_cast<PointerType>(arg->getType()); - if (!PT) { - continue; - } - Type *DereferencedType = PT->getElementType(); - if (!dyn_cast<StructType>(DereferencedType) - ) { - continue; - } - if (!containsPointerType(DereferencedType)) { - continue; - } - // FIXME: Because a pointer inside of a struct/union may be aliased to - // another pointer we need to take the conservative approach and place all - // pointers into the arena until more advanced detection is implemented. - dumpAll = true; - } - return dumpAll; -} -void -AMDGPUPeepholeOpt::doIsConstCallConversionIfNeeded() -{ - if (isConstVec.empty()) { - return; - } - for (unsigned x = 0, y = isConstVec.size(); x < y; ++x) { - CallInst *CI = isConstVec[x]; - Constant *CV = dyn_cast<Constant>(CI->getOperand(0)); - Type *aType = Type::getInt32Ty(*mCTX); - Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1) - : ConstantInt::get(aType, 0); - CI->replaceAllUsesWith(Val); - CI->eraseFromParent(); - } - isConstVec.clear(); -} -void -AMDGPUPeepholeOpt::doAtomicConversionIfNeeded(Function &F) -{ - // Don't do anything if we don't have any atomic operations. - if (atomicFuncs.empty()) { - return; - } - // Change the function name for the atomic if it is required - uint32_t size = atomicFuncs.size(); - for (uint32_t x = 0; x < size; ++x) { - atomicFuncs[x].first->setOperand( - atomicFuncs[x].first->getNumOperands()-1, - atomicFuncs[x].second); - - } - mChanged = true; - if (mConvertAtomics) { - return; - } -} - -bool -AMDGPUPeepholeOpt::runOnFunction(Function &MF) -{ - mChanged = false; - mF = &MF; - mSTM = &TM.getSubtarget<AMDGPUSubtarget>(); - if (mDebug) { - MF.dump(); - } - mCTX = &MF.getType()->getContext(); - mConvertAtomics = true; - safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(), - std::bind1st(std::mem_fun(&AMDGPUPeepholeOpt::instLevelOptimizations), - this)); - - doAtomicConversionIfNeeded(MF); - doIsConstCallConversionIfNeeded(); - - if (mDebug) { - MF.dump(); - } - return mChanged; -} - -bool -AMDGPUPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb) -{ - Instruction *inst = (*bbb); - CallInst *CI = dyn_cast<CallInst>(inst); - if (!CI) { - return false; - } - if (isSigned24BitOps(CI)) { - expandSigned24BitOps(CI); - ++(*bbb); - CI->eraseFromParent(); - return true; - } - if (propagateSamplerInst(CI)) { - return false; - } - if (expandBFI(CI) || expandBFM(CI)) { - ++(*bbb); - CI->eraseFromParent(); - return true; - } - if (convertAccurateDivide(CI)) { - expandAccurateDivide(CI); - ++(*bbb); - CI->eraseFromParent(); - return true; - } - - StringRef calleeName = CI->getOperand(CI->getNumOperands()-1)->getName(); - if (calleeName.startswith("__amdil_is_constant")) { - // If we do not have optimizations, then this - // cannot be properly evaluated, so we add the - // call instruction to a vector and process - // them at the end of processing after the - // samplers have been correctly handled. - if (optLevel == CodeGenOpt::None) { - isConstVec.push_back(CI); - return false; - } else { - Constant *CV = dyn_cast<Constant>(CI->getOperand(0)); - Type *aType = Type::getInt32Ty(*mCTX); - Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1) - : ConstantInt::get(aType, 0); - CI->replaceAllUsesWith(Val); - ++(*bbb); - CI->eraseFromParent(); - return true; - } - } - - if (calleeName.equals("__amdil_is_asic_id_i32")) { - ConstantInt *CV = dyn_cast<ConstantInt>(CI->getOperand(0)); - Type *aType = Type::getInt32Ty(*mCTX); - Value *Val = CV; - if (Val) { - Val = ConstantInt::get(aType, - mSTM->device()->getDeviceFlag() & CV->getZExtValue()); - } else { - Val = ConstantInt::get(aType, 0); - } - CI->replaceAllUsesWith(Val); - ++(*bbb); - CI->eraseFromParent(); - return true; - } - Function *F = dyn_cast<Function>(CI->getOperand(CI->getNumOperands()-1)); - if (!F) { - return false; - } - if (F->getName().startswith("__atom") && !CI->getNumUses() - && F->getName().find("_xchg") == StringRef::npos) { - std::string buffer(F->getName().str() + "_noret"); - F = dyn_cast<Function>( - F->getParent()->getOrInsertFunction(buffer, F->getFunctionType())); - atomicFuncs.push_back(std::make_pair <CallInst*, Function*>(CI, F)); - } - - if (!mSTM->device()->isSupported(AMDGPUDeviceInfo::ArenaSegment) - && !mSTM->device()->isSupported(AMDGPUDeviceInfo::MultiUAV)) { - return false; - } - if (!mConvertAtomics) { - return false; - } - StringRef name = F->getName(); - if (name.startswith("__atom") && name.find("_g") != StringRef::npos) { - mConvertAtomics = false; - } - return false; -} - -bool -AMDGPUPeepholeOpt::setupBitInsert(Instruction *base, - Instruction *&src, - Constant *&mask, - Constant *&shift) -{ - if (!base) { - if (mDebug) { - dbgs() << "Null pointer passed into function.\n"; - } - return false; - } - bool andOp = false; - if (base->getOpcode() == Instruction::Shl) { - shift = dyn_cast<Constant>(base->getOperand(1)); - } else if (base->getOpcode() == Instruction::And) { - mask = dyn_cast<Constant>(base->getOperand(1)); - andOp = true; - } else { - if (mDebug) { - dbgs() << "Failed setup with no Shl or And instruction on base opcode!\n"; - } - // If the base is neither a Shl or a And, we don't fit any of the patterns above. - return false; - } - src = dyn_cast<Instruction>(base->getOperand(0)); - if (!src) { - if (mDebug) { - dbgs() << "Failed setup since the base operand is not an instruction!\n"; - } - return false; - } - // If we find an 'and' operation, then we don't need to - // find the next operation as we already know the - // bits that are valid at this point. - if (andOp) { - return true; - } - if (src->getOpcode() == Instruction::Shl && !shift) { - shift = dyn_cast<Constant>(src->getOperand(1)); - src = dyn_cast<Instruction>(src->getOperand(0)); - } else if (src->getOpcode() == Instruction::And && !mask) { - mask = dyn_cast<Constant>(src->getOperand(1)); - } - if (!mask && !shift) { - if (mDebug) { - dbgs() << "Failed setup since both mask and shift are NULL!\n"; - } - // Did not find a constant mask or a shift. - return false; - } - return true; -} -bool -AMDGPUPeepholeOpt::optimizeBitInsert(Instruction *inst) -{ - if (!inst) { - return false; - } - if (!inst->isBinaryOp()) { - return false; - } - if (inst->getOpcode() != Instruction::Or) { - return false; - } - if (optLevel == CodeGenOpt::None) { - return false; - } - // We want to do an optimization on a sequence of ops that in the end equals a - // single ISA instruction. - // The base pattern for this optimization is - ((A & B) << C) | ((D & E) << F) - // Some simplified versions of this pattern are as follows: - // (A & B) | (D & E) when B & E == 0 && C == 0 && F == 0 - // ((A & B) << C) | (D & E) when B ^ E == 0 && (1 << C) >= E - // (A & B) | ((D & E) << F) when B ^ E == 0 && (1 << F) >= B - // (A & B) | (D << F) when (1 << F) >= B - // (A << C) | (D & E) when (1 << C) >= E - if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) { - // The HD4XXX hardware doesn't support the ubit_insert instruction. - return false; - } - Type *aType = inst->getType(); - bool isVector = aType->isVectorTy(); - int numEle = 1; - // This optimization only works on 32bit integers. - if (aType->getScalarType() - != Type::getInt32Ty(inst->getContext())) { - return false; - } - if (isVector) { - const VectorType *VT = dyn_cast<VectorType>(aType); - numEle = VT->getNumElements(); - // We currently cannot support more than 4 elements in a intrinsic and we - // cannot support Vec3 types. - if (numEle > 4 || numEle == 3) { - return false; - } - } - // TODO: Handle vectors. - if (isVector) { - if (mDebug) { - dbgs() << "!!! Vectors are not supported yet!\n"; - } - return false; - } - Instruction *LHSSrc = NULL, *RHSSrc = NULL; - Constant *LHSMask = NULL, *RHSMask = NULL; - Constant *LHSShift = NULL, *RHSShift = NULL; - Instruction *LHS = dyn_cast<Instruction>(inst->getOperand(0)); - Instruction *RHS = dyn_cast<Instruction>(inst->getOperand(1)); - if (!setupBitInsert(LHS, LHSSrc, LHSMask, LHSShift)) { - if (mDebug) { - dbgs() << "Found an OR Operation that failed setup!\n"; - inst->dump(); - if (LHS) { LHS->dump(); } - if (LHSSrc) { LHSSrc->dump(); } - if (LHSMask) { LHSMask->dump(); } - if (LHSShift) { LHSShift->dump(); } - } - // There was an issue with the setup for BitInsert. - return false; - } - if (!setupBitInsert(RHS, RHSSrc, RHSMask, RHSShift)) { - if (mDebug) { - dbgs() << "Found an OR Operation that failed setup!\n"; - inst->dump(); - if (RHS) { RHS->dump(); } - if (RHSSrc) { RHSSrc->dump(); } - if (RHSMask) { RHSMask->dump(); } - if (RHSShift) { RHSShift->dump(); } - } - // There was an issue with the setup for BitInsert. - return false; - } - if (mDebug) { - dbgs() << "Found an OR operation that can possible be optimized to ubit insert!\n"; - dbgs() << "Op: "; inst->dump(); - dbgs() << "LHS: "; if (LHS) { LHS->dump(); } else { dbgs() << "(None)\n"; } - dbgs() << "LHS Src: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(None)\n"; } - dbgs() << "LHS Mask: "; if (LHSMask) { LHSMask->dump(); } else { dbgs() << "(None)\n"; } - dbgs() << "LHS Shift: "; if (LHSShift) { LHSShift->dump(); } else { dbgs() << "(None)\n"; } - dbgs() << "RHS: "; if (RHS) { RHS->dump(); } else { dbgs() << "(None)\n"; } - dbgs() << "RHS Src: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(None)\n"; } - dbgs() << "RHS Mask: "; if (RHSMask) { RHSMask->dump(); } else { dbgs() << "(None)\n"; } - dbgs() << "RHS Shift: "; if (RHSShift) { RHSShift->dump(); } else { dbgs() << "(None)\n"; } - } - Constant *offset = NULL; - Constant *width = NULL; - int32_t lhsMaskVal = 0, rhsMaskVal = 0; - int32_t lhsShiftVal = 0, rhsShiftVal = 0; - int32_t lhsMaskWidth = 0, rhsMaskWidth = 0; - int32_t lhsMaskOffset = 0, rhsMaskOffset = 0; - lhsMaskVal = (int32_t)(LHSMask - ? dyn_cast<ConstantInt>(LHSMask)->getZExtValue() : 0); - rhsMaskVal = (int32_t)(RHSMask - ? dyn_cast<ConstantInt>(RHSMask)->getZExtValue() : 0); - lhsShiftVal = (int32_t)(LHSShift - ? dyn_cast<ConstantInt>(LHSShift)->getZExtValue() : 0); - rhsShiftVal = (int32_t)(RHSShift - ? dyn_cast<ConstantInt>(RHSShift)->getZExtValue() : 0); - lhsMaskWidth = lhsMaskVal ? CountPopulation_32(lhsMaskVal) : 32 - lhsShiftVal; - rhsMaskWidth = rhsMaskVal ? CountPopulation_32(rhsMaskVal) : 32 - rhsShiftVal; - lhsMaskOffset = lhsMaskVal ? CountTrailingZeros_32(lhsMaskVal) : lhsShiftVal; - rhsMaskOffset = rhsMaskVal ? CountTrailingZeros_32(rhsMaskVal) : rhsShiftVal; - // TODO: Handle the case of A & B | D & ~B(i.e. inverted masks). - if (mDebug) { - dbgs() << "Found pattern: \'((A" << (LHSMask ? " & B)" : ")"); - dbgs() << (LHSShift ? " << C)" : ")") << " | ((D" ; - dbgs() << (RHSMask ? " & E)" : ")"); - dbgs() << (RHSShift ? " << F)\'\n" : ")\'\n"); - dbgs() << "A = LHSSrc\t\tD = RHSSrc \n"; - dbgs() << "B = " << lhsMaskVal << "\t\tE = " << rhsMaskVal << "\n"; - dbgs() << "C = " << lhsShiftVal << "\t\tF = " << rhsShiftVal << "\n"; - dbgs() << "width(B) = " << lhsMaskWidth; - dbgs() << "\twidth(E) = " << rhsMaskWidth << "\n"; - dbgs() << "offset(B) = " << lhsMaskOffset; - dbgs() << "\toffset(E) = " << rhsMaskOffset << "\n"; - dbgs() << "Constraints: \n"; - dbgs() << "\t(1) B ^ E == 0\n"; - dbgs() << "\t(2-LHS) B is a mask\n"; - dbgs() << "\t(2-LHS) E is a mask\n"; - dbgs() << "\t(3-LHS) (offset(B)) >= (width(E) + offset(E))\n"; - dbgs() << "\t(3-RHS) (offset(E)) >= (width(B) + offset(B))\n"; - } - if ((lhsMaskVal || rhsMaskVal) && !(lhsMaskVal ^ rhsMaskVal)) { - if (mDebug) { - dbgs() << lhsMaskVal << " ^ " << rhsMaskVal; - dbgs() << " = " << (lhsMaskVal ^ rhsMaskVal) << "\n"; - dbgs() << "Failed constraint 1!\n"; - } - return false; - } - if (mDebug) { - dbgs() << "LHS = " << lhsMaskOffset << ""; - dbgs() << " >= (" << rhsMaskWidth << " + " << rhsMaskOffset << ") = "; - dbgs() << (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset)); - dbgs() << "\nRHS = " << rhsMaskOffset << ""; - dbgs() << " >= (" << lhsMaskWidth << " + " << lhsMaskOffset << ") = "; - dbgs() << (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset)); - dbgs() << "\n"; - } - if (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset)) { - offset = ConstantInt::get(aType, lhsMaskOffset, false); - width = ConstantInt::get(aType, lhsMaskWidth, false); - RHSSrc = RHS; - if (!isMask_32(lhsMaskVal) && !isShiftedMask_32(lhsMaskVal)) { - if (mDebug) { - dbgs() << "Value is not a Mask: " << lhsMaskVal << "\n"; - dbgs() << "Failed constraint 2!\n"; - } - return false; - } - if (!LHSShift) { - LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset, - "MaskShr", LHS); - } else if (lhsShiftVal != lhsMaskOffset) { - LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset, - "MaskShr", LHS); - } - if (mDebug) { - dbgs() << "Optimizing LHS!\n"; - } - } else if (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset)) { - offset = ConstantInt::get(aType, rhsMaskOffset, false); - width = ConstantInt::get(aType, rhsMaskWidth, false); - LHSSrc = RHSSrc; - RHSSrc = LHS; - if (!isMask_32(rhsMaskVal) && !isShiftedMask_32(rhsMaskVal)) { - if (mDebug) { - dbgs() << "Non-Mask: " << rhsMaskVal << "\n"; - dbgs() << "Failed constraint 2!\n"; - } - return false; - } - if (!RHSShift) { - LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset, - "MaskShr", RHS); - } else if (rhsShiftVal != rhsMaskOffset) { - LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset, - "MaskShr", RHS); - } - if (mDebug) { - dbgs() << "Optimizing RHS!\n"; - } - } else { - if (mDebug) { - dbgs() << "Failed constraint 3!\n"; - } - return false; - } - if (mDebug) { - dbgs() << "Width: "; if (width) { width->dump(); } else { dbgs() << "(0)\n"; } - dbgs() << "Offset: "; if (offset) { offset->dump(); } else { dbgs() << "(0)\n"; } - dbgs() << "LHSSrc: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(0)\n"; } - dbgs() << "RHSSrc: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(0)\n"; } - } - if (!offset || !width) { - if (mDebug) { - dbgs() << "Either width or offset are NULL, failed detection!\n"; - } - return false; - } - // Lets create the function signature. - std::vector<Type *> callTypes; - callTypes.push_back(aType); - callTypes.push_back(aType); - callTypes.push_back(aType); - callTypes.push_back(aType); - FunctionType *funcType = FunctionType::get(aType, callTypes, false); - std::string name = "__amdil_ubit_insert"; - if (isVector) { name += "_v" + itostr(numEle) + "u32"; } else { name += "_u32"; } - Function *Func = - dyn_cast<Function>(inst->getParent()->getParent()->getParent()-> - getOrInsertFunction(llvm::StringRef(name), funcType)); - Value *Operands[4] = { - width, - offset, - LHSSrc, - RHSSrc - }; - CallInst *CI = CallInst::Create(Func, Operands, "BitInsertOpt"); - if (mDebug) { - dbgs() << "Old Inst: "; - inst->dump(); - dbgs() << "New Inst: "; - CI->dump(); - dbgs() << "\n\n"; - } - CI->insertBefore(inst); - inst->replaceAllUsesWith(CI); - return true; -} - -bool -AMDGPUPeepholeOpt::optimizeBitExtract(Instruction *inst) -{ - if (!inst) { - return false; - } - if (!inst->isBinaryOp()) { - return false; - } - if (inst->getOpcode() != Instruction::And) { - return false; - } - if (optLevel == CodeGenOpt::None) { - return false; - } - // We want to do some simple optimizations on Shift right/And patterns. The - // basic optimization is to turn (A >> B) & C where A is a 32bit type, B is a - // value smaller than 32 and C is a mask. If C is a constant value, then the - // following transformation can occur. For signed integers, it turns into the - // function call dst = __amdil_ibit_extract(log2(C), B, A) For unsigned - // integers, it turns into the function call dst = - // __amdil_ubit_extract(log2(C), B, A) The function __amdil_[u|i]bit_extract - // can be found in Section 7.9 of the ATI IL spec of the stream SDK for - // Evergreen hardware. - if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) { - // This does not work on HD4XXX hardware. - return false; - } - Type *aType = inst->getType(); - bool isVector = aType->isVectorTy(); - - // XXX Support vector types - if (isVector) { - return false; - } - int numEle = 1; - // This only works on 32bit integers - if (aType->getScalarType() - != Type::getInt32Ty(inst->getContext())) { - return false; - } - if (isVector) { - const VectorType *VT = dyn_cast<VectorType>(aType); - numEle = VT->getNumElements(); - // We currently cannot support more than 4 elements in a intrinsic and we - // cannot support Vec3 types. - if (numEle > 4 || numEle == 3) { - return false; - } - } - BinaryOperator *ShiftInst = dyn_cast<BinaryOperator>(inst->getOperand(0)); - // If the first operand is not a shift instruction, then we can return as it - // doesn't match this pattern. - if (!ShiftInst || !ShiftInst->isShift()) { - return false; - } - // If we are a shift left, then we need don't match this pattern. - if (ShiftInst->getOpcode() == Instruction::Shl) { - return false; - } - bool isSigned = ShiftInst->isArithmeticShift(); - Constant *AndMask = dyn_cast<Constant>(inst->getOperand(1)); - Constant *ShrVal = dyn_cast<Constant>(ShiftInst->getOperand(1)); - // Lets make sure that the shift value and the and mask are constant integers. - if (!AndMask || !ShrVal) { - return false; - } - Constant *newMaskConst; - Constant *shiftValConst; - if (isVector) { - // Handle the vector case - std::vector<Constant *> maskVals; - std::vector<Constant *> shiftVals; - ConstantVector *AndMaskVec = dyn_cast<ConstantVector>(AndMask); - ConstantVector *ShrValVec = dyn_cast<ConstantVector>(ShrVal); - Type *scalarType = AndMaskVec->getType()->getScalarType(); - assert(AndMaskVec->getNumOperands() == - ShrValVec->getNumOperands() && "cannot have a " - "combination where the number of elements to a " - "shift and an and are different!"); - for (size_t x = 0, y = AndMaskVec->getNumOperands(); x < y; ++x) { - ConstantInt *AndCI = dyn_cast<ConstantInt>(AndMaskVec->getOperand(x)); - ConstantInt *ShiftIC = dyn_cast<ConstantInt>(ShrValVec->getOperand(x)); - if (!AndCI || !ShiftIC) { - return false; - } - uint32_t maskVal = (uint32_t)AndCI->getZExtValue(); - if (!isMask_32(maskVal)) { - return false; - } - maskVal = (uint32_t)CountTrailingOnes_32(maskVal); - uint32_t shiftVal = (uint32_t)ShiftIC->getZExtValue(); - // If the mask or shiftval is greater than the bitcount, then break out. - if (maskVal >= 32 || shiftVal >= 32) { - return false; - } - // If the mask val is greater than the the number of original bits left - // then this optimization is invalid. - if (maskVal > (32 - shiftVal)) { - return false; - } - maskVals.push_back(ConstantInt::get(scalarType, maskVal, isSigned)); - shiftVals.push_back(ConstantInt::get(scalarType, shiftVal, isSigned)); - } - newMaskConst = ConstantVector::get(maskVals); - shiftValConst = ConstantVector::get(shiftVals); - } else { - // Handle the scalar case - uint32_t maskVal = (uint32_t)dyn_cast<ConstantInt>(AndMask)->getZExtValue(); - // This must be a mask value where all lower bits are set to 1 and then any - // bit higher is set to 0. - if (!isMask_32(maskVal)) { - return false; - } - maskVal = (uint32_t)CountTrailingOnes_32(maskVal); - // Count the number of bits set in the mask, this is the width of the - // resulting bit set that is extracted from the source value. - uint32_t shiftVal = (uint32_t)dyn_cast<ConstantInt>(ShrVal)->getZExtValue(); - // If the mask or shift val is greater than the bitcount, then break out. - if (maskVal >= 32 || shiftVal >= 32) { - return false; - } - // If the mask val is greater than the the number of original bits left then - // this optimization is invalid. - if (maskVal > (32 - shiftVal)) { - return false; - } - newMaskConst = ConstantInt::get(aType, maskVal, isSigned); - shiftValConst = ConstantInt::get(aType, shiftVal, isSigned); - } - // Lets create the function signature. - std::vector<Type *> callTypes; - callTypes.push_back(aType); - callTypes.push_back(aType); - callTypes.push_back(aType); - FunctionType *funcType = FunctionType::get(aType, callTypes, false); - std::string name = "llvm.AMDIL.bit.extract.u32"; - if (isVector) { - name += ".v" + itostr(numEle) + "i32"; - } else { - name += "."; - } - // Lets create the function. - Function *Func = - dyn_cast<Function>(inst->getParent()->getParent()->getParent()-> - getOrInsertFunction(llvm::StringRef(name), funcType)); - Value *Operands[3] = { - ShiftInst->getOperand(0), - shiftValConst, - newMaskConst - }; - // Lets create the Call with the operands - CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt"); - CI->setDoesNotAccessMemory(); - CI->insertBefore(inst); - inst->replaceAllUsesWith(CI); - return true; -} - -bool -AMDGPUPeepholeOpt::expandBFI(CallInst *CI) -{ - if (!CI) { - return false; - } - Value *LHS = CI->getOperand(CI->getNumOperands() - 1); - if (!LHS->getName().startswith("__amdil_bfi")) { - return false; - } - Type* type = CI->getOperand(0)->getType(); - Constant *negOneConst = NULL; - if (type->isVectorTy()) { - std::vector<Constant *> negOneVals; - negOneConst = ConstantInt::get(CI->getContext(), - APInt(32, StringRef("-1"), 10)); - for (size_t x = 0, - y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) { - negOneVals.push_back(negOneConst); - } - negOneConst = ConstantVector::get(negOneVals); - } else { - negOneConst = ConstantInt::get(CI->getContext(), - APInt(32, StringRef("-1"), 10)); - } - // __amdil_bfi => (A & B) | (~A & C) - BinaryOperator *lhs = - BinaryOperator::Create(Instruction::And, CI->getOperand(0), - CI->getOperand(1), "bfi_and", CI); - BinaryOperator *rhs = - BinaryOperator::Create(Instruction::Xor, CI->getOperand(0), negOneConst, - "bfi_not", CI); - rhs = BinaryOperator::Create(Instruction::And, rhs, CI->getOperand(2), - "bfi_and", CI); - lhs = BinaryOperator::Create(Instruction::Or, lhs, rhs, "bfi_or", CI); - CI->replaceAllUsesWith(lhs); - return true; -} - -bool -AMDGPUPeepholeOpt::expandBFM(CallInst *CI) -{ - if (!CI) { - return false; - } - Value *LHS = CI->getOperand(CI->getNumOperands() - 1); - if (!LHS->getName().startswith("__amdil_bfm")) { - return false; - } - // __amdil_bfm => ((1 << (src0 & 0x1F)) - 1) << (src1 & 0x1f) - Constant *newMaskConst = NULL; - Constant *newShiftConst = NULL; - Type* type = CI->getOperand(0)->getType(); - if (type->isVectorTy()) { - std::vector<Constant*> newMaskVals, newShiftVals; - newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F); - newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1); - for (size_t x = 0, - y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) { - newMaskVals.push_back(newMaskConst); - newShiftVals.push_back(newShiftConst); - } - newMaskConst = ConstantVector::get(newMaskVals); - newShiftConst = ConstantVector::get(newShiftVals); - } else { - newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F); - newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1); - } - BinaryOperator *lhs = - BinaryOperator::Create(Instruction::And, CI->getOperand(0), - newMaskConst, "bfm_mask", CI); - lhs = BinaryOperator::Create(Instruction::Shl, newShiftConst, - lhs, "bfm_shl", CI); - lhs = BinaryOperator::Create(Instruction::Sub, lhs, - newShiftConst, "bfm_sub", CI); - BinaryOperator *rhs = - BinaryOperator::Create(Instruction::And, CI->getOperand(1), - newMaskConst, "bfm_mask", CI); - lhs = BinaryOperator::Create(Instruction::Shl, lhs, rhs, "bfm_shl", CI); - CI->replaceAllUsesWith(lhs); - return true; -} - -bool -AMDGPUPeepholeOpt::instLevelOptimizations(BasicBlock::iterator *bbb) -{ - Instruction *inst = (*bbb); - if (optimizeCallInst(bbb)) { - return true; - } - if (optimizeBitExtract(inst)) { - return false; - } - if (optimizeBitInsert(inst)) { - return false; - } - if (correctMisalignedMemOp(inst)) { - return false; - } - return false; -} -bool -AMDGPUPeepholeOpt::correctMisalignedMemOp(Instruction *inst) -{ - LoadInst *linst = dyn_cast<LoadInst>(inst); - StoreInst *sinst = dyn_cast<StoreInst>(inst); - unsigned alignment; - Type* Ty = inst->getType(); - if (linst) { - alignment = linst->getAlignment(); - Ty = inst->getType(); - } else if (sinst) { - alignment = sinst->getAlignment(); - Ty = sinst->getValueOperand()->getType(); - } else { - return false; - } - unsigned size = getTypeSize(Ty); - if (size == alignment || size < alignment) { - return false; - } - if (!Ty->isStructTy()) { - return false; - } - if (alignment < 4) { - if (linst) { - linst->setAlignment(0); - return true; - } else if (sinst) { - sinst->setAlignment(0); - return true; - } - } - return false; -} -bool -AMDGPUPeepholeOpt::isSigned24BitOps(CallInst *CI) -{ - if (!CI) { - return false; - } - Value *LHS = CI->getOperand(CI->getNumOperands() - 1); - std::string namePrefix = LHS->getName().substr(0, 14); - if (namePrefix != "__amdil_imad24" && namePrefix != "__amdil_imul24" - && namePrefix != "__amdil__imul24_high") { - return false; - } - if (mSTM->device()->usesHardware(AMDGPUDeviceInfo::Signed24BitOps)) { - return false; - } - return true; -} - -void -AMDGPUPeepholeOpt::expandSigned24BitOps(CallInst *CI) -{ - assert(isSigned24BitOps(CI) && "Must be a " - "signed 24 bit operation to call this function!"); - Value *LHS = CI->getOperand(CI->getNumOperands()-1); - // On 7XX and 8XX we do not have signed 24bit, so we need to - // expand it to the following: - // imul24 turns into 32bit imul - // imad24 turns into 32bit imad - // imul24_high turns into 32bit imulhigh - if (LHS->getName().substr(0, 14) == "__amdil_imad24") { - Type *aType = CI->getOperand(0)->getType(); - bool isVector = aType->isVectorTy(); - int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1; - std::vector<Type*> callTypes; - callTypes.push_back(CI->getOperand(0)->getType()); - callTypes.push_back(CI->getOperand(1)->getType()); - callTypes.push_back(CI->getOperand(2)->getType()); - FunctionType *funcType = - FunctionType::get(CI->getOperand(0)->getType(), callTypes, false); - std::string name = "__amdil_imad"; - if (isVector) { - name += "_v" + itostr(numEle) + "i32"; - } else { - name += "_i32"; - } - Function *Func = dyn_cast<Function>( - CI->getParent()->getParent()->getParent()-> - getOrInsertFunction(llvm::StringRef(name), funcType)); - Value *Operands[3] = { - CI->getOperand(0), - CI->getOperand(1), - CI->getOperand(2) - }; - CallInst *nCI = CallInst::Create(Func, Operands, "imad24"); - nCI->insertBefore(CI); - CI->replaceAllUsesWith(nCI); - } else if (LHS->getName().substr(0, 14) == "__amdil_imul24") { - BinaryOperator *mulOp = - BinaryOperator::Create(Instruction::Mul, CI->getOperand(0), - CI->getOperand(1), "imul24", CI); - CI->replaceAllUsesWith(mulOp); - } else if (LHS->getName().substr(0, 19) == "__amdil_imul24_high") { - Type *aType = CI->getOperand(0)->getType(); - - bool isVector = aType->isVectorTy(); - int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1; - std::vector<Type*> callTypes; - callTypes.push_back(CI->getOperand(0)->getType()); - callTypes.push_back(CI->getOperand(1)->getType()); - FunctionType *funcType = - FunctionType::get(CI->getOperand(0)->getType(), callTypes, false); - std::string name = "__amdil_imul_high"; - if (isVector) { - name += "_v" + itostr(numEle) + "i32"; - } else { - name += "_i32"; - } - Function *Func = dyn_cast<Function>( - CI->getParent()->getParent()->getParent()-> - getOrInsertFunction(llvm::StringRef(name), funcType)); - Value *Operands[2] = { - CI->getOperand(0), - CI->getOperand(1) - }; - CallInst *nCI = CallInst::Create(Func, Operands, "imul24_high"); - nCI->insertBefore(CI); - CI->replaceAllUsesWith(nCI); - } -} - -bool -AMDGPUPeepholeOpt::isRWGLocalOpt(CallInst *CI) -{ - return (CI != NULL - && CI->getOperand(CI->getNumOperands() - 1)->getName() - == "__amdil_get_local_size_int"); -} - -bool -AMDGPUPeepholeOpt::convertAccurateDivide(CallInst *CI) -{ - if (!CI) { - return false; - } - if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD6XXX - && (mSTM->getDeviceName() == "cayman")) { - return false; - } - return CI->getOperand(CI->getNumOperands() - 1)->getName().substr(0, 20) - == "__amdil_improved_div"; -} - -void -AMDGPUPeepholeOpt::expandAccurateDivide(CallInst *CI) -{ - assert(convertAccurateDivide(CI) - && "expanding accurate divide can only happen if it is expandable!"); - BinaryOperator *divOp = - BinaryOperator::Create(Instruction::FDiv, CI->getOperand(0), - CI->getOperand(1), "fdiv32", CI); - CI->replaceAllUsesWith(divOp); -} - -bool -AMDGPUPeepholeOpt::propagateSamplerInst(CallInst *CI) -{ - if (optLevel != CodeGenOpt::None) { - return false; - } - - if (!CI) { - return false; - } - - unsigned funcNameIdx = 0; - funcNameIdx = CI->getNumOperands() - 1; - StringRef calleeName = CI->getOperand(funcNameIdx)->getName(); - if (calleeName != "__amdil_image2d_read_norm" - && calleeName != "__amdil_image2d_read_unnorm" - && calleeName != "__amdil_image3d_read_norm" - && calleeName != "__amdil_image3d_read_unnorm") { - return false; - } - - unsigned samplerIdx = 2; - samplerIdx = 1; - Value *sampler = CI->getOperand(samplerIdx); - LoadInst *lInst = dyn_cast<LoadInst>(sampler); - if (!lInst) { - return false; - } - - if (lInst->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { - return false; - } - - GlobalVariable *gv = dyn_cast<GlobalVariable>(lInst->getPointerOperand()); - // If we are loading from what is not a global value, then we - // fail and return. - if (!gv) { - return false; - } - - // If we don't have an initializer or we have an initializer and - // the initializer is not a 32bit integer, we fail. - if (!gv->hasInitializer() - || !gv->getInitializer()->getType()->isIntegerTy(32)) { - return false; - } - - // Now that we have the global variable initializer, lets replace - // all uses of the load instruction with the samplerVal and - // reparse the __amdil_is_constant() function. - Constant *samplerVal = gv->getInitializer(); - lInst->replaceAllUsesWith(samplerVal); - return true; -} - -bool -AMDGPUPeepholeOpt::doInitialization(Module &M) -{ - return false; -} - -bool -AMDGPUPeepholeOpt::doFinalization(Module &M) -{ - return false; -} - -void -AMDGPUPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const -{ - AU.addRequired<MachineFunctionAnalysis>(); - FunctionPass::getAnalysisUsage(AU); - AU.setPreservesAll(); -} - -size_t AMDGPUPeepholeOpt::getTypeSize(Type * const T, bool dereferencePtr) { - size_t size = 0; - if (!T) { - return size; - } - switch (T->getTypeID()) { - case Type::X86_FP80TyID: - case Type::FP128TyID: - case Type::PPC_FP128TyID: - case Type::LabelTyID: - assert(0 && "These types are not supported by this backend"); - default: - case Type::FloatTyID: - case Type::DoubleTyID: - size = T->getPrimitiveSizeInBits() >> 3; - break; - case Type::PointerTyID: - size = getTypeSize(dyn_cast<PointerType>(T), dereferencePtr); - break; - case Type::IntegerTyID: - size = getTypeSize(dyn_cast<IntegerType>(T), dereferencePtr); - break; - case Type::StructTyID: - size = getTypeSize(dyn_cast<StructType>(T), dereferencePtr); - break; - case Type::ArrayTyID: - size = getTypeSize(dyn_cast<ArrayType>(T), dereferencePtr); - break; - case Type::FunctionTyID: - size = getTypeSize(dyn_cast<FunctionType>(T), dereferencePtr); - break; - case Type::VectorTyID: - size = getTypeSize(dyn_cast<VectorType>(T), dereferencePtr); - break; - }; - return size; -} - -size_t AMDGPUPeepholeOpt::getTypeSize(StructType * const ST, - bool dereferencePtr) { - size_t size = 0; - if (!ST) { - return size; - } - Type *curType; - StructType::element_iterator eib; - StructType::element_iterator eie; - for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) { - curType = *eib; - size += getTypeSize(curType, dereferencePtr); - } - return size; -} - -size_t AMDGPUPeepholeOpt::getTypeSize(IntegerType * const IT, - bool dereferencePtr) { - return IT ? (IT->getBitWidth() >> 3) : 0; -} - -size_t AMDGPUPeepholeOpt::getTypeSize(FunctionType * const FT, - bool dereferencePtr) { - assert(0 && "Should not be able to calculate the size of an function type"); - return 0; -} - -size_t AMDGPUPeepholeOpt::getTypeSize(ArrayType * const AT, - bool dereferencePtr) { - return (size_t)(AT ? (getTypeSize(AT->getElementType(), - dereferencePtr) * AT->getNumElements()) - : 0); -} - -size_t AMDGPUPeepholeOpt::getTypeSize(VectorType * const VT, - bool dereferencePtr) { - return VT ? (VT->getBitWidth() >> 3) : 0; -} - -size_t AMDGPUPeepholeOpt::getTypeSize(PointerType * const PT, - bool dereferencePtr) { - if (!PT) { - return 0; - } - Type *CT = PT->getElementType(); - if (CT->getTypeID() == Type::StructTyID && - PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) { - return getTypeSize(dyn_cast<StructType>(CT)); - } else if (dereferencePtr) { - size_t size = 0; - for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) { - size += getTypeSize(PT->getContainedType(x), dereferencePtr); - } - return size; - } else { - return 4; - } -} - -size_t AMDGPUPeepholeOpt::getTypeSize(OpaqueType * const OT, - bool dereferencePtr) { - //assert(0 && "Should not be able to calculate the size of an opaque type"); - return 4; -} diff --git a/src/gallium/drivers/radeon/AMDILRegisterInfo.td b/src/gallium/drivers/radeon/AMDILRegisterInfo.td deleted file mode 100644 index 42235ff37a1..00000000000 --- a/src/gallium/drivers/radeon/AMDILRegisterInfo.td +++ /dev/null @@ -1,110 +0,0 @@ -//===- AMDILRegisterInfo.td - AMDIL Register defs ----------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// -// Declarations that describe the AMDIL register file -// -//===----------------------------------------------------------------------===// - -class AMDILReg<bits<16> num, string n> : Register<n> { - field bits<16> Value; - let Value = num; - let Namespace = "AMDGPU"; -} - -// We will start with 8 registers for each class before expanding to more -// Since the swizzle is added based on the register class, we can leave it -// off here and just specify different registers for different register classes -def R1 : AMDILReg<1, "r1">, DwarfRegNum<[1]>; -def R2 : AMDILReg<2, "r2">, DwarfRegNum<[2]>; -def R3 : AMDILReg<3, "r3">, DwarfRegNum<[3]>; -def R4 : AMDILReg<4, "r4">, DwarfRegNum<[4]>; -def R5 : AMDILReg<5, "r5">, DwarfRegNum<[5]>; -def R6 : AMDILReg<6, "r6">, DwarfRegNum<[6]>; -def R7 : AMDILReg<7, "r7">, DwarfRegNum<[7]>; -def R8 : AMDILReg<8, "r8">, DwarfRegNum<[8]>; -def R9 : AMDILReg<9, "r9">, DwarfRegNum<[9]>; -def R10 : AMDILReg<10, "r10">, DwarfRegNum<[10]>; -def R11 : AMDILReg<11, "r11">, DwarfRegNum<[11]>; -def R12 : AMDILReg<12, "r12">, DwarfRegNum<[12]>; -def R13 : AMDILReg<13, "r13">, DwarfRegNum<[13]>; -def R14 : AMDILReg<14, "r14">, DwarfRegNum<[14]>; -def R15 : AMDILReg<15, "r15">, DwarfRegNum<[15]>; -def R16 : AMDILReg<16, "r16">, DwarfRegNum<[16]>; -def R17 : AMDILReg<17, "r17">, DwarfRegNum<[17]>; -def R18 : AMDILReg<18, "r18">, DwarfRegNum<[18]>; -def R19 : AMDILReg<19, "r19">, DwarfRegNum<[19]>; -def R20 : AMDILReg<20, "r20">, DwarfRegNum<[20]>; - -// All registers between 1000 and 1024 are reserved and cannot be used -// unless commented in this section -// r1021-r1025 are used to dynamically calculate the local/group/thread/region/region_local ID's -// r1020 is used to hold the frame index for local arrays -// r1019 is used to hold the dynamic stack allocation pointer -// r1018 is used as a temporary register for handwritten code -// r1017 is used as a temporary register for handwritten code -// r1016 is used as a temporary register for load/store code -// r1015 is used as a temporary register for data segment offset -// r1014 is used as a temporary register for store code -// r1013 is used as the section data pointer register -// r1012-r1010 and r1001-r1008 are used for temporary I/O registers -// r1009 is used as the frame pointer register -// r999 is used as the mem register. -// r998 is used as the return address register. -//def R1025 : AMDILReg<1025, "r1025">, DwarfRegNum<[1025]>; -//def R1024 : AMDILReg<1024, "r1024">, DwarfRegNum<[1024]>; -//def R1023 : AMDILReg<1023, "r1023">, DwarfRegNum<[1023]>; -//def R1022 : AMDILReg<1022, "r1022">, DwarfRegNum<[1022]>; -//def R1021 : AMDILReg<1021, "r1021">, DwarfRegNum<[1021]>; -//def R1020 : AMDILReg<1020, "r1020">, DwarfRegNum<[1020]>; -def SP : AMDILReg<1019, "r1019">, DwarfRegNum<[1019]>; -def T1 : AMDILReg<1018, "r1018">, DwarfRegNum<[1018]>; -def T2 : AMDILReg<1017, "r1017">, DwarfRegNum<[1017]>; -def T3 : AMDILReg<1016, "r1016">, DwarfRegNum<[1016]>; -def T4 : AMDILReg<1015, "r1015">, DwarfRegNum<[1015]>; -def T5 : AMDILReg<1014, "r1014">, DwarfRegNum<[1014]>; -def SDP : AMDILReg<1013, "r1013">, DwarfRegNum<[1013]>; -def R1012: AMDILReg<1012, "r1012">, DwarfRegNum<[1012]>; -def R1011: AMDILReg<1011, "r1011">, DwarfRegNum<[1011]>; -def R1010: AMDILReg<1010, "r1010">, DwarfRegNum<[1010]>; -def DFP : AMDILReg<1009, "r1009">, DwarfRegNum<[1009]>; -def R1008: AMDILReg<1008, "r1008">, DwarfRegNum<[1008]>; -def R1007: AMDILReg<1007, "r1007">, DwarfRegNum<[1007]>; -def R1006: AMDILReg<1006, "r1006">, DwarfRegNum<[1006]>; -def R1005: AMDILReg<1005, "r1005">, DwarfRegNum<[1005]>; -def R1004: AMDILReg<1004, "r1004">, DwarfRegNum<[1004]>; -def R1003: AMDILReg<1003, "r1003">, DwarfRegNum<[1003]>; -def R1002: AMDILReg<1002, "r1002">, DwarfRegNum<[1002]>; -def R1001: AMDILReg<1001, "r1001">, DwarfRegNum<[1001]>; -def MEM : AMDILReg<999, "mem">, DwarfRegNum<[999]>; -def RA : AMDILReg<998, "r998">, DwarfRegNum<[998]>; -def FP : AMDILReg<997, "r997">, DwarfRegNum<[997]>; -def GPRI16 : RegisterClass<"AMDGPU", [i16], 16, - (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> -{ - let AltOrders = [(add (sequence "R%u", 1, 20))]; - let AltOrderSelect = [{ - return 1; - }]; - } -def GPRI32 : RegisterClass<"AMDGPU", [i32], 32, - (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> -{ - let AltOrders = [(add (sequence "R%u", 1, 20))]; - let AltOrderSelect = [{ - return 1; - }]; - } -def GPRF32 : RegisterClass<"AMDGPU", [f32], 32, - (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> -{ - let AltOrders = [(add (sequence "R%u", 1, 20))]; - let AltOrderSelect = [{ - return 1; - }]; - } diff --git a/src/gallium/drivers/radeon/AMDILSIDevice.cpp b/src/gallium/drivers/radeon/AMDILSIDevice.cpp deleted file mode 100644 index 856b00f894a..00000000000 --- a/src/gallium/drivers/radeon/AMDILSIDevice.cpp +++ /dev/null @@ -1,49 +0,0 @@ -//===-- AMDILSIDevice.cpp - Device Info for Southern Islands GPUs ---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -#include "AMDILSIDevice.h" -#include "AMDILEvergreenDevice.h" -#include "AMDILNIDevice.h" -#include "AMDGPUSubtarget.h" - -using namespace llvm; - -AMDGPUSIDevice::AMDGPUSIDevice(AMDGPUSubtarget *ST) - : AMDGPUEvergreenDevice(ST) -{ -} -AMDGPUSIDevice::~AMDGPUSIDevice() -{ -} - -size_t -AMDGPUSIDevice::getMaxLDSSize() const -{ - if (usesHardware(AMDGPUDeviceInfo::LocalMem)) { - return MAX_LDS_SIZE_900; - } else { - return 0; - } -} - -uint32_t -AMDGPUSIDevice::getGeneration() const -{ - return AMDGPUDeviceInfo::HD7XXX; -} - -std::string -AMDGPUSIDevice::getDataLayout() const -{ - return std::string("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16" - "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32" - "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64" - "-v96:128:128-v128:128:128-v192:256:256-v256:256:256" - "-v512:512:512-v1024:1024:1024-v2048:2048:2048" - "-n8:16:32:64"); -} diff --git a/src/gallium/drivers/radeon/AMDILSIDevice.h b/src/gallium/drivers/radeon/AMDILSIDevice.h deleted file mode 100644 index 6a684cb6095..00000000000 --- a/src/gallium/drivers/radeon/AMDILSIDevice.h +++ /dev/null @@ -1,45 +0,0 @@ -//===------- AMDILSIDevice.h - Define SI Device for AMDIL -*- C++ -*------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// -// Interface for the subtarget data classes. -// -//===---------------------------------------------------------------------===// -// This file will define the interface that each generation needs to -// implement in order to correctly answer queries on the capabilities of the -// specific hardware. -//===---------------------------------------------------------------------===// -#ifndef _AMDILSIDEVICE_H_ -#define _AMDILSIDEVICE_H_ -#include "AMDILEvergreenDevice.h" -#include "AMDGPUSubtarget.h" - -namespace llvm { - class AMDGPUSubtarget; -//===---------------------------------------------------------------------===// -// SI generation of devices and their respective sub classes -//===---------------------------------------------------------------------===// - -// The AMDGPUSIDevice is the base class for all Northern Island series of -// cards. It is very similiar to the AMDGPUEvergreenDevice, with the major -// exception being differences in wavefront size and hardware capabilities. The -// SI devices are all 64 wide wavefronts and also add support for signed 24 bit -// integer operations - - class AMDGPUSIDevice : public AMDGPUEvergreenDevice { - public: - AMDGPUSIDevice(AMDGPUSubtarget*); - virtual ~AMDGPUSIDevice(); - virtual size_t getMaxLDSSize() const; - virtual uint32_t getGeneration() const; - virtual std::string getDataLayout() const; - protected: - }; // AMDGPUSIDevice - -} // namespace llvm -#endif // _AMDILSIDEVICE_H_ diff --git a/src/gallium/drivers/radeon/AMDILUtilityFunctions.h b/src/gallium/drivers/radeon/AMDILUtilityFunctions.h deleted file mode 100644 index e6666f97705..00000000000 --- a/src/gallium/drivers/radeon/AMDILUtilityFunctions.h +++ /dev/null @@ -1,75 +0,0 @@ -//===-- AMDILUtilityFunctions.h - AMDIL Utility Functions Header --------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// -// This file provides helper macros for expanding case statements. -// -//===----------------------------------------------------------------------===// -#ifndef AMDILUTILITYFUNCTIONS_H_ -#define AMDILUTILITYFUNCTIONS_H_ - -// Macros that are used to help with switch statements for various data types -// However, these macro's do not return anything unlike the second set below. -#define ExpandCaseTo32bitIntTypes(Instr) \ -case Instr##_i32: - -#define ExpandCaseTo32bitIntTruncTypes(Instr) \ -case Instr##_i32i8: \ -case Instr##_i32i16: - -#define ExpandCaseToIntTypes(Instr) \ - ExpandCaseTo32bitIntTypes(Instr) - -#define ExpandCaseToIntTruncTypes(Instr) \ - ExpandCaseTo32bitIntTruncTypes(Instr) - -#define ExpandCaseToFloatTypes(Instr) \ - case Instr##_f32: - -#define ExpandCaseTo32bitScalarTypes(Instr) \ - ExpandCaseTo32bitIntTypes(Instr) \ -case Instr##_f32: - -#define ExpandCaseToAllScalarTypes(Instr) \ - ExpandCaseToFloatTypes(Instr) \ -ExpandCaseToIntTypes(Instr) - -#define ExpandCaseToAllScalarTruncTypes(Instr) \ - ExpandCaseToFloatTruncTypes(Instr) \ -ExpandCaseToIntTruncTypes(Instr) - -#define ExpandCaseToAllTypes(Instr) \ -ExpandCaseToAllScalarTypes(Instr) - -#define ExpandCaseToAllTruncTypes(Instr) \ -ExpandCaseToAllScalarTruncTypes(Instr) - -// Macros that expand into statements with return values -#define ExpandCaseTo32bitIntReturn(Instr, Return) \ -case Instr##_i32: return Return##_i32; - -#define ExpandCaseToIntReturn(Instr, Return) \ - ExpandCaseTo32bitIntReturn(Instr, Return) - -#define ExpandCaseToFloatReturn(Instr, Return) \ - case Instr##_f32: return Return##_f32;\ - -#define ExpandCaseToAllScalarReturn(Instr, Return) \ - ExpandCaseToFloatReturn(Instr, Return) \ -ExpandCaseToIntReturn(Instr, Return) - -// These macros expand to common groupings of RegClass ID's -#define ExpandCaseTo1CompRegID \ -case AMDGPU::GPRI32RegClassID: \ -case AMDGPU::GPRF32RegClassID: - -#define ExpandCaseTo32BitType(Instr) \ -case Instr##_i32: \ -case Instr##_f32: - -#endif // AMDILUTILITYFUNCTIONS_H_ diff --git a/src/gallium/drivers/radeon/InstPrinter/AMDGPUInstPrinter.cpp b/src/gallium/drivers/radeon/InstPrinter/AMDGPUInstPrinter.cpp deleted file mode 100644 index b6ab9b22fb1..00000000000 --- a/src/gallium/drivers/radeon/InstPrinter/AMDGPUInstPrinter.cpp +++ /dev/null @@ -1,34 +0,0 @@ - -#include "AMDGPUInstPrinter.h" -#include "llvm/MC/MCInst.h" - -using namespace llvm; - -void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, - StringRef Annot) { - printInstruction(MI, OS); - - printAnnotation(OS, Annot); -} - -void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - - const MCOperand &Op = MI->getOperand(OpNo); - if (Op.isReg()) { - O << getRegisterName(Op.getReg()); - } else if (Op.isImm()) { - O << Op.getImm(); - } else if (Op.isFPImm()) { - O << Op.getFPImm(); - } else { - assert(!"unknown operand type in printOperand"); - } -} - -void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - printOperand(MI, OpNo, O); -} - -#include "AMDGPUGenAsmWriter.inc" diff --git a/src/gallium/drivers/radeon/InstPrinter/AMDGPUInstPrinter.h b/src/gallium/drivers/radeon/InstPrinter/AMDGPUInstPrinter.h deleted file mode 100644 index 62c1a5ee04f..00000000000 --- a/src/gallium/drivers/radeon/InstPrinter/AMDGPUInstPrinter.h +++ /dev/null @@ -1,34 +0,0 @@ - -#ifndef AMDGPUINSTPRINTER_H -#define AMDGPUINSTPRINTER_H - -#include "llvm/ADT/StringRef.h" -#include "llvm/MC/MCInstPrinter.h" -#include "llvm/Support/raw_ostream.h" - -namespace llvm { - -class AMDGPUInstPrinter : public MCInstPrinter { -public: - AMDGPUInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI) - : MCInstPrinter(MAI, MII, MRI) {} - - //Autogenerated by tblgen - void printInstruction(const MCInst *MI, raw_ostream &O); - static const char *getRegisterName(unsigned RegNo); - -// virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; - virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); - -private: - void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); -// void printUnsignedImm(const MCInst *MI, int OpNo, raw_ostream &O); - void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - - -}; - -} // End namespace llvm - -#endif // AMDGPUINSTRPRINTER_H diff --git a/src/gallium/drivers/radeon/LICENSE.TXT b/src/gallium/drivers/radeon/LICENSE.TXT deleted file mode 100644 index a57de2e87a1..00000000000 --- a/src/gallium/drivers/radeon/LICENSE.TXT +++ /dev/null @@ -1,43 +0,0 @@ -============================================================================== -LLVM Release License -============================================================================== -University of Illinois/NCSA -Open Source License - -Copyright (c) 2003-2012 University of Illinois at Urbana-Champaign. -All rights reserved. - -Developed by: - - LLVM Team - - University of Illinois at Urbana-Champaign - - http://llvm.org - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal with -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimers. - - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimers in the - documentation and/or other materials provided with the distribution. - - * Neither the names of the LLVM Team, University of Illinois at - Urbana-Champaign, nor the names of its contributors may be used to - endorse or promote products derived from this Software without specific - prior written permission. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE -SOFTWARE. diff --git a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUAsmBackend.cpp b/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUAsmBackend.cpp deleted file mode 100644 index 5b31bc6c8f0..00000000000 --- a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUAsmBackend.cpp +++ /dev/null @@ -1,80 +0,0 @@ -//===-- AMDGPUAsmBackend.cpp - AMDGPU Assembler Backend -------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/MC/MCAsmBackend.h" -#include "llvm/MC/MCAssembler.h" -#include "llvm/MC/MCObjectWriter.h" -#include "llvm/MC/MCValue.h" -#include "llvm/Support/TargetRegistry.h" - -using namespace llvm; - -namespace { - -class AMDGPUMCObjectWriter : public MCObjectWriter { -public: - AMDGPUMCObjectWriter(raw_ostream &OS) : MCObjectWriter(OS, true) { } - virtual void ExecutePostLayoutBinding(MCAssembler &Asm, - const MCAsmLayout &Layout) { - //XXX: Implement if necessary. - } - virtual void RecordRelocation(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFragment *Fragment, - const MCFixup &Fixup, - MCValue Target, uint64_t &FixedValue) { - assert(!"Not implemented"); - } - - virtual void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout); - -}; - -class AMDGPUAsmBackend : public MCAsmBackend { -public: - AMDGPUAsmBackend(const Target &T) - : MCAsmBackend() {} - - virtual AMDGPUMCObjectWriter *createObjectWriter(raw_ostream &OS) const; - virtual unsigned getNumFixupKinds() const { return 0; }; - virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value) const { assert(!"Not implemented"); } - virtual bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, - const MCInstFragment *DF, - const MCAsmLayout &Layout) const { - return false; - } - virtual void relaxInstruction(const MCInst &Inst, MCInst &Res) const { - assert(!"Not implemented"); - } - virtual bool mayNeedRelaxation(const MCInst &Inst) const { return false; } - virtual bool writeNopData(uint64_t Count, MCObjectWriter *OW) const { - return true; - } -}; - -} //End anonymous namespace - -void AMDGPUMCObjectWriter::WriteObject(MCAssembler &Asm, - const MCAsmLayout &Layout) { - for (MCAssembler::iterator I = Asm.begin(), E = Asm.end(); I != E; ++I) { - Asm.writeSectionData(I, Layout); - } -} - -MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T, StringRef TT) { - return new AMDGPUAsmBackend(T); -} - -AMDGPUMCObjectWriter * AMDGPUAsmBackend::createObjectWriter( - raw_ostream &OS) const { - return new AMDGPUMCObjectWriter(OS); -} diff --git a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCAsmInfo.cpp deleted file mode 100644 index ccd5a3bfaa9..00000000000 --- a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCAsmInfo.cpp +++ /dev/null @@ -1,96 +0,0 @@ -//===-- MCTargetDesc/AMDGPUMCAsmInfo.cpp - Assembly Info ------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "AMDGPUMCAsmInfo.h" - -using namespace llvm; -AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Target &T, StringRef &TT) : MCAsmInfo() -{ - HasSingleParameterDotFile = false; - WeakDefDirective = 0; - //===------------------------------------------------------------------===// - HasSubsectionsViaSymbols = true; - HasMachoZeroFillDirective = false; - HasMachoTBSSDirective = false; - HasStaticCtorDtorReferenceInStaticMode = false; - LinkerRequiresNonEmptyDwarfLines = true; - MaxInstLength = 16; - PCSymbol = "$"; - SeparatorString = "\n"; - CommentColumn = 40; - CommentString = ";"; - LabelSuffix = ":"; - GlobalPrefix = "@"; - PrivateGlobalPrefix = ";."; - LinkerPrivateGlobalPrefix = "!"; - InlineAsmStart = ";#ASMSTART"; - InlineAsmEnd = ";#ASMEND"; - AssemblerDialect = 0; - AllowQuotesInName = false; - AllowNameToStartWithDigit = false; - AllowPeriodsInName = false; - - //===--- Data Emission Directives -------------------------------------===// - ZeroDirective = ".zero"; - AsciiDirective = ".ascii\t"; - AscizDirective = ".asciz\t"; - Data8bitsDirective = ".byte\t"; - Data16bitsDirective = ".short\t"; - Data32bitsDirective = ".long\t"; - Data64bitsDirective = ".quad\t"; - GPRel32Directive = 0; - SunStyleELFSectionSwitchSyntax = true; - UsesELFSectionDirectiveForBSS = true; - HasMicrosoftFastStdCallMangling = false; - - //===--- Alignment Information ----------------------------------------===// - AlignDirective = ".align\t"; - AlignmentIsInBytes = true; - TextAlignFillValue = 0; - - //===--- Global Variable Emission Directives --------------------------===// - GlobalDirective = ".global"; - ExternDirective = ".extern"; - HasSetDirective = false; - HasAggressiveSymbolFolding = true; - LCOMMDirectiveType = LCOMM::None; - COMMDirectiveAlignmentIsInBytes = false; - HasDotTypeDotSizeDirective = false; - HasNoDeadStrip = true; - HasSymbolResolver = false; - WeakRefDirective = ".weakref\t"; - LinkOnceDirective = 0; - //===--- Dwarf Emission Directives -----------------------------------===// - HasLEB128 = true; - SupportsDebugInformation = true; - ExceptionsType = ExceptionHandling::None; - DwarfUsesInlineInfoSection = false; - DwarfSectionOffsetDirective = ".offset"; - DwarfUsesLabelOffsetForRanges = true; - - //===--- CBE Asm Translation Table -----------------------------------===// - AsmTransCBE = 0; -} -const char* -AMDGPUMCAsmInfo::getDataASDirective(unsigned int Size, unsigned int AS) const -{ - switch (AS) { - default: - return 0; - case 0: - return 0; - }; - return 0; -} - -const MCSection* -AMDGPUMCAsmInfo::getNonexecutableStackSection(MCContext &CTX) const -{ - return 0; -} diff --git a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCAsmInfo.h b/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCAsmInfo.h deleted file mode 100644 index 0ca264beb0f..00000000000 --- a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCAsmInfo.h +++ /dev/null @@ -1,30 +0,0 @@ -//===-- MCTargetDesc/AMDGPUMCAsmInfo.h - TODO: Add brief description -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// TODO: Add full description -// -//===----------------------------------------------------------------------===// - -#ifndef AMDGPUMCASMINFO_H_ -#define AMDGPUMCASMINFO_H_ - -#include "llvm/MC/MCAsmInfo.h" -namespace llvm { - class Target; - class StringRef; - - class AMDGPUMCAsmInfo : public MCAsmInfo { - public: - explicit AMDGPUMCAsmInfo(const Target &T, StringRef &TT); - const char* - getDataASDirective(unsigned int Size, unsigned int AS) const; - const MCSection* getNonexecutableStackSection(MCContext &CTX) const; - }; -} // namespace llvm -#endif // AMDGPUMCASMINFO_H_ diff --git a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCCodeEmitter.h b/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCCodeEmitter.h deleted file mode 100644 index a75a8414e6c..00000000000 --- a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCCodeEmitter.h +++ /dev/null @@ -1,59 +0,0 @@ -//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// CodeEmitter interface for R600 and SI codegen. -// -//===----------------------------------------------------------------------===// - -#ifndef AMDGPUCODEEMITTER_H -#define AMDGPUCODEEMITTER_H - -#include "llvm/MC/MCCodeEmitter.h" -#include "llvm/Support/raw_ostream.h" - -namespace llvm { - - class MCInst; - class MCOperand; - - class AMDGPUMCCodeEmitter : public MCCodeEmitter { - public: - - uint64_t getBinaryCodeForInstr(const MCInst &MI, - SmallVectorImpl<MCFixup> &Fixups) const; - - virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO, - SmallVectorImpl<MCFixup> &Fixups) const { - return 0; - } - - virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups) const { - return 0; - } - virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups) const { - return 0; - } - virtual uint64_t VOPPostEncode(const MCInst &MI, uint64_t Value) const { - return Value; - } - virtual uint64_t i32LiteralEncode(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups) const { - return 0; - } - virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups) const { - return 0; - } - }; - -} // End namespace llvm - -#endif // AMDGPUCODEEMITTER_H diff --git a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCTargetDesc.cpp deleted file mode 100644 index 6de20fceda6..00000000000 --- a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCTargetDesc.cpp +++ /dev/null @@ -1,111 +0,0 @@ -//===-- AMDGPUMCTargetDesc.cpp - AMDGPU Target Descriptions ---------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file provides AMDGPU specific target descriptions. -// -//===----------------------------------------------------------------------===// - -#include "AMDGPUMCTargetDesc.h" -#include "AMDGPUMCAsmInfo.h" -#include "InstPrinter/AMDGPUInstPrinter.h" -#include "llvm/MC/MachineLocation.h" -#include "llvm/MC/MCCodeGenInfo.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TargetRegistry.h" - -#define GET_INSTRINFO_MC_DESC -#include "AMDGPUGenInstrInfo.inc" - -#define GET_SUBTARGETINFO_MC_DESC -#include "AMDGPUGenSubtargetInfo.inc" - -#define GET_REGINFO_MC_DESC -#include "AMDGPUGenRegisterInfo.inc" - -using namespace llvm; - -static MCInstrInfo *createAMDGPUMCInstrInfo() { - MCInstrInfo *X = new MCInstrInfo(); - InitAMDGPUMCInstrInfo(X); - return X; -} - -static MCRegisterInfo *createAMDGPUMCRegisterInfo(StringRef TT) { - MCRegisterInfo *X = new MCRegisterInfo(); - InitAMDGPUMCRegisterInfo(X, 0); - return X; -} - -static MCSubtargetInfo *createAMDGPUMCSubtargetInfo(StringRef TT, StringRef CPU, - StringRef FS) { - MCSubtargetInfo * X = new MCSubtargetInfo(); - InitAMDGPUMCSubtargetInfo(X, TT, CPU, FS); - return X; -} - -static MCCodeGenInfo *createAMDGPUMCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM, - CodeGenOpt::Level OL) { - MCCodeGenInfo *X = new MCCodeGenInfo(); - X->InitMCCodeGenInfo(RM, CM, OL); - return X; -} - -static MCInstPrinter *createAMDGPUMCInstPrinter(const Target &T, - unsigned SyntaxVariant, - const MCAsmInfo &MAI, - const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { - return new AMDGPUInstPrinter(MAI, MII, MRI); -} - -static MCCodeEmitter *createAMDGPUMCCodeEmitter(const MCInstrInfo &MCII, - const MCSubtargetInfo &STI, - MCContext &Ctx) { - if (STI.getFeatureBits() & AMDGPU::Feature64BitPtr) { - return createSIMCCodeEmitter(MCII, STI, Ctx); - } else { - return createR600MCCodeEmitter(MCII, STI, Ctx); - } -} - -static MCStreamer *createMCStreamer(const Target &T, StringRef TT, - MCContext &Ctx, MCAsmBackend &MAB, - raw_ostream &_OS, - MCCodeEmitter *_Emitter, - bool RelaxAll, - bool NoExecStack) { - return createPureStreamer(Ctx, MAB, _OS, _Emitter); -} - -extern "C" void LLVMInitializeAMDGPUTargetMC() { - - RegisterMCAsmInfo<AMDGPUMCAsmInfo> Y(TheAMDGPUTarget); - - TargetRegistry::RegisterMCCodeGenInfo(TheAMDGPUTarget, createAMDGPUMCCodeGenInfo); - - TargetRegistry::RegisterMCInstrInfo(TheAMDGPUTarget, createAMDGPUMCInstrInfo); - - TargetRegistry::RegisterMCRegInfo(TheAMDGPUTarget, createAMDGPUMCRegisterInfo); - - TargetRegistry::RegisterMCSubtargetInfo(TheAMDGPUTarget, createAMDGPUMCSubtargetInfo); - - TargetRegistry::RegisterMCInstPrinter(TheAMDGPUTarget, createAMDGPUMCInstPrinter); - - TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget, createAMDGPUMCCodeEmitter); - - TargetRegistry::RegisterMCAsmBackend(TheAMDGPUTarget, createAMDGPUAsmBackend); - - TargetRegistry::RegisterMCObjectStreamer(TheAMDGPUTarget, createMCStreamer); -} diff --git a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCTargetDesc.h b/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCTargetDesc.h deleted file mode 100644 index 328e367401a..00000000000 --- a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCTargetDesc.h +++ /dev/null @@ -1,51 +0,0 @@ -//===-- AMDGPUMCTargetDesc.h - AMDGPU Target Descriptions -----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file provides AMDGPU specific target descriptions. -// -//===----------------------------------------------------------------------===// -// - -#ifndef AMDGPUMCTARGETDESC_H -#define AMDGPUMCTARGETDESC_H - -#include "llvm/ADT/StringRef.h" - -namespace llvm { -class MCAsmBackend; -class MCCodeEmitter; -class MCContext; -class MCInstrInfo; -class MCRegisterInfo; -class MCSubtargetInfo; -class Target; - -extern Target TheAMDGPUTarget; - -MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII, - const MCSubtargetInfo &STI, - MCContext &Ctx); - -MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII, - const MCSubtargetInfo &STI, - MCContext &Ctx); - -MCAsmBackend *createAMDGPUAsmBackend(const Target &T, StringRef TT); -} // End llvm namespace - -#define GET_REGINFO_ENUM -#include "AMDGPUGenRegisterInfo.inc" - -#define GET_INSTRINFO_ENUM -#include "AMDGPUGenInstrInfo.inc" - -#define GET_SUBTARGETINFO_ENUM -#include "AMDGPUGenSubtargetInfo.inc" - -#endif // AMDGPUMCTARGETDESC_H diff --git a/src/gallium/drivers/radeon/MCTargetDesc/R600MCCodeEmitter.cpp b/src/gallium/drivers/radeon/MCTargetDesc/R600MCCodeEmitter.cpp deleted file mode 100644 index a11f48234cb..00000000000 --- a/src/gallium/drivers/radeon/MCTargetDesc/R600MCCodeEmitter.cpp +++ /dev/null @@ -1,727 +0,0 @@ -//===- R600MCCodeEmitter.cpp - Code Emitter for R600->Cayman GPU families -===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This code emitters outputs bytecode that is understood by the r600g driver -// in the Mesa [1] project. The bytecode is very similar to the hardware's ISA, -// except that the size of the instruction fields are rounded up to the -// nearest byte. -// -// [1] http://www.mesa3d.org/ -// -//===----------------------------------------------------------------------===// - -#include "R600Defines.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" -#include "MCTargetDesc/AMDGPUMCCodeEmitter.h" -#include "llvm/MC/MCCodeEmitter.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/raw_ostream.h" - -#include <stdio.h> - -#define SRC_BYTE_COUNT 11 -#define DST_BYTE_COUNT 5 - -using namespace llvm; - -namespace { - -class R600MCCodeEmitter : public AMDGPUMCCodeEmitter { - R600MCCodeEmitter(const R600MCCodeEmitter &); // DO NOT IMPLEMENT - void operator=(const R600MCCodeEmitter &); // DO NOT IMPLEMENT - const MCInstrInfo &MCII; - const MCSubtargetInfo &STI; - MCContext &Ctx; - -public: - - R600MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, - MCContext &ctx) - : MCII(mcii), STI(sti), Ctx(ctx) { } - - /// EncodeInstruction - Encode the instruction and write it to the OS. - virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS, - SmallVectorImpl<MCFixup> &Fixups) const; - - /// getMachineOpValue - Reutrn the encoding for an MCOperand. - virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO, - SmallVectorImpl<MCFixup> &Fixups) const; -private: - - void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups, - raw_ostream &OS) const; - void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const; - void EmitSrcISA(const MCInst &MI, unsigned OpIdx, uint64_t &Value, - raw_ostream &OS) const; - void EmitDst(const MCInst &MI, raw_ostream &OS) const; - void EmitTexInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups, - raw_ostream &OS) const; - void EmitFCInstr(const MCInst &MI, raw_ostream &OS) const; - - void EmitNullBytes(unsigned int byteCount, raw_ostream &OS) const; - - void EmitByte(unsigned int byte, raw_ostream &OS) const; - - void EmitTwoBytes(uint32_t bytes, raw_ostream &OS) const; - - void Emit(uint32_t value, raw_ostream &OS) const; - void Emit(uint64_t value, raw_ostream &OS) const; - - unsigned getHWRegIndex(unsigned reg) const; - unsigned getHWRegChan(unsigned reg) const; - unsigned getHWReg(unsigned regNo) const; - - bool isFCOp(unsigned opcode) const; - bool isTexOp(unsigned opcode) const; - bool isFlagSet(const MCInst &MI, unsigned Operand, unsigned Flag) const; - - /// getHWRegIndexGen - Get the register's hardware index. Implemented in - /// R600HwRegInfo.include. - unsigned getHWRegIndexGen(unsigned int Reg) const; - - /// getHWRegChanGen - Get the register's channel. Implemented in - /// R600HwRegInfo.include. - unsigned getHWRegChanGen(unsigned int Reg) const; -}; - -} // End anonymous namespace - -enum RegElement { - ELEMENT_X = 0, - ELEMENT_Y, - ELEMENT_Z, - ELEMENT_W -}; - -enum InstrTypes { - INSTR_ALU = 0, - INSTR_TEX, - INSTR_FC, - INSTR_NATIVE, - INSTR_VTX -}; - -enum FCInstr { - FC_IF = 0, - FC_IF_INT, - FC_ELSE, - FC_ENDIF, - FC_BGNLOOP, - FC_ENDLOOP, - FC_BREAK, - FC_BREAK_NZ_INT, - FC_CONTINUE, - FC_BREAK_Z_INT, - FC_BREAK_NZ -}; - -enum TextureTypes { - TEXTURE_1D = 1, - TEXTURE_2D, - TEXTURE_3D, - TEXTURE_CUBE, - TEXTURE_RECT, - TEXTURE_SHADOW1D, - TEXTURE_SHADOW2D, - TEXTURE_SHADOWRECT, - TEXTURE_1D_ARRAY, - TEXTURE_2D_ARRAY, - TEXTURE_SHADOW1D_ARRAY, - TEXTURE_SHADOW2D_ARRAY -}; - -MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII, - const MCSubtargetInfo &STI, - MCContext &Ctx) { - return new R600MCCodeEmitter(MCII, STI, Ctx); -} - -void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, - SmallVectorImpl<MCFixup> &Fixups) const { - if (isTexOp(MI.getOpcode())) { - EmitTexInstr(MI, Fixups, OS); - } else if (isFCOp(MI.getOpcode())){ - EmitFCInstr(MI, OS); - } else if (MI.getOpcode() == AMDGPU::RETURN || - MI.getOpcode() == AMDGPU::BUNDLE || - MI.getOpcode() == AMDGPU::KILL) { - return; - } else { - switch(MI.getOpcode()) { - case AMDGPU::RAT_WRITE_CACHELESS_32_eg: - case AMDGPU::RAT_WRITE_CACHELESS_128_eg: - { - uint64_t inst = getBinaryCodeForInstr(MI, Fixups); - EmitByte(INSTR_NATIVE, OS); - Emit(inst, OS); - break; - } - case AMDGPU::CONSTANT_LOAD_eg: - case AMDGPU::VTX_READ_PARAM_i32_eg: - case AMDGPU::VTX_READ_PARAM_f32_eg: - case AMDGPU::VTX_READ_GLOBAL_i8_eg: - case AMDGPU::VTX_READ_GLOBAL_i32_eg: - case AMDGPU::VTX_READ_GLOBAL_f32_eg: - case AMDGPU::VTX_READ_GLOBAL_v4i32_eg: - case AMDGPU::VTX_READ_GLOBAL_v4f32_eg: - { - uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups); - uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset - - EmitByte(INSTR_VTX, OS); - Emit(InstWord01, OS); - Emit(InstWord2, OS); - break; - } - - default: - EmitALUInstr(MI, Fixups, OS); - break; - } - } -} - -void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI, - SmallVectorImpl<MCFixup> &Fixups, - raw_ostream &OS) const { - const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode()); - unsigned NumOperands = MI.getNumOperands(); - - if(MCDesc.findFirstPredOperandIdx() > -1) - NumOperands--; - - if (GET_FLAG_OPERAND_IDX(MCDesc.TSFlags) != 0) - NumOperands--; - - if(MI.getOpcode() == AMDGPU::PRED_X) - NumOperands = 2; - - // XXX Check if instruction writes a result - if (NumOperands < 1) { - return; - } - - // Emit instruction type - EmitByte(INSTR_ALU, OS); - - uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups); - - //older alu have different encoding for instructions with one or two src - //parameters. - if ((STI.getFeatureBits() & AMDGPU::FeatureR600ALUInst) && - !(MCDesc.TSFlags & R600_InstFlag::OP3)) { - uint64_t ISAOpCode = InstWord01 & (0x3FFULL << 39); - InstWord01 &= ~(0x3FFULL << 39); - InstWord01 |= ISAOpCode << 1; - } - - unsigned int OpIndex; - for (OpIndex = 1; OpIndex < NumOperands; OpIndex++) { - // Literal constants are always stored as the last operand. - if (MI.getOperand(OpIndex).isImm() || MI.getOperand(OpIndex).isFPImm()) { - break; - } - EmitSrcISA(MI, OpIndex, InstWord01, OS); - } - - // Emit zeros for unused sources - for ( ; OpIndex < 4; OpIndex++) { - EmitNullBytes(SRC_BYTE_COUNT - 6, OS); - } - - // Emit destination register - const MCOperand &dstOp = MI.getOperand(0); - if (dstOp.isReg() && dstOp.getReg() != AMDGPU::PREDICATE_BIT) { - //element of destination register - InstWord01 |= uint64_t(getHWRegChan(dstOp.getReg())) << 61; - - // isClamped - if (isFlagSet(MI, 0, MO_FLAG_CLAMP)) { - InstWord01 |= 1ULL << 63; - } - - // write mask - if (!isFlagSet(MI, 0, MO_FLAG_MASK) && NumOperands < 4) { - InstWord01 |= 1ULL << 36; - } - - // XXX: Emit relative addressing mode - } - - // Emit ALU - - // Emit IsLast (for this instruction group) (1 byte) - if (!isFlagSet(MI, 0, MO_FLAG_NOT_LAST)) { - InstWord01 |= 1ULL << 31; - } - - // XXX: Emit push modifier - if(isFlagSet(MI, 1, MO_FLAG_PUSH)) { - InstWord01 |= 1ULL << 34; - } - - // XXX: Emit predicate (1 byte) - int PredIdx = MCDesc.findFirstPredOperandIdx(); - if (PredIdx != -1) { - switch(MI.getOperand(PredIdx).getReg()) { - case AMDGPU::PRED_SEL_ZERO: - InstWord01 |= 2ULL << 29; - break; - case AMDGPU::PRED_SEL_ONE: - InstWord01 |= 3ULL << 29; - break; - } - } - - //XXX: predicate - //XXX: bank swizzle - //XXX: OMOD - //XXX: index mode - - Emit(InstWord01, OS); -} - -void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx, - raw_ostream &OS) const { - const MCOperand &MO = MI.getOperand(OpIdx); - union { - float f; - uint32_t i; - } Value; - Value.i = 0; - // Emit the source select (2 bytes). For GPRs, this is the register index. - // For other potential instruction operands, (e.g. constant registers) the - // value of the source select is defined in the r600isa docs. - if (MO.isReg()) { - unsigned reg = MO.getReg(); - EmitTwoBytes(getHWReg(reg), OS); - if (reg == AMDGPU::ALU_LITERAL_X) { - unsigned ImmOpIndex = MI.getNumOperands() - 1; - MCOperand ImmOp = MI.getOperand(ImmOpIndex); - if (ImmOp.isFPImm()) { - Value.f = ImmOp.getFPImm(); - } else { - assert(ImmOp.isImm()); - Value.i = ImmOp.getImm(); - } - } - } else { - // XXX: Handle other operand types. - EmitTwoBytes(0, OS); - } - - // Emit the source channel (1 byte) - if (MO.isReg()) { - EmitByte(getHWRegChan(MO.getReg()), OS); - } else { - EmitByte(0, OS); - } - - // XXX: Emit isNegated (1 byte) - if ((!(isFlagSet(MI, OpIdx, MO_FLAG_ABS))) - && (isFlagSet(MI, OpIdx, MO_FLAG_NEG) || - (MO.isReg() && - (MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){ - EmitByte(1, OS); - } else { - EmitByte(0, OS); - } - - // Emit isAbsolute (1 byte) - if (isFlagSet(MI, OpIdx, MO_FLAG_ABS)) { - EmitByte(1, OS); - } else { - EmitByte(0, OS); - } - - // XXX: Emit relative addressing mode (1 byte) - EmitByte(0, OS); - - // Emit kc_bank, This will be adjusted later by r600_asm - EmitByte(0, OS); - - // Emit the literal value, if applicable (4 bytes). - Emit(Value.i, OS); - -} - -void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned OpIdx, - uint64_t &Value, raw_ostream &OS) const { - const MCOperand &MO = MI.getOperand(OpIdx); - union { - float f; - uint32_t i; - } InlineConstant; - InlineConstant.i = 0; - // Emit the source select (2 bytes). For GPRs, this is the register index. - // For other potential instruction operands, (e.g. constant registers) the - // value of the source select is defined in the r600isa docs. - if (MO.isReg()) { - unsigned Reg = MO.getReg(); - if (AMDGPUMCRegisterClasses[AMDGPU::R600_CReg32RegClassID].contains(Reg)) { - EmitByte(1, OS); - } else { - EmitByte(0, OS); - } - - if (Reg == AMDGPU::ALU_LITERAL_X) { - unsigned ImmOpIndex = MI.getNumOperands() - 1; - MCOperand ImmOp = MI.getOperand(ImmOpIndex); - if (ImmOp.isFPImm()) { - InlineConstant.f = ImmOp.getFPImm(); - } else { - assert(ImmOp.isImm()); - InlineConstant.i = ImmOp.getImm(); - } - } - } else { - // XXX: Handle other operand types. - EmitTwoBytes(0, OS); - } - - // source channel - uint64_t sourceChannelValue = getHWRegChan(MO.getReg()); - if (OpIdx == 1) - Value |= sourceChannelValue << 10; - if (OpIdx == 2) - Value |= sourceChannelValue << 23; - if (OpIdx == 3) - Value |= sourceChannelValue << 42; - - // isNegated - if ((!(isFlagSet(MI, OpIdx, MO_FLAG_ABS))) - && (isFlagSet(MI, OpIdx, MO_FLAG_NEG) || - (MO.isReg() && - (MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){ - if (OpIdx == 1) - Value |= 1ULL << 12; - else if (OpIdx == 2) - Value |= 1ULL << 25; - else if (OpIdx == 3) - Value |= 1ULL << 44; - } - - // isAbsolute - if (isFlagSet(MI, OpIdx, MO_FLAG_ABS)) { - assert(OpIdx < 3); - Value |= 1ULL << (32+OpIdx-1); - } - - // XXX: relative addressing mode - // XXX: kc_bank - - // Emit the literal value, if applicable (4 bytes). - Emit(InlineConstant.i, OS); - -} - -void R600MCCodeEmitter::EmitTexInstr(const MCInst &MI, - SmallVectorImpl<MCFixup> &Fixups, - raw_ostream &OS) const { - - unsigned opcode = MI.getOpcode(); - bool hasOffsets = (opcode == AMDGPU::TEX_LD); - unsigned op_offset = hasOffsets ? 3 : 0; - int64_t sampler = MI.getOperand(op_offset+2).getImm(); - int64_t textureType = MI.getOperand(op_offset+3).getImm(); - unsigned srcSelect[4] = {0, 1, 2, 3}; - - // Emit instruction type - EmitByte(1, OS); - - // Emit instruction - EmitByte(getBinaryCodeForInstr(MI, Fixups), OS); - - // XXX: Emit resource id r600_shader.c uses sampler + 1. Why? - EmitByte(sampler + 1 + 1, OS); - - // Emit source register - EmitByte(getHWReg(MI.getOperand(1).getReg()), OS); - - // XXX: Emit src isRelativeAddress - EmitByte(0, OS); - - // Emit destination register - EmitByte(getHWReg(MI.getOperand(0).getReg()), OS); - - // XXX: Emit dst isRealtiveAddress - EmitByte(0, OS); - - // XXX: Emit dst select - EmitByte(0, OS); // X - EmitByte(1, OS); // Y - EmitByte(2, OS); // Z - EmitByte(3, OS); // W - - // XXX: Emit lod bias - EmitByte(0, OS); - - // XXX: Emit coord types - unsigned coordType[4] = {1, 1, 1, 1}; - - if (textureType == TEXTURE_RECT - || textureType == TEXTURE_SHADOWRECT) { - coordType[ELEMENT_X] = 0; - coordType[ELEMENT_Y] = 0; - } - - if (textureType == TEXTURE_1D_ARRAY - || textureType == TEXTURE_SHADOW1D_ARRAY) { - if (opcode == AMDGPU::TEX_SAMPLE_C_L || opcode == AMDGPU::TEX_SAMPLE_C_LB) { - coordType[ELEMENT_Y] = 0; - } else { - coordType[ELEMENT_Z] = 0; - srcSelect[ELEMENT_Z] = ELEMENT_Y; - } - } else if (textureType == TEXTURE_2D_ARRAY - || textureType == TEXTURE_SHADOW2D_ARRAY) { - coordType[ELEMENT_Z] = 0; - } - - for (unsigned i = 0; i < 4; i++) { - EmitByte(coordType[i], OS); - } - - // XXX: Emit offsets - if (hasOffsets) - for (unsigned i = 2; i < 5; i++) - EmitByte(MI.getOperand(i).getImm()<<1, OS); - else - EmitNullBytes(3, OS); - - // Emit sampler id - EmitByte(sampler, OS); - - // XXX:Emit source select - if ((textureType == TEXTURE_SHADOW1D - || textureType == TEXTURE_SHADOW2D - || textureType == TEXTURE_SHADOWRECT - || textureType == TEXTURE_SHADOW1D_ARRAY) - && opcode != AMDGPU::TEX_SAMPLE_C_L - && opcode != AMDGPU::TEX_SAMPLE_C_LB) { - srcSelect[ELEMENT_W] = ELEMENT_Z; - } - - for (unsigned i = 0; i < 4; i++) { - EmitByte(srcSelect[i], OS); - } -} - -void R600MCCodeEmitter::EmitFCInstr(const MCInst &MI, raw_ostream &OS) const { - - // Emit instruction type - EmitByte(INSTR_FC, OS); - - // Emit SRC - unsigned NumOperands = MI.getNumOperands(); - if (NumOperands > 0) { - assert(NumOperands == 1); - EmitSrc(MI, 0, OS); - } else { - EmitNullBytes(SRC_BYTE_COUNT, OS); - } - - // Emit FC Instruction - enum FCInstr instr; - switch (MI.getOpcode()) { - case AMDGPU::BREAK_LOGICALZ_f32: - instr = FC_BREAK; - break; - case AMDGPU::BREAK_LOGICALNZ_f32: - instr = FC_BREAK_NZ; - break; - case AMDGPU::BREAK_LOGICALNZ_i32: - instr = FC_BREAK_NZ_INT; - break; - case AMDGPU::BREAK_LOGICALZ_i32: - instr = FC_BREAK_Z_INT; - break; - case AMDGPU::CONTINUE_LOGICALNZ_f32: - case AMDGPU::CONTINUE_LOGICALNZ_i32: - instr = FC_CONTINUE; - break; - case AMDGPU::IF_LOGICALNZ_f32: - instr = FC_IF; - case AMDGPU::IF_LOGICALNZ_i32: - instr = FC_IF_INT; - break; - case AMDGPU::IF_LOGICALZ_f32: - abort(); - break; - case AMDGPU::ELSE: - instr = FC_ELSE; - break; - case AMDGPU::ENDIF: - instr = FC_ENDIF; - break; - case AMDGPU::ENDLOOP: - instr = FC_ENDLOOP; - break; - case AMDGPU::WHILELOOP: - instr = FC_BGNLOOP; - break; - default: - abort(); - break; - } - EmitByte(instr, OS); -} - -void R600MCCodeEmitter::EmitNullBytes(unsigned int ByteCount, - raw_ostream &OS) const { - - for (unsigned int i = 0; i < ByteCount; i++) { - EmitByte(0, OS); - } -} - -void R600MCCodeEmitter::EmitByte(unsigned int Byte, raw_ostream &OS) const { - OS.write((uint8_t) Byte & 0xff); -} - -void R600MCCodeEmitter::EmitTwoBytes(unsigned int Bytes, - raw_ostream &OS) const { - OS.write((uint8_t) (Bytes & 0xff)); - OS.write((uint8_t) ((Bytes >> 8) & 0xff)); -} - -void R600MCCodeEmitter::Emit(uint32_t Value, raw_ostream &OS) const { - for (unsigned i = 0; i < 4; i++) { - OS.write((uint8_t) ((Value >> (8 * i)) & 0xff)); - } -} - -void R600MCCodeEmitter::Emit(uint64_t Value, raw_ostream &OS) const { - for (unsigned i = 0; i < 8; i++) { - EmitByte((Value >> (8 * i)) & 0xff, OS); - } -} - -unsigned R600MCCodeEmitter::getHWRegIndex(unsigned reg) const { - switch(reg) { - case AMDGPU::ZERO: return 248; - case AMDGPU::ONE: - case AMDGPU::NEG_ONE: return 249; - case AMDGPU::ONE_INT: return 250; - case AMDGPU::HALF: - case AMDGPU::NEG_HALF: return 252; - case AMDGPU::ALU_LITERAL_X: return 253; - case AMDGPU::PREDICATE_BIT: - case AMDGPU::PRED_SEL_OFF: - case AMDGPU::PRED_SEL_ZERO: - case AMDGPU::PRED_SEL_ONE: - return 0; - default: return getHWRegIndexGen(reg); - } -} - -unsigned R600MCCodeEmitter::getHWRegChan(unsigned reg) const { - switch(reg) { - case AMDGPU::ZERO: - case AMDGPU::ONE: - case AMDGPU::ONE_INT: - case AMDGPU::NEG_ONE: - case AMDGPU::HALF: - case AMDGPU::NEG_HALF: - case AMDGPU::ALU_LITERAL_X: - case AMDGPU::PREDICATE_BIT: - case AMDGPU::PRED_SEL_OFF: - case AMDGPU::PRED_SEL_ZERO: - case AMDGPU::PRED_SEL_ONE: - return 0; - default: return getHWRegChanGen(reg); - } -} -unsigned R600MCCodeEmitter::getHWReg(unsigned RegNo) const { - unsigned HWReg; - - HWReg = getHWRegIndex(RegNo); - if (AMDGPUMCRegisterClasses[AMDGPU::R600_CReg32RegClassID].contains(RegNo)) { - HWReg += 512; - } - return HWReg; -} - -uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI, - const MCOperand &MO, - SmallVectorImpl<MCFixup> &Fixup) const { - if (MO.isReg()) { - return getHWRegIndex(MO.getReg()); - } else if (MO.isImm()) { - return MO.getImm(); - } else { - assert(0); - return 0; - } -} - -//===----------------------------------------------------------------------===// -// Encoding helper functions -//===----------------------------------------------------------------------===// - -bool R600MCCodeEmitter::isFCOp(unsigned opcode) const { - switch(opcode) { - default: return false; - case AMDGPU::BREAK_LOGICALZ_f32: - case AMDGPU::BREAK_LOGICALNZ_i32: - case AMDGPU::BREAK_LOGICALZ_i32: - case AMDGPU::BREAK_LOGICALNZ_f32: - case AMDGPU::CONTINUE_LOGICALNZ_f32: - case AMDGPU::IF_LOGICALNZ_i32: - case AMDGPU::IF_LOGICALZ_f32: - case AMDGPU::ELSE: - case AMDGPU::ENDIF: - case AMDGPU::ENDLOOP: - case AMDGPU::IF_LOGICALNZ_f32: - case AMDGPU::WHILELOOP: - return true; - } -} - -bool R600MCCodeEmitter::isTexOp(unsigned opcode) const { - switch(opcode) { - default: return false; - case AMDGPU::TEX_LD: - case AMDGPU::TEX_GET_TEXTURE_RESINFO: - case AMDGPU::TEX_SAMPLE: - case AMDGPU::TEX_SAMPLE_C: - case AMDGPU::TEX_SAMPLE_L: - case AMDGPU::TEX_SAMPLE_C_L: - case AMDGPU::TEX_SAMPLE_LB: - case AMDGPU::TEX_SAMPLE_C_LB: - case AMDGPU::TEX_SAMPLE_G: - case AMDGPU::TEX_SAMPLE_C_G: - case AMDGPU::TEX_GET_GRADIENTS_H: - case AMDGPU::TEX_GET_GRADIENTS_V: - case AMDGPU::TEX_SET_GRADIENTS_H: - case AMDGPU::TEX_SET_GRADIENTS_V: - return true; - } -} - -bool R600MCCodeEmitter::isFlagSet(const MCInst &MI, unsigned Operand, - unsigned Flag) const { - const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode()); - unsigned FlagIndex = GET_FLAG_OPERAND_IDX(MCDesc.TSFlags); - if (FlagIndex == 0) { - return false; - } - assert(MI.getOperand(FlagIndex).isImm()); - return !!((MI.getOperand(FlagIndex).getImm() >> - (NUM_MO_FLAGS * Operand)) & Flag); -} -#define R600RegisterInfo R600MCCodeEmitter -#include "R600HwRegInfo.include" -#undef R600RegisterInfo - -#include "AMDGPUGenMCCodeEmitter.inc" diff --git a/src/gallium/drivers/radeon/MCTargetDesc/SIMCCodeEmitter.cpp b/src/gallium/drivers/radeon/MCTargetDesc/SIMCCodeEmitter.cpp deleted file mode 100644 index ca4b579dcce..00000000000 --- a/src/gallium/drivers/radeon/MCTargetDesc/SIMCCodeEmitter.cpp +++ /dev/null @@ -1,296 +0,0 @@ -//===-- SIMCCodeEmitter.cpp - SI Code Emitter -------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// The SI code emitter produces machine code that can be executed directly on -// the GPU device. -// -//===----------------------------------------------------------------------===// - -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" -#include "MCTargetDesc/AMDGPUMCCodeEmitter.h" -#include "llvm/MC/MCCodeEmitter.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/raw_ostream.h" - -#define VGPR_BIT(src_idx) (1ULL << (9 * src_idx - 1)) -#define SI_INSTR_FLAGS_ENCODING_MASK 0xf - -// These must be kept in sync with SIInstructions.td and also the -// InstrEncodingInfo array in SIInstrInfo.cpp. -// -// NOTE: This enum is only used to identify the encoding type within LLVM, -// the actual encoding type that is part of the instruction format is different -namespace SIInstrEncodingType { - enum Encoding { - EXP = 0, - LDS = 1, - MIMG = 2, - MTBUF = 3, - MUBUF = 4, - SMRD = 5, - SOP1 = 6, - SOP2 = 7, - SOPC = 8, - SOPK = 9, - SOPP = 10, - VINTRP = 11, - VOP1 = 12, - VOP2 = 13, - VOP3 = 14, - VOPC = 15 - }; -} - -using namespace llvm; - -namespace { -class SIMCCodeEmitter : public AMDGPUMCCodeEmitter { - SIMCCodeEmitter(const SIMCCodeEmitter &); // DO NOT IMPLEMENT - void operator=(const SIMCCodeEmitter &); // DO NOT IMPLEMENT - const MCInstrInfo &MCII; - const MCSubtargetInfo &STI; - MCContext &Ctx; - -public: - SIMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, - MCContext &ctx) - : MCII(mcii), STI(sti), Ctx(ctx) { } - - ~SIMCCodeEmitter() { } - - /// EncodeInstruction - Encode the instruction and write it to the OS. - virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS, - SmallVectorImpl<MCFixup> &Fixups) const; - - /// getMachineOpValue - Reutrn the encoding for an MCOperand. - virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO, - SmallVectorImpl<MCFixup> &Fixups) const; - -public: - - /// GPRAlign - Encode a sequence of registers with the correct alignment. - unsigned GPRAlign(const MCInst &MI, unsigned OpNo, unsigned shift) const; - - /// GPR2AlignEncode - Encoding for when 2 consecutive registers are used - virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixup) const; - - /// GPR4AlignEncode - Encoding for when 4 consectuive registers are used - virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixup) const; - - /// SMRDmemriEncode - Encoding for SMRD indexed loads - virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixup) const; - - /// VOPPostEncode - Post-Encoder method for VOP instructions - virtual uint64_t VOPPostEncode(const MCInst &MI, uint64_t Value) const; - -private: - - ///getEncodingType = Return this SIInstrEncodingType for this instruction. - unsigned getEncodingType(const MCInst &MI) const; - - ///getEncodingBytes - Get then size in bytes of this instructions encoding. - unsigned getEncodingBytes(const MCInst &MI) const; - - /// getRegBinaryCode - Returns the hardware encoding for a register - unsigned getRegBinaryCode(unsigned reg) const; - - /// getHWRegNum - Generated function that returns the hardware encoding for - /// a register - unsigned getHWRegNum(unsigned reg) const; - -}; - -} // End anonymous namespace - -MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII, - const MCSubtargetInfo &STI, - MCContext &Ctx) { - return new SIMCCodeEmitter(MCII, STI, Ctx); -} - -void SIMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, - SmallVectorImpl<MCFixup> &Fixups) const { - uint64_t Encoding = getBinaryCodeForInstr(MI, Fixups); - unsigned bytes = getEncodingBytes(MI); - for (unsigned i = 0; i < bytes; i++) { - OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff)); - } -} - -uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI, - const MCOperand &MO, - SmallVectorImpl<MCFixup> &Fixups) const { - if (MO.isReg()) { - return getRegBinaryCode(MO.getReg()); - } else if (MO.isImm()) { - return MO.getImm(); - } else if (MO.isFPImm()) { - // XXX: Not all instructions can use inline literals - // XXX: We should make sure this is a 32-bit constant - union { - float F; - uint32_t I; - } Imm; - Imm.F = MO.getFPImm(); - return Imm.I; - } else{ - llvm_unreachable("Encoding of this operand type is not supported yet."); - } - return 0; -} - -//===----------------------------------------------------------------------===// -// Custom Operand Encodings -//===----------------------------------------------------------------------===// - -unsigned SIMCCodeEmitter::GPRAlign(const MCInst &MI, unsigned OpNo, - unsigned shift) const { - unsigned regCode = getRegBinaryCode(MI.getOperand(OpNo).getReg()); - return regCode >> shift; - return 0; -} -unsigned SIMCCodeEmitter::GPR2AlignEncode(const MCInst &MI, - unsigned OpNo , - SmallVectorImpl<MCFixup> &Fixup) const { - return GPRAlign(MI, OpNo, 1); -} - -unsigned SIMCCodeEmitter::GPR4AlignEncode(const MCInst &MI, - unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixup) const { - return GPRAlign(MI, OpNo, 2); -} - -#define SMRD_OFFSET_MASK 0xff -#define SMRD_IMM_SHIFT 8 -#define SMRD_SBASE_MASK 0x3f -#define SMRD_SBASE_SHIFT 9 -/// SMRDmemriEncode - This function is responsibe for encoding the offset -/// and the base ptr for SMRD instructions it should return a bit string in -/// this format: -/// -/// OFFSET = bits{7-0} -/// IMM = bits{8} -/// SBASE = bits{14-9} -/// -uint32_t SIMCCodeEmitter::SMRDmemriEncode(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixup) const { - uint32_t Encoding; - - const MCOperand &OffsetOp = MI.getOperand(OpNo + 1); - - //XXX: Use this function for SMRD loads with register offsets - assert(OffsetOp.isImm()); - - Encoding = - (getMachineOpValue(MI, OffsetOp, Fixup) & SMRD_OFFSET_MASK) - | (1 << SMRD_IMM_SHIFT) //XXX If the Offset is a register we shouldn't set this bit - | ((GPR2AlignEncode(MI, OpNo, Fixup) & SMRD_SBASE_MASK) << SMRD_SBASE_SHIFT) - ; - - return Encoding; -} - -//===----------------------------------------------------------------------===// -// Post Encoder Callbacks -//===----------------------------------------------------------------------===// - -uint64_t SIMCCodeEmitter::VOPPostEncode(const MCInst &MI, uint64_t Value) const{ - unsigned encodingType = getEncodingType(MI); - unsigned numSrcOps; - unsigned vgprBitOffset; - - if (encodingType == SIInstrEncodingType::VOP3) { - numSrcOps = 3; - vgprBitOffset = 32; - } else { - numSrcOps = 1; - vgprBitOffset = 0; - } - - // Add one to skip over the destination reg operand. - for (unsigned opIdx = 1; opIdx < numSrcOps + 1; opIdx++) { - const MCOperand &MO = MI.getOperand(opIdx); - if (MO.isReg()) { - unsigned reg = MI.getOperand(opIdx).getReg(); - if (AMDGPUMCRegisterClasses[AMDGPU::VReg_32RegClassID].contains(reg) || - AMDGPUMCRegisterClasses[AMDGPU::VReg_64RegClassID].contains(reg)) { - Value |= (VGPR_BIT(opIdx)) << vgprBitOffset; - } - } else if (MO.isFPImm()) { - union { - float f; - uint32_t i; - } Imm; - // XXX: Not all instructions can use inline literals - // XXX: We should make sure this is a 32-bit constant - Imm.f = MO.getFPImm(); - Value |= ((uint64_t)Imm.i) << 32; - } - } - return Value; -} - -//===----------------------------------------------------------------------===// -// Encoding helper functions -//===----------------------------------------------------------------------===// - -unsigned SIMCCodeEmitter::getEncodingType(const MCInst &MI) const { - return MCII.get(MI.getOpcode()).TSFlags & SI_INSTR_FLAGS_ENCODING_MASK; -} - -unsigned SIMCCodeEmitter::getEncodingBytes(const MCInst &MI) const { - - // These instructions aren't real instructions with an encoding type, so - // we need to manually specify their size. - switch (MI.getOpcode()) { - default: break; - case AMDGPU::SI_LOAD_LITERAL_I32: - case AMDGPU::SI_LOAD_LITERAL_F32: - return 4; - } - - unsigned encoding_type = getEncodingType(MI); - switch (encoding_type) { - case SIInstrEncodingType::EXP: - case SIInstrEncodingType::LDS: - case SIInstrEncodingType::MUBUF: - case SIInstrEncodingType::MTBUF: - case SIInstrEncodingType::MIMG: - case SIInstrEncodingType::VOP3: - return 8; - default: - return 4; - } -} - - -unsigned SIMCCodeEmitter::getRegBinaryCode(unsigned reg) const { - switch (reg) { - case AMDGPU::VCC: return 106; - case AMDGPU::M0: return 124; - case AMDGPU::EXEC: return 126; - case AMDGPU::EXEC_LO: return 126; - case AMDGPU::EXEC_HI: return 127; - case AMDGPU::SREG_LIT_0: return 128; - case AMDGPU::SI_LITERAL_CONSTANT: return 255; - default: return getHWRegNum(reg); - } -} - -#define SIRegisterInfo SIMCCodeEmitter -#include "SIRegisterGetHWRegNum.inc" -#undef SIRegisterInfo diff --git a/src/gallium/drivers/radeon/Makefile b/src/gallium/drivers/radeon/Makefile index 05dc518a9aa..7934c712195 100644 --- a/src/gallium/drivers/radeon/Makefile +++ b/src/gallium/drivers/radeon/Makefile @@ -8,74 +8,8 @@ LIBNAME = radeon LIBRARY_INCLUDES = -I$(TOP)/include -TBLGEN = $(LLVM_BINDIR)/llvm-tblgen - CXXFLAGS+= $(LLVM_CXXFLAGS) -ifeq ($(LLVM_VERSION),3.1) - CPP_SOURCES += $(LLVM_CPP_SOURCES) - GENERATED_SOURCES = $(LLVM_GENERATED_SOURCES) -else - CXXFLAGS+= -DEXTERNAL_LLVM -endif - include ../../Makefile.template CXXFLAGS := $(filter-out -DDEBUG, $(CXXFLAGS)) - -tablegen = $(TBLGEN) -I $(LLVM_INCLUDEDIR) $1 $2 -o $3 - -HAVE_LLVM_INTRINSICS = $(shell grep IntrinsicsR600.td $(LLVM_INCLUDEDIR)/llvm/Intrinsics.td) - -SIRegisterInfo.td: SIGenRegisterInfo.pl - $(PERL) $^ > $@ - -SIRegisterGetHWRegNum.inc: SIGenRegisterInfo.pl - $(PERL) $^ $@ > /dev/null - -R600Intrinsics.td: R600IntrinsicsNoOpenCL.td R600IntrinsicsOpenCL.td -ifeq ($(HAVE_LLVM_INTRINSICS),) - cp R600IntrinsicsNoOpenCL.td R600Intrinsics.td -else - cp R600IntrinsicsOpenCL.td R600Intrinsics.td -endif - -R600RegisterInfo.td: R600GenRegisterInfo.pl - $(PERL) $^ > $@ - -AMDGPUGenRegisterInfo.inc: $(TD_FILES) - $(call tablegen, -gen-register-info, AMDGPU.td, $@) - -AMDGPUGenInstrInfo.inc: $(TD_FILES) - $(call tablegen, -gen-instr-info, AMDGPU.td, $@) - -AMDGPUGenAsmWriter.inc: $(TD_FILES) - $(call tablegen, -gen-asm-writer, AMDGPU.td, $@) - -AMDGPUGenDAGISel.inc: $(TD_FILES) - $(call tablegen, -gen-dag-isel, AMDGPU.td, $@) - -AMDGPUGenCallingConv.inc: $(TD_FILES) - $(call tablegen, -gen-callingconv, AMDGPU.td, $@) - -AMDGPUGenSubtargetInfo.inc: $(TD_FILES) - $(call tablegen, -gen-subtarget, AMDGPU.td, $@) - -AMDGPUGenEDInfo.inc: $(TD_FILES) - $(call tablegen, -gen-enhanced-disassembly-info, AMDGPU.td, $@) - -AMDGPUGenIntrinsics.inc: $(TD_FILES) - $(call tablegen, -gen-tgt-intrinsic, AMDGPU.td, $@) - -AMDGPUGenCodeEmitter.inc: $(TD_FILES) - $(call tablegen, -gen-emitter, AMDGPU.td, $@) - -AMDGPUGenMCCodeEmitter.inc: $(TD_FILES) - $(call tablegen, -mc-emitter -gen-emitter, AMDGPU.td, $@) - -AMDGPUGenDFAPacketizer.inc: $(TD_FILES) - $(call tablegen, -gen-dfa-packetizer, AMDGPU.td, $@) - -LOADER_LIBS=$(shell llvm-config --libs bitreader asmparser) -loader: loader.o libradeon.a - gcc -o loader $(LLVM_LDFLAGS) -L/usr/local/lib $(LDFLAGS) loader.o libradeon.a $(LLVM_LIBS) $(LOADER_LIBS) -lpthread -ldl -lstdc++ -lm diff --git a/src/gallium/drivers/radeon/Makefile.sources b/src/gallium/drivers/radeon/Makefile.sources index 5e793422d66..45d2e8f2e76 100644 --- a/src/gallium/drivers/radeon/Makefile.sources +++ b/src/gallium/drivers/radeon/Makefile.sources @@ -1,86 +1,3 @@ - -TD_FILES := \ - AMDGPU.td \ - AMDGPUInstrInfo.td \ - AMDGPUInstructions.td \ - AMDGPUIntrinsics.td \ - AMDGPURegisterInfo.td \ - AMDILBase.td \ - AMDILInstrInfo.td \ - AMDILIntrinsics.td \ - AMDILRegisterInfo.td \ - Processors.td \ - R600Instructions.td \ - R600Intrinsics.td \ - R600IntrinsicsNoOpenCL.td \ - R600IntrinsicsOpenCL.td \ - R600RegisterInfo.td \ - R600Schedule.td \ - SIInstrFormats.td \ - SIInstrInfo.td \ - SIInstructions.td \ - SIIntrinsics.td \ - SIRegisterInfo.td \ - SISchedule.td - -LLVM_GENERATED_SOURCES := \ - R600Intrinsics.td \ - R600RegisterInfo.td \ - SIRegisterInfo.td \ - SIRegisterGetHWRegNum.inc \ - AMDGPUGenRegisterInfo.inc \ - AMDGPUGenInstrInfo.inc \ - AMDGPUGenAsmWriter.inc \ - AMDGPUGenDAGISel.inc \ - AMDGPUGenCallingConv.inc \ - AMDGPUGenSubtargetInfo.inc \ - AMDGPUGenEDInfo.inc \ - AMDGPUGenIntrinsics.inc \ - AMDGPUGenCodeEmitter.inc \ - AMDGPUGenMCCodeEmitter.inc \ - AMDGPUGenDFAPacketizer.inc - -LLVM_CPP_SOURCES := \ - AMDIL7XXDevice.cpp \ - AMDILCFGStructurizer.cpp \ - AMDILDevice.cpp \ - AMDILDeviceInfo.cpp \ - AMDILEvergreenDevice.cpp \ - AMDILFrameLowering.cpp \ - AMDILIntrinsicInfo.cpp \ - AMDILISelDAGToDAG.cpp \ - AMDILISelLowering.cpp \ - AMDILNIDevice.cpp \ - AMDILPeepholeOptimizer.cpp \ - AMDILSIDevice.cpp \ - AMDGPUAsmPrinter.cpp \ - AMDGPUMCInstLower.cpp \ - AMDGPUSubtarget.cpp \ - AMDGPUTargetMachine.cpp \ - AMDGPUISelLowering.cpp \ - AMDGPUConvertToISA.cpp \ - AMDGPUInstrInfo.cpp \ - AMDGPURegisterInfo.cpp \ - R600ExpandSpecialInstrs.cpp \ - R600ISelLowering.cpp \ - R600InstrInfo.cpp \ - R600MachineFunctionInfo.cpp \ - R600RegisterInfo.cpp \ - SIAssignInterpRegs.cpp \ - SIInstrInfo.cpp \ - SIISelLowering.cpp \ - SILowerLiteralConstants.cpp \ - SILowerFlowControl.cpp \ - SIMachineFunctionInfo.cpp \ - SIRegisterInfo.cpp \ - InstPrinter/AMDGPUInstPrinter.cpp \ - MCTargetDesc/AMDGPUMCAsmInfo.cpp \ - MCTargetDesc/AMDGPUAsmBackend.cpp \ - MCTargetDesc/AMDGPUMCTargetDesc.cpp \ - MCTargetDesc/SIMCCodeEmitter.cpp \ - MCTargetDesc/R600MCCodeEmitter.cpp \ - TargetInfo/AMDGPUTargetInfo.cpp \ - CPP_SOURCES := \ radeon_llvm_emit.cpp diff --git a/src/gallium/drivers/radeon/Processors.td b/src/gallium/drivers/radeon/Processors.td deleted file mode 100644 index 3469f828fc0..00000000000 --- a/src/gallium/drivers/radeon/Processors.td +++ /dev/null @@ -1,29 +0,0 @@ -//===-- Processors.td - TODO: Add brief description -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// AMDIL processors supported. -// -//===----------------------------------------------------------------------===// - -class Proc<string Name, ProcessorItineraries itin, list<SubtargetFeature> Features> -: Processor<Name, itin, Features>; -def : Proc<"r600", R600_EG_Itin, [FeatureR600ALUInst]>; -def : Proc<"rv710", R600_EG_Itin, []>; -def : Proc<"rv730", R600_EG_Itin, []>; -def : Proc<"rv770", R600_EG_Itin, [FeatureFP64]>; -def : Proc<"cedar", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; -def : Proc<"redwood", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; -def : Proc<"juniper", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; -def : Proc<"cypress", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>; -def : Proc<"barts", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; -def : Proc<"turks", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; -def : Proc<"caicos", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; -def : Proc<"cayman", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>; -def : Proc<"SI", SI_Itin, [Feature64BitPtr]>; - diff --git a/src/gallium/drivers/radeon/R600Defines.h b/src/gallium/drivers/radeon/R600Defines.h deleted file mode 100644 index 20c357cc15f..00000000000 --- a/src/gallium/drivers/radeon/R600Defines.h +++ /dev/null @@ -1,35 +0,0 @@ -//===-- R600Defines.h - R600 Helper Macros ----------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - - -// Operand Flags -#define MO_FLAG_CLAMP (1 << 0) -#define MO_FLAG_NEG (1 << 1) -#define MO_FLAG_ABS (1 << 2) -#define MO_FLAG_MASK (1 << 3) -#define MO_FLAG_PUSH (1 << 4) -#define MO_FLAG_NOT_LAST (1 << 5) -#define NUM_MO_FLAGS 6 - -// Helper for finding getting the operand index for the instruction flags -// operand. -#define GET_FLAG_OPERAND_IDX(Flags) (((Flags) >> 7) & 0x3) - -namespace R600_InstFlag { - enum TIF { - TRANS_ONLY = (1 << 0), - TEX = (1 << 1), - REDUCTION = (1 << 2), - FC = (1 << 3), - TRIG = (1 << 4), - OP3 = (1 << 5), - VECTOR = (1 << 6) - //FlagOperand bits 7, 8 - }; -} diff --git a/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp b/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp deleted file mode 100644 index d6184e55302..00000000000 --- a/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp +++ /dev/null @@ -1,292 +0,0 @@ -//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Vector, Reduction, and Cube instructions need to fill the entire instruction -// group to work correctly. This pass expands these individual instructions -// into several instructions that will completely fill the instruction group. -//===----------------------------------------------------------------------===// - -#include "AMDGPU.h" -#include "R600Defines.h" -#include "R600InstrInfo.h" -#include "R600RegisterInfo.h" -#include "R600MachineFunctionInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" - -using namespace llvm; - -namespace { - -class R600ExpandSpecialInstrsPass : public MachineFunctionPass { - -private: - static char ID; - const R600InstrInfo *TII; - - bool ExpandInputPerspective(MachineInstr& MI); - bool ExpandInputConstant(MachineInstr& MI); - -public: - R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID), - TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { } - - virtual bool runOnMachineFunction(MachineFunction &MF); - - const char *getPassName() const { - return "R600 Expand special instructions pass"; - } -}; - -} // End anonymous namespace - -char R600ExpandSpecialInstrsPass::ID = 0; - -FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) { - return new R600ExpandSpecialInstrsPass(TM); -} - -bool R600ExpandSpecialInstrsPass::ExpandInputPerspective(MachineInstr &MI) -{ - const R600RegisterInfo &TRI = TII->getRegisterInfo(); - if (MI.getOpcode() != AMDGPU::input_perspective) - return false; - - MachineBasicBlock::iterator I = &MI; - unsigned DstReg = MI.getOperand(0).getReg(); - R600MachineFunctionInfo *MFI = MI.getParent()->getParent() - ->getInfo<R600MachineFunctionInfo>(); - unsigned IJIndexBase; - - // In Evergreen ISA doc section 8.3.2 : - // We need to interpolate XY and ZW in two different instruction groups. - // An INTERP_* must occupy all 4 slots of an instruction group. - // Output of INTERP_XY is written in X,Y slots - // Output of INTERP_ZW is written in Z,W slots - // - // Thus interpolation requires the following sequences : - // - // AnyGPR.x = INTERP_ZW; (Write Masked Out) - // AnyGPR.y = INTERP_ZW; (Write Masked Out) - // DstGPR.z = INTERP_ZW; - // DstGPR.w = INTERP_ZW; (End of first IG) - // DstGPR.x = INTERP_XY; - // DstGPR.y = INTERP_XY; - // AnyGPR.z = INTERP_XY; (Write Masked Out) - // AnyGPR.w = INTERP_XY; (Write Masked Out) (End of second IG) - // - switch (MI.getOperand(1).getImm()) { - case 0: - IJIndexBase = MFI->GetIJPerspectiveIndex(); - break; - case 1: - IJIndexBase = MFI->GetIJLinearIndex(); - break; - default: - assert(0 && "Unknow ij index"); - } - - for (unsigned i = 0; i < 8; i++) { - unsigned IJIndex = AMDGPU::R600_TReg32RegClass.getRegister( - 2 * IJIndexBase + ((i + 1) % 2)); - unsigned ReadReg = AMDGPU::R600_TReg32RegClass.getRegister( - 4 * MI.getOperand(2).getImm()); - - unsigned Sel; - switch (i % 4) { - case 0:Sel = AMDGPU::sel_x;break; - case 1:Sel = AMDGPU::sel_y;break; - case 2:Sel = AMDGPU::sel_z;break; - case 3:Sel = AMDGPU::sel_w;break; - default:break; - } - - unsigned Res = TRI.getSubReg(DstReg, Sel); - - const MCInstrDesc &Opcode = (i < 4)? - TII->get(AMDGPU::INTERP_ZW): - TII->get(AMDGPU::INTERP_XY); - - MachineInstr *NewMI = BuildMI(*(MI.getParent()), - I, MI.getParent()->findDebugLoc(I), - Opcode, Res) - .addReg(IJIndex) - .addReg(ReadReg) - .addImm(0); - - if (!(i> 1 && i < 6)) { - TII->addFlag(NewMI, 0, MO_FLAG_MASK); - } - - if (i % 4 != 3) - TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST); - } - - MI.eraseFromParent(); - - return true; -} - -bool R600ExpandSpecialInstrsPass::ExpandInputConstant(MachineInstr &MI) -{ - const R600RegisterInfo &TRI = TII->getRegisterInfo(); - if (MI.getOpcode() != AMDGPU::input_constant) - return false; - - MachineBasicBlock::iterator I = &MI; - unsigned DstReg = MI.getOperand(0).getReg(); - - for (unsigned i = 0; i < 4; i++) { - unsigned ReadReg = AMDGPU::R600_TReg32RegClass.getRegister( - 4 * MI.getOperand(1).getImm() + i); - - unsigned Sel; - switch (i % 4) { - case 0:Sel = AMDGPU::sel_x;break; - case 1:Sel = AMDGPU::sel_y;break; - case 2:Sel = AMDGPU::sel_z;break; - case 3:Sel = AMDGPU::sel_w;break; - default:break; - } - - unsigned Res = TRI.getSubReg(DstReg, Sel); - - MachineInstr *NewMI = BuildMI(*(MI.getParent()), - I, MI.getParent()->findDebugLoc(I), - TII->get(AMDGPU::INTERP_LOAD_P0), Res) - .addReg(ReadReg) - .addImm(0); - - if (i % 4 != 3) - TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST); - } - - MI.eraseFromParent(); - - return true; -} - -bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { - - const R600RegisterInfo &TRI = TII->getRegisterInfo(); - - for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); - BB != BB_E; ++BB) { - MachineBasicBlock &MBB = *BB; - MachineBasicBlock::iterator I = MBB.begin(); - while (I != MBB.end()) { - MachineInstr &MI = *I; - I = llvm::next(I); - - if (ExpandInputPerspective(MI)) - continue; - if (ExpandInputConstant(MI)) - continue; - - bool IsReduction = TII->isReductionOp(MI.getOpcode()); - bool IsVector = TII->isVector(MI); - bool IsCube = TII->isCubeOp(MI.getOpcode()); - if (!IsReduction && !IsVector && !IsCube) { - continue; - } - - // Expand the instruction - // - // Reduction instructions: - // T0_X = DP4 T1_XYZW, T2_XYZW - // becomes: - // TO_X = DP4 T1_X, T2_X - // TO_Y (write masked) = DP4 T1_Y, T2_Y - // TO_Z (write masked) = DP4 T1_Z, T2_Z - // TO_W (write masked) = DP4 T1_W, T2_W - // - // Vector instructions: - // T0_X = MULLO_INT T1_X, T2_X - // becomes: - // T0_X = MULLO_INT T1_X, T2_X - // T0_Y (write masked) = MULLO_INT T1_X, T2_X - // T0_Z (write masked) = MULLO_INT T1_X, T2_X - // T0_W (write masked) = MULLO_INT T1_X, T2_X - // - // Cube instructions: - // T0_XYZW = CUBE T1_XYZW - // becomes: - // TO_X = CUBE T1_Z, T1_Y - // T0_Y = CUBE T1_Z, T1_X - // T0_Z = CUBE T1_X, T1_Z - // T0_W = CUBE T1_Y, T1_Z - for (unsigned Chan = 0; Chan < 4; Chan++) { - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned Src0 = MI.getOperand(1).getReg(); - unsigned Src1 = 0; - - // Determine the correct source registers - if (!IsCube) { - Src1 = MI.getOperand(2).getReg(); - } - if (IsReduction) { - unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan); - Src0 = TRI.getSubReg(Src0, SubRegIndex); - Src1 = TRI.getSubReg(Src1, SubRegIndex); - } else if (IsCube) { - static const int CubeSrcSwz[] = {2, 2, 0, 1}; - unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]); - unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]); - Src1 = TRI.getSubReg(Src0, SubRegIndex1); - Src0 = TRI.getSubReg(Src0, SubRegIndex0); - } - - // Determine the correct destination registers; - unsigned Flags = 0; - if (IsCube) { - unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan); - DstReg = TRI.getSubReg(DstReg, SubRegIndex); - } else { - // Mask the write if the original instruction does not write to - // the current Channel. - Flags |= (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0); - unsigned DstBase = TRI.getHWRegIndex(DstReg); - DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan); - } - - // Set the IsLast bit - Flags |= (Chan != 3 ? MO_FLAG_NOT_LAST : 0); - - // Add the new instruction - unsigned Opcode; - if (IsCube) { - switch (MI.getOpcode()) { - case AMDGPU::CUBE_r600_pseudo: - Opcode = AMDGPU::CUBE_r600_real; - break; - case AMDGPU::CUBE_eg_pseudo: - Opcode = AMDGPU::CUBE_eg_real; - break; - default: - assert(!"Unknown CUBE instruction"); - Opcode = 0; - break; - } - } else { - Opcode = MI.getOpcode(); - } - MachineInstr *NewMI = - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(Opcode), DstReg) - .addReg(Src0) - .addReg(Src1) - .addImm(0); // Flag - - NewMI->setIsInsideBundle(Chan != 0); - TII->addFlag(NewMI, 0, Flags); - } - MI.eraseFromParent(); - } - } - return false; -} diff --git a/src/gallium/drivers/radeon/R600GenRegisterInfo.pl b/src/gallium/drivers/radeon/R600GenRegisterInfo.pl deleted file mode 100644 index c0a05f54cae..00000000000 --- a/src/gallium/drivers/radeon/R600GenRegisterInfo.pl +++ /dev/null @@ -1,206 +0,0 @@ -#===-- R600GenRegisterInfo.pl - Script for generating register info files --===# -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -#===------------------------------------------------------------------------===# -# -# This perl script prints to stdout .td code to be used as R600RegisterInfo.td -# it also generates a file called R600HwRegInfo.include, which contains helper -# functions for determining the hw encoding of registers. -# -#===------------------------------------------------------------------------===# - -use strict; -use warnings; - -use constant CONST_REG_COUNT => 512; -use constant TEMP_REG_COUNT => 128; - -my $CREG_MAX = CONST_REG_COUNT - 1; -my $TREG_MAX = TEMP_REG_COUNT - 1; - -print <<STRING; - -class R600Reg <string name> : Register<name> { - let Namespace = "AMDGPU"; -} - -class R600Reg_128<string n, list<Register> subregs> : RegisterWithSubRegs<n, subregs> { - let Namespace = "AMDGPU"; - let SubRegIndices = [sel_x, sel_y, sel_z, sel_w]; -} - -STRING - -my $i; - -### REG DEFS ### - -my @creg_list = print_reg_defs(CONST_REG_COUNT * 4, "C"); -my @treg_list = print_reg_defs(TEMP_REG_COUNT * 4, "T"); - -my @t128reg; -my @treg_x; -for (my $i = 0; $i < TEMP_REG_COUNT; $i++) { - my $name = "T$i\_XYZW"; - print qq{def $name : R600Reg_128 <"T$i.XYZW", [T$i\_X, T$i\_Y, T$i\_Z, T$i\_W] >;\n}; - $t128reg[$i] = $name; - $treg_x[$i] = "T$i\_X"; -} - -my $treg_string = join(",", @treg_list); -my $creg_list = join(",", @creg_list); -my $t128_string = join(",", @t128reg); -my $treg_x_string = join(",", @treg_x); -print <<STRING; - -class RegSet <dag s> { - dag set = s; -} - -def ZERO : R600Reg<"0.0">; -def HALF : R600Reg<"0.5">; -def ONE : R600Reg<"1.0">; -def ONE_INT : R600Reg<"1">; -def NEG_HALF : R600Reg<"-0.5">; -def NEG_ONE : R600Reg<"-1.0">; -def PV_X : R600Reg<"pv.x">; -def ALU_LITERAL_X : R600Reg<"literal.x">; -def PREDICATE_BIT : R600Reg<"PredicateBit">; -def PRED_SEL_OFF: R600Reg<"Pred_sel_off">; -def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero">; -def PRED_SEL_ONE : R600Reg<"Pred_sel_one">; - -def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add - $creg_list)>; - -def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add - $treg_string)>; - -def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32, (add - $treg_x_string)>; - -def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add - R600_TReg32, - R600_CReg32, - ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>; - -def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add - PRED_SEL_OFF, PRED_SEL_ZERO, PRED_SEL_ONE)>; - -def R600_Predicate_Bit: RegisterClass <"AMDGPU", [i32], 32, (add - PREDICATE_BIT)>; - -def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add - $t128_string)> -{ - let SubRegClasses = [(R600_TReg32 sel_x, sel_y, sel_z, sel_w)]; - let CopyCost = -1; -} - -STRING - -my %index_map; -my %chan_map; - -for ($i = 0; $i <= $#creg_list; $i++) { - push(@{$index_map{get_hw_index($i)}}, $creg_list[$i]); - push(@{$chan_map{get_chan_str($i)}}, $creg_list[$i]); -} - -for ($i = 0; $i <= $#treg_list; $i++) { - push(@{$index_map{get_hw_index($i)}}, $treg_list[$i]); - push(@{$chan_map{get_chan_str($i)}}, $treg_list[$i]); -} - -for ($i = 0; $i <= $#t128reg; $i++) { - push(@{$index_map{$i}}, $t128reg[$i]); - push(@{$chan_map{'X'}}, $t128reg[$i]); -} - -open(OUTFILE, ">", "R600HwRegInfo.include"); - -print OUTFILE <<STRING; - -unsigned R600RegisterInfo::getHWRegIndexGen(unsigned reg) const -{ - switch(reg) { - default: assert(!"Unknown register"); return 0; -STRING -foreach my $key (keys(%index_map)) { - foreach my $reg (@{$index_map{$key}}) { - print OUTFILE " case AMDGPU::$reg:\n"; - } - print OUTFILE " return $key;\n\n"; -} - -print OUTFILE " }\n}\n\n"; - -print OUTFILE <<STRING; - -unsigned R600RegisterInfo::getHWRegChanGen(unsigned reg) const -{ - switch(reg) { - default: assert(!"Unknown register"); return 0; -STRING - -foreach my $key (keys(%chan_map)) { - foreach my $reg (@{$chan_map{$key}}) { - print OUTFILE " case AMDGPU::$reg:\n"; - } - my $val; - if ($key eq 'X') { - $val = 0; - } elsif ($key eq 'Y') { - $val = 1; - } elsif ($key eq 'Z') { - $val = 2; - } elsif ($key eq 'W') { - $val = 3; - } else { - die("Unknown chan value; $key"); - } - print OUTFILE " return $val;\n\n"; -} - -print OUTFILE " }\n}\n\n"; - -sub print_reg_defs { - my ($count, $prefix) = @_; - - my @reg_list; - - for ($i = 0; $i < $count; $i++) { - my $hw_index = get_hw_index($i); - my $chan= get_chan_str($i); - my $name = "$prefix$hw_index\_$chan"; - print qq{def $name : R600Reg <"$prefix$hw_index.$chan">;\n}; - $reg_list[$i] = $name; - } - return @reg_list; -} - -#Helper functions -sub get_hw_index { - my ($index) = @_; - return int($index / 4); -} - -sub get_chan_str { - my ($index) = @_; - my $chan = $index % 4; - if ($chan == 0 ) { - return 'X'; - } elsif ($chan == 1) { - return 'Y'; - } elsif ($chan == 2) { - return 'Z'; - } elsif ($chan == 3) { - return 'W'; - } else { - die("Unknown chan value: $chan"); - } -} diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp b/src/gallium/drivers/radeon/R600ISelLowering.cpp deleted file mode 100644 index 5dd2f5334c5..00000000000 --- a/src/gallium/drivers/radeon/R600ISelLowering.cpp +++ /dev/null @@ -1,740 +0,0 @@ -//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file -// is mostly EmitInstrWithCustomInserter(). -// -//===----------------------------------------------------------------------===// - -#include "R600ISelLowering.h" -#include "R600Defines.h" -#include "R600InstrInfo.h" -#include "R600MachineFunctionInfo.h" -#include "llvm/Argument.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/SelectionDAG.h" - -using namespace llvm; - -R600TargetLowering::R600TargetLowering(TargetMachine &TM) : - AMDGPUTargetLowering(TM), - TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) -{ - setOperationAction(ISD::MUL, MVT::i64, Expand); - addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass); - addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass); - addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass); - addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass); - computeRegisterProperties(); - - setOperationAction(ISD::FADD, MVT::v4f32, Expand); - setOperationAction(ISD::FMUL, MVT::v4f32, Expand); - - setOperationAction(ISD::BR_CC, MVT::i32, Custom); - setOperationAction(ISD::BR_CC, MVT::f32, Custom); - - setOperationAction(ISD::FSUB, MVT::f32, Expand); - - setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom); - - setOperationAction(ISD::ROTL, MVT::i32, Custom); - - setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); - setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); - - setOperationAction(ISD::SETCC, MVT::i32, Custom); - setOperationAction(ISD::SETCC, MVT::f32, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom); - - setTargetDAGCombine(ISD::FP_ROUND); - - setSchedulingPreference(Sched::VLIW); -} - -MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( - MachineInstr * MI, MachineBasicBlock * BB) const -{ - MachineFunction * MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - MachineBasicBlock::iterator I = *MI; - - switch (MI->getOpcode()) { - default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); - case AMDGPU::SHADER_TYPE: break; - case AMDGPU::CLAMP_R600: - { - MachineInstr *NewMI = - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) - .addOperand(MI->getOperand(0)) - .addOperand(MI->getOperand(1)) - .addImm(0) // Flags - .addReg(AMDGPU::PRED_SEL_OFF); - TII->addFlag(NewMI, 0, MO_FLAG_CLAMP); - break; - } - case AMDGPU::FABS_R600: - { - MachineInstr *NewMI = - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) - .addOperand(MI->getOperand(0)) - .addOperand(MI->getOperand(1)) - .addImm(0) // Flags - .addReg(AMDGPU::PRED_SEL_OFF); - TII->addFlag(NewMI, 1, MO_FLAG_ABS); - break; - } - - case AMDGPU::FNEG_R600: - { - MachineInstr *NewMI = - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) - .addOperand(MI->getOperand(0)) - .addOperand(MI->getOperand(1)) - .addImm(0) // Flags - .addReg(AMDGPU::PRED_SEL_OFF); - TII->addFlag(NewMI, 1, MO_FLAG_NEG); - break; - } - - case AMDGPU::R600_LOAD_CONST: - { - int64_t RegIndex = MI->getOperand(1).getImm(); - unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex); - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY)) - .addOperand(MI->getOperand(0)) - .addReg(ConstantReg); - break; - } - - case AMDGPU::MASK_WRITE: - { - unsigned maskedRegister = MI->getOperand(0).getReg(); - assert(TargetRegisterInfo::isVirtualRegister(maskedRegister)); - MachineInstr * defInstr = MRI.getVRegDef(maskedRegister); - TII->addFlag(defInstr, 0, MO_FLAG_MASK); - // Return early so the instruction is not erased - return BB; - } - - case AMDGPU::RAT_WRITE_CACHELESS_32_eg: - case AMDGPU::RAT_WRITE_CACHELESS_128_eg: - { - // Convert to DWORD address - unsigned NewAddr = MRI.createVirtualRegister( - &AMDGPU::R600_TReg32_XRegClass); - unsigned ShiftValue = MRI.createVirtualRegister( - &AMDGPU::R600_TReg32RegClass); - unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0; - - // XXX In theory, we should be able to pass ShiftValue directly to - // the LSHR_eg instruction as an inline literal, but I tried doing it - // this way and it didn't produce the correct results. - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV_IMM_I32), - ShiftValue) - .addReg(AMDGPU::ALU_LITERAL_X) - .addReg(AMDGPU::PRED_SEL_OFF) - .addImm(2); - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr) - .addOperand(MI->getOperand(1)) - .addReg(ShiftValue) - .addReg(AMDGPU::PRED_SEL_OFF); - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) - .addOperand(MI->getOperand(0)) - .addReg(NewAddr) - .addImm(EOP); // Set End of program bit - break; - } - - case AMDGPU::RESERVE_REG: - { - R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>(); - int64_t ReservedIndex = MI->getOperand(0).getImm(); - unsigned ReservedReg = - AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex); - MFI->ReservedRegs.push_back(ReservedReg); - break; - } - - case AMDGPU::TXD: - { - unsigned t0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); - unsigned t1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); - - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0) - .addOperand(MI->getOperand(3)) - .addOperand(MI->getOperand(4)) - .addOperand(MI->getOperand(5)); - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1) - .addOperand(MI->getOperand(2)) - .addOperand(MI->getOperand(4)) - .addOperand(MI->getOperand(5)); - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G)) - .addOperand(MI->getOperand(0)) - .addOperand(MI->getOperand(1)) - .addOperand(MI->getOperand(4)) - .addOperand(MI->getOperand(5)) - .addReg(t0, RegState::Implicit) - .addReg(t1, RegState::Implicit); - break; - } - case AMDGPU::TXD_SHADOW: - { - unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); - unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); - - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0) - .addOperand(MI->getOperand(3)) - .addOperand(MI->getOperand(4)) - .addOperand(MI->getOperand(5)); - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1) - .addOperand(MI->getOperand(2)) - .addOperand(MI->getOperand(4)) - .addOperand(MI->getOperand(5)); - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G)) - .addOperand(MI->getOperand(0)) - .addOperand(MI->getOperand(1)) - .addOperand(MI->getOperand(4)) - .addOperand(MI->getOperand(5)) - .addReg(t0, RegState::Implicit) - .addReg(t1, RegState::Implicit); - break; - } - case AMDGPU::BRANCH: - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) - .addOperand(MI->getOperand(0)) - .addReg(0); - break; - case AMDGPU::BRANCH_COND_f32: - { - MachineInstr *NewMI = - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X)) - .addReg(AMDGPU::PREDICATE_BIT) - .addOperand(MI->getOperand(1)) - .addImm(OPCODE_IS_NOT_ZERO) - .addImm(0); // Flags - TII->addFlag(NewMI, 1, MO_FLAG_PUSH); - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) - .addOperand(MI->getOperand(0)) - .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); - break; - } - case AMDGPU::BRANCH_COND_i32: - { - MachineInstr *NewMI = - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X)) - .addReg(AMDGPU::PREDICATE_BIT) - .addOperand(MI->getOperand(1)) - .addImm(OPCODE_IS_NOT_ZERO_INT) - .addImm(0); // Flags - TII->addFlag(NewMI, 1, MO_FLAG_PUSH); - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) - .addOperand(MI->getOperand(0)) - .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); - break; - } - case AMDGPU::input_perspective: - { - R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>(); - - // XXX Be more fine about register reservation - for (unsigned i = 0; i < 4; i ++) { - unsigned ReservedReg = AMDGPU::R600_TReg32RegClass.getRegister(i); - MFI->ReservedRegs.push_back(ReservedReg); - } - - switch (MI->getOperand(1).getImm()) { - case 0:// Perspective - MFI->HasPerspectiveInterpolation = true; - break; - case 1:// Linear - MFI->HasLinearInterpolation = true; - break; - default: - assert(0 && "Unknow ij index"); - } - - return BB; - } - } - - MI->eraseFromParent(); - return BB; -} - -//===----------------------------------------------------------------------===// -// Custom DAG Lowering Operations -//===----------------------------------------------------------------------===// - -using namespace llvm::Intrinsic; -using namespace llvm::AMDGPUIntrinsic; - -SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const -{ - switch (Op.getOpcode()) { - default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); - case ISD::BR_CC: return LowerBR_CC(Op, DAG); - case ISD::ROTL: return LowerROTL(Op, DAG); - case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); - case ISD::SETCC: return LowerSETCC(Op, DAG); - case ISD::INTRINSIC_VOID: { - SDValue Chain = Op.getOperand(0); - unsigned IntrinsicID = - cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); - switch (IntrinsicID) { - case AMDGPUIntrinsic::AMDGPU_store_output: { - MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); - unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); - if (!MRI.isLiveOut(Reg)) { - MRI.addLiveOut(Reg); - } - return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2)); - } - // default for switch(IntrinsicID) - default: break; - } - // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode()) - break; - } - case ISD::INTRINSIC_WO_CHAIN: { - unsigned IntrinsicID = - cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); - EVT VT = Op.getValueType(); - DebugLoc DL = Op.getDebugLoc(); - switch(IntrinsicID) { - default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); - case AMDGPUIntrinsic::R600_load_input: { - int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); - unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); - return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT); - } - case AMDGPUIntrinsic::R600_load_input_perspective: { - unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); - SDValue FullVector = DAG.getNode( - AMDGPUISD::INTERP, - DL, MVT::v4f32, - DAG.getConstant(0, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32)); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, - DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32)); - } - case AMDGPUIntrinsic::R600_load_input_linear: { - unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); - SDValue FullVector = DAG.getNode( - AMDGPUISD::INTERP, - DL, MVT::v4f32, - DAG.getConstant(1, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32)); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, - DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32)); - } - case AMDGPUIntrinsic::R600_load_input_constant: { - unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); - SDValue FullVector = DAG.getNode( - AMDGPUISD::INTERP_P0, - DL, MVT::v4f32, - DAG.getConstant(slot / 4 , MVT::i32)); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, - DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32)); - } - case AMDGPUIntrinsic::R600_load_input_position: { - unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); - unsigned RegIndex = AMDGPU::R600_TReg32RegClass.getRegister(slot); - SDValue Reg = CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, - RegIndex, MVT::f32); - if ((slot % 4) == 3) { - return DAG.getNode(ISD::FDIV, - DL, VT, - DAG.getConstantFP(1.0f, MVT::f32), - Reg); - } else { - return Reg; - } - } - - case r600_read_ngroups_x: - return LowerImplicitParameter(DAG, VT, DL, 0); - case r600_read_ngroups_y: - return LowerImplicitParameter(DAG, VT, DL, 1); - case r600_read_ngroups_z: - return LowerImplicitParameter(DAG, VT, DL, 2); - case r600_read_global_size_x: - return LowerImplicitParameter(DAG, VT, DL, 3); - case r600_read_global_size_y: - return LowerImplicitParameter(DAG, VT, DL, 4); - case r600_read_global_size_z: - return LowerImplicitParameter(DAG, VT, DL, 5); - case r600_read_local_size_x: - return LowerImplicitParameter(DAG, VT, DL, 6); - case r600_read_local_size_y: - return LowerImplicitParameter(DAG, VT, DL, 7); - case r600_read_local_size_z: - return LowerImplicitParameter(DAG, VT, DL, 8); - - case r600_read_tgid_x: - return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, - AMDGPU::T1_X, VT); - case r600_read_tgid_y: - return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, - AMDGPU::T1_Y, VT); - case r600_read_tgid_z: - return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, - AMDGPU::T1_Z, VT); - case r600_read_tidig_x: - return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, - AMDGPU::T0_X, VT); - case r600_read_tidig_y: - return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, - AMDGPU::T0_Y, VT); - case r600_read_tidig_z: - return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, - AMDGPU::T0_Z, VT); - } - // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode()) - break; - } - } // end switch(Op.getOpcode()) - return SDValue(); -} - -void R600TargetLowering::ReplaceNodeResults(SDNode *N, - SmallVectorImpl<SDValue> &Results, - SelectionDAG &DAG) const -{ - switch (N->getOpcode()) { - default: return; - case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG)); - case ISD::INTRINSIC_WO_CHAIN: - { - unsigned IntrinsicID = - cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); - if (IntrinsicID == AMDGPUIntrinsic::R600_load_input_face) { - Results.push_back(LowerInputFace(N, DAG)); - } else { - return; - } - } - } -} - -SDValue R600TargetLowering::LowerInputFace(SDNode* Op, SelectionDAG &DAG) const -{ - unsigned slot = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue(); - unsigned RegIndex = AMDGPU::R600_TReg32RegClass.getRegister(slot); - SDValue Reg = CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, - RegIndex, MVT::f32); - return DAG.getNode(ISD::SETCC, Op->getDebugLoc(), MVT::i1, - Reg, DAG.getConstantFP(0.0f, MVT::f32), - DAG.getCondCode(ISD::SETUGT)); -} - -SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const -{ - return DAG.getNode( - ISD::SETCC, - Op.getDebugLoc(), - MVT::i1, - Op, DAG.getConstantFP(0.0f, MVT::f32), - DAG.getCondCode(ISD::SETNE) - ); -} - -SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const -{ - SDValue Chain = Op.getOperand(0); - SDValue CC = Op.getOperand(1); - SDValue LHS = Op.getOperand(2); - SDValue RHS = Op.getOperand(3); - SDValue JumpT = Op.getOperand(4); - SDValue CmpValue; - SDValue Result; - - if (LHS.getValueType() == MVT::i32) { - CmpValue = DAG.getNode( - ISD::SELECT_CC, - Op.getDebugLoc(), - MVT::i32, - LHS, RHS, - DAG.getConstant(-1, MVT::i32), - DAG.getConstant(0, MVT::i32), - CC); - } else if (LHS.getValueType() == MVT::f32) { - CmpValue = DAG.getNode( - ISD::SELECT_CC, - Op.getDebugLoc(), - MVT::f32, - LHS, RHS, - DAG.getConstantFP(1.0f, MVT::f32), - DAG.getConstantFP(0.0f, MVT::f32), - CC); - } else { - assert(0 && "Not valid type for br_cc"); - } - Result = DAG.getNode( - AMDGPUISD::BRANCH_COND, - CmpValue.getDebugLoc(), - MVT::Other, Chain, - JumpT, CmpValue); - return Result; -} - -SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT, - DebugLoc DL, - unsigned DwordOffset) const -{ - unsigned ByteOffset = DwordOffset * 4; - PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), - AMDGPUAS::PARAM_I_ADDRESS); - - // We shouldn't be using an offset wider than 16-bits for implicit parameters. - assert(isInt<16>(ByteOffset)); - - return DAG.getLoad(VT, DL, DAG.getEntryNode(), - DAG.getConstant(ByteOffset, MVT::i32), // PTR - MachinePointerInfo(ConstantPointerNull::get(PtrType)), - false, false, false, 0); -} - -SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const -{ - DebugLoc DL = Op.getDebugLoc(); - EVT VT = Op.getValueType(); - - return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT, - Op.getOperand(0), - Op.getOperand(0), - DAG.getNode(ISD::SUB, DL, VT, - DAG.getConstant(32, MVT::i32), - Op.getOperand(1))); -} - -bool R600TargetLowering::isZero(SDValue Op) const -{ - if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) { - return Cst->isNullValue(); - } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){ - return CstFP->isZero(); - } else { - return false; - } -} - -SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const -{ - DebugLoc DL = Op.getDebugLoc(); - EVT VT = Op.getValueType(); - - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - SDValue True = Op.getOperand(2); - SDValue False = Op.getOperand(3); - SDValue CC = Op.getOperand(4); - SDValue Temp; - - // LHS and RHS are guaranteed to be the same value type - EVT CompareVT = LHS.getValueType(); - - // We need all the operands of SELECT_CC to have the same value type, so if - // necessary we need to convert LHS and RHS to be the same type True and - // False. True and False are guaranteed to have the same type as this - // SELECT_CC node. - - if (isHWTrueValue(True) && isHWFalseValue(False)) { - if (CompareVT != VT) { - if (VT == MVT::f32 && CompareVT == MVT::i32) { - SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, - LHS, RHS, - DAG.getConstant(-1, MVT::i32), - DAG.getConstant(0, MVT::i32), - CC); - return DAG.getNode(ISD::UINT_TO_FP, DL, VT, Boolean); - } else if (VT == MVT::i32 && CompareVT == MVT::f32) { - SDValue BoolAsFlt = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, - LHS, RHS, - DAG.getConstantFP(1.0f, MVT::f32), - DAG.getConstantFP(0.0f, MVT::f32), - CC); - return DAG.getNode(ISD::FP_TO_UINT, DL, VT, BoolAsFlt); - } else { - // I don't think there will be any other type pairings. - assert(!"Unhandled operand type parings in SELECT_CC"); - } - } else { - return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); - } - } - - - // XXX If True is a hardware TRUE value and False is a hardware FALSE value, - // we can handle this with a native instruction, but we need to swap true - // and false and change the conditional. - if (isHWTrueValue(False) && isHWFalseValue(True)) { - } - - // Check if we can lower this to a native operation. - // CND* instructions requires all operands to have the same type, - // and RHS to be zero. - - if (isZero(LHS) || isZero(RHS)) { - SDValue Cond = (isZero(LHS) ? RHS : LHS); - SDValue Zero = (isZero(LHS) ? LHS : RHS); - ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); - if (CompareVT != VT) { - True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True); - False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False); - } - if (isZero(LHS)) { - CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode); - } - - switch (CCOpcode) { - case ISD::SETONE: - case ISD::SETUNE: - case ISD::SETNE: - case ISD::SETULE: - case ISD::SETULT: - case ISD::SETOLE: - case ISD::SETOLT: - case ISD::SETLE: - case ISD::SETLT: - CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32); - Temp = True; - True = False; - False = Temp; - break; - default: - break; - } - SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, - Cond, Zero, - True, False, - DAG.getCondCode(CCOpcode)); - return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode); - } - - - // If we make it this for it means we have no native instructions to handle - // this SELECT_CC, so we must lower it. - SDValue HWTrue, HWFalse; - - if (CompareVT == MVT::f32) { - HWTrue = DAG.getConstantFP(1.0f, CompareVT); - HWFalse = DAG.getConstantFP(0.0f, CompareVT); - } else if (CompareVT == MVT::i32) { - HWTrue = DAG.getConstant(-1, CompareVT); - HWFalse = DAG.getConstant(0, CompareVT); - } - else { - assert(!"Unhandled value type in LowerSELECT_CC"); - } - - // Lower this unsupported SELECT_CC into a combination of two supported - // SELECT_CC operations. - SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC); - - return DAG.getNode(ISD::SELECT_CC, DL, VT, - Cond, HWFalse, - True, False, - DAG.getCondCode(ISD::SETNE)); -} - -SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const -{ - SDValue Cond; - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - SDValue CC = Op.getOperand(2); - DebugLoc DL = Op.getDebugLoc(); - assert(Op.getValueType() == MVT::i32); - if (LHS.getValueType() == MVT::i32) { - Cond = DAG.getNode( - ISD::SELECT_CC, - Op.getDebugLoc(), - MVT::i32, - LHS, RHS, - DAG.getConstant(-1, MVT::i32), - DAG.getConstant(0, MVT::i32), - CC); - } else if (LHS.getValueType() == MVT::f32) { - Cond = DAG.getNode( - ISD::SELECT_CC, - Op.getDebugLoc(), - MVT::f32, - LHS, RHS, - DAG.getConstantFP(1.0f, MVT::f32), - DAG.getConstantFP(0.0f, MVT::f32), - CC); - Cond = DAG.getNode( - ISD::FP_TO_SINT, - DL, - MVT::i32, - Cond); - } else { - assert(0 && "Not valid type for set_cc"); - } - Cond = DAG.getNode( - ISD::AND, - DL, - MVT::i32, - DAG.getConstant(1, MVT::i32), - Cond); - return Cond; -} - -// XXX Only kernel functions are supporte, so we can assume for now that -// every function is a kernel function, but in the future we should use -// separate calling conventions for kernel and non-kernel functions. -// Only kernel functions are supported, so we can assume for now -SDValue R600TargetLowering::LowerFormalArguments( - SDValue Chain, - CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc DL, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const -{ - unsigned ParamOffsetBytes = 36; - for (unsigned i = 0, e = Ins.size(); i < e; ++i) { - EVT VT = Ins[i].VT; - PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), - AMDGPUAS::PARAM_I_ADDRESS); - SDValue Arg = DAG.getLoad(VT, DL, DAG.getRoot(), - DAG.getConstant(ParamOffsetBytes, MVT::i32), - MachinePointerInfo(new Argument(PtrTy)), - false, false, false, 4); - InVals.push_back(Arg); - ParamOffsetBytes += (VT.getStoreSize()); - } - return Chain; -} - -//===----------------------------------------------------------------------===// -// Custom DAG Optimizations -//===----------------------------------------------------------------------===// - -SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, - DAGCombinerInfo &DCI) const -{ - SelectionDAG &DAG = DCI.DAG; - - switch (N->getOpcode()) { - // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a) - case ISD::FP_ROUND: { - SDValue Arg = N->getOperand(0); - if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) { - return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), N->getValueType(0), - Arg.getOperand(0)); - } - break; - } - } - return SDValue(); -} diff --git a/src/gallium/drivers/radeon/R600ISelLowering.h b/src/gallium/drivers/radeon/R600ISelLowering.h deleted file mode 100644 index 7df2dd13787..00000000000 --- a/src/gallium/drivers/radeon/R600ISelLowering.h +++ /dev/null @@ -1,69 +0,0 @@ -//===-- R600ISelLowering.h - R600 DAG Lowering Interface -*- C++ -*--------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// R600 DAG Lowering interface definition -// -//===----------------------------------------------------------------------===// - -#ifndef R600ISELLOWERING_H -#define R600ISELLOWERING_H - -#include "AMDGPUISelLowering.h" - -namespace llvm { - -class R600InstrInfo; - -class R600TargetLowering : public AMDGPUTargetLowering -{ -public: - R600TargetLowering(TargetMachine &TM); - virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock * BB) const; - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; - virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; - void ReplaceNodeResults(SDNode * N, - SmallVectorImpl<SDValue> &Results, - SelectionDAG &DAG) const; - virtual SDValue LowerFormalArguments( - SDValue Chain, - CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc DL, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const; -private: - const R600InstrInfo * TII; - - /// lowerImplicitParameter - Each OpenCL kernel has nine implicit parameters - /// that are stored in the first nine dwords of a Vertex Buffer. These - /// implicit parameters are lowered to load instructions which retreive the - /// values from the Vertex Buffer. - SDValue LowerImplicitParameter(SelectionDAG &DAG, EVT VT, - DebugLoc DL, unsigned DwordOffset) const; - - void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB, - MachineRegisterInfo & MRI, unsigned dword_offset) const; - - SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; - - /// LowerROTL - Lower ROTL opcode to BITALIGN - SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const; - - SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerInputFace(SDNode *Op, SelectionDAG &DAG) const; - SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const; - - bool isZero(SDValue Op) const; -}; - -} // End namespace llvm; - -#endif // R600ISELLOWERING_H diff --git a/src/gallium/drivers/radeon/R600InstrInfo.cpp b/src/gallium/drivers/radeon/R600InstrInfo.cpp deleted file mode 100644 index e990dd9370b..00000000000 --- a/src/gallium/drivers/radeon/R600InstrInfo.cpp +++ /dev/null @@ -1,512 +0,0 @@ -//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// R600 Implementation of TargetInstrInfo. -// -//===----------------------------------------------------------------------===// - -#include "R600InstrInfo.h" -#include "AMDGPUTargetMachine.h" -#include "AMDGPUSubtarget.h" -#include "R600Defines.h" -#include "R600RegisterInfo.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "AMDILUtilityFunctions.h" - -#define GET_INSTRINFO_CTOR -#include "AMDGPUGenDFAPacketizer.inc" - -using namespace llvm; - -R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm) - : AMDGPUInstrInfo(tm), - RI(tm, *this), - TM(tm) - { } - -const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const -{ - return RI; -} - -bool R600InstrInfo::isTrig(const MachineInstr &MI) const -{ - return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG; -} - -bool R600InstrInfo::isVector(const MachineInstr &MI) const -{ - return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR; -} - -void -R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const -{ - if (AMDGPU::R600_Reg128RegClass.contains(DestReg) - && AMDGPU::R600_Reg128RegClass.contains(SrcReg)) { - for (unsigned I = 0; I < 4; I++) { - unsigned SubRegIndex = RI.getSubRegFromChannel(I); - BuildMI(MBB, MI, DL, get(AMDGPU::MOV)) - .addReg(RI.getSubReg(DestReg, SubRegIndex), RegState::Define) - .addReg(RI.getSubReg(SrcReg, SubRegIndex)) - .addImm(0) // Flag - .addReg(0) // PREDICATE_BIT - .addReg(DestReg, RegState::Define | RegState::Implicit); - } - } else { - - /* We can't copy vec4 registers */ - assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg) - && !AMDGPU::R600_Reg128RegClass.contains(SrcReg)); - - BuildMI(MBB, MI, DL, get(AMDGPU::MOV), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)) - .addImm(0) // Flag - .addReg(0); // PREDICATE_BIT - } -} - -MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF, - unsigned DstReg, int64_t Imm) const -{ - MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc()); - MachineInstrBuilder(MI).addReg(DstReg, RegState::Define); - MachineInstrBuilder(MI).addReg(AMDGPU::ALU_LITERAL_X); - MachineInstrBuilder(MI).addImm(Imm); - MachineInstrBuilder(MI).addReg(0); // PREDICATE_BIT - - return MI; -} - -unsigned R600InstrInfo::getIEQOpcode() const -{ - return AMDGPU::SETE_INT; -} - -bool R600InstrInfo::isMov(unsigned Opcode) const -{ - - - switch(Opcode) { - default: return false; - case AMDGPU::MOV: - case AMDGPU::MOV_IMM_F32: - case AMDGPU::MOV_IMM_I32: - return true; - } -} - -// Some instructions act as place holders to emulate operations that the GPU -// hardware does automatically. This function can be used to check if -// an opcode falls into this category. -bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const -{ - switch (Opcode) { - default: return false; - case AMDGPU::RETURN: - case AMDGPU::MASK_WRITE: - case AMDGPU::RESERVE_REG: - return true; - } -} - -bool R600InstrInfo::isReductionOp(unsigned Opcode) const -{ - switch(Opcode) { - default: return false; - case AMDGPU::DOT4_r600: - case AMDGPU::DOT4_eg: - return true; - } -} - -bool R600InstrInfo::isCubeOp(unsigned Opcode) const -{ - switch(Opcode) { - default: return false; - case AMDGPU::CUBE_r600_pseudo: - case AMDGPU::CUBE_r600_real: - case AMDGPU::CUBE_eg_pseudo: - case AMDGPU::CUBE_eg_real: - return true; - } -} - -DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM, - const ScheduleDAG *DAG) const -{ - const InstrItineraryData *II = TM->getInstrItineraryData(); - return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II); -} - -static bool -isPredicateSetter(unsigned Opcode) -{ - switch (Opcode) { - case AMDGPU::PRED_X: - return true; - default: - return false; - } -} - -static MachineInstr * -findFirstPredicateSetterFrom(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) -{ - while (I != MBB.begin()) { - --I; - MachineInstr *MI = I; - if (isPredicateSetter(MI->getOpcode())) - return MI; - } - - return NULL; -} - -bool -R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, - MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl<MachineOperand> &Cond, - bool AllowModify) const -{ - // Most of the following comes from the ARM implementation of AnalyzeBranch - - // If the block has no terminators, it just falls into the block after it. - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) - return false; - --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return false; - --I; - } - if (static_cast<MachineInstr *>(I)->getOpcode() != AMDGPU::JUMP) { - return false; - } - - // Get the last instruction in the block. - MachineInstr *LastInst = I; - - // If there is only one terminator instruction, process it. - unsigned LastOpc = LastInst->getOpcode(); - if (I == MBB.begin() || - static_cast<MachineInstr *>(--I)->getOpcode() != AMDGPU::JUMP) { - if (LastOpc == AMDGPU::JUMP) { - if(!isPredicated(LastInst)) { - TBB = LastInst->getOperand(0).getMBB(); - return false; - } else { - MachineInstr *predSet = I; - while (!isPredicateSetter(predSet->getOpcode())) { - predSet = --I; - } - TBB = LastInst->getOperand(0).getMBB(); - Cond.push_back(predSet->getOperand(1)); - Cond.push_back(predSet->getOperand(2)); - Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); - return false; - } - } - return true; // Can't handle indirect branch. - } - - // Get the instruction before it if it is a terminator. - MachineInstr *SecondLastInst = I; - unsigned SecondLastOpc = SecondLastInst->getOpcode(); - - // If the block ends with a B and a Bcc, handle it. - if (SecondLastOpc == AMDGPU::JUMP && - isPredicated(SecondLastInst) && - LastOpc == AMDGPU::JUMP && - !isPredicated(LastInst)) { - MachineInstr *predSet = --I; - while (!isPredicateSetter(predSet->getOpcode())) { - predSet = --I; - } - TBB = SecondLastInst->getOperand(0).getMBB(); - FBB = LastInst->getOperand(0).getMBB(); - Cond.push_back(predSet->getOperand(1)); - Cond.push_back(predSet->getOperand(2)); - Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); - return false; - } - - // Otherwise, can't handle this. - return true; -} - -int R600InstrInfo::getBranchInstr(const MachineOperand &op) const { - const MachineInstr *MI = op.getParent(); - - switch (MI->getDesc().OpInfo->RegClass) { - default: // FIXME: fallthrough?? - case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32; - case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32; - }; -} - -unsigned -R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, - MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond, - DebugLoc DL) const -{ - assert(TBB && "InsertBranch must not be told to insert a fallthrough"); - - if (FBB == 0) { - if (Cond.empty()) { - BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0); - return 1; - } else { - MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); - assert(PredSet && "No previous predicate !"); - addFlag(PredSet, 1, MO_FLAG_PUSH); - PredSet->getOperand(2).setImm(Cond[1].getImm()); - - BuildMI(&MBB, DL, get(AMDGPU::JUMP)) - .addMBB(TBB) - .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); - return 1; - } - } else { - MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); - assert(PredSet && "No previous predicate !"); - addFlag(PredSet, 1, MO_FLAG_PUSH); - PredSet->getOperand(2).setImm(Cond[1].getImm()); - BuildMI(&MBB, DL, get(AMDGPU::JUMP)) - .addMBB(TBB) - .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); - BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0); - return 2; - } -} - -unsigned -R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const -{ - - // Note : we leave PRED* instructions there. - // They may be needed when predicating instructions. - - MachineBasicBlock::iterator I = MBB.end(); - - if (I == MBB.begin()) { - return 0; - } - --I; - switch (I->getOpcode()) { - default: - return 0; - case AMDGPU::JUMP: - if (isPredicated(I)) { - MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); - clearFlag(predSet, 1, MO_FLAG_PUSH); - } - I->eraseFromParent(); - break; - } - I = MBB.end(); - - if (I == MBB.begin()) { - return 1; - } - --I; - switch (I->getOpcode()) { - // FIXME: only one case?? - default: - return 1; - case AMDGPU::JUMP: - if (isPredicated(I)) { - MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); - clearFlag(predSet, 1, MO_FLAG_PUSH); - } - I->eraseFromParent(); - break; - } - return 2; -} - -bool -R600InstrInfo::isPredicated(const MachineInstr *MI) const -{ - int idx = MI->findFirstPredOperandIdx(); - if (idx < 0) - return false; - - unsigned Reg = MI->getOperand(idx).getReg(); - switch (Reg) { - default: return false; - case AMDGPU::PRED_SEL_ONE: - case AMDGPU::PRED_SEL_ZERO: - case AMDGPU::PREDICATE_BIT: - return true; - } -} - -bool -R600InstrInfo::isPredicable(MachineInstr *MI) const -{ - return AMDGPUInstrInfo::isPredicable(MI); -} - - -bool -R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, - unsigned NumCyles, - unsigned ExtraPredCycles, - const BranchProbability &Probability) const{ - return true; -} - -bool -R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, - unsigned NumTCycles, - unsigned ExtraTCycles, - MachineBasicBlock &FMBB, - unsigned NumFCycles, - unsigned ExtraFCycles, - const BranchProbability &Probability) const -{ - return true; -} - -bool -R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB, - unsigned NumCyles, - const BranchProbability &Probability) - const -{ - return true; -} - -bool -R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, - MachineBasicBlock &FMBB) const -{ - return false; -} - - -bool -R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const -{ - MachineOperand &MO = Cond[1]; - switch (MO.getImm()) { - case OPCODE_IS_ZERO_INT: - MO.setImm(OPCODE_IS_NOT_ZERO_INT); - break; - case OPCODE_IS_NOT_ZERO_INT: - MO.setImm(OPCODE_IS_ZERO_INT); - break; - case OPCODE_IS_ZERO: - MO.setImm(OPCODE_IS_NOT_ZERO); - break; - case OPCODE_IS_NOT_ZERO: - MO.setImm(OPCODE_IS_ZERO); - break; - default: - return true; - } - - MachineOperand &MO2 = Cond[2]; - switch (MO2.getReg()) { - case AMDGPU::PRED_SEL_ZERO: - MO2.setReg(AMDGPU::PRED_SEL_ONE); - break; - case AMDGPU::PRED_SEL_ONE: - MO2.setReg(AMDGPU::PRED_SEL_ZERO); - break; - default: - return true; - } - return false; -} - -bool -R600InstrInfo::DefinesPredicate(MachineInstr *MI, - std::vector<MachineOperand> &Pred) const -{ - return isPredicateSetter(MI->getOpcode()); -} - - -bool -R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, - const SmallVectorImpl<MachineOperand> &Pred2) const -{ - return false; -} - - -bool -R600InstrInfo::PredicateInstruction(MachineInstr *MI, - const SmallVectorImpl<MachineOperand> &Pred) const -{ - int PIdx = MI->findFirstPredOperandIdx(); - - if (PIdx != -1) { - MachineOperand &PMO = MI->getOperand(PIdx); - PMO.setReg(Pred[2].getReg()); - MachineInstrBuilder(MI).addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); - return true; - } - - return false; -} - -int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData, - const MachineInstr *MI, - unsigned *PredCost) const -{ - if (PredCost) - *PredCost = 2; - return 2; -} - -//===----------------------------------------------------------------------===// -// Instruction flag getters/setters -//===----------------------------------------------------------------------===// - -bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const -{ - return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0; -} - -MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI) const -{ - unsigned FlagIndex = GET_FLAG_OPERAND_IDX(get(MI->getOpcode()).TSFlags); - assert(FlagIndex != 0 && - "Instruction flags not supported for this instruction"); - MachineOperand &FlagOp = MI->getOperand(FlagIndex); - assert(FlagOp.isImm()); - return FlagOp; -} - -void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand, - unsigned Flag) const -{ - MachineOperand &FlagOp = getFlagOp(MI); - FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand))); -} - -void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand, - unsigned Flag) const -{ - MachineOperand &FlagOp = getFlagOp(MI); - unsigned InstFlags = FlagOp.getImm(); - InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand)); - FlagOp.setImm(InstFlags); -} diff --git a/src/gallium/drivers/radeon/R600InstrInfo.h b/src/gallium/drivers/radeon/R600InstrInfo.h deleted file mode 100644 index de82542fa2c..00000000000 --- a/src/gallium/drivers/radeon/R600InstrInfo.h +++ /dev/null @@ -1,132 +0,0 @@ -//===-- R600InstrInfo.h - R600 Instruction Info Interface -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Interface definition for R600InstrInfo -// -//===----------------------------------------------------------------------===// - -#ifndef R600INSTRUCTIONINFO_H_ -#define R600INSTRUCTIONINFO_H_ - -#include "AMDIL.h" -#include "AMDGPUInstrInfo.h" -#include "R600RegisterInfo.h" - -#include <map> - -namespace llvm { - - class AMDGPUTargetMachine; - class DFAPacketizer; - class ScheduleDAG; - class MachineFunction; - class MachineInstr; - class MachineInstrBuilder; - - class R600InstrInfo : public AMDGPUInstrInfo { - private: - const R600RegisterInfo RI; - AMDGPUTargetMachine &TM; - - int getBranchInstr(const MachineOperand &op) const; - - public: - explicit R600InstrInfo(AMDGPUTargetMachine &tm); - - const R600RegisterInfo &getRegisterInfo() const; - virtual void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const; - - bool isTrig(const MachineInstr &MI) const; - bool isPlaceHolderOpcode(unsigned opcode) const; - bool isReductionOp(unsigned opcode) const; - bool isCubeOp(unsigned opcode) const; - - /// isVector - Vector instructions are instructions that must fill all - /// instruction slots within an instruction group. - bool isVector(const MachineInstr &MI) const; - - virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg, - int64_t Imm) const; - - virtual unsigned getIEQOpcode() const; - virtual bool isMov(unsigned Opcode) const; - - DFAPacketizer *CreateTargetScheduleState(const TargetMachine *TM, - const ScheduleDAG *DAG) const; - - bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; - - bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, - SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const; - - unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const; - - unsigned RemoveBranch(MachineBasicBlock &MBB) const; - - bool isPredicated(const MachineInstr *MI) const; - - bool isPredicable(MachineInstr *MI) const; - - bool - isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles, - const BranchProbability &Probability) const; - - bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles, - unsigned ExtraPredCycles, - const BranchProbability &Probability) const ; - - bool - isProfitableToIfCvt(MachineBasicBlock &TMBB, - unsigned NumTCycles, unsigned ExtraTCycles, - MachineBasicBlock &FMBB, - unsigned NumFCycles, unsigned ExtraFCycles, - const BranchProbability &Probability) const; - - bool DefinesPredicate(MachineInstr *MI, - std::vector<MachineOperand> &Pred) const; - - bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, - const SmallVectorImpl<MachineOperand> &Pred2) const; - - bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, - MachineBasicBlock &FMBB) const; - - bool PredicateInstruction(MachineInstr *MI, - const SmallVectorImpl<MachineOperand> &Pred) const; - - int getInstrLatency(const InstrItineraryData *ItinData, - const MachineInstr *MI, - unsigned *PredCost = 0) const; - - virtual int getInstrLatency(const InstrItineraryData *ItinData, - SDNode *Node) const { return 1;} - - ///hasFlagOperand - Returns true if this instruction has an operand for - /// storing target flags. - bool hasFlagOperand(const MachineInstr &MI) const; - - ///addFlag - Add one of the MO_FLAG* flags to the specified Operand. - void addFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const; - - ///isFlagSet - Determine if the specified flag is set on this Operand. - bool isFlagSet(const MachineInstr &MI, unsigned Operand, unsigned Flag) const; - - ///getFlagOp - Return the operand containing the flags for this instruction. - MachineOperand &getFlagOp(MachineInstr *MI) const; - - ///clearFlag - Clear the specified flag on the instruction. - void clearFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const; -}; - -} // End llvm namespace - -#endif // R600INSTRINFO_H_ diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td deleted file mode 100644 index 120a71c5b9e..00000000000 --- a/src/gallium/drivers/radeon/R600Instructions.td +++ /dev/null @@ -1,1458 +0,0 @@ -//===-- R600Instructions.td - R600 Instruction defs -------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// R600 Tablegen instruction definitions -// -//===----------------------------------------------------------------------===// - -include "R600Intrinsics.td" - -class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern, - InstrItinClass itin> - : AMDGPUInst <outs, ins, asm, pattern> { - - field bits<64> Inst; - bit Trig = 0; - bit Op3 = 0; - bit isVector = 0; - bits<2> FlagOperandIdx = 0; - - bits<11> op_code = inst; - //let Inst = inst; - let Namespace = "AMDGPU"; - let OutOperandList = outs; - let InOperandList = ins; - let AsmString = asm; - let Pattern = pattern; - let Itinerary = itin; - - let TSFlags{4} = Trig; - let TSFlags{5} = Op3; - - // Vector instructions are instructions that must fill all slots in an - // instruction group - let TSFlags{6} = isVector; - let TSFlags{8-7} = FlagOperandIdx; -} - -class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> : - AMDGPUInst <outs, ins, asm, pattern> -{ - field bits<64> Inst; - - let Namespace = "AMDGPU"; -} - -def MEMxi : Operand<iPTR> { - let MIOperandInfo = (ops R600_TReg32_X:$ptr, i32imm:$index); -} - -def MEMrr : Operand<iPTR> { - let MIOperandInfo = (ops R600_Reg32:$ptr, R600_Reg32:$index); -} - -def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>; -def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>; -def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>; - -class R600_ALU { - - bits<7> DST_GPR = 0; - bits<9> SRC0_SEL = 0; - bits<1> SRC0_NEG = 0; - bits<9> SRC1_SEL = 0; - bits<1> SRC1_NEG = 0; - bits<1> CLAMP = 0; - -} - -def R600_Pred : PredicateOperand<i32, (ops R600_Predicate), - (ops PRED_SEL_OFF)>; - - -class R600_1OP <bits<11> inst, string opName, list<dag> pattern, - InstrItinClass itin = AnyALU> : - InstR600 <inst, - (outs R600_Reg32:$dst), - (ins R600_Reg32:$src, R600_Pred:$p, variable_ops), - !strconcat(opName, " $dst, $src ($p)"), - pattern, - itin>{ - bits<7> dst; - bits<9> src; - let Inst{8-0} = src; - let Inst{49-39} = inst; - let Inst{59-53} = dst; - } - -class R600_2OP <bits<11> inst, string opName, list<dag> pattern, - InstrItinClass itin = AnyALU> : - InstR600 <inst, - (outs R600_Reg32:$dst), - (ins R600_Reg32:$src0, R600_Reg32:$src1,R600_Pred:$p, variable_ops), - !strconcat(opName, " $dst, $src0, $src1"), - pattern, - itin>{ - bits<7> dst; - bits<9> src0; - bits<9> src1; - let Inst{8-0} = src0; - let Inst{21-13} = src1; - let Inst{49-39} = inst; - let Inst{59-53} = dst; - } - -class R600_3OP <bits<11> inst, string opName, list<dag> pattern, - InstrItinClass itin = AnyALU> : - InstR600 <inst, - (outs R600_Reg32:$dst), - (ins R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2,R600_Pred:$p, variable_ops), - !strconcat(opName, " $dst, $src0, $src1, $src2"), - pattern, - itin>{ - bits<7> dst; - bits<9> src0; - bits<9> src1; - bits<9> src2; - let Inst{8-0} = src0; - let Inst{21-13} = src1; - let Inst{40-32} = src2; - let Inst{49-45} = inst{4-0}; - let Inst{59-53} = dst; - let Op3 = 1; - } - - - -def PRED_X : InstR600 <0, (outs R600_Predicate_Bit:$dst), - (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags), - "PRED $dst, $src0, $src1", - [], NullALU> -{ - bits<7> dst; - bits<9> src0; - bits<11> src1; - let Inst{8-0} = src0; - let Inst{49-39} = src1; - let Inst{59-53} = dst; - let FlagOperandIdx = 3; -} - -let isTerminator = 1, isBranch = 1, isPseudo = 1 in { -def JUMP : InstR600 <0x10, - (outs), - (ins brtarget:$target, R600_Pred:$p), - "JUMP $target ($p)", - [], AnyALU - >; -} - -class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern, - InstrItinClass itin = VecALU> : - InstR600 <inst, - (outs R600_Reg32:$dst), - ins, - asm, - pattern, - itin>{ - bits<7> dst; - let Inst{49-39} = inst; - let Inst{59-53} = dst; - } - -class R600_TEX <bits<11> inst, string opName, list<dag> pattern, - InstrItinClass itin = AnyALU> : - InstR600 <inst, - (outs R600_Reg128:$dst), - (ins R600_Reg128:$src0, i32imm:$src1, i32imm:$src2), - !strconcat(opName, "$dst, $src0, $src1, $src2"), - pattern, - itin>{ - let Inst {10-0} = inst; - } - -def TEX_SHADOW : PatLeaf< - (imm), - [{uint32_t TType = (uint32_t)N->getZExtValue(); - return (TType >= 6 && TType <= 8) || TType == 11 || TType == 12; - }] ->; - -class EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, bits<4> rat_id, dag outs, - dag ins, string asm, list<dag> pattern> : - InstR600ISA <outs, ins, asm, pattern> -{ - bits<7> RW_GPR; - bits<7> INDEX_GPR; - - bits<2> RIM; - bits<2> TYPE; - bits<1> RW_REL; - bits<2> ELEM_SIZE; - - bits<12> ARRAY_SIZE; - bits<4> COMP_MASK; - bits<4> BURST_COUNT; - bits<1> VPM; - bits<1> eop; - bits<1> MARK; - bits<1> BARRIER; - - // CF_ALLOC_EXPORT_WORD0_RAT - let Inst{3-0} = rat_id; - let Inst{9-4} = rat_inst; - let Inst{10} = 0; // Reserved - let Inst{12-11} = RIM; - let Inst{14-13} = TYPE; - let Inst{21-15} = RW_GPR; - let Inst{22} = RW_REL; - let Inst{29-23} = INDEX_GPR; - let Inst{31-30} = ELEM_SIZE; - - // CF_ALLOC_EXPORT_WORD1_BUF - let Inst{43-32} = ARRAY_SIZE; - let Inst{47-44} = COMP_MASK; - let Inst{51-48} = BURST_COUNT; - let Inst{52} = VPM; - let Inst{53} = eop; - let Inst{61-54} = cf_inst; - let Inst{62} = MARK; - let Inst{63} = BARRIER; -} - -def load_param : PatFrag<(ops node:$ptr), - (load node:$ptr), - [{ - const Value *Src = cast<LoadSDNode>(N)->getSrcValue(); - if (Src) { - PointerType * PT = dyn_cast<PointerType>(Src->getType()); - return PT && PT->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS; - } - return false; - }]>; - -def isR600 : Predicate<"Subtarget.device()" - "->getGeneration() == AMDGPUDeviceInfo::HD4XXX">; -def isR700 : Predicate<"Subtarget.device()" - "->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&" - "Subtarget.device()->getDeviceFlag()" - ">= OCL_DEVICE_RV710">; -def isEG : Predicate< - "Subtarget.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX && " - "Subtarget.device()->getGeneration() < AMDGPUDeviceInfo::HD7XXX && " - "Subtarget.device()->getDeviceFlag() != OCL_DEVICE_CAYMAN">; - -def isCayman : Predicate<"Subtarget.device()" - "->getDeviceFlag() == OCL_DEVICE_CAYMAN">; -def isEGorCayman : Predicate<"Subtarget.device()" - "->getGeneration() == AMDGPUDeviceInfo::HD5XXX" - "|| Subtarget.device()->getGeneration() ==" - "AMDGPUDeviceInfo::HD6XXX">; - -def isR600toCayman : Predicate< - "Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">; - -//===----------------------------------------------------------------------===// -// Interpolation Instructions -//===----------------------------------------------------------------------===// - -def INTERP: SDNode<"AMDGPUISD::INTERP", - SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisInt<1>, SDTCisInt<2>]> - >; - -def INTERP_P0: SDNode<"AMDGPUISD::INTERP_P0", - SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisInt<1>]> - >; - -let usesCustomInserter = 1 in { -def input_perspective : AMDGPUShaderInst < - (outs R600_Reg128:$dst), - (ins i32imm:$src0, i32imm:$src1), - "input_perspective $src0 $src1 : dst", - [(set R600_Reg128:$dst, (INTERP (i32 imm:$src0), (i32 imm:$src1)))]>; -} // End usesCustomInserter = 1 - -def input_constant : AMDGPUShaderInst < - (outs R600_Reg128:$dst), - (ins i32imm:$src), - "input_perspective $src : dst", - [(set R600_Reg128:$dst, (INTERP_P0 (i32 imm:$src)))]>; - - - -def INTERP_XY : InstR600 <0xD6, - (outs R600_Reg32:$dst), - (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags), - "INTERP_XY dst", - [], AnyALU> -{ - let FlagOperandIdx = 3; -} - -def INTERP_ZW : InstR600 <0xD7, - (outs R600_Reg32:$dst), - (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags), - "INTERP_ZW dst", - [], AnyALU> -{ - let FlagOperandIdx = 3; -} - -def INTERP_LOAD_P0 : InstR600 <0xE0, - (outs R600_Reg32:$dst), - (ins R600_Reg32:$src, i32imm:$flags), - "INTERP_LOAD_P0 dst", - [], AnyALU> -{ - let FlagOperandIdx = 2; -} - -let Predicates = [isR600toCayman] in { - -//===----------------------------------------------------------------------===// -// Common Instructions R600, R700, Evergreen, Cayman -//===----------------------------------------------------------------------===// - -def ADD : R600_2OP < - 0x0, "ADD", - [(set R600_Reg32:$dst, (fadd R600_Reg32:$src0, R600_Reg32:$src1))] ->; - -// Non-IEEE MUL: 0 * anything = 0 -def MUL : R600_2OP < - 0x1, "MUL NON-IEEE", - [(set R600_Reg32:$dst, (int_AMDGPU_mul R600_Reg32:$src0, R600_Reg32:$src1))] ->; - -def MUL_IEEE : R600_2OP < - 0x2, "MUL_IEEE", - [(set R600_Reg32:$dst, (fmul R600_Reg32:$src0, R600_Reg32:$src1))] ->; - -def MAX : R600_2OP < - 0x3, "MAX", - [(set R600_Reg32:$dst, (AMDGPUfmax R600_Reg32:$src0, R600_Reg32:$src1))] ->; - -def MIN : R600_2OP < - 0x4, "MIN", - [(set R600_Reg32:$dst, (AMDGPUfmin R600_Reg32:$src0, R600_Reg32:$src1))] ->; - -// For the SET* instructions there is a naming conflict in TargetSelectionDAG.td, -// so some of the instruction names don't match the asm string. -// XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics. - -def SETE : R600_2OP < - 0x08, "SETE", - [(set R600_Reg32:$dst, - (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, - COND_EQ))] ->; - -def SGT : R600_2OP < - 0x09, "SETGT", - [(set R600_Reg32:$dst, - (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, - COND_GT))] ->; - -def SGE : R600_2OP < - 0xA, "SETGE", - [(set R600_Reg32:$dst, - (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, - COND_GE))] ->; - -def SNE : R600_2OP < - 0xB, "SETNE", - [(set R600_Reg32:$dst, - (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, - COND_NE))] ->; - -def FRACT : R600_1OP < - 0x10, "FRACT", - [(set R600_Reg32:$dst, (AMDGPUfract R600_Reg32:$src))] ->; - -def TRUNC : R600_1OP < - 0x11, "TRUNC", - [(set R600_Reg32:$dst, (int_AMDGPU_trunc R600_Reg32:$src))] ->; - -def CEIL : R600_1OP < - 0x12, "CEIL", - [(set R600_Reg32:$dst, (fceil R600_Reg32:$src))] ->; - -def RNDNE : R600_1OP < - 0x13, "RNDNE", - [(set R600_Reg32:$dst, (frint R600_Reg32:$src))] ->; - -def FLOOR : R600_1OP < - 0x14, "FLOOR", - [(set R600_Reg32:$dst, (ffloor R600_Reg32:$src))] ->; - -def MOV : InstR600 <0x19, (outs R600_Reg32:$dst), - (ins R600_Reg32:$src0, i32imm:$flags, - R600_Pred:$p), - "MOV $dst, $src0", [], AnyALU> { - let FlagOperandIdx = 2; - bits<7> dst; - bits<9> src0; - let Inst{8-0} = src0; - let Inst{49-39} = op_code; - let Inst{59-53} = dst; -} - -class MOV_IMM <ValueType vt, Operand immType> : InstR600 <0x19, - (outs R600_Reg32:$dst), - (ins R600_Reg32:$alu_literal, R600_Pred:$p, immType:$imm), - "MOV_IMM $dst, $imm", - [], AnyALU ->{ - bits<7> dst; - bits<9> alu_literal; - bits<9> p; - let Inst{8-0} = alu_literal; - let Inst{21-13} = p; - let Inst{49-39} = op_code; - let Inst{59-53} = dst; -} - -def MOV_IMM_I32 : MOV_IMM<i32, i32imm>; -def : Pat < - (imm:$val), - (MOV_IMM_I32 (i32 ALU_LITERAL_X), imm:$val) ->; - -def MOV_IMM_F32 : MOV_IMM<f32, f32imm>; -def : Pat < - (fpimm:$val), - (MOV_IMM_F32 (i32 ALU_LITERAL_X), fpimm:$val) ->; - -def KILLGT : InstR600 <0x2D, - (outs R600_Reg32:$dst), - (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags, R600_Pred:$p, - variable_ops), - "KILLGT $dst, $src0, $src1, $flags ($p)", - [], - NullALU>{ - let FlagOperandIdx = 3; - bits<7> dst; - bits<9> src0; - bits<9> src1; - let Inst{8-0} = src0; - let Inst{21-13} = src1; - let Inst{49-39} = op_code; - let Inst{59-53} = dst; -} - -def AND_INT : R600_2OP < - 0x30, "AND_INT", - [(set R600_Reg32:$dst, (and R600_Reg32:$src0, R600_Reg32:$src1))] ->; - -def OR_INT : R600_2OP < - 0x31, "OR_INT", - [(set R600_Reg32:$dst, (or R600_Reg32:$src0, R600_Reg32:$src1))] ->; - -def XOR_INT : R600_2OP < - 0x32, "XOR_INT", - [(set R600_Reg32:$dst, (xor R600_Reg32:$src0, R600_Reg32:$src1))] ->; - -def NOT_INT : R600_1OP < - 0x33, "NOT_INT", - [(set R600_Reg32:$dst, (not R600_Reg32:$src))] ->; - -def ADD_INT : R600_2OP < - 0x34, "ADD_INT", - [(set R600_Reg32:$dst, (add R600_Reg32:$src0, R600_Reg32:$src1))] ->; - -def SUB_INT : R600_2OP < - 0x35, "SUB_INT", - [(set R600_Reg32:$dst, (sub R600_Reg32:$src0, R600_Reg32:$src1))] ->; - -def MAX_INT : R600_2OP < - 0x36, "MAX_INT", - [(set R600_Reg32:$dst, (AMDGPUsmax R600_Reg32:$src0, R600_Reg32:$src1))]>; - -def MIN_INT : R600_2OP < - 0x37, "MIN_INT", - [(set R600_Reg32:$dst, (AMDGPUsmin R600_Reg32:$src0, R600_Reg32:$src1))]>; - -def MAX_UINT : R600_2OP < - 0x38, "MAX_UINT", - [(set R600_Reg32:$dst, (AMDGPUsmax R600_Reg32:$src0, R600_Reg32:$src1))] ->; - -def MIN_UINT : R600_2OP < - 0x39, "MIN_UINT", - [(set R600_Reg32:$dst, (AMDGPUumin R600_Reg32:$src0, R600_Reg32:$src1))] ->; - -def SETE_INT : R600_2OP < - 0x3A, "SETE_INT", - [(set (i32 R600_Reg32:$dst), - (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETEQ))] ->; - -def SETGT_INT : R600_2OP < - 0x3B, "SGT_INT", - [(set (i32 R600_Reg32:$dst), - (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGT))] ->; - -def SETGE_INT : R600_2OP < - 0x3C, "SETGE_INT", - [(set (i32 R600_Reg32:$dst), - (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGE))] ->; - -def SETNE_INT : R600_2OP < - 0x3D, "SETNE_INT", - [(set (i32 R600_Reg32:$dst), - (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETNE))] ->; - -def SETGT_UINT : R600_2OP < - 0x3E, "SETGT_UINT", - [(set (i32 R600_Reg32:$dst), - (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGT))] ->; - -def SETGE_UINT : R600_2OP < - 0x3F, "SETGE_UINT", - [(set (i32 R600_Reg32:$dst), - (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGE))] ->; - -def CNDE_INT : R600_3OP < - 0x1C, "CNDE_INT", - [(set (i32 R600_Reg32:$dst), - (selectcc (i32 R600_Reg32:$src0), 0, - (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2), - COND_EQ))] ->; - -def CNDGE_INT : R600_3OP < - 0x1E, "CNDGE_INT", - [(set (i32 R600_Reg32:$dst), - (selectcc (i32 R600_Reg32:$src0), 0, - (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2), - COND_GE))] ->; - -def CNDGT_INT : R600_3OP < - 0x1D, "CNDGT_INT", - [(set (i32 R600_Reg32:$dst), - (selectcc (i32 R600_Reg32:$src0), 0, - (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2), - COND_GT))] ->; - -//===----------------------------------------------------------------------===// -// Texture instructions -//===----------------------------------------------------------------------===// - -def TEX_LD : R600_TEX < - 0x03, "TEX_LD", - [(set R600_Reg128:$dst, (int_AMDGPU_txf R600_Reg128:$src0, imm:$src1, imm:$src2, imm:$src3, imm:$src4, imm:$src5))] -> { -let AsmString = "TEX_LD $dst, $src0, $src1, $src2, $src3, $src4, $src5"; -let InOperandList = (ins R600_Reg128:$src0, i32imm:$src1, i32imm:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5); -} - -def TEX_GET_TEXTURE_RESINFO : R600_TEX < - 0x04, "TEX_GET_TEXTURE_RESINFO", - [(set R600_Reg128:$dst, (int_AMDGPU_txq R600_Reg128:$src0, imm:$src1, imm:$src2))] ->; - -def TEX_GET_GRADIENTS_H : R600_TEX < - 0x07, "TEX_GET_GRADIENTS_H", - [(set R600_Reg128:$dst, (int_AMDGPU_ddx R600_Reg128:$src0, imm:$src1, imm:$src2))] ->; - -def TEX_GET_GRADIENTS_V : R600_TEX < - 0x08, "TEX_GET_GRADIENTS_V", - [(set R600_Reg128:$dst, (int_AMDGPU_ddy R600_Reg128:$src0, imm:$src1, imm:$src2))] ->; - -def TEX_SET_GRADIENTS_H : R600_TEX < - 0x0B, "TEX_SET_GRADIENTS_H", - [] ->; - -def TEX_SET_GRADIENTS_V : R600_TEX < - 0x0C, "TEX_SET_GRADIENTS_V", - [] ->; - -def TEX_SAMPLE : R600_TEX < - 0x10, "TEX_SAMPLE", - [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$src1, imm:$src2))] ->; - -def TEX_SAMPLE_C : R600_TEX < - 0x18, "TEX_SAMPLE_C", - [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$src1, TEX_SHADOW:$src2))] ->; - -def TEX_SAMPLE_L : R600_TEX < - 0x11, "TEX_SAMPLE_L", - [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$src1, imm:$src2))] ->; - -def TEX_SAMPLE_C_L : R600_TEX < - 0x19, "TEX_SAMPLE_C_L", - [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$src1, TEX_SHADOW:$src2))] ->; - -def TEX_SAMPLE_LB : R600_TEX < - 0x12, "TEX_SAMPLE_LB", - [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0, imm:$src1, imm:$src2))] ->; - -def TEX_SAMPLE_C_LB : R600_TEX < - 0x1A, "TEX_SAMPLE_C_LB", - [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0, imm:$src1, TEX_SHADOW:$src2))] ->; - -def TEX_SAMPLE_G : R600_TEX < - 0x14, "TEX_SAMPLE_G", - [] ->; - -def TEX_SAMPLE_C_G : R600_TEX < - 0x1C, "TEX_SAMPLE_C_G", - [] ->; - -//===----------------------------------------------------------------------===// -// Helper classes for common instructions -//===----------------------------------------------------------------------===// - -class MUL_LIT_Common <bits<11> inst> : R600_3OP < - inst, "MUL_LIT", - [] ->; - -class MULADD_Common <bits<11> inst> : R600_3OP < - inst, "MULADD", - [(set (f32 R600_Reg32:$dst), - (IL_mad R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))] ->; - -class CNDE_Common <bits<11> inst> : R600_3OP < - inst, "CNDE", - [(set R600_Reg32:$dst, - (selectcc (f32 R600_Reg32:$src0), FP_ZERO, - (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2), - COND_EQ))] ->; - -class CNDGT_Common <bits<11> inst> : R600_3OP < - inst, "CNDGT", - [(set R600_Reg32:$dst, - (selectcc (f32 R600_Reg32:$src0), FP_ZERO, - (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2), - COND_GT))] ->; - -class CNDGE_Common <bits<11> inst> : R600_3OP < - inst, "CNDGE", - [(set R600_Reg32:$dst, - (selectcc (f32 R600_Reg32:$src0), FP_ZERO, - (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2), - COND_GE))] ->; - -class DOT4_Common <bits<11> inst> : R600_REDUCTION < - inst, - (ins R600_Reg128:$src0, R600_Reg128:$src1, i32imm:$flags), - "DOT4 $dst $src0, $src1", - [] - > { - bits<9> src0; - bits<9> src1; - let Inst{8-0} = src0; - let Inst{21-13} = src1; - let FlagOperandIdx = 3; -} - -class DOT4_Pat <Instruction dot4> : Pat < - (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1), - (dot4 R600_Reg128:$src0, R600_Reg128:$src1, 0) ->; - -multiclass CUBE_Common <bits<11> inst> { - - def _pseudo : InstR600 < - inst, - (outs R600_Reg128:$dst), - (ins R600_Reg128:$src), - "CUBE $dst $src", - [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))], - VecALU - >; - - def _real : InstR600 < - inst, - (outs R600_Reg32:$dst), - (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags), - "CUBE $dst, $src0, $src1", - [], VecALU - >{ - let FlagOperandIdx = 3; - bits<7> dst; - bits<9> src0; - bits<9> src1; - let Inst{8-0} = src0; - let Inst{21-13} = src1; - let Inst{49-39} = inst; - let Inst{59-53} = dst; - } -} - -class EXP_IEEE_Common <bits<11> inst> : R600_1OP < - inst, "EXP_IEEE", - [(set R600_Reg32:$dst, (fexp2 R600_Reg32:$src))] ->; - -class FLT_TO_INT_Common <bits<11> inst> : R600_1OP < - inst, "FLT_TO_INT", - [(set R600_Reg32:$dst, (fp_to_sint R600_Reg32:$src))] ->; - -class INT_TO_FLT_Common <bits<11> inst> : R600_1OP < - inst, "INT_TO_FLT", - [(set R600_Reg32:$dst, (sint_to_fp R600_Reg32:$src))] ->; - -class FLT_TO_UINT_Common <bits<11> inst> : R600_1OP < - inst, "FLT_TO_UINT", - [(set R600_Reg32:$dst, (fp_to_uint R600_Reg32:$src))] ->; - -class UINT_TO_FLT_Common <bits<11> inst> : R600_1OP < - inst, "UINT_TO_FLT", - [(set R600_Reg32:$dst, (uint_to_fp R600_Reg32:$src))] ->; - -class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP < - inst, "LOG_CLAMPED", - [] ->; - -class LOG_IEEE_Common <bits<11> inst> : R600_1OP < - inst, "LOG_IEEE", - [(set R600_Reg32:$dst, (flog2 R600_Reg32:$src))] ->; - -class LSHL_Common <bits<11> inst> : R600_2OP < - inst, "LSHL $dst, $src0, $src1", - [(set R600_Reg32:$dst, (shl R600_Reg32:$src0, R600_Reg32:$src1))] ->; - -class LSHR_Common <bits<11> inst> : R600_2OP < - inst, "LSHR $dst, $src0, $src1", - [(set R600_Reg32:$dst, (srl R600_Reg32:$src0, R600_Reg32:$src1))] ->; - -class ASHR_Common <bits<11> inst> : R600_2OP < - inst, "ASHR $dst, $src0, $src1", - [(set R600_Reg32:$dst, (sra R600_Reg32:$src0, R600_Reg32:$src1))] ->; - -class MULHI_INT_Common <bits<11> inst> : R600_2OP < - inst, "MULHI_INT $dst, $src0, $src1", - [(set R600_Reg32:$dst, (mulhs R600_Reg32:$src0, R600_Reg32:$src1))] ->; - -class MULHI_UINT_Common <bits<11> inst> : R600_2OP < - inst, "MULHI $dst, $src0, $src1", - [(set R600_Reg32:$dst, (mulhu R600_Reg32:$src0, R600_Reg32:$src1))] ->; - -class MULLO_INT_Common <bits<11> inst> : R600_2OP < - inst, "MULLO_INT $dst, $src0, $src1", - [(set R600_Reg32:$dst, (mul R600_Reg32:$src0, R600_Reg32:$src1))] ->; - -class MULLO_UINT_Common <bits<11> inst> : R600_2OP < - inst, "MULLO_UINT $dst, $src0, $src1", - [] ->; - -class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP < - inst, "RECIP_CLAMPED", - [] ->; - -class RECIP_IEEE_Common <bits<11> inst> : R600_1OP < - inst, "RECIP_IEEE", - [(set R600_Reg32:$dst, (int_AMDGPU_rcp R600_Reg32:$src))] ->; - -class RECIP_UINT_Common <bits<11> inst> : R600_1OP < - inst, "RECIP_INT $dst, $src", - [(set R600_Reg32:$dst, (AMDGPUurecip R600_Reg32:$src))] ->; - -class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP < - inst, "RECIPSQRT_CLAMPED", - [(set R600_Reg32:$dst, (int_AMDGPU_rsq R600_Reg32:$src))] ->; - -class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP < - inst, "RECIPSQRT_IEEE", - [] ->; - -class SIN_Common <bits<11> inst> : R600_1OP < - inst, "SIN", []>{ - let Trig = 1; -} - -class COS_Common <bits<11> inst> : R600_1OP < - inst, "COS", []> { - let Trig = 1; -} - -//===----------------------------------------------------------------------===// -// Helper patterns for complex intrinsics -//===----------------------------------------------------------------------===// - -multiclass DIV_Common <InstR600 recip_ieee> { -def : Pat< - (int_AMDGPU_div R600_Reg32:$src0, R600_Reg32:$src1), - (MUL R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1)) ->; - -def : Pat< - (fdiv R600_Reg32:$src0, R600_Reg32:$src1), - (MUL R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1)) ->; -} - -class SSG_Common <InstR600 cndgt, InstR600 cndge> : Pat < - (int_AMDGPU_ssg R600_Reg32:$src), - (cndgt R600_Reg32:$src, (f32 ONE), (cndge R600_Reg32:$src, (f32 ZERO), (f32 NEG_ONE))) ->; - -class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee> : Pat < - (int_TGSI_lit_z R600_Reg32:$src_x, R600_Reg32:$src_y, R600_Reg32:$src_w), - (exp_ieee (mul_lit (log_clamped (MAX R600_Reg32:$src_y, (f32 ZERO))), R600_Reg32:$src_w, R600_Reg32:$src_x)) ->; - -//===----------------------------------------------------------------------===// -// R600 / R700 Instructions -//===----------------------------------------------------------------------===// - -let Predicates = [isR600] in { - - def MUL_LIT_r600 : MUL_LIT_Common<0x0C>; - def MULADD_r600 : MULADD_Common<0x10>; - def CNDE_r600 : CNDE_Common<0x18>; - def CNDGT_r600 : CNDGT_Common<0x19>; - def CNDGE_r600 : CNDGE_Common<0x1A>; - def DOT4_r600 : DOT4_Common<0x50>; - def : DOT4_Pat <DOT4_r600>; - defm CUBE_r600 : CUBE_Common<0x52>; - def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>; - def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>; - def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>; - def RECIP_CLAMPED_r600 : RECIP_CLAMPED_Common<0x64>; - def RECIP_IEEE_r600 : RECIP_IEEE_Common<0x66>; - def RECIPSQRT_CLAMPED_r600 : RECIPSQRT_CLAMPED_Common<0x67>; - def RECIPSQRT_IEEE_r600 : RECIPSQRT_IEEE_Common<0x69>; - def FLT_TO_INT_r600 : FLT_TO_INT_Common<0x6b>; - def INT_TO_FLT_r600 : INT_TO_FLT_Common<0x6c>; - def FLT_TO_UINT_r600 : FLT_TO_UINT_Common<0x79>; - def UINT_TO_FLT_r600 : UINT_TO_FLT_Common<0x6d>; - def SIN_r600 : SIN_Common<0x6E>; - def COS_r600 : COS_Common<0x6F>; - def ASHR_r600 : ASHR_Common<0x70>; - def LSHR_r600 : LSHR_Common<0x71>; - def LSHL_r600 : LSHL_Common<0x72>; - def MULLO_INT_r600 : MULLO_INT_Common<0x73>; - def MULHI_INT_r600 : MULHI_INT_Common<0x74>; - def MULLO_UINT_r600 : MULLO_UINT_Common<0x75>; - def MULHI_UINT_r600 : MULHI_UINT_Common<0x76>; - def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>; - - defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>; - def POW_r600 : POW_Common<LOG_IEEE_r600, EXP_IEEE_r600, MUL, GPRF32>; - def SSG_r600 : SSG_Common<CNDGT_r600, CNDGE_r600>; - def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>; - -} - -// Helper pattern for normalizing inputs to triginomic instructions for R700+ -// cards. -class COS_PAT <InstR600 trig> : Pat< - (fcos R600_Reg32:$src), - (trig (MUL (MOV_IMM_I32 (i32 ALU_LITERAL_X), CONST.TWO_PI_INV), R600_Reg32:$src)) ->; - -class SIN_PAT <InstR600 trig> : Pat< - (fsin R600_Reg32:$src), - (trig (MUL (MOV_IMM_I32 (i32 ALU_LITERAL_X), CONST.TWO_PI_INV), R600_Reg32:$src)) ->; - -//===----------------------------------------------------------------------===// -// R700 Only instructions -//===----------------------------------------------------------------------===// - -let Predicates = [isR700] in { - def SIN_r700 : SIN_Common<0x6E>; - def COS_r700 : COS_Common<0x6F>; - - // R700 normalizes inputs to SIN/COS the same as EG - def : SIN_PAT <SIN_r700>; - def : COS_PAT <COS_r700>; -} - -//===----------------------------------------------------------------------===// -// Evergreen Only instructions -//===----------------------------------------------------------------------===// - -let Predicates = [isEG] in { - -def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>; - -def MULLO_INT_eg : MULLO_INT_Common<0x8F>; -def MULHI_INT_eg : MULHI_INT_Common<0x90>; -def MULLO_UINT_eg : MULLO_UINT_Common<0x91>; -def MULHI_UINT_eg : MULHI_UINT_Common<0x92>; -def RECIP_UINT_eg : RECIP_UINT_Common<0x94>; - -} // End Predicates = [isEG] - -//===----------------------------------------------------------------------===// -// Evergreen / Cayman Instructions -//===----------------------------------------------------------------------===// - -let Predicates = [isEGorCayman] in { - - // BFE_UINT - bit_extract, an optimization for mask and shift - // Src0 = Input - // Src1 = Offset - // Src2 = Width - // - // bit_extract = (Input << (32 - Offset - Width)) >> (32 - Width) - // - // Example Usage: - // (Offset, Width) - // - // (0, 8) = (Input << 24) >> 24 = (Input & 0xff) >> 0 - // (8, 8) = (Input << 16) >> 24 = (Input & 0xffff) >> 8 - // (16,8) = (Input << 8) >> 24 = (Input & 0xffffff) >> 16 - // (24,8) = (Input << 0) >> 24 = (Input & 0xffffffff) >> 24 - def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT", - [(set R600_Reg32:$dst, (int_AMDIL_bit_extract_u32 R600_Reg32:$src0, - R600_Reg32:$src1, - R600_Reg32:$src2))], - VecALU - >; - - def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", - [(set R600_Reg32:$dst, (AMDGPUbitalign R600_Reg32:$src0, R600_Reg32:$src1, - R600_Reg32:$src2))], - VecALU - >; - - def MULADD_eg : MULADD_Common<0x14>; - def ASHR_eg : ASHR_Common<0x15>; - def LSHR_eg : LSHR_Common<0x16>; - def LSHL_eg : LSHL_Common<0x17>; - def CNDE_eg : CNDE_Common<0x19>; - def CNDGT_eg : CNDGT_Common<0x1A>; - def CNDGE_eg : CNDGE_Common<0x1B>; - def MUL_LIT_eg : MUL_LIT_Common<0x1F>; - def EXP_IEEE_eg : EXP_IEEE_Common<0x81>; - def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>; - def LOG_IEEE_eg : LOG_IEEE_Common<0x83>; - def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>; - def RECIPSQRT_CLAMPED_eg : RECIPSQRT_CLAMPED_Common<0x87>; - def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>; - def SIN_eg : SIN_Common<0x8D>; - def COS_eg : COS_Common<0x8E>; - def DOT4_eg : DOT4_Common<0xBE>; - def : DOT4_Pat <DOT4_eg>; - defm CUBE_eg : CUBE_Common<0xC0>; - - defm DIV_eg : DIV_Common<RECIP_IEEE_eg>; - def POW_eg : POW_Common<LOG_IEEE_eg, EXP_IEEE_eg, MUL, GPRF32>; - def SSG_eg : SSG_Common<CNDGT_eg, CNDGE_eg>; - def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>; - - def : SIN_PAT <SIN_eg>; - def : COS_PAT <COS_eg>; - - def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> { - let Pattern = []; - } - - def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>; - - def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> { - let Pattern = []; - } - - def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>; - - def : Pat<(fp_to_sint R600_Reg32:$src), - (FLT_TO_INT_eg (TRUNC R600_Reg32:$src))>; - - def : Pat<(fp_to_uint R600_Reg32:$src), - (FLT_TO_UINT_eg (TRUNC R600_Reg32:$src))>; - - def : Pat<(fsqrt R600_Reg32:$src), - (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_eg R600_Reg32:$src))>; - -//===----------------------------------------------------------------------===// -// Memory read/write instructions -//===----------------------------------------------------------------------===// - -let usesCustomInserter = 1 in { - -class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> comp_mask, string name> : EG_CF_RAT < - 0x57, 0x2, 0, (outs), ins, !strconcat(name, " $rw_gpr, $index_gpr, $eop"), []> -{ - let RIM = 0; - // XXX: Have a separate instruction for non-indexed writes. - let TYPE = 1; - let RW_REL = 0; - let ELEM_SIZE = 0; - - let ARRAY_SIZE = 0; - let COMP_MASK = comp_mask; - let BURST_COUNT = 0; - let VPM = 0; - let MARK = 0; - let BARRIER = 1; -} - -} // End usesCustomInserter = 1 - -// 32-bit store -def RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg < - (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, i32imm:$eop), - 0x1, "RAT_WRITE_CACHELESS_32_eg" ->; - -// i32 global_store -def : Pat < - (global_store (i32 R600_TReg32_X:$val), R600_TReg32_X:$ptr), - (RAT_WRITE_CACHELESS_32_eg R600_TReg32_X:$val, R600_TReg32_X:$ptr, 0) ->; - -// Floating point global_store -def : Pat < - (global_store (f32 R600_TReg32_X:$val), R600_TReg32_X:$ptr), - (RAT_WRITE_CACHELESS_32_eg R600_TReg32_X:$val, R600_TReg32_X:$ptr, 0) ->; - -//128-bit store -def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg < - (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, i32imm:$eop), - 0xf, "RAT_WRITE_CACHELESS_128" ->; - -// v4f32 global store -def : Pat < - (global_store (v4f32 R600_Reg128:$val), R600_TReg32_X:$ptr), - (RAT_WRITE_CACHELESS_128_eg R600_Reg128:$val, R600_TReg32_X:$ptr, 0) ->; - -class VTX_READ_eg <bits<8> buffer_id, dag outs, list<dag> pattern> - : InstR600ISA <outs, (ins MEMxi:$ptr), "VTX_READ_eg $dst, $ptr", pattern> { - - // Operands - bits<7> DST_GPR; - bits<7> SRC_GPR; - - // Static fields - bits<5> VC_INST = 0; - bits<2> FETCH_TYPE = 2; - bits<1> FETCH_WHOLE_QUAD = 0; - bits<8> BUFFER_ID = buffer_id; - bits<1> SRC_REL = 0; - // XXX: We can infer this field based on the SRC_GPR. This would allow us - // to store vertex addresses in any channel, not just X. - bits<2> SRC_SEL_X = 0; - bits<6> MEGA_FETCH_COUNT; - bits<1> DST_REL = 0; - bits<3> DST_SEL_X; - bits<3> DST_SEL_Y; - bits<3> DST_SEL_Z; - bits<3> DST_SEL_W; - // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL, - // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored, - // however, based on my testing if USE_CONST_FIELDS is set, then all - // these fields need to be set to 0. - bits<1> USE_CONST_FIELDS = 0; - bits<6> DATA_FORMAT; - bits<2> NUM_FORMAT_ALL = 1; - bits<1> FORMAT_COMP_ALL = 0; - bits<1> SRF_MODE_ALL = 0; - - // LLVM can only encode 64-bit instructions, so these fields are manually - // encoded in R600CodeEmitter - // - // bits<16> OFFSET; - // bits<2> ENDIAN_SWAP = 0; - // bits<1> CONST_BUF_NO_STRIDE = 0; - // bits<1> MEGA_FETCH = 0; - // bits<1> ALT_CONST = 0; - // bits<2> BUFFER_INDEX_MODE = 0; - - // VTX_WORD0 - let Inst{4-0} = VC_INST; - let Inst{6-5} = FETCH_TYPE; - let Inst{7} = FETCH_WHOLE_QUAD; - let Inst{15-8} = BUFFER_ID; - let Inst{22-16} = SRC_GPR; - let Inst{23} = SRC_REL; - let Inst{25-24} = SRC_SEL_X; - let Inst{31-26} = MEGA_FETCH_COUNT; - - // VTX_WORD1_GPR - let Inst{38-32} = DST_GPR; - let Inst{39} = DST_REL; - let Inst{40} = 0; // Reserved - let Inst{43-41} = DST_SEL_X; - let Inst{46-44} = DST_SEL_Y; - let Inst{49-47} = DST_SEL_Z; - let Inst{52-50} = DST_SEL_W; - let Inst{53} = USE_CONST_FIELDS; - let Inst{59-54} = DATA_FORMAT; - let Inst{61-60} = NUM_FORMAT_ALL; - let Inst{62} = FORMAT_COMP_ALL; - let Inst{63} = SRF_MODE_ALL; - - // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding - // is done in R600CodeEmitter - // - // Inst{79-64} = OFFSET; - // Inst{81-80} = ENDIAN_SWAP; - // Inst{82} = CONST_BUF_NO_STRIDE; - // Inst{83} = MEGA_FETCH; - // Inst{84} = ALT_CONST; - // Inst{86-85} = BUFFER_INDEX_MODE; - // Inst{95-86} = 0; Reserved - - // VTX_WORD3 (Padding) - // - // Inst{127-96} = 0; -} - -class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern> - : VTX_READ_eg <buffer_id, (outs R600_TReg32_X:$dst), pattern> { - - let MEGA_FETCH_COUNT = 1; - let DST_SEL_X = 0; - let DST_SEL_Y = 7; // Masked - let DST_SEL_Z = 7; // Masked - let DST_SEL_W = 7; // Masked - let DATA_FORMAT = 1; // FMT_8 -} - -class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern> - : VTX_READ_eg <buffer_id, (outs R600_TReg32_X:$dst), pattern> { - - let MEGA_FETCH_COUNT = 4; - let DST_SEL_X = 0; - let DST_SEL_Y = 7; // Masked - let DST_SEL_Z = 7; // Masked - let DST_SEL_W = 7; // Masked - let DATA_FORMAT = 0xD; // COLOR_32 - - // This is not really necessary, but there were some GPU hangs that appeared - // to be caused by ALU instructions in the next instruction group that wrote - // to the $ptr registers of the VTX_READ. - // e.g. - // %T3_X<def> = VTX_READ_PARAM_i32_eg %T2_X<kill>, 24 - // %T2_X<def> = MOV %ZERO - //Adding this constraint prevents this from happening. - let Constraints = "$ptr.ptr = $dst"; -} - -class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern> - : VTX_READ_eg <buffer_id, (outs R600_Reg128:$dst), pattern> { - - let MEGA_FETCH_COUNT = 16; - let DST_SEL_X = 0; - let DST_SEL_Y = 1; - let DST_SEL_Z = 2; - let DST_SEL_W = 3; - let DATA_FORMAT = 0x22; // COLOR_32_32_32_32 - - // XXX: Need to force VTX_READ_128 instructions to write to the same register - // that holds its buffer address to avoid potential hangs. We can't use - // the same constraint as VTX_READ_32_eg, because the $ptr.ptr and $dst - // registers are different sizes. -} - -//===----------------------------------------------------------------------===// -// VTX Read from parameter memory space -//===----------------------------------------------------------------------===// - -class VTX_READ_PARAM_32_eg <ValueType vt> : VTX_READ_32_eg <0, - [(set (vt R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))] ->; - -def VTX_READ_PARAM_i32_eg : VTX_READ_PARAM_32_eg<i32>; -def VTX_READ_PARAM_f32_eg : VTX_READ_PARAM_32_eg<f32>; - - -//===----------------------------------------------------------------------===// -// VTX Read from global memory space -//===----------------------------------------------------------------------===// - -// 8-bit reads -def VTX_READ_GLOBAL_i8_eg : VTX_READ_8_eg <1, - [(set (i32 R600_TReg32_X:$dst), (zextloadi8_global ADDRVTX_READ:$ptr))] ->; - -// 32-bit reads - -class VTX_READ_GLOBAL_eg <ValueType vt> : VTX_READ_32_eg <1, - [(set (vt R600_TReg32_X:$dst), (global_load ADDRVTX_READ:$ptr))] ->; - -def VTX_READ_GLOBAL_i32_eg : VTX_READ_GLOBAL_eg<i32>; -def VTX_READ_GLOBAL_f32_eg : VTX_READ_GLOBAL_eg<f32>; - -// 128-bit reads - -class VTX_READ_GLOBAL_128_eg <ValueType vt> : VTX_READ_128_eg <1, - [(set (vt R600_Reg128:$dst), (global_load ADDRVTX_READ:$ptr))] ->; - -def VTX_READ_GLOBAL_v4i32_eg : VTX_READ_GLOBAL_128_eg<v4i32>; -def VTX_READ_GLOBAL_v4f32_eg : VTX_READ_GLOBAL_128_eg<v4f32>; - -//===----------------------------------------------------------------------===// -// Constant Loads -// XXX: We are currently storing all constants in the global address space. -//===----------------------------------------------------------------------===// - -def CONSTANT_LOAD_eg : VTX_READ_32_eg <1, - [(set (f32 R600_TReg32_X:$dst), (constant_load ADDRVTX_READ:$ptr))] ->; - -} - -let Predicates = [isCayman] in { - -let isVector = 1 in { - -def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>; - -def MULLO_INT_cm : MULLO_INT_Common<0x8F>; -def MULHI_INT_cm : MULHI_INT_Common<0x90>; -def MULLO_UINT_cm : MULLO_UINT_Common<0x91>; -def MULHI_UINT_cm : MULHI_UINT_Common<0x92>; - -} // End isVector = 1 - -// RECIP_UINT emulation for Cayman -def : Pat < - (AMDGPUurecip R600_Reg32:$src0), - (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg R600_Reg32:$src0)), - (MOV_IMM_I32 (i32 ALU_LITERAL_X), 0x4f800000))) ->; - -} // End isCayman - -let isCodeGenOnly = 1 in { - - def MULLIT : AMDGPUShaderInst < - (outs R600_Reg128:$dst), - (ins R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2), - "MULLIT $dst, $src0, $src1", - [(set R600_Reg128:$dst, (int_AMDGPU_mullit R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))] - >; - -let usesCustomInserter = 1, isPseudo = 1 in { - -class R600PreloadInst <string asm, Intrinsic intr> : AMDGPUInst < - (outs R600_TReg32:$dst), - (ins), - asm, - [(set R600_TReg32:$dst, (intr))] ->; - -def R600_LOAD_CONST : AMDGPUShaderInst < - (outs R600_Reg32:$dst), - (ins i32imm:$src0), - "R600_LOAD_CONST $dst, $src0", - [(set R600_Reg32:$dst, (int_AMDGPU_load_const imm:$src0))] ->; - -def RESERVE_REG : AMDGPUShaderInst < - (outs), - (ins i32imm:$src), - "RESERVE_REG $src", - [(int_AMDGPU_reserve_reg imm:$src)] ->; - -def TXD: AMDGPUShaderInst < - (outs R600_Reg128:$dst), - (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$src3, i32imm:$src4), - "TXD $dst, $src0, $src1, $src2, $src3, $src4", - [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$src3, imm:$src4))] ->; - -def TXD_SHADOW: AMDGPUShaderInst < - (outs R600_Reg128:$dst), - (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$src3, i32imm:$src4), - "TXD_SHADOW $dst, $src0, $src1, $src2, $src3, $src4", - [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$src3, TEX_SHADOW:$src4))] ->; - -} // End usesCustomInserter = 1, isPseudo = 1 - -} // End isCodeGenOnly = 1 - -def CLAMP_R600 : CLAMP <R600_Reg32>; -def FABS_R600 : FABS<R600_Reg32>; -def FNEG_R600 : FNEG<R600_Reg32>; - -let usesCustomInserter = 1 in { - -def MASK_WRITE : AMDGPUShaderInst < - (outs), - (ins R600_Reg32:$src), - "MASK_WRITE $src", - [] ->; - -} // End usesCustomInserter = 1 - -//===---------------------------------------------------------------------===// -// Return instruction -//===---------------------------------------------------------------------===// -let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in { - def RETURN : ILFormat<(outs), (ins variable_ops), - "RETURN", [(IL_retflag)]>; -} - -//===----------------------------------------------------------------------===// -// ISel Patterns -//===----------------------------------------------------------------------===// - -// KIL Patterns -def KILP : Pat < - (int_AMDGPU_kilp), - (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO), 0)) ->; - -def KIL : Pat < - (int_AMDGPU_kill R600_Reg32:$src0), - (MASK_WRITE (KILLGT (f32 ZERO), (f32 R600_Reg32:$src0), 0)) ->; - -// SGT Reverse args -def : Pat < - (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LT), - (SGT R600_Reg32:$src1, R600_Reg32:$src0) ->; - -// SGE Reverse args -def : Pat < - (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LE), - (SGE R600_Reg32:$src1, R600_Reg32:$src0) ->; - -// SETGT_INT reverse args -def : Pat < - (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLT), - (SETGT_INT R600_Reg32:$src1, R600_Reg32:$src0) ->; - -// SETGE_INT reverse args -def : Pat < - (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLE), - (SETGE_INT R600_Reg32:$src1, R600_Reg32:$src0) ->; - -// SETGT_UINT reverse args -def : Pat < - (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULT), - (SETGT_UINT R600_Reg32:$src1, R600_Reg32:$src0) ->; - -// SETGE_UINT reverse args -def : Pat < - (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULE), - (SETGE_UINT R600_Reg32:$src0, R600_Reg32:$src1) ->; - -// The next two patterns are special cases for handling 'true if ordered' and -// 'true if unordered' conditionals. The assumption here is that the behavior of -// SETE and SNE conforms to the Direct3D 10 rules for floating point values -// described here: -// http://msdn.microsoft.com/en-us/library/windows/desktop/cc308050.aspx#alpha_32_bit -// We assume that SETE returns false when one of the operands is NAN and -// SNE returns true when on of the operands is NAN - -//SETE - 'true if ordered' -def : Pat < - (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETO), - (SETE R600_Reg32:$src0, R600_Reg32:$src1) ->; - -//SNE - 'true if unordered' -def : Pat < - (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETUO), - (SNE R600_Reg32:$src0, R600_Reg32:$src1) ->; - -def : Extract_Element <f32, v4f32, R600_Reg128, 0, sel_x>; -def : Extract_Element <f32, v4f32, R600_Reg128, 1, sel_y>; -def : Extract_Element <f32, v4f32, R600_Reg128, 2, sel_z>; -def : Extract_Element <f32, v4f32, R600_Reg128, 3, sel_w>; - -def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 0, sel_x>; -def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 1, sel_y>; -def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 2, sel_z>; -def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 3, sel_w>; - -def : Extract_Element <i32, v4i32, R600_Reg128, 0, sel_x>; -def : Extract_Element <i32, v4i32, R600_Reg128, 1, sel_y>; -def : Extract_Element <i32, v4i32, R600_Reg128, 2, sel_z>; -def : Extract_Element <i32, v4i32, R600_Reg128, 3, sel_w>; - -def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 0, sel_x>; -def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sel_y>; -def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sel_z>; -def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sel_w>; - -def : Vector_Build <v4f32, R600_Reg128, f32, R600_Reg32>; -def : Vector_Build <v4i32, R600_Reg128, i32, R600_Reg32>; - -// bitconvert patterns - -def : BitConvert <i32, f32, R600_Reg32>; -def : BitConvert <f32, i32, R600_Reg32>; -def : BitConvert <v4f32, v4i32, R600_Reg128>; - -} // End isR600toCayman Predicate diff --git a/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td b/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td deleted file mode 100644 index 3b62f0a7303..00000000000 --- a/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td +++ /dev/null @@ -1,50 +0,0 @@ -//===-- R600Intrinsics.td - R600 Instrinsic defs -------*- tablegen -*-----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// R600 Intrinsic Definitions -// -//===----------------------------------------------------------------------===// - -let TargetPrefix = "R600", isTarget = 1 in { - def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; - def int_R600_load_input_perspective : - Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; - def int_R600_load_input_constant : - Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; - def int_R600_load_input_linear : - Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; - def int_R600_load_input_position : - Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; - def int_R600_load_input_face : - Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrReadMem]>; -} - -let TargetPrefix = "r600", isTarget = 1 in { - -class R600ReadPreloadRegisterIntrinsic<string name> - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, - GCCBuiltin<name>; - -multiclass R600ReadPreloadRegisterIntrinsic_xyz<string prefix> { - def _x : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_x")>; - def _y : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_y")>; - def _z : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_z")>; -} - -defm int_r600_read_global_size : R600ReadPreloadRegisterIntrinsic_xyz < - "__builtin_r600_read_global_size">; -defm int_r600_read_local_size : R600ReadPreloadRegisterIntrinsic_xyz < - "__builtin_r600_read_local_size">; -defm int_r600_read_ngroups : R600ReadPreloadRegisterIntrinsic_xyz < - "__builtin_r600_read_ngroups">; -defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz < - "__builtin_r600_read_tgid">; -defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz < - "__builtin_r600_read_tidig">; -} // End TargetPrefix = "r600" diff --git a/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td b/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td deleted file mode 100644 index 00877caf2b9..00000000000 --- a/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td +++ /dev/null @@ -1,26 +0,0 @@ -//===-- R600Intrinsics.td - TODO: Add brief description -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// TODO: Add full description -// -//===----------------------------------------------------------------------===// - -let TargetPrefix = "R600", isTarget = 1 in { - def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; - def int_R600_load_input_perspective : - Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; - def int_R600_load_input_constant : - Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; - def int_R600_load_input_linear : - Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; - def int_R600_load_input_position : - Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; - def int_R600_load_input_face : - Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrReadMem]>; -} diff --git a/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp b/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp deleted file mode 100644 index a31848efc99..00000000000 --- a/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp +++ /dev/null @@ -1,33 +0,0 @@ -//===-- R600MachineFunctionInfo.cpp - R600 Machine Function Info-*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "R600MachineFunctionInfo.h" - -using namespace llvm; - -R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF) - : MachineFunctionInfo(), - HasLinearInterpolation(false), - HasPerspectiveInterpolation(false) - { } - -unsigned R600MachineFunctionInfo::GetIJPerspectiveIndex() const -{ - assert(HasPerspectiveInterpolation); - return 0; -} - -unsigned R600MachineFunctionInfo::GetIJLinearIndex() const -{ - assert(HasLinearInterpolation); - if (HasPerspectiveInterpolation) - return 1; - else - return 0; -} diff --git a/src/gallium/drivers/radeon/R600MachineFunctionInfo.h b/src/gallium/drivers/radeon/R600MachineFunctionInfo.h deleted file mode 100644 index 68211b25813..00000000000 --- a/src/gallium/drivers/radeon/R600MachineFunctionInfo.h +++ /dev/null @@ -1,38 +0,0 @@ -//===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// R600MachineFunctionInfo is used for keeping track of which registers have -// been reserved by the llvm.AMDGPU.reserve.reg intrinsic. -// -//===----------------------------------------------------------------------===// - -#ifndef R600MACHINEFUNCTIONINFO_H -#define R600MACHINEFUNCTIONINFO_H - -#include "llvm/CodeGen/MachineFunction.h" -#include <vector> - -namespace llvm { - -class R600MachineFunctionInfo : public MachineFunctionInfo { - -public: - R600MachineFunctionInfo(const MachineFunction &MF); - std::vector<unsigned> ReservedRegs; - bool HasLinearInterpolation; - bool HasPerspectiveInterpolation; - - unsigned GetIJLinearIndex() const; - unsigned GetIJPerspectiveIndex() const; - -}; - -} // End llvm namespace - -#endif //R600MACHINEFUNCTIONINFO_H diff --git a/src/gallium/drivers/radeon/R600RegisterInfo.cpp b/src/gallium/drivers/radeon/R600RegisterInfo.cpp deleted file mode 100644 index 4096cb050bf..00000000000 --- a/src/gallium/drivers/radeon/R600RegisterInfo.cpp +++ /dev/null @@ -1,128 +0,0 @@ -//===-- R600RegisterInfo.cpp - R600 Register Information ------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// The file contains the R600 implementation of the TargetRegisterInfo class. -// -//===----------------------------------------------------------------------===// - -#include "R600RegisterInfo.h" -#include "AMDGPUTargetMachine.h" -#include "R600MachineFunctionInfo.h" - -using namespace llvm; - -R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm, - const TargetInstrInfo &tii) -: AMDGPURegisterInfo(tm, tii), - TM(tm), - TII(tii) - { } - -BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const -{ - BitVector Reserved(getNumRegs()); - const R600MachineFunctionInfo * MFI = MF.getInfo<R600MachineFunctionInfo>(); - - Reserved.set(AMDGPU::ZERO); - Reserved.set(AMDGPU::HALF); - Reserved.set(AMDGPU::ONE); - Reserved.set(AMDGPU::ONE_INT); - Reserved.set(AMDGPU::NEG_HALF); - Reserved.set(AMDGPU::NEG_ONE); - Reserved.set(AMDGPU::PV_X); - Reserved.set(AMDGPU::ALU_LITERAL_X); - Reserved.set(AMDGPU::PREDICATE_BIT); - Reserved.set(AMDGPU::PRED_SEL_OFF); - Reserved.set(AMDGPU::PRED_SEL_ZERO); - Reserved.set(AMDGPU::PRED_SEL_ONE); - - for (TargetRegisterClass::iterator I = AMDGPU::R600_CReg32RegClass.begin(), - E = AMDGPU::R600_CReg32RegClass.end(); I != E; ++I) { - Reserved.set(*I); - } - - for (std::vector<unsigned>::const_iterator I = MFI->ReservedRegs.begin(), - E = MFI->ReservedRegs.end(); I != E; ++I) { - Reserved.set(*I); - Reserved.set(*(getSuperRegisters(*I))); - } - - return Reserved; -} - -const TargetRegisterClass * -R600RegisterInfo::getISARegClass(const TargetRegisterClass * rc) const -{ - switch (rc->getID()) { - case AMDGPU::GPRF32RegClassID: - case AMDGPU::GPRI32RegClassID: - return &AMDGPU::R600_Reg32RegClass; - default: return rc; - } -} - -unsigned R600RegisterInfo::getHWRegIndex(unsigned reg) const -{ - switch(reg) { - case AMDGPU::ZERO: return 248; - case AMDGPU::ONE: - case AMDGPU::NEG_ONE: return 249; - case AMDGPU::ONE_INT: return 250; - case AMDGPU::HALF: - case AMDGPU::NEG_HALF: return 252; - case AMDGPU::ALU_LITERAL_X: return 253; - case AMDGPU::PREDICATE_BIT: - case AMDGPU::PRED_SEL_OFF: - case AMDGPU::PRED_SEL_ZERO: - case AMDGPU::PRED_SEL_ONE: - return 0; - default: return getHWRegIndexGen(reg); - } -} - -unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const -{ - switch(reg) { - case AMDGPU::ZERO: - case AMDGPU::ONE: - case AMDGPU::ONE_INT: - case AMDGPU::NEG_ONE: - case AMDGPU::HALF: - case AMDGPU::NEG_HALF: - case AMDGPU::ALU_LITERAL_X: - case AMDGPU::PREDICATE_BIT: - case AMDGPU::PRED_SEL_OFF: - case AMDGPU::PRED_SEL_ZERO: - case AMDGPU::PRED_SEL_ONE: - return 0; - default: return getHWRegChanGen(reg); - } -} - -const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass( - MVT VT) const -{ - switch(VT.SimpleTy) { - default: - case MVT::i32: return &AMDGPU::R600_TReg32RegClass; - } -} - -unsigned R600RegisterInfo::getSubRegFromChannel(unsigned Channel) const -{ - switch (Channel) { - default: assert(!"Invalid channel index"); return 0; - case 0: return AMDGPU::sel_x; - case 1: return AMDGPU::sel_y; - case 2: return AMDGPU::sel_z; - case 3: return AMDGPU::sel_w; - } -} - -#include "R600HwRegInfo.include" diff --git a/src/gallium/drivers/radeon/R600RegisterInfo.h b/src/gallium/drivers/radeon/R600RegisterInfo.h deleted file mode 100644 index 60f6d53b2d8..00000000000 --- a/src/gallium/drivers/radeon/R600RegisterInfo.h +++ /dev/null @@ -1,63 +0,0 @@ -//===-- R600RegisterInfo.h - R600 Register Info Interface ------*- C++ -*--===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Interface definition for R600RegisterInfo -// -//===----------------------------------------------------------------------===// - -#ifndef R600REGISTERINFO_H_ -#define R600REGISTERINFO_H_ - -#include "AMDGPUTargetMachine.h" -#include "AMDGPURegisterInfo.h" - -namespace llvm { - -class R600TargetMachine; -class TargetInstrInfo; - -struct R600RegisterInfo : public AMDGPURegisterInfo -{ - AMDGPUTargetMachine &TM; - const TargetInstrInfo &TII; - - R600RegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii); - - virtual BitVector getReservedRegs(const MachineFunction &MF) const; - - /// getISARegClass - rc is an AMDIL reg class. This function returns the - /// R600 reg class that is equivalent to the given AMDIL reg class. - virtual const TargetRegisterClass * getISARegClass( - const TargetRegisterClass * rc) const; - - /// getHWRegIndex - get the HW encoding for a register. - unsigned getHWRegIndex(unsigned reg) const; - - /// getHWRegChan - get the HW encoding for a register's channel. - unsigned getHWRegChan(unsigned reg) const; - - /// getCFGStructurizerRegClass - get the register class of the specified - /// type to use in the CFGStructurizer - virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const; - - /// getSubRegFromChannel - Return the sub reg enum value for the given - /// Channel (e.g. getSubRegFromChannel(0) -> AMDGPU::sel_x) - unsigned getSubRegFromChannel(unsigned Channel) const; - -private: - /// getHWRegIndexGen - Generated function returns a register's encoding - unsigned getHWRegIndexGen(unsigned reg) const; - /// getHWRegChanGen - Generated function returns a register's channel - /// encoding. - unsigned getHWRegChanGen(unsigned reg) const; -}; - -} // End namespace llvm - -#endif // AMDIDSAREGISTERINFO_H_ diff --git a/src/gallium/drivers/radeon/R600Schedule.td b/src/gallium/drivers/radeon/R600Schedule.td deleted file mode 100644 index 7ede181c51d..00000000000 --- a/src/gallium/drivers/radeon/R600Schedule.td +++ /dev/null @@ -1,36 +0,0 @@ -//===-- R600Schedule.td - R600 Scheduling definitions ------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// R600 has a VLIW architecture. On pre-cayman cards there are 5 instruction -// slots ALU.X, ALU.Y, ALU.Z, ALU.W, and TRANS. For cayman cards, the TRANS -// slot has been removed. -// -//===----------------------------------------------------------------------===// - - -def ALU_X : FuncUnit; -def ALU_Y : FuncUnit; -def ALU_Z : FuncUnit; -def ALU_W : FuncUnit; -def TRANS : FuncUnit; - -def AnyALU : InstrItinClass; -def VecALU : InstrItinClass; -def TransALU : InstrItinClass; - -def R600_EG_Itin : ProcessorItineraries < - [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS, ALU_NULL], - [], - [ - InstrItinData<AnyALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS]>]>, - InstrItinData<VecALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_X, ALU_W]>]>, - InstrItinData<TransALU, [InstrStage<1, [TRANS]>]>, - InstrItinData<NullALU, [InstrStage<1, [ALU_NULL]>]> - ] ->; diff --git a/src/gallium/drivers/radeon/SIAssignInterpRegs.cpp b/src/gallium/drivers/radeon/SIAssignInterpRegs.cpp deleted file mode 100644 index 1fc0a873eb6..00000000000 --- a/src/gallium/drivers/radeon/SIAssignInterpRegs.cpp +++ /dev/null @@ -1,151 +0,0 @@ -//===-- SIAssignInterpRegs.cpp - Assign interpolation registers -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass maps the pseudo interpolation registers to the correct physical -// registers. Prior to executing a fragment shader, the GPU loads interpolation -// parameters into physical registers. The specific physical register that each -// interpolation parameter ends up in depends on the type of the interpolation -// parameter as well as how many interpolation parameters are used by the -// shader. -// -//===----------------------------------------------------------------------===// - - - -#include "AMDGPU.h" -#include "AMDIL.h" -#include "SIMachineFunctionInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" - -using namespace llvm; - -namespace { - -class SIAssignInterpRegsPass : public MachineFunctionPass { - -private: - static char ID; - TargetMachine &TM; - - void addLiveIn(MachineFunction * MF, MachineRegisterInfo & MRI, - unsigned physReg, unsigned virtReg); - -public: - SIAssignInterpRegsPass(TargetMachine &tm) : - MachineFunctionPass(ID), TM(tm) { } - - virtual bool runOnMachineFunction(MachineFunction &MF); - - const char *getPassName() const { return "SI Assign intrpolation registers"; } -}; - -} // End anonymous namespace - -char SIAssignInterpRegsPass::ID = 0; - -#define INTERP_VALUES 16 -#define REQUIRED_VALUE_MAX_INDEX 7 - -struct InterpInfo { - bool Enabled; - unsigned Regs[3]; - unsigned RegCount; -}; - - -FunctionPass *llvm::createSIAssignInterpRegsPass(TargetMachine &tm) { - return new SIAssignInterpRegsPass(tm); -} - -bool SIAssignInterpRegsPass::runOnMachineFunction(MachineFunction &MF) -{ - - struct InterpInfo InterpUse[INTERP_VALUES] = { - {false, {AMDGPU::PERSP_SAMPLE_I, AMDGPU::PERSP_SAMPLE_J}, 2}, - {false, {AMDGPU::PERSP_CENTER_I, AMDGPU::PERSP_CENTER_J}, 2}, - {false, {AMDGPU::PERSP_CENTROID_I, AMDGPU::PERSP_CENTROID_J}, 2}, - {false, {AMDGPU::PERSP_I_W, AMDGPU::PERSP_J_W, AMDGPU::PERSP_1_W}, 3}, - {false, {AMDGPU::LINEAR_SAMPLE_I, AMDGPU::LINEAR_SAMPLE_J}, 2}, - {false, {AMDGPU::LINEAR_CENTER_I, AMDGPU::LINEAR_CENTER_J}, 2}, - {false, {AMDGPU::LINEAR_CENTROID_I, AMDGPU::LINEAR_CENTROID_J}, 2}, - {false, {AMDGPU::LINE_STIPPLE_TEX_COORD}, 1}, - {false, {AMDGPU::POS_X_FLOAT}, 1}, - {false, {AMDGPU::POS_Y_FLOAT}, 1}, - {false, {AMDGPU::POS_Z_FLOAT}, 1}, - {false, {AMDGPU::POS_W_FLOAT}, 1}, - {false, {AMDGPU::FRONT_FACE}, 1}, - {false, {AMDGPU::ANCILLARY}, 1}, - {false, {AMDGPU::SAMPLE_COVERAGE}, 1}, - {false, {AMDGPU::POS_FIXED_PT}, 1} - }; - - SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>(); - // This pass is only needed for pixel shaders. - if (MFI->ShaderType != ShaderType::PIXEL) { - return false; - } - MachineRegisterInfo &MRI = MF.getRegInfo(); - bool ForceEnable = true; - - // First pass, mark the interpolation values that are used. - for (unsigned InterpIdx = 0; InterpIdx < INTERP_VALUES; InterpIdx++) { - for (unsigned RegIdx = 0; RegIdx < InterpUse[InterpIdx].RegCount; - RegIdx++) { - InterpUse[InterpIdx].Enabled = InterpUse[InterpIdx].Enabled || - !MRI.use_empty(InterpUse[InterpIdx].Regs[RegIdx]); - if (InterpUse[InterpIdx].Enabled && - InterpIdx <= REQUIRED_VALUE_MAX_INDEX) { - ForceEnable = false; - } - } - } - - // At least one interpolation mode must be enabled or else the GPU will hang. - if (ForceEnable) { - InterpUse[0].Enabled = true; - } - - unsigned UsedVgprs = 0; - - // Second pass, replace with VGPRs. - for (unsigned InterpIdx = 0; InterpIdx < INTERP_VALUES; InterpIdx++) { - if (!InterpUse[InterpIdx].Enabled) { - continue; - } - MFI->SPIPSInputAddr |= (1 << InterpIdx); - - for (unsigned RegIdx = 0; RegIdx < InterpUse[InterpIdx].RegCount; - RegIdx++, UsedVgprs++) { - unsigned NewReg = AMDGPU::VReg_32RegClass.getRegister(UsedVgprs); - unsigned VirtReg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); - MRI.replaceRegWith(InterpUse[InterpIdx].Regs[RegIdx], VirtReg); - addLiveIn(&MF, MRI, NewReg, VirtReg); - } - } - - return false; -} - -void SIAssignInterpRegsPass::addLiveIn(MachineFunction * MF, - MachineRegisterInfo & MRI, - unsigned physReg, unsigned virtReg) -{ - const TargetInstrInfo * TII = TM.getInstrInfo(); - if (!MRI.isLiveIn(physReg)) { - MRI.addLiveIn(physReg, virtReg); - MF->front().addLiveIn(physReg); - BuildMI(MF->front(), MF->front().begin(), DebugLoc(), - TII->get(TargetOpcode::COPY), virtReg) - .addReg(physReg); - } else { - MRI.replaceRegWith(virtReg, MRI.getLiveInVirtReg(physReg)); - } -} diff --git a/src/gallium/drivers/radeon/SIGenRegisterInfo.pl b/src/gallium/drivers/radeon/SIGenRegisterInfo.pl deleted file mode 100644 index 48bd5676eb0..00000000000 --- a/src/gallium/drivers/radeon/SIGenRegisterInfo.pl +++ /dev/null @@ -1,291 +0,0 @@ -#===-- SIGenRegisterInfo.pl - Script for generating register info files ----===# -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -#===------------------------------------------------------------------------===# -# -# This perl script prints to stdout .td code to be used as SIRegisterInfo.td -# it also generates a file called SIHwRegInfo.include, which contains helper -# functions for determining the hw encoding of registers. -# -#===------------------------------------------------------------------------===# - -use strict; -use warnings; - -my $SGPR_COUNT = 104; -my $VGPR_COUNT = 256; - -my $SGPR_MAX_IDX = $SGPR_COUNT - 1; -my $VGPR_MAX_IDX = $VGPR_COUNT - 1; - -my $INDEX_FILE = defined($ARGV[0]) ? $ARGV[0] : ''; - -print <<STRING; - -let Namespace = "AMDGPU" in { - def low : SubRegIndex; - def high : SubRegIndex; - - def sub0 : SubRegIndex; - def sub1 : SubRegIndex; - def sub2 : SubRegIndex; - def sub3 : SubRegIndex; - def sub4 : SubRegIndex; - def sub5 : SubRegIndex; - def sub6 : SubRegIndex; - def sub7 : SubRegIndex; -} - -class SIReg <string n> : Register<n> { - let Namespace = "AMDGPU"; -} - -class SI_64 <string n, list<Register> subregs> : RegisterWithSubRegs<n, subregs> { - let Namespace = "AMDGPU"; - let SubRegIndices = [low, high]; -} - -class SI_128 <string n, list<Register> subregs> : RegisterWithSubRegs<n, subregs> { - let Namespace = "AMDGPU"; - let SubRegIndices = [sel_x, sel_y, sel_z, sel_w]; -} - -class SI_256 <string n, list<Register> subregs> : RegisterWithSubRegs<n, subregs> { - let Namespace = "AMDGPU"; - let SubRegIndices = [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7]; -} - -class SGPR_32 <bits<8> num, string name> : SIReg<name> { - field bits<8> Num; - - let Num = num; -} - - -class VGPR_32 <bits<9> num, string name> : SIReg<name> { - field bits<9> Num; - - let Num = num; -} - -class SGPR_64 <bits<8> num, string name, list<Register> subregs> : - SI_64 <name, subregs>; - -class VGPR_64 <bits<9> num, string name, list<Register> subregs> : - SI_64 <name, subregs>; - -class SGPR_128 <bits<8> num, string name, list<Register> subregs> : - SI_128 <name, subregs>; - -class VGPR_128 <bits<9> num, string name, list<Register> subregs> : - SI_128 <name, subregs>; - -class SGPR_256 <bits<8> num, string name, list<Register> subregs> : - SI_256 <name, subregs>; - -def VCC : SIReg<"VCC">; -def EXEC_LO : SIReg<"EXEC LO">; -def EXEC_HI : SIReg<"EXEC HI">; -def EXEC : SI_64<"EXEC", [EXEC_LO,EXEC_HI]>; -def SCC : SIReg<"SCC">; -def SREG_LIT_0 : SIReg <"S LIT 0">; -def SI_LITERAL_CONSTANT : SIReg<"LITERAL CONSTANT">; - -def M0 : SIReg <"M0">; - -//Interpolation registers - -def PERSP_SAMPLE_I : SIReg <"PERSP_SAMPLE_I">; -def PERSP_SAMPLE_J : SIReg <"PERSP_SAMPLE_J">; -def PERSP_CENTER_I : SIReg <"PERSP_CENTER_I">; -def PERSP_CENTER_J : SIReg <"PERSP_CENTER_J">; -def PERSP_CENTROID_I : SIReg <"PERSP_CENTROID_I">; -def PERSP_CENTROID_J : SIReg <"PERP_CENTROID_J">; -def PERSP_I_W : SIReg <"PERSP_I_W">; -def PERSP_J_W : SIReg <"PERSP_J_W">; -def PERSP_1_W : SIReg <"PERSP_1_W">; -def LINEAR_SAMPLE_I : SIReg <"LINEAR_SAMPLE_I">; -def LINEAR_SAMPLE_J : SIReg <"LINEAR_SAMPLE_J">; -def LINEAR_CENTER_I : SIReg <"LINEAR_CENTER_I">; -def LINEAR_CENTER_J : SIReg <"LINEAR_CENTER_J">; -def LINEAR_CENTROID_I : SIReg <"LINEAR_CENTROID_I">; -def LINEAR_CENTROID_J : SIReg <"LINEAR_CENTROID_J">; -def LINE_STIPPLE_TEX_COORD : SIReg <"LINE_STIPPLE_TEX_COORD">; -def POS_X_FLOAT : SIReg <"POS_X_FLOAT">; -def POS_Y_FLOAT : SIReg <"POS_Y_FLOAT">; -def POS_Z_FLOAT : SIReg <"POS_Z_FLOAT">; -def POS_W_FLOAT : SIReg <"POS_W_FLOAT">; -def FRONT_FACE : SIReg <"FRONT_FACE">; -def ANCILLARY : SIReg <"ANCILLARY">; -def SAMPLE_COVERAGE : SIReg <"SAMPLE_COVERAGE">; -def POS_FIXED_PT : SIReg <"POS_FIXED_PT">; - -STRING - -#32 bit register - -my @SGPR; -for (my $i = 0; $i < $SGPR_COUNT; $i++) { - print "def SGPR$i : SGPR_32 <$i, \"SGPR$i\">;\n"; - $SGPR[$i] = "SGPR$i"; -} - -my @VGPR; -for (my $i = 0; $i < $VGPR_COUNT; $i++) { - print "def VGPR$i : VGPR_32 <$i, \"VGPR$i\">;\n"; - $VGPR[$i] = "VGPR$i"; -} - -print <<STRING; - -def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, - (add (sequence "SGPR%u", 0, $SGPR_MAX_IDX), SREG_LIT_0, M0, EXEC_LO, EXEC_HI) ->; - -def VReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, - (add (sequence "VGPR%u", 0, $VGPR_MAX_IDX), - PERSP_SAMPLE_I, PERSP_SAMPLE_J, - PERSP_CENTER_I, PERSP_CENTER_J, - PERSP_CENTROID_I, PERSP_CENTROID_J, - PERSP_I_W, PERSP_J_W, PERSP_1_W, - LINEAR_SAMPLE_I, LINEAR_SAMPLE_J, - LINEAR_CENTER_I, LINEAR_CENTER_J, - LINEAR_CENTROID_I, LINEAR_CENTROID_J, - LINE_STIPPLE_TEX_COORD, - POS_X_FLOAT, - POS_Y_FLOAT, - POS_Z_FLOAT, - POS_W_FLOAT, - FRONT_FACE, - ANCILLARY, - SAMPLE_COVERAGE, - POS_FIXED_PT - ) ->; - -def AllReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, - (add VReg_32, SReg_32) ->; - -def SCCReg : RegisterClass<"AMDGPU", [i1], 1, (add SCC)>; -def VCCReg : RegisterClass<"AMDGPU", [i1], 1, (add VCC)>; -def EXECReg : RegisterClass<"AMDGPU", [i1], 1, (add EXEC)>; -def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>; - - -STRING - -my @subregs_64 = ('low', 'high'); -my @subregs_128 = ('sel_x', 'sel_y', 'sel_z', 'sel_w'); -my @subregs_256 = ('sub0', 'sub1', 'sub2', 'sub3', 'sub4', 'sub5', 'sub6', 'sub7'); - -my @SGPR64 = print_sgpr_class(64, \@subregs_64, ('i64')); -my @SGPR128 = print_sgpr_class(128, \@subregs_128, ('v4f32', 'v4i32')); -my @SGPR256 = print_sgpr_class(256, \@subregs_256, ('v8i32')); - -my @VGPR64 = print_vgpr_class(64, \@subregs_64, ('i64')); -my @VGPR128 = print_vgpr_class(128, \@subregs_128, ('v4f32')); - - -my $sgpr64_list = join(',', @SGPR64); -my $vgpr64_list = join(',', @VGPR64); -print <<STRING; - -def AllReg_64 : RegisterClass<"AMDGPU", [f64, i64], 64, - (add $sgpr64_list, $vgpr64_list) ->; - -STRING - -if ($INDEX_FILE ne '') { - open(my $fh, ">", $INDEX_FILE); - my %hw_values; - - for (my $i = 0; $i <= $#SGPR; $i++) { - push (@{$hw_values{$i}}, $SGPR[$i]); - } - - for (my $i = 0; $i <= $#SGPR64; $i++) { - push (@{$hw_values{$i * 2}}, $SGPR64[$i]) - } - - for (my $i = 0; $i <= $#SGPR128; $i++) { - push (@{$hw_values{$i * 4}}, $SGPR128[$i]); - } - - for (my $i = 0; $i <= $#SGPR256; $i++) { - push (@{$hw_values{$i * 8}}, $SGPR256[$i]); - } - - for (my $i = 0; $i <= $#VGPR; $i++) { - push (@{$hw_values{$i}}, $VGPR[$i]); - } - for (my $i = 0; $i <= $#VGPR64; $i++) { - push (@{$hw_values{$i * 2}}, $VGPR64[$i]); - } - - for (my $i = 0; $i <= $#VGPR128; $i++) { - push (@{$hw_values{$i * 4}}, $VGPR128[$i]); - } - - - print $fh "unsigned SIRegisterInfo::getHWRegNum(unsigned reg) const\n{\n switch(reg) {\n"; - for my $key (keys(%hw_values)) { - my @names = @{$hw_values{$key}}; - for my $regname (@names) { - print $fh " case AMDGPU::$regname:\n" - } - print $fh " return $key;\n"; - } - print $fh " default: assert(!\"Unknown Register\"); return 0;\n }\n}\n" -} - - - - -sub print_sgpr_class { - my ($reg_width, $sub_reg_ref, @types) = @_; - return print_reg_class('SReg', 'SGPR', $reg_width, $SGPR_COUNT, $sub_reg_ref, @types); -} - -sub print_vgpr_class { - my ($reg_width, $sub_reg_ref, @types) = @_; - return print_reg_class('VReg', 'VGPR', $reg_width, $VGPR_COUNT, $sub_reg_ref, @types); -} - -sub print_reg_class { - my ($class_prefix, $reg_prefix, $reg_width, $reg_count, $sub_reg_ref, @types) = @_; - my @registers; - my $component_count = $reg_width / 32; - - for (my $i = 0; $i < $reg_count; $i += $component_count) { - my $reg_name = $reg_prefix . $i . '_' . $reg_width; - my @sub_regs; - for (my $idx = 0; $idx < $component_count; $idx++) { - my $sub_idx = $i + $idx; - push(@sub_regs, $reg_prefix . $sub_idx); - } - print "def $reg_name : $reg_prefix\_$reg_width <$i, \"$reg_name\", [ ", join(',', @sub_regs) , "]>;\n"; - push (@registers, $reg_name); - } - - #Add VCC to SReg_64 - if ($class_prefix eq 'SReg' and $reg_width == 64) { - push (@registers, 'VCC') - } - - #Add EXEC to SReg_64 - if ($class_prefix eq 'SReg' and $reg_width == 64) { - push (@registers, 'EXEC') - } - - my $reg_list = join(', ', @registers); - - print "def $class_prefix\_$reg_width : RegisterClass<\"AMDGPU\", [" . join (', ', @types) . "], $reg_width,\n (add $reg_list)\n>{\n"; - print " let SubRegClasses = [($class_prefix\_", ($reg_width / $component_count) , ' ', join(', ', @{$sub_reg_ref}), ")];\n}\n"; - return @registers; -} diff --git a/src/gallium/drivers/radeon/SIISelLowering.cpp b/src/gallium/drivers/radeon/SIISelLowering.cpp deleted file mode 100644 index 7c2739cf5b6..00000000000 --- a/src/gallium/drivers/radeon/SIISelLowering.cpp +++ /dev/null @@ -1,466 +0,0 @@ -//===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file is -// mostly EmitInstrWithCustomInserter(). -// -//===----------------------------------------------------------------------===// - -#include "SIISelLowering.h" -#include "AMDIL.h" -#include "AMDILIntrinsicInfo.h" -#include "SIInstrInfo.h" -#include "SIMachineFunctionInfo.h" -#include "SIRegisterInfo.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/SelectionDAG.h" - -using namespace llvm; - -SITargetLowering::SITargetLowering(TargetMachine &TM) : - AMDGPUTargetLowering(TM), - TII(static_cast<const SIInstrInfo*>(TM.getInstrInfo())) -{ - addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass); - addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass); - addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass); - addRegisterClass(MVT::i64, &AMDGPU::VReg_64RegClass); - addRegisterClass(MVT::i1, &AMDGPU::SCCRegRegClass); - addRegisterClass(MVT::i1, &AMDGPU::VCCRegRegClass); - - addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass); - addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass); - - computeRegisterProperties(); - - setOperationAction(ISD::AND, MVT::i1, Custom); - - setOperationAction(ISD::ADD, MVT::i64, Legal); - setOperationAction(ISD::ADD, MVT::i32, Legal); - - setOperationAction(ISD::BR_CC, MVT::i32, Custom); - - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); - - // We need to custom lower loads from the USER_SGPR address space, so we can - // add the SGPRs as livein registers. - setOperationAction(ISD::LOAD, MVT::i32, Custom); - setOperationAction(ISD::LOAD, MVT::i64, Custom); - - setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); - setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); - - setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); - setTargetDAGCombine(ISD::SELECT_CC); - - setTargetDAGCombine(ISD::SETCC); -} - -MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( - MachineInstr * MI, MachineBasicBlock * BB) const -{ - const TargetInstrInfo * TII = getTargetMachine().getInstrInfo(); - MachineRegisterInfo & MRI = BB->getParent()->getRegInfo(); - MachineBasicBlock::iterator I = MI; - - if (TII->get(MI->getOpcode()).TSFlags & SIInstrFlags::NEED_WAIT) { - AppendS_WAITCNT(MI, *BB, llvm::next(I)); - return BB; - } - - switch (MI->getOpcode()) { - default: - return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); - case AMDGPU::BRANCH: return BB; - case AMDGPU::CLAMP_SI: - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64)) - .addOperand(MI->getOperand(0)) - .addOperand(MI->getOperand(1)) - // VSRC1-2 are unused, but we still need to fill all the - // operand slots, so we just reuse the VSRC0 operand - .addOperand(MI->getOperand(1)) - .addOperand(MI->getOperand(1)) - .addImm(0) // ABS - .addImm(1) // CLAMP - .addImm(0) // OMOD - .addImm(0); // NEG - MI->eraseFromParent(); - break; - - case AMDGPU::FABS_SI: - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64)) - .addOperand(MI->getOperand(0)) - .addOperand(MI->getOperand(1)) - // VSRC1-2 are unused, but we still need to fill all the - // operand slots, so we just reuse the VSRC0 operand - .addOperand(MI->getOperand(1)) - .addOperand(MI->getOperand(1)) - .addImm(1) // ABS - .addImm(0) // CLAMP - .addImm(0) // OMOD - .addImm(0); // NEG - MI->eraseFromParent(); - break; - - case AMDGPU::FNEG_SI: - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64)) - .addOperand(MI->getOperand(0)) - .addOperand(MI->getOperand(1)) - // VSRC1-2 are unused, but we still need to fill all the - // operand slots, so we just reuse the VSRC0 operand - .addOperand(MI->getOperand(1)) - .addOperand(MI->getOperand(1)) - .addImm(0) // ABS - .addImm(0) // CLAMP - .addImm(0) // OMOD - .addImm(1); // NEG - MI->eraseFromParent(); - break; - case AMDGPU::SHADER_TYPE: - BB->getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType = - MI->getOperand(0).getImm(); - MI->eraseFromParent(); - break; - - case AMDGPU::SI_INTERP: - LowerSI_INTERP(MI, *BB, I, MRI); - break; - case AMDGPU::SI_INTERP_CONST: - LowerSI_INTERP_CONST(MI, *BB, I, MRI); - break; - case AMDGPU::SI_KIL: - LowerSI_KIL(MI, *BB, I, MRI); - break; - case AMDGPU::SI_WQM: - LowerSI_WQM(MI, *BB, I, MRI); - break; - case AMDGPU::SI_V_CNDLT: - LowerSI_V_CNDLT(MI, *BB, I, MRI); - break; - } - return BB; -} - -void SITargetLowering::AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB, - MachineBasicBlock::iterator I) const -{ - BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WAITCNT)) - .addImm(0); -} - - -void SITargetLowering::LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB, - MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const -{ - BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WQM_B64), AMDGPU::EXEC) - .addReg(AMDGPU::EXEC); - - MI->eraseFromParent(); -} - -void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB, - MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const -{ - unsigned tmp = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); - unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass); - MachineOperand dst = MI->getOperand(0); - MachineOperand iReg = MI->getOperand(1); - MachineOperand jReg = MI->getOperand(2); - MachineOperand attr_chan = MI->getOperand(3); - MachineOperand attr = MI->getOperand(4); - MachineOperand params = MI->getOperand(5); - - BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0) - .addOperand(params); - - BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P1_F32), tmp) - .addOperand(iReg) - .addOperand(attr_chan) - .addOperand(attr) - .addReg(M0); - - BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P2_F32)) - .addOperand(dst) - .addReg(tmp) - .addOperand(jReg) - .addOperand(attr_chan) - .addOperand(attr) - .addReg(M0); - - MI->eraseFromParent(); -} - -void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI, - MachineBasicBlock &BB, MachineBasicBlock::iterator I, - MachineRegisterInfo &MRI) const -{ - MachineOperand dst = MI->getOperand(0); - MachineOperand attr_chan = MI->getOperand(1); - MachineOperand attr = MI->getOperand(2); - MachineOperand params = MI->getOperand(3); - unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass); - - BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0) - .addOperand(params); - - BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_MOV_F32)) - .addOperand(dst) - .addOperand(attr_chan) - .addOperand(attr) - .addReg(M0); - - MI->eraseFromParent(); -} - -void SITargetLowering::LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB, - MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const -{ - // Clear this pixel from the exec mask if the operand is negative - BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMPX_LE_F32_e32), - AMDGPU::VCC) - .addReg(AMDGPU::SREG_LIT_0) - .addOperand(MI->getOperand(0)); - - // If the exec mask is non-zero, skip the next two instructions - BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_CBRANCH_EXECNZ)) - .addImm(3) - .addReg(AMDGPU::EXEC); - - // Exec mask is zero: Export to NULL target... - BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::EXP)) - .addImm(0) - .addImm(0x09) // V_008DFC_SQ_EXP_NULL - .addImm(0) - .addImm(1) - .addImm(1) - .addReg(AMDGPU::SREG_LIT_0) - .addReg(AMDGPU::SREG_LIT_0) - .addReg(AMDGPU::SREG_LIT_0) - .addReg(AMDGPU::SREG_LIT_0); - - // ... and terminate wavefront - BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_ENDPGM)); - - MI->eraseFromParent(); -} - -void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB, - MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const -{ - BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMP_GT_F32_e32), - AMDGPU::VCC) - .addReg(AMDGPU::SREG_LIT_0) - .addOperand(MI->getOperand(1)); - - BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CNDMASK_B32)) - .addOperand(MI->getOperand(0)) - .addOperand(MI->getOperand(3)) - .addOperand(MI->getOperand(2)) - .addReg(AMDGPU::VCC); - - MI->eraseFromParent(); -} - -EVT SITargetLowering::getSetCCResultType(EVT VT) const -{ - return MVT::i1; -} - -//===----------------------------------------------------------------------===// -// Custom DAG Lowering Operations -//===----------------------------------------------------------------------===// - -SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const -{ - switch (Op.getOpcode()) { - default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); - case ISD::BR_CC: return LowerBR_CC(Op, DAG); - case ISD::LOAD: return LowerLOAD(Op, DAG); - case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); - case ISD::AND: return Loweri1ContextSwitch(Op, DAG, ISD::AND); - case ISD::INTRINSIC_WO_CHAIN: { - unsigned IntrinsicID = - cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); - EVT VT = Op.getValueType(); - switch (IntrinsicID) { - case AMDGPUIntrinsic::SI_vs_load_buffer_index: - return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass, - AMDGPU::VGPR0, VT); - default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); - } - break; - } - } - return SDValue(); -} - -/// Loweri1ContextSwitch - The function is for lowering i1 operations on the -/// VCC register. In the VALU context, VCC is a one bit register, but in the -/// SALU context the VCC is a 64-bit register (1-bit per thread). Since only -/// the SALU can perform operations on the VCC register, we need to promote -/// the operand types from i1 to i64 in order for tablegen to be able to match -/// this operation to the correct SALU instruction. We do this promotion by -/// wrapping the operands in a CopyToReg node. -/// -SDValue SITargetLowering::Loweri1ContextSwitch(SDValue Op, - SelectionDAG &DAG, - unsigned VCCNode) const -{ - DebugLoc DL = Op.getDebugLoc(); - - SDValue OpNode = DAG.getNode(VCCNode, DL, MVT::i64, - DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64, - Op.getOperand(0)), - DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64, - Op.getOperand(1))); - - return DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i1, OpNode); -} - -SDValue SITargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const -{ - SDValue Chain = Op.getOperand(0); - SDValue CC = Op.getOperand(1); - SDValue LHS = Op.getOperand(2); - SDValue RHS = Op.getOperand(3); - SDValue JumpT = Op.getOperand(4); - SDValue CmpValue; - SDValue Result; - CmpValue = DAG.getNode( - ISD::SETCC, - Op.getDebugLoc(), - MVT::i1, - LHS, RHS, - CC); - - Result = DAG.getNode( - AMDGPUISD::BRANCH_COND, - CmpValue.getDebugLoc(), - MVT::Other, Chain, - JumpT, CmpValue); - return Result; -} - -SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const -{ - EVT VT = Op.getValueType(); - LoadSDNode *Ptr = dyn_cast<LoadSDNode>(Op); - - assert(Ptr); - - unsigned AddrSpace = Ptr->getPointerInfo().getAddrSpace(); - - // We only need to lower USER_SGPR address space loads - if (AddrSpace != AMDGPUAS::USER_SGPR_ADDRESS) { - return SDValue(); - } - - // Loads from the USER_SGPR address space can only have constant value - // pointers. - ConstantSDNode *BasePtr = dyn_cast<ConstantSDNode>(Ptr->getBasePtr()); - assert(BasePtr); - - unsigned TypeDwordWidth = VT.getSizeInBits() / 32; - const TargetRegisterClass * dstClass; - switch (TypeDwordWidth) { - default: - assert(!"USER_SGPR value size not implemented"); - return SDValue(); - case 1: - dstClass = &AMDGPU::SReg_32RegClass; - break; - case 2: - dstClass = &AMDGPU::SReg_64RegClass; - break; - } - uint64_t Index = BasePtr->getZExtValue(); - assert(Index % TypeDwordWidth == 0 && "USER_SGPR not properly aligned"); - unsigned SGPRIndex = Index / TypeDwordWidth; - unsigned Reg = dstClass->getRegister(SGPRIndex); - - DAG.ReplaceAllUsesOfValueWith(Op, CreateLiveInRegister(DAG, dstClass, Reg, - VT)); - return SDValue(); -} - -SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const -{ - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - SDValue True = Op.getOperand(2); - SDValue False = Op.getOperand(3); - SDValue CC = Op.getOperand(4); - EVT VT = Op.getValueType(); - DebugLoc DL = Op.getDebugLoc(); - - SDValue Cond = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, CC); - return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); -} - -//===----------------------------------------------------------------------===// -// Custom DAG optimizations -//===----------------------------------------------------------------------===// - -SDValue SITargetLowering::PerformDAGCombine(SDNode *N, - DAGCombinerInfo &DCI) const { - SelectionDAG &DAG = DCI.DAG; - DebugLoc DL = N->getDebugLoc(); - EVT VT = N->getValueType(0); - - switch (N->getOpcode()) { - default: break; - case ISD::SELECT_CC: { - N->dump(); - ConstantSDNode *True, *False; - // i1 selectcc(l, r, -1, 0, cc) -> i1 setcc(l, r, cc) - if ((True = dyn_cast<ConstantSDNode>(N->getOperand(2))) - && (False = dyn_cast<ConstantSDNode>(N->getOperand(3))) - && True->isAllOnesValue() - && False->isNullValue() - && VT == MVT::i1) { - return DAG.getNode(ISD::SETCC, DL, VT, N->getOperand(0), - N->getOperand(1), N->getOperand(4)); - - } - break; - } - case ISD::SETCC: { - SDValue Arg0 = N->getOperand(0); - SDValue Arg1 = N->getOperand(1); - SDValue CC = N->getOperand(2); - ConstantSDNode * C = NULL; - ISD::CondCode CCOp = dyn_cast<CondCodeSDNode>(CC)->get(); - - // i1 setcc (sext(i1), 0, setne) -> i1 setcc(i1, 0, setne) - if (VT == MVT::i1 - && Arg0.getOpcode() == ISD::SIGN_EXTEND - && Arg0.getOperand(0).getValueType() == MVT::i1 - && (C = dyn_cast<ConstantSDNode>(Arg1)) - && C->isNullValue() - && CCOp == ISD::SETNE) { - return SimplifySetCC(VT, Arg0.getOperand(0), - DAG.getConstant(0, MVT::i1), CCOp, true, DCI, DL); - } - break; - } - } - return SDValue(); -} - -#define NODE_NAME_CASE(node) case SIISD::node: return #node; - -const char* SITargetLowering::getTargetNodeName(unsigned Opcode) const -{ - switch (Opcode) { - default: return AMDGPUTargetLowering::getTargetNodeName(Opcode); - NODE_NAME_CASE(VCC_AND) - NODE_NAME_CASE(VCC_BITCAST) - } -} diff --git a/src/gallium/drivers/radeon/SIISelLowering.h b/src/gallium/drivers/radeon/SIISelLowering.h deleted file mode 100644 index 4407bf04667..00000000000 --- a/src/gallium/drivers/radeon/SIISelLowering.h +++ /dev/null @@ -1,63 +0,0 @@ -//===-- SIISelLowering.h - SI DAG Lowering Interface ------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// SI DAG Lowering interface definition -// -//===----------------------------------------------------------------------===// - -#ifndef SIISELLOWERING_H -#define SIISELLOWERING_H - -#include "AMDGPUISelLowering.h" -#include "SIInstrInfo.h" - -namespace llvm { - -class SITargetLowering : public AMDGPUTargetLowering -{ - const SIInstrInfo * TII; - - /// AppendS_WAITCNT - Memory reads and writes are syncronized using the - /// S_WAITCNT instruction. This function takes the most conservative - /// approach and inserts an S_WAITCNT instruction after every read and - /// write. - void AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB, - MachineBasicBlock::iterator I) const; - void LowerMOV_IMM(MachineInstr *MI, MachineBasicBlock &BB, - MachineBasicBlock::iterator I, unsigned Opocde) const; - void LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB, - MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; - void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB, - MachineBasicBlock::iterator I, MachineRegisterInfo &MRI) const; - void LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB, - MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; - void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB, - MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; - void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB, - MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; - - SDValue Loweri1ContextSwitch(SDValue Op, SelectionDAG &DAG, - unsigned VCCNode) const; - SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; - -public: - SITargetLowering(TargetMachine &tm); - virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI, - MachineBasicBlock * BB) const; - virtual EVT getSetCCResultType(EVT VT) const; - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; - virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; - virtual const char* getTargetNodeName(unsigned Opcode) const; -}; - -} // End namespace llvm - -#endif //SIISELLOWERING_H diff --git a/src/gallium/drivers/radeon/SIInstrFormats.td b/src/gallium/drivers/radeon/SIInstrFormats.td deleted file mode 100644 index 8f56e21f5a6..00000000000 --- a/src/gallium/drivers/radeon/SIInstrFormats.td +++ /dev/null @@ -1,131 +0,0 @@ -//===-- SIInstrFormats.td - SI Instruction Formats ------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// SI Instruction format definitions. -// -// Instructions with _32 take 32-bit operands. -// Instructions with _64 take 64-bit operands. -// -// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit -// encoding is the standard encoding, but instruction that make use of -// any of the instruction modifiers must use the 64-bit encoding. -// -// Instructions with _e32 use the 32-bit encoding. -// Instructions with _e64 use the 64-bit encoding. -// -//===----------------------------------------------------------------------===// - - -class VOP3_32 <bits<9> op, string opName, list<dag> pattern> - : VOP3 <op, (outs VReg_32:$dst), (ins AllReg_32:$src0, AllReg_32:$src1, AllReg_32:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5, i32imm:$src6), opName, pattern>; - -class VOP3_64 <bits<9> op, string opName, list<dag> pattern> - : VOP3 <op, (outs VReg_64:$dst), (ins AllReg_64:$src0, AllReg_64:$src1, AllReg_64:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5, i32imm:$src6), opName, pattern>; - - -class SOP1_32 <bits<8> op, string opName, list<dag> pattern> - : SOP1 <op, (outs SReg_32:$dst), (ins SReg_32:$src0), opName, pattern>; - -class SOP1_64 <bits<8> op, string opName, list<dag> pattern> - : SOP1 <op, (outs SReg_64:$dst), (ins SReg_64:$src0), opName, pattern>; - -class SOP2_32 <bits<7> op, string opName, list<dag> pattern> - : SOP2 <op, (outs SReg_32:$dst), (ins SReg_32:$src0, SReg_32:$src1), opName, pattern>; - -class SOP2_64 <bits<7> op, string opName, list<dag> pattern> - : SOP2 <op, (outs SReg_64:$dst), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>; - -class SOP2_VCC <bits<7> op, string opName, list<dag> pattern> - : SOP2 <op, (outs VCCReg:$vcc), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>; - -class VOP1_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc, - string opName, list<dag> pattern> : - VOP1 < - op, (outs vrc:$dst), (ins arc:$src0), opName, pattern - >; - -multiclass VOP1_32 <bits<8> op, string opName, list<dag> pattern> { - def _e32: VOP1_Helper <op, VReg_32, AllReg_32, opName, pattern>; - def _e64 : VOP3_32 <{1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, - opName, [] - >; -} - -multiclass VOP1_64 <bits<8> op, string opName, list<dag> pattern> { - - def _e32 : VOP1_Helper <op, VReg_64, AllReg_64, opName, pattern>; - - def _e64 : VOP3_64 < - {1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, - opName, [] - >; -} - -class VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc, - string opName, list<dag> pattern> : - VOP2 < - op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1), opName, pattern - >; - -multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern> { - - def _e32 : VOP2_Helper <op, VReg_32, AllReg_32, opName, pattern>; - - def _e64 : VOP3_32 <{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, - opName, [] - >; -} - -multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern> { - def _e32: VOP2_Helper <op, VReg_64, AllReg_64, opName, pattern>; - - def _e64 : VOP3_64 < - {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, - opName, [] - >; -} - -class SOPK_32 <bits<5> op, string opName, list<dag> pattern> - : SOPK <op, (outs SReg_32:$dst), (ins i16imm:$src0), opName, pattern>; - -class SOPK_64 <bits<5> op, string opName, list<dag> pattern> - : SOPK <op, (outs SReg_64:$dst), (ins i16imm:$src0), opName, pattern>; - -class VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc, - string opName, list<dag> pattern> : - VOPC < - op, (ins arc:$src0, vrc:$src1), opName, pattern - >; - -multiclass VOPC_32 <bits<8> op, string opName, list<dag> pattern> { - - def _e32 : VOPC_Helper <op, VReg_32, AllReg_32, opName, pattern>; - - def _e64 : VOP3_32 < - {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, - opName, [] - >; -} - -multiclass VOPC_64 <bits<8> op, string opName, list<dag> pattern> { - - def _e32 : VOPC_Helper <op, VReg_64, AllReg_64, opName, pattern>; - - def _e64 : VOP3_64 < - {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, - opName, [] - >; -} - -class SOPC_32 <bits<7> op, string opName, list<dag> pattern> - : SOPC <op, (outs SCCReg:$dst), (ins SReg_32:$src0, SReg_32:$src1), opName, pattern>; - -class SOPC_64 <bits<7> op, string opName, list<dag> pattern> - : SOPC <op, (outs SCCReg:$dst), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>; - diff --git a/src/gallium/drivers/radeon/SIInstrInfo.cpp b/src/gallium/drivers/radeon/SIInstrInfo.cpp deleted file mode 100644 index 42fef6ba468..00000000000 --- a/src/gallium/drivers/radeon/SIInstrInfo.cpp +++ /dev/null @@ -1,76 +0,0 @@ -//===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// SI Implementation of TargetInstrInfo. -// -//===----------------------------------------------------------------------===// - - -#include "SIInstrInfo.h" -#include "AMDGPUTargetMachine.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/MC/MCInstrDesc.h" - -#include <stdio.h> - -using namespace llvm; - -SIInstrInfo::SIInstrInfo(AMDGPUTargetMachine &tm) - : AMDGPUInstrInfo(tm), - RI(tm, *this), - TM(tm) - { } - -const SIRegisterInfo &SIInstrInfo::getRegisterInfo() const -{ - return RI; -} - -void -SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const -{ - - // If we are trying to copy to or from SCC, there is a bug somewhere else in - // the backend. While it may be theoretically possible to do this, it should - // never be necessary. - assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC); - - BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); -} - -MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg, - int64_t Imm) const -{ - MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::V_MOV_IMM_I32), DebugLoc()); - MachineInstrBuilder(MI).addReg(DstReg, RegState::Define); - MachineInstrBuilder(MI).addImm(Imm); - - return MI; - -} - -bool SIInstrInfo::isMov(unsigned Opcode) const -{ - switch(Opcode) { - default: return false; - case AMDGPU::S_MOV_B32: - case AMDGPU::S_MOV_B64: - case AMDGPU::V_MOV_B32_e32: - case AMDGPU::V_MOV_B32_e64: - case AMDGPU::V_MOV_IMM_F32: - case AMDGPU::V_MOV_IMM_I32: - case AMDGPU::S_MOV_IMM_I32: - return true; - } -} diff --git a/src/gallium/drivers/radeon/SIInstrInfo.h b/src/gallium/drivers/radeon/SIInstrInfo.h deleted file mode 100644 index 5915a00f779..00000000000 --- a/src/gallium/drivers/radeon/SIInstrInfo.h +++ /dev/null @@ -1,62 +0,0 @@ -//===-- SIInstrInfo.h - SI Instruction Info Interface ---------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Interface definition for SIInstrInfo. -// -//===----------------------------------------------------------------------===// - - -#ifndef SIINSTRINFO_H -#define SIINSTRINFO_H - -#include "AMDGPUInstrInfo.h" -#include "SIRegisterInfo.h" - -namespace llvm { - -class SIInstrInfo : public AMDGPUInstrInfo { -private: - const SIRegisterInfo RI; - AMDGPUTargetMachine &TM; - -public: - explicit SIInstrInfo(AMDGPUTargetMachine &tm); - - const SIRegisterInfo &getRegisterInfo() const; - - virtual void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const; - - /// getEncodingType - Returns the encoding type of this instruction. - unsigned getEncodingType(const MachineInstr &MI) const; - - /// getEncodingBytes - Returns the size of this instructions encoding in - /// number of bytes. - unsigned getEncodingBytes(const MachineInstr &MI) const; - - virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg, - int64_t Imm) const; - - virtual unsigned getIEQOpcode() const { assert(!"Implement"); return 0;} - virtual bool isMov(unsigned Opcode) const; - - }; - -} // End namespace llvm - -namespace SIInstrFlags { - enum Flags { - // First 4 bits are the instruction encoding - NEED_WAIT = 1 << 4 - }; -} - -#endif //SIINSTRINFO_H diff --git a/src/gallium/drivers/radeon/SIInstrInfo.td b/src/gallium/drivers/radeon/SIInstrInfo.td deleted file mode 100644 index 49ef342a154..00000000000 --- a/src/gallium/drivers/radeon/SIInstrInfo.td +++ /dev/null @@ -1,506 +0,0 @@ -//===-- SIInstrInfo.td - SI Instruction Encodings ---------*- tablegen -*--===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// SI DAG Profiles -//===----------------------------------------------------------------------===// -def SDTVCCBinaryOp : SDTypeProfile<1, 2, [ - SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 2> -]>; - -//===----------------------------------------------------------------------===// -// SI DAG Nodes -//===----------------------------------------------------------------------===// - -// and operation on 64-bit wide vcc -def SIvcc_and : SDNode<"SIISD::VCC_AND", SDTVCCBinaryOp, - [SDNPCommutative, SDNPAssociative] ->; - -// Special bitcast node for sharing VCC register between VALU and SALU -def SIvcc_bitcast : SDNode<"SIISD::VCC_BITCAST", - SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]> ->; - -class InstSI <dag outs, dag ins, string asm, list<dag> pattern> : - AMDGPUInst<outs, ins, asm, pattern> { - - field bits<4> EncodingType = 0; - field bits<1> NeedWait = 0; - - let TSFlags{3-0} = EncodingType; - let TSFlags{4} = NeedWait; - -} - -class Enc32 <dag outs, dag ins, string asm, list<dag> pattern> : - InstSI <outs, ins, asm, pattern> { - - field bits<32> Inst; -} - -class Enc64 <dag outs, dag ins, string asm, list<dag> pattern> : - InstSI <outs, ins, asm, pattern> { - - field bits<64> Inst; -} - -class SIOperand <ValueType vt, dag opInfo>: Operand <vt> { - let EncoderMethod = "encodeOperand"; - let MIOperandInfo = opInfo; -} - -def IMM16bit : ImmLeaf < - i16, - [{return isInt<16>(Imm);}] ->; - -def IMM8bit : ImmLeaf < - i32, - [{return (int32_t)Imm >= 0 && (int32_t)Imm <= 0xff;}] ->; - -def IMM12bit : ImmLeaf < - i16, - [{return (int16_t)Imm >= 0 && (int16_t)Imm <= 0xfff;}] ->; - -def IMM32bitIn64bit : ImmLeaf < - i64, - [{return isInt<32>(Imm);}] ->; - -class GPR4Align <RegisterClass rc> : Operand <vAny> { - let EncoderMethod = "GPR4AlignEncode"; - let MIOperandInfo = (ops rc:$reg); -} - -class GPR2Align <RegisterClass rc, ValueType vt> : Operand <vt> { - let EncoderMethod = "GPR2AlignEncode"; - let MIOperandInfo = (ops rc:$reg); -} - -def SMRDmemrr : Operand<iPTR> { - let MIOperandInfo = (ops SReg_64, SReg_32); - let EncoderMethod = "GPR2AlignEncode"; -} - -def SMRDmemri : Operand<iPTR> { - let MIOperandInfo = (ops SReg_64, i32imm); - let EncoderMethod = "SMRDmemriEncode"; -} - -def ADDR_Reg : ComplexPattern<i64, 2, "SelectADDRReg", [], []>; -def ADDR_Offset8 : ComplexPattern<i64, 2, "SelectADDR8BitOffset", [], []>; - -let Uses = [EXEC] in { -def EXP : Enc64< - (outs), - (ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm, - VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3), - "EXP $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3", - [] > { - - bits<4> EN; - bits<6> TGT; - bits<1> COMPR; - bits<1> DONE; - bits<1> VM; - bits<8> VSRC0; - bits<8> VSRC1; - bits<8> VSRC2; - bits<8> VSRC3; - - let Inst{3-0} = EN; - let Inst{9-4} = TGT; - let Inst{10} = COMPR; - let Inst{11} = DONE; - let Inst{12} = VM; - let Inst{31-26} = 0x3e; - let Inst{39-32} = VSRC0; - let Inst{47-40} = VSRC1; - let Inst{55-48} = VSRC2; - let Inst{63-56} = VSRC3; - let EncodingType = 0; //SIInstrEncodingType::EXP - - let NeedWait = 1; - let usesCustomInserter = 1; -} - -class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : - Enc64 <outs, ins, asm, pattern> { - - bits<8> VDATA; - bits<4> DMASK; - bits<1> UNORM; - bits<1> GLC; - bits<1> DA; - bits<1> R128; - bits<1> TFE; - bits<1> LWE; - bits<1> SLC; - bits<8> VADDR; - bits<5> SRSRC; - bits<5> SSAMP; - - let Inst{11-8} = DMASK; - let Inst{12} = UNORM; - let Inst{13} = GLC; - let Inst{14} = DA; - let Inst{15} = R128; - let Inst{16} = TFE; - let Inst{17} = LWE; - let Inst{24-18} = op; - let Inst{25} = SLC; - let Inst{31-26} = 0x3c; - let Inst{39-32} = VADDR; - let Inst{47-40} = VDATA; - let Inst{52-48} = SRSRC; - let Inst{57-53} = SSAMP; - - let EncodingType = 2; //SIInstrEncodingType::MIMG - - let NeedWait = 1; - let usesCustomInserter = 1; -} - -class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> : - Enc64<outs, ins, asm, pattern> { - - bits<8> VDATA; - bits<12> OFFSET; - bits<1> OFFEN; - bits<1> IDXEN; - bits<1> GLC; - bits<1> ADDR64; - bits<4> DFMT; - bits<3> NFMT; - bits<8> VADDR; - bits<5> SRSRC; - bits<1> SLC; - bits<1> TFE; - bits<8> SOFFSET; - - let Inst{11-0} = OFFSET; - let Inst{12} = OFFEN; - let Inst{13} = IDXEN; - let Inst{14} = GLC; - let Inst{15} = ADDR64; - let Inst{18-16} = op; - let Inst{22-19} = DFMT; - let Inst{25-23} = NFMT; - let Inst{31-26} = 0x3a; //encoding - let Inst{39-32} = VADDR; - let Inst{47-40} = VDATA; - let Inst{52-48} = SRSRC; - let Inst{54} = SLC; - let Inst{55} = TFE; - let Inst{63-56} = SOFFSET; - let EncodingType = 3; //SIInstrEncodingType::MTBUF - - let NeedWait = 1; - let usesCustomInserter = 1; - let neverHasSideEffects = 1; -} - -class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : - Enc64<outs, ins, asm, pattern> { - - bits<8> VDATA; - bits<12> OFFSET; - bits<1> OFFEN; - bits<1> IDXEN; - bits<1> GLC; - bits<1> ADDR64; - bits<1> LDS; - bits<8> VADDR; - bits<5> SRSRC; - bits<1> SLC; - bits<1> TFE; - bits<8> SOFFSET; - - let Inst{11-0} = OFFSET; - let Inst{12} = OFFEN; - let Inst{13} = IDXEN; - let Inst{14} = GLC; - let Inst{15} = ADDR64; - let Inst{16} = LDS; - let Inst{24-18} = op; - let Inst{31-26} = 0x38; //encoding - let Inst{39-32} = VADDR; - let Inst{47-40} = VDATA; - let Inst{52-48} = SRSRC; - let Inst{54} = SLC; - let Inst{55} = TFE; - let Inst{63-56} = SOFFSET; - let EncodingType = 4; //SIInstrEncodingType::MUBUF - - let NeedWait = 1; - let usesCustomInserter = 1; - let neverHasSideEffects = 1; -} -} // End Uses = [EXEC] - -class SMRD <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> : - Enc32<outs, ins, asm, pattern> { - - bits<7> SDST; - bits<15> PTR; - bits<8> OFFSET = PTR{7-0}; - bits<1> IMM = PTR{8}; - bits<6> SBASE = PTR{14-9}; - - let Inst{7-0} = OFFSET; - let Inst{8} = IMM; - let Inst{14-9} = SBASE; - let Inst{21-15} = SDST; - let Inst{26-22} = op; - let Inst{31-27} = 0x18; //encoding - let EncodingType = 5; //SIInstrEncodingType::SMRD - - let NeedWait = 1; - let usesCustomInserter = 1; -} - -class SOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> : - Enc32<outs, ins, asm, pattern> { - - bits<7> SDST; - bits<8> SSRC0; - - let Inst{7-0} = SSRC0; - let Inst{15-8} = op; - let Inst{22-16} = SDST; - let Inst{31-23} = 0x17d; //encoding; - let EncodingType = 6; //SIInstrEncodingType::SOP1 -} - -class SOP2 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : - Enc32 <outs, ins, asm, pattern> { - - bits<7> SDST; - bits<8> SSRC0; - bits<8> SSRC1; - - let Inst{7-0} = SSRC0; - let Inst{15-8} = SSRC1; - let Inst{22-16} = SDST; - let Inst{29-23} = op; - let Inst{31-30} = 0x2; // encoding - let EncodingType = 7; // SIInstrEncodingType::SOP2 -} - -class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : - Enc32<outs, ins, asm, pattern> { - - bits<8> SSRC0; - bits<8> SSRC1; - - let Inst{7-0} = SSRC0; - let Inst{15-8} = SSRC1; - let Inst{22-16} = op; - let Inst{31-23} = 0x17e; - let EncodingType = 8; // SIInstrEncodingType::SOPC - - let DisableEncoding = "$dst"; -} - -class SOPK <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> : - Enc32 <outs, ins , asm, pattern> { - - bits <7> SDST; - bits <16> SIMM16; - - let Inst{15-0} = SIMM16; - let Inst{22-16} = SDST; - let Inst{27-23} = op; - let Inst{31-28} = 0xb; //encoding - let EncodingType = 9; // SIInstrEncodingType::SOPK -} - -class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern> : Enc32 < - (outs), - ins, - asm, - pattern > { - - bits <16> SIMM16; - - let Inst{15-0} = SIMM16; - let Inst{22-16} = op; - let Inst{31-23} = 0x17f; // encoding - let EncodingType = 10; // SIInstrEncodingType::SOPP -} - - -let Uses = [EXEC] in { -class VINTRP <bits <2> op, dag outs, dag ins, string asm, list<dag> pattern> : - Enc32 <outs, ins, asm, pattern> { - - bits<8> VDST; - bits<8> VSRC; - bits<2> ATTRCHAN; - bits<6> ATTR; - - let Inst{7-0} = VSRC; - let Inst{9-8} = ATTRCHAN; - let Inst{15-10} = ATTR; - let Inst{17-16} = op; - let Inst{25-18} = VDST; - let Inst{31-26} = 0x32; // encoding - let EncodingType = 11; // SIInstrEncodingType::VINTRP - - let neverHasSideEffects = 1; -} - -class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> : - Enc32 <outs, ins, asm, pattern> { - - bits<8> VDST; - bits<9> SRC0; - - let Inst{8-0} = SRC0; - let Inst{16-9} = op; - let Inst{24-17} = VDST; - let Inst{31-25} = 0x3f; //encoding - - let EncodingType = 12; // SIInstrEncodingType::VOP1 - let PostEncoderMethod = "VOPPostEncode"; -} - -class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> : - Enc32 <outs, ins, asm, pattern> { - - bits<8> VDST; - bits<9> SRC0; - bits<8> VSRC1; - - let Inst{8-0} = SRC0; - let Inst{16-9} = VSRC1; - let Inst{24-17} = VDST; - let Inst{30-25} = op; - let Inst{31} = 0x0; //encoding - - let EncodingType = 13; // SIInstrEncodingType::VOP2 - let PostEncoderMethod = "VOPPostEncode"; -} - -class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> : - Enc64 <outs, ins, asm, pattern> { - - bits<8> VDST; - bits<9> SRC0; - bits<9> SRC1; - bits<9> SRC2; - bits<3> ABS; - bits<1> CLAMP; - bits<2> OMOD; - bits<3> NEG; - - let Inst{7-0} = VDST; - let Inst{10-8} = ABS; - let Inst{11} = CLAMP; - let Inst{25-17} = op; - let Inst{31-26} = 0x34; //encoding - let Inst{40-32} = SRC0; - let Inst{49-41} = SRC1; - let Inst{58-50} = SRC2; - let Inst{60-59} = OMOD; - let Inst{63-61} = NEG; - - let EncodingType = 14; // SIInstrEncodingType::VOP3 - let PostEncoderMethod = "VOPPostEncode"; -} - -class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> : - Enc32 <(outs VCCReg:$dst), ins, asm, pattern> { - - bits<9> SRC0; - bits<8> VSRC1; - - let Inst{8-0} = SRC0; - let Inst{16-9} = VSRC1; - let Inst{24-17} = op; - let Inst{31-25} = 0x3e; - - let EncodingType = 15; //SIInstrEncodingType::VOPC - let PostEncoderMethod = "VOPPostEncode"; - let DisableEncoding = "$dst"; -} -} // End Uses = [EXEC] - -class MIMG_Load_Helper <bits<7> op, string asm> : MIMG < - op, - (outs VReg_128:$vdata), - (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128, - i1imm:$tfe, i1imm:$lwe, i1imm:$slc, VReg_128:$vaddr, - GPR4Align<SReg_256>:$srsrc, GPR4Align<SReg_128>:$ssamp), - asm, - [] ->; - -class MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> : MUBUF < - op, - (outs regClass:$dst), - (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, - i1imm:$lds, VReg_32:$vaddr, GPR4Align<SReg_128>:$srsrc, i1imm:$slc, - i1imm:$tfe, SReg_32:$soffset), - asm, - []> { - let mayLoad = 1; -} - -class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF < - op, - (outs regClass:$dst), - (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, - i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, GPR4Align<SReg_128>:$srsrc, - i1imm:$slc, i1imm:$tfe, SReg_32:$soffset), - asm, - []> { - let mayLoad = 1; -} - -class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF < - op, - (outs), - (ins regClass:$vdata, i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, - i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, - GPR4Align<SReg_128>:$srsrc, i1imm:$slc, i1imm:$tfe, SReg_32:$soffset), - asm, - []> { - let mayStore = 1; -} - -multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass dstClass, - ValueType vt> { - def _IMM : SMRD < - op, - (outs dstClass:$dst), - (ins SMRDmemri:$src0), - asm, - [(set (vt dstClass:$dst), (constant_load ADDR_Offset8:$src0))] - >; - - def _SGPR : SMRD < - op, - (outs dstClass:$dst), - (ins SMRDmemrr:$src0), - asm, - [(set (vt dstClass:$dst), (constant_load ADDR_Reg:$src0))] - >; -} - -multiclass SMRD_32 <bits<5> op, string asm, RegisterClass dstClass> { - defm _F32 : SMRD_Helper <op, asm, dstClass, f32>; - defm _I32 : SMRD_Helper <op, asm, dstClass, i32>; -} - -include "SIInstrFormats.td" -include "SIInstructions.td" diff --git a/src/gallium/drivers/radeon/SIInstructions.td b/src/gallium/drivers/radeon/SIInstructions.td deleted file mode 100644 index f9bdc63e3e5..00000000000 --- a/src/gallium/drivers/radeon/SIInstructions.td +++ /dev/null @@ -1,1180 +0,0 @@ -//===-- SIInstructions.td - SI Instruction Defintions ---------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -def isSI : Predicate<"Subtarget.device()" - "->getGeneration() == AMDGPUDeviceInfo::HD7XXX">; - -let Predicates = [isSI] in { - -let neverHasSideEffects = 1 in { -def S_MOV_B32 : SOP1_32 <0x00000003, "S_MOV_B32", []>; -def S_MOV_B64 : SOP1_64 <0x00000004, "S_MOV_B64", []>; -def S_CMOV_B32 : SOP1_32 <0x00000005, "S_CMOV_B32", []>; -def S_CMOV_B64 : SOP1_64 <0x00000006, "S_CMOV_B64", []>; -def S_NOT_B32 : SOP1_32 <0x00000007, "S_NOT_B32", []>; -def S_NOT_B64 : SOP1_64 <0x00000008, "S_NOT_B64", []>; -def S_WQM_B32 : SOP1_32 <0x00000009, "S_WQM_B32", []>; -def S_WQM_B64 : SOP1_64 <0x0000000a, "S_WQM_B64", []>; -def S_BREV_B32 : SOP1_32 <0x0000000b, "S_BREV_B32", []>; -def S_BREV_B64 : SOP1_64 <0x0000000c, "S_BREV_B64", []>; -} // End neverHasSideEffects = 1 -////def S_BCNT0_I32_B32 : SOP1_BCNT0 <0x0000000d, "S_BCNT0_I32_B32", []>; -////def S_BCNT0_I32_B64 : SOP1_BCNT0 <0x0000000e, "S_BCNT0_I32_B64", []>; -////def S_BCNT1_I32_B32 : SOP1_BCNT1 <0x0000000f, "S_BCNT1_I32_B32", []>; -////def S_BCNT1_I32_B64 : SOP1_BCNT1 <0x00000010, "S_BCNT1_I32_B64", []>; -////def S_FF0_I32_B32 : SOP1_FF0 <0x00000011, "S_FF0_I32_B32", []>; -////def S_FF0_I32_B64 : SOP1_FF0 <0x00000012, "S_FF0_I32_B64", []>; -////def S_FF1_I32_B32 : SOP1_FF1 <0x00000013, "S_FF1_I32_B32", []>; -////def S_FF1_I32_B64 : SOP1_FF1 <0x00000014, "S_FF1_I32_B64", []>; -//def S_FLBIT_I32_B32 : SOP1_32 <0x00000015, "S_FLBIT_I32_B32", []>; -//def S_FLBIT_I32_B64 : SOP1_32 <0x00000016, "S_FLBIT_I32_B64", []>; -def S_FLBIT_I32 : SOP1_32 <0x00000017, "S_FLBIT_I32", []>; -//def S_FLBIT_I32_I64 : SOP1_32 <0x00000018, "S_FLBIT_I32_I64", []>; -//def S_SEXT_I32_I8 : SOP1_32 <0x00000019, "S_SEXT_I32_I8", []>; -//def S_SEXT_I32_I16 : SOP1_32 <0x0000001a, "S_SEXT_I32_I16", []>; -////def S_BITSET0_B32 : SOP1_BITSET0 <0x0000001b, "S_BITSET0_B32", []>; -////def S_BITSET0_B64 : SOP1_BITSET0 <0x0000001c, "S_BITSET0_B64", []>; -////def S_BITSET1_B32 : SOP1_BITSET1 <0x0000001d, "S_BITSET1_B32", []>; -////def S_BITSET1_B64 : SOP1_BITSET1 <0x0000001e, "S_BITSET1_B64", []>; -def S_GETPC_B64 : SOP1_64 <0x0000001f, "S_GETPC_B64", []>; -def S_SETPC_B64 : SOP1_64 <0x00000020, "S_SETPC_B64", []>; -def S_SWAPPC_B64 : SOP1_64 <0x00000021, "S_SWAPPC_B64", []>; -def S_RFE_B64 : SOP1_64 <0x00000022, "S_RFE_B64", []>; -def S_AND_SAVEEXEC_B64 : SOP1_64 <0x00000024, "S_AND_SAVEEXEC_B64", []>; -def S_OR_SAVEEXEC_B64 : SOP1_64 <0x00000025, "S_OR_SAVEEXEC_B64", []>; -def S_XOR_SAVEEXEC_B64 : SOP1_64 <0x00000026, "S_XOR_SAVEEXEC_B64", []>; -////def S_ANDN2_SAVEEXEC_B64 : SOP1_ANDN2 <0x00000027, "S_ANDN2_SAVEEXEC_B64", []>; -////def S_ORN2_SAVEEXEC_B64 : SOP1_ORN2 <0x00000028, "S_ORN2_SAVEEXEC_B64", []>; -def S_NAND_SAVEEXEC_B64 : SOP1_64 <0x00000029, "S_NAND_SAVEEXEC_B64", []>; -def S_NOR_SAVEEXEC_B64 : SOP1_64 <0x0000002a, "S_NOR_SAVEEXEC_B64", []>; -def S_XNOR_SAVEEXEC_B64 : SOP1_64 <0x0000002b, "S_XNOR_SAVEEXEC_B64", []>; -def S_QUADMASK_B32 : SOP1_32 <0x0000002c, "S_QUADMASK_B32", []>; -def S_QUADMASK_B64 : SOP1_64 <0x0000002d, "S_QUADMASK_B64", []>; -def S_MOVRELS_B32 : SOP1_32 <0x0000002e, "S_MOVRELS_B32", []>; -def S_MOVRELS_B64 : SOP1_64 <0x0000002f, "S_MOVRELS_B64", []>; -def S_MOVRELD_B32 : SOP1_32 <0x00000030, "S_MOVRELD_B32", []>; -def S_MOVRELD_B64 : SOP1_64 <0x00000031, "S_MOVRELD_B64", []>; -//def S_CBRANCH_JOIN : SOP1_ <0x00000032, "S_CBRANCH_JOIN", []>; -def S_MOV_REGRD_B32 : SOP1_32 <0x00000033, "S_MOV_REGRD_B32", []>; -def S_ABS_I32 : SOP1_32 <0x00000034, "S_ABS_I32", []>; -def S_MOV_FED_B32 : SOP1_32 <0x00000035, "S_MOV_FED_B32", []>; -def S_MOVK_I32 : SOPK_32 <0x00000000, "S_MOVK_I32", []>; -def S_CMOVK_I32 : SOPK_32 <0x00000002, "S_CMOVK_I32", []>; - -/* -This instruction is disabled for now until we can figure out how to teach -the instruction selector to correctly use the S_CMP* vs V_CMP* -instructions. - -When this instruction is enabled the code generator sometimes produces this -invalid sequence: - -SCC = S_CMPK_EQ_I32 SGPR0, imm -VCC = COPY SCC -VGPR0 = V_CNDMASK VCC, VGPR0, VGPR1 - -def S_CMPK_EQ_I32 : SOPK < - 0x00000003, (outs SCCReg:$dst), (ins SReg_32:$src0, i32imm:$src1), - "S_CMPK_EQ_I32", - [(set SCCReg:$dst, (setcc SReg_32:$src0, imm:$src1, SETEQ))] ->; -*/ - -def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>; -def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>; -def S_CMPK_GE_I32 : SOPK_32 <0x00000006, "S_CMPK_GE_I32", []>; -def S_CMPK_LT_I32 : SOPK_32 <0x00000007, "S_CMPK_LT_I32", []>; -def S_CMPK_LE_I32 : SOPK_32 <0x00000008, "S_CMPK_LE_I32", []>; -def S_CMPK_EQ_U32 : SOPK_32 <0x00000009, "S_CMPK_EQ_U32", []>; -def S_CMPK_LG_U32 : SOPK_32 <0x0000000a, "S_CMPK_LG_U32", []>; -def S_CMPK_GT_U32 : SOPK_32 <0x0000000b, "S_CMPK_GT_U32", []>; -def S_CMPK_GE_U32 : SOPK_32 <0x0000000c, "S_CMPK_GE_U32", []>; -def S_CMPK_LT_U32 : SOPK_32 <0x0000000d, "S_CMPK_LT_U32", []>; -def S_CMPK_LE_U32 : SOPK_32 <0x0000000e, "S_CMPK_LE_U32", []>; -def S_ADDK_I32 : SOPK_32 <0x0000000f, "S_ADDK_I32", []>; -def S_MULK_I32 : SOPK_32 <0x00000010, "S_MULK_I32", []>; -//def S_CBRANCH_I_FORK : SOPK_ <0x00000011, "S_CBRANCH_I_FORK", []>; -def S_GETREG_B32 : SOPK_32 <0x00000012, "S_GETREG_B32", []>; -def S_SETREG_B32 : SOPK_32 <0x00000013, "S_SETREG_B32", []>; -def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "S_GETREG_REGRD_B32", []>; -//def S_SETREG_IMM32_B32 : SOPK_32 <0x00000015, "S_SETREG_IMM32_B32", []>; -//def EXP : EXP_ <0x00000000, "EXP", []>; - -defm V_CMP_F_F32 : VOPC_32 <0x00000000, "V_CMP_F_F32", []>; -defm V_CMP_LT_F32 : VOPC_32 <0x00000001, "V_CMP_LT_F32", - [(set VCCReg:$dst, (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_LT))] ->; -defm V_CMP_EQ_F32 : VOPC_32 <0x00000002, "V_CMP_EQ_F32", - [(set VCCReg:$dst, (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_EQ))] ->; -defm V_CMP_LE_F32 : VOPC_32 <0x00000003, "V_CMP_LE_F32", - [(set VCCReg:$dst, (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_LE))] ->; -defm V_CMP_GT_F32 : VOPC_32 <0x00000004, "V_CMP_GT_F32", - [(set VCCReg:$dst, (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_GT))] ->; -defm V_CMP_LG_F32 : VOPC_32 <0x00000005, "V_CMP_LG_F32", - [(set VCCReg:$dst, (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_NE))] ->; -defm V_CMP_GE_F32 : VOPC_32 <0x00000006, "V_CMP_GE_F32", - [(set VCCReg:$dst, (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_GE))] ->; -defm V_CMP_O_F32 : VOPC_32 <0x00000007, "V_CMP_O_F32", []>; -defm V_CMP_U_F32 : VOPC_32 <0x00000008, "V_CMP_U_F32", []>; -defm V_CMP_NGE_F32 : VOPC_32 <0x00000009, "V_CMP_NGE_F32", []>; -defm V_CMP_NLG_F32 : VOPC_32 <0x0000000a, "V_CMP_NLG_F32", []>; -defm V_CMP_NGT_F32 : VOPC_32 <0x0000000b, "V_CMP_NGT_F32", []>; -defm V_CMP_NLE_F32 : VOPC_32 <0x0000000c, "V_CMP_NLE_F32", []>; -defm V_CMP_NEQ_F32 : VOPC_32 <0x0000000d, "V_CMP_NEQ_F32", - [(set VCCReg:$dst, (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_NE))] ->; -defm V_CMP_NLT_F32 : VOPC_32 <0x0000000e, "V_CMP_NLT_F32", []>; -defm V_CMP_TRU_F32 : VOPC_32 <0x0000000f, "V_CMP_TRU_F32", []>; -defm V_CMPX_F_F32 : VOPC_32 <0x00000010, "V_CMPX_F_F32", []>; -defm V_CMPX_LT_F32 : VOPC_32 <0x00000011, "V_CMPX_LT_F32", []>; -defm V_CMPX_EQ_F32 : VOPC_32 <0x00000012, "V_CMPX_EQ_F32", []>; -defm V_CMPX_LE_F32 : VOPC_32 <0x00000013, "V_CMPX_LE_F32", []>; -defm V_CMPX_GT_F32 : VOPC_32 <0x00000014, "V_CMPX_GT_F32", []>; -defm V_CMPX_LG_F32 : VOPC_32 <0x00000015, "V_CMPX_LG_F32", []>; -defm V_CMPX_GE_F32 : VOPC_32 <0x00000016, "V_CMPX_GE_F32", []>; -defm V_CMPX_O_F32 : VOPC_32 <0x00000017, "V_CMPX_O_F32", []>; -defm V_CMPX_U_F32 : VOPC_32 <0x00000018, "V_CMPX_U_F32", []>; -defm V_CMPX_NGE_F32 : VOPC_32 <0x00000019, "V_CMPX_NGE_F32", []>; -defm V_CMPX_NLG_F32 : VOPC_32 <0x0000001a, "V_CMPX_NLG_F32", []>; -defm V_CMPX_NGT_F32 : VOPC_32 <0x0000001b, "V_CMPX_NGT_F32", []>; -defm V_CMPX_NLE_F32 : VOPC_32 <0x0000001c, "V_CMPX_NLE_F32", []>; -defm V_CMPX_NEQ_F32 : VOPC_32 <0x0000001d, "V_CMPX_NEQ_F32", []>; -defm V_CMPX_NLT_F32 : VOPC_32 <0x0000001e, "V_CMPX_NLT_F32", []>; -defm V_CMPX_TRU_F32 : VOPC_32 <0x0000001f, "V_CMPX_TRU_F32", []>; -defm V_CMP_F_F64 : VOPC_64 <0x00000020, "V_CMP_F_F64", []>; -defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64", []>; -defm V_CMP_EQ_F64 : VOPC_64 <0x00000022, "V_CMP_EQ_F64", []>; -defm V_CMP_LE_F64 : VOPC_64 <0x00000023, "V_CMP_LE_F64", []>; -defm V_CMP_GT_F64 : VOPC_64 <0x00000024, "V_CMP_GT_F64", []>; -defm V_CMP_LG_F64 : VOPC_64 <0x00000025, "V_CMP_LG_F64", []>; -defm V_CMP_GE_F64 : VOPC_64 <0x00000026, "V_CMP_GE_F64", []>; -defm V_CMP_O_F64 : VOPC_64 <0x00000027, "V_CMP_O_F64", []>; -defm V_CMP_U_F64 : VOPC_64 <0x00000028, "V_CMP_U_F64", []>; -defm V_CMP_NGE_F64 : VOPC_64 <0x00000029, "V_CMP_NGE_F64", []>; -defm V_CMP_NLG_F64 : VOPC_64 <0x0000002a, "V_CMP_NLG_F64", []>; -defm V_CMP_NGT_F64 : VOPC_64 <0x0000002b, "V_CMP_NGT_F64", []>; -defm V_CMP_NLE_F64 : VOPC_64 <0x0000002c, "V_CMP_NLE_F64", []>; -defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64", []>; -defm V_CMP_NLT_F64 : VOPC_64 <0x0000002e, "V_CMP_NLT_F64", []>; -defm V_CMP_TRU_F64 : VOPC_64 <0x0000002f, "V_CMP_TRU_F64", []>; -defm V_CMPX_F_F64 : VOPC_64 <0x00000030, "V_CMPX_F_F64", []>; -defm V_CMPX_LT_F64 : VOPC_64 <0x00000031, "V_CMPX_LT_F64", []>; -defm V_CMPX_EQ_F64 : VOPC_64 <0x00000032, "V_CMPX_EQ_F64", []>; -defm V_CMPX_LE_F64 : VOPC_64 <0x00000033, "V_CMPX_LE_F64", []>; -defm V_CMPX_GT_F64 : VOPC_64 <0x00000034, "V_CMPX_GT_F64", []>; -defm V_CMPX_LG_F64 : VOPC_64 <0x00000035, "V_CMPX_LG_F64", []>; -defm V_CMPX_GE_F64 : VOPC_64 <0x00000036, "V_CMPX_GE_F64", []>; -defm V_CMPX_O_F64 : VOPC_64 <0x00000037, "V_CMPX_O_F64", []>; -defm V_CMPX_U_F64 : VOPC_64 <0x00000038, "V_CMPX_U_F64", []>; -defm V_CMPX_NGE_F64 : VOPC_64 <0x00000039, "V_CMPX_NGE_F64", []>; -defm V_CMPX_NLG_F64 : VOPC_64 <0x0000003a, "V_CMPX_NLG_F64", []>; -defm V_CMPX_NGT_F64 : VOPC_64 <0x0000003b, "V_CMPX_NGT_F64", []>; -defm V_CMPX_NLE_F64 : VOPC_64 <0x0000003c, "V_CMPX_NLE_F64", []>; -defm V_CMPX_NEQ_F64 : VOPC_64 <0x0000003d, "V_CMPX_NEQ_F64", []>; -defm V_CMPX_NLT_F64 : VOPC_64 <0x0000003e, "V_CMPX_NLT_F64", []>; -defm V_CMPX_TRU_F64 : VOPC_64 <0x0000003f, "V_CMPX_TRU_F64", []>; -defm V_CMPS_F_F32 : VOPC_32 <0x00000040, "V_CMPS_F_F32", []>; -defm V_CMPS_LT_F32 : VOPC_32 <0x00000041, "V_CMPS_LT_F32", []>; -defm V_CMPS_EQ_F32 : VOPC_32 <0x00000042, "V_CMPS_EQ_F32", []>; -defm V_CMPS_LE_F32 : VOPC_32 <0x00000043, "V_CMPS_LE_F32", []>; -defm V_CMPS_GT_F32 : VOPC_32 <0x00000044, "V_CMPS_GT_F32", []>; -defm V_CMPS_LG_F32 : VOPC_32 <0x00000045, "V_CMPS_LG_F32", []>; -defm V_CMPS_GE_F32 : VOPC_32 <0x00000046, "V_CMPS_GE_F32", []>; -defm V_CMPS_O_F32 : VOPC_32 <0x00000047, "V_CMPS_O_F32", []>; -defm V_CMPS_U_F32 : VOPC_32 <0x00000048, "V_CMPS_U_F32", []>; -defm V_CMPS_NGE_F32 : VOPC_32 <0x00000049, "V_CMPS_NGE_F32", []>; -defm V_CMPS_NLG_F32 : VOPC_32 <0x0000004a, "V_CMPS_NLG_F32", []>; -defm V_CMPS_NGT_F32 : VOPC_32 <0x0000004b, "V_CMPS_NGT_F32", []>; -defm V_CMPS_NLE_F32 : VOPC_32 <0x0000004c, "V_CMPS_NLE_F32", []>; -defm V_CMPS_NEQ_F32 : VOPC_32 <0x0000004d, "V_CMPS_NEQ_F32", []>; -defm V_CMPS_NLT_F32 : VOPC_32 <0x0000004e, "V_CMPS_NLT_F32", []>; -defm V_CMPS_TRU_F32 : VOPC_32 <0x0000004f, "V_CMPS_TRU_F32", []>; -defm V_CMPSX_F_F32 : VOPC_32 <0x00000050, "V_CMPSX_F_F32", []>; -defm V_CMPSX_LT_F32 : VOPC_32 <0x00000051, "V_CMPSX_LT_F32", []>; -defm V_CMPSX_EQ_F32 : VOPC_32 <0x00000052, "V_CMPSX_EQ_F32", []>; -defm V_CMPSX_LE_F32 : VOPC_32 <0x00000053, "V_CMPSX_LE_F32", []>; -defm V_CMPSX_GT_F32 : VOPC_32 <0x00000054, "V_CMPSX_GT_F32", []>; -defm V_CMPSX_LG_F32 : VOPC_32 <0x00000055, "V_CMPSX_LG_F32", []>; -defm V_CMPSX_GE_F32 : VOPC_32 <0x00000056, "V_CMPSX_GE_F32", []>; -defm V_CMPSX_O_F32 : VOPC_32 <0x00000057, "V_CMPSX_O_F32", []>; -defm V_CMPSX_U_F32 : VOPC_32 <0x00000058, "V_CMPSX_U_F32", []>; -defm V_CMPSX_NGE_F32 : VOPC_32 <0x00000059, "V_CMPSX_NGE_F32", []>; -defm V_CMPSX_NLG_F32 : VOPC_32 <0x0000005a, "V_CMPSX_NLG_F32", []>; -defm V_CMPSX_NGT_F32 : VOPC_32 <0x0000005b, "V_CMPSX_NGT_F32", []>; -defm V_CMPSX_NLE_F32 : VOPC_32 <0x0000005c, "V_CMPSX_NLE_F32", []>; -defm V_CMPSX_NEQ_F32 : VOPC_32 <0x0000005d, "V_CMPSX_NEQ_F32", []>; -defm V_CMPSX_NLT_F32 : VOPC_32 <0x0000005e, "V_CMPSX_NLT_F32", []>; -defm V_CMPSX_TRU_F32 : VOPC_32 <0x0000005f, "V_CMPSX_TRU_F32", []>; -defm V_CMPS_F_F64 : VOPC_64 <0x00000060, "V_CMPS_F_F64", []>; -defm V_CMPS_LT_F64 : VOPC_64 <0x00000061, "V_CMPS_LT_F64", []>; -defm V_CMPS_EQ_F64 : VOPC_64 <0x00000062, "V_CMPS_EQ_F64", []>; -defm V_CMPS_LE_F64 : VOPC_64 <0x00000063, "V_CMPS_LE_F64", []>; -defm V_CMPS_GT_F64 : VOPC_64 <0x00000064, "V_CMPS_GT_F64", []>; -defm V_CMPS_LG_F64 : VOPC_64 <0x00000065, "V_CMPS_LG_F64", []>; -defm V_CMPS_GE_F64 : VOPC_64 <0x00000066, "V_CMPS_GE_F64", []>; -defm V_CMPS_O_F64 : VOPC_64 <0x00000067, "V_CMPS_O_F64", []>; -defm V_CMPS_U_F64 : VOPC_64 <0x00000068, "V_CMPS_U_F64", []>; -defm V_CMPS_NGE_F64 : VOPC_64 <0x00000069, "V_CMPS_NGE_F64", []>; -defm V_CMPS_NLG_F64 : VOPC_64 <0x0000006a, "V_CMPS_NLG_F64", []>; -defm V_CMPS_NGT_F64 : VOPC_64 <0x0000006b, "V_CMPS_NGT_F64", []>; -defm V_CMPS_NLE_F64 : VOPC_64 <0x0000006c, "V_CMPS_NLE_F64", []>; -defm V_CMPS_NEQ_F64 : VOPC_64 <0x0000006d, "V_CMPS_NEQ_F64", []>; -defm V_CMPS_NLT_F64 : VOPC_64 <0x0000006e, "V_CMPS_NLT_F64", []>; -defm V_CMPS_TRU_F64 : VOPC_64 <0x0000006f, "V_CMPS_TRU_F64", []>; -defm V_CMPSX_F_F64 : VOPC_64 <0x00000070, "V_CMPSX_F_F64", []>; -defm V_CMPSX_LT_F64 : VOPC_64 <0x00000071, "V_CMPSX_LT_F64", []>; -defm V_CMPSX_EQ_F64 : VOPC_64 <0x00000072, "V_CMPSX_EQ_F64", []>; -defm V_CMPSX_LE_F64 : VOPC_64 <0x00000073, "V_CMPSX_LE_F64", []>; -defm V_CMPSX_GT_F64 : VOPC_64 <0x00000074, "V_CMPSX_GT_F64", []>; -defm V_CMPSX_LG_F64 : VOPC_64 <0x00000075, "V_CMPSX_LG_F64", []>; -defm V_CMPSX_GE_F64 : VOPC_64 <0x00000076, "V_CMPSX_GE_F64", []>; -defm V_CMPSX_O_F64 : VOPC_64 <0x00000077, "V_CMPSX_O_F64", []>; -defm V_CMPSX_U_F64 : VOPC_64 <0x00000078, "V_CMPSX_U_F64", []>; -defm V_CMPSX_NGE_F64 : VOPC_64 <0x00000079, "V_CMPSX_NGE_F64", []>; -defm V_CMPSX_NLG_F64 : VOPC_64 <0x0000007a, "V_CMPSX_NLG_F64", []>; -defm V_CMPSX_NGT_F64 : VOPC_64 <0x0000007b, "V_CMPSX_NGT_F64", []>; -defm V_CMPSX_NLE_F64 : VOPC_64 <0x0000007c, "V_CMPSX_NLE_F64", []>; -defm V_CMPSX_NEQ_F64 : VOPC_64 <0x0000007d, "V_CMPSX_NEQ_F64", []>; -defm V_CMPSX_NLT_F64 : VOPC_64 <0x0000007e, "V_CMPSX_NLT_F64", []>; -defm V_CMPSX_TRU_F64 : VOPC_64 <0x0000007f, "V_CMPSX_TRU_F64", []>; -defm V_CMP_F_I32 : VOPC_32 <0x00000080, "V_CMP_F_I32", []>; -defm V_CMP_LT_I32 : VOPC_32 <0x00000081, "V_CMP_LT_I32", - [(set VCCReg:$dst, (setcc (i32 AllReg_32:$src0), VReg_32:$src1, SETLT))] ->; -defm V_CMP_EQ_I32 : VOPC_32 <0x00000082, "V_CMP_EQ_I32", - [(set VCCReg:$dst, (setcc (i32 AllReg_32:$src0), VReg_32:$src1, SETEQ))] ->; -defm V_CMP_LE_I32 : VOPC_32 <0x00000083, "V_CMP_LE_I32", - [(set VCCReg:$dst, (setcc (i32 AllReg_32:$src0), VReg_32:$src1, SETLE))] ->; -defm V_CMP_GT_I32 : VOPC_32 <0x00000084, "V_CMP_GT_I32", - [(set VCCReg:$dst, (setcc (i32 AllReg_32:$src0), VReg_32:$src1, SETGT))] ->; -defm V_CMP_NE_I32 : VOPC_32 <0x00000085, "V_CMP_NE_I32", - [(set VCCReg:$dst, (setcc (i32 AllReg_32:$src0), VReg_32:$src1, SETNE))] ->; -defm V_CMP_GE_I32 : VOPC_32 <0x00000086, "V_CMP_GE_I32", - [(set VCCReg:$dst, (setcc (i32 AllReg_32:$src0), VReg_32:$src1, SETGE))] ->; -defm V_CMP_T_I32 : VOPC_32 <0x00000087, "V_CMP_T_I32", []>; -defm V_CMPX_F_I32 : VOPC_32 <0x00000090, "V_CMPX_F_I32", []>; -defm V_CMPX_LT_I32 : VOPC_32 <0x00000091, "V_CMPX_LT_I32", []>; -defm V_CMPX_EQ_I32 : VOPC_32 <0x00000092, "V_CMPX_EQ_I32", []>; -defm V_CMPX_LE_I32 : VOPC_32 <0x00000093, "V_CMPX_LE_I32", []>; -defm V_CMPX_GT_I32 : VOPC_32 <0x00000094, "V_CMPX_GT_I32", []>; -defm V_CMPX_NE_I32 : VOPC_32 <0x00000095, "V_CMPX_NE_I32", []>; -defm V_CMPX_GE_I32 : VOPC_32 <0x00000096, "V_CMPX_GE_I32", []>; -defm V_CMPX_T_I32 : VOPC_32 <0x00000097, "V_CMPX_T_I32", []>; -defm V_CMP_F_I64 : VOPC_64 <0x000000a0, "V_CMP_F_I64", []>; -defm V_CMP_LT_I64 : VOPC_64 <0x000000a1, "V_CMP_LT_I64", []>; -defm V_CMP_EQ_I64 : VOPC_64 <0x000000a2, "V_CMP_EQ_I64", []>; -defm V_CMP_LE_I64 : VOPC_64 <0x000000a3, "V_CMP_LE_I64", []>; -defm V_CMP_GT_I64 : VOPC_64 <0x000000a4, "V_CMP_GT_I64", []>; -defm V_CMP_NE_I64 : VOPC_64 <0x000000a5, "V_CMP_NE_I64", []>; -defm V_CMP_GE_I64 : VOPC_64 <0x000000a6, "V_CMP_GE_I64", []>; -defm V_CMP_T_I64 : VOPC_64 <0x000000a7, "V_CMP_T_I64", []>; -defm V_CMPX_F_I64 : VOPC_64 <0x000000b0, "V_CMPX_F_I64", []>; -defm V_CMPX_LT_I64 : VOPC_64 <0x000000b1, "V_CMPX_LT_I64", []>; -defm V_CMPX_EQ_I64 : VOPC_64 <0x000000b2, "V_CMPX_EQ_I64", []>; -defm V_CMPX_LE_I64 : VOPC_64 <0x000000b3, "V_CMPX_LE_I64", []>; -defm V_CMPX_GT_I64 : VOPC_64 <0x000000b4, "V_CMPX_GT_I64", []>; -defm V_CMPX_NE_I64 : VOPC_64 <0x000000b5, "V_CMPX_NE_I64", []>; -defm V_CMPX_GE_I64 : VOPC_64 <0x000000b6, "V_CMPX_GE_I64", []>; -defm V_CMPX_T_I64 : VOPC_64 <0x000000b7, "V_CMPX_T_I64", []>; -defm V_CMP_F_U32 : VOPC_32 <0x000000c0, "V_CMP_F_U32", []>; -defm V_CMP_LT_U32 : VOPC_32 <0x000000c1, "V_CMP_LT_U32", []>; -defm V_CMP_EQ_U32 : VOPC_32 <0x000000c2, "V_CMP_EQ_U32", []>; -defm V_CMP_LE_U32 : VOPC_32 <0x000000c3, "V_CMP_LE_U32", []>; -defm V_CMP_GT_U32 : VOPC_32 <0x000000c4, "V_CMP_GT_U32", []>; -defm V_CMP_NE_U32 : VOPC_32 <0x000000c5, "V_CMP_NE_U32", []>; -defm V_CMP_GE_U32 : VOPC_32 <0x000000c6, "V_CMP_GE_U32", []>; -defm V_CMP_T_U32 : VOPC_32 <0x000000c7, "V_CMP_T_U32", []>; -defm V_CMPX_F_U32 : VOPC_32 <0x000000d0, "V_CMPX_F_U32", []>; -defm V_CMPX_LT_U32 : VOPC_32 <0x000000d1, "V_CMPX_LT_U32", []>; -defm V_CMPX_EQ_U32 : VOPC_32 <0x000000d2, "V_CMPX_EQ_U32", []>; -defm V_CMPX_LE_U32 : VOPC_32 <0x000000d3, "V_CMPX_LE_U32", []>; -defm V_CMPX_GT_U32 : VOPC_32 <0x000000d4, "V_CMPX_GT_U32", []>; -defm V_CMPX_NE_U32 : VOPC_32 <0x000000d5, "V_CMPX_NE_U32", []>; -defm V_CMPX_GE_U32 : VOPC_32 <0x000000d6, "V_CMPX_GE_U32", []>; -defm V_CMPX_T_U32 : VOPC_32 <0x000000d7, "V_CMPX_T_U32", []>; -defm V_CMP_F_U64 : VOPC_64 <0x000000e0, "V_CMP_F_U64", []>; -defm V_CMP_LT_U64 : VOPC_64 <0x000000e1, "V_CMP_LT_U64", []>; -defm V_CMP_EQ_U64 : VOPC_64 <0x000000e2, "V_CMP_EQ_U64", []>; -defm V_CMP_LE_U64 : VOPC_64 <0x000000e3, "V_CMP_LE_U64", []>; -defm V_CMP_GT_U64 : VOPC_64 <0x000000e4, "V_CMP_GT_U64", []>; -defm V_CMP_NE_U64 : VOPC_64 <0x000000e5, "V_CMP_NE_U64", []>; -defm V_CMP_GE_U64 : VOPC_64 <0x000000e6, "V_CMP_GE_U64", []>; -defm V_CMP_T_U64 : VOPC_64 <0x000000e7, "V_CMP_T_U64", []>; -defm V_CMPX_F_U64 : VOPC_64 <0x000000f0, "V_CMPX_F_U64", []>; -defm V_CMPX_LT_U64 : VOPC_64 <0x000000f1, "V_CMPX_LT_U64", []>; -defm V_CMPX_EQ_U64 : VOPC_64 <0x000000f2, "V_CMPX_EQ_U64", []>; -defm V_CMPX_LE_U64 : VOPC_64 <0x000000f3, "V_CMPX_LE_U64", []>; -defm V_CMPX_GT_U64 : VOPC_64 <0x000000f4, "V_CMPX_GT_U64", []>; -defm V_CMPX_NE_U64 : VOPC_64 <0x000000f5, "V_CMPX_NE_U64", []>; -defm V_CMPX_GE_U64 : VOPC_64 <0x000000f6, "V_CMPX_GE_U64", []>; -defm V_CMPX_T_U64 : VOPC_64 <0x000000f7, "V_CMPX_T_U64", []>; -defm V_CMP_CLASS_F32 : VOPC_32 <0x00000088, "V_CMP_CLASS_F32", []>; -defm V_CMPX_CLASS_F32 : VOPC_32 <0x00000098, "V_CMPX_CLASS_F32", []>; -defm V_CMP_CLASS_F64 : VOPC_64 <0x000000a8, "V_CMP_CLASS_F64", []>; -defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64", []>; -//def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>; -//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>; -//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <0x00000002, "BUFFER_LOAD_FORMAT_XYZ", []>; -def BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "BUFFER_LOAD_FORMAT_XYZW", VReg_128>; -//def BUFFER_STORE_FORMAT_X : MUBUF_ <0x00000004, "BUFFER_STORE_FORMAT_X", []>; -//def BUFFER_STORE_FORMAT_XY : MUBUF_ <0x00000005, "BUFFER_STORE_FORMAT_XY", []>; -//def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <0x00000006, "BUFFER_STORE_FORMAT_XYZ", []>; -//def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <0x00000007, "BUFFER_STORE_FORMAT_XYZW", []>; -//def BUFFER_LOAD_UBYTE : MUBUF_ <0x00000008, "BUFFER_LOAD_UBYTE", []>; -//def BUFFER_LOAD_SBYTE : MUBUF_ <0x00000009, "BUFFER_LOAD_SBYTE", []>; -//def BUFFER_LOAD_USHORT : MUBUF_ <0x0000000a, "BUFFER_LOAD_USHORT", []>; -//def BUFFER_LOAD_SSHORT : MUBUF_ <0x0000000b, "BUFFER_LOAD_SSHORT", []>; -//def BUFFER_LOAD_DWORD : MUBUF_ <0x0000000c, "BUFFER_LOAD_DWORD", []>; -//def BUFFER_LOAD_DWORDX2 : MUBUF_DWORDX2 <0x0000000d, "BUFFER_LOAD_DWORDX2", []>; -//def BUFFER_LOAD_DWORDX4 : MUBUF_DWORDX4 <0x0000000e, "BUFFER_LOAD_DWORDX4", []>; -//def BUFFER_STORE_BYTE : MUBUF_ <0x00000018, "BUFFER_STORE_BYTE", []>; -//def BUFFER_STORE_SHORT : MUBUF_ <0x0000001a, "BUFFER_STORE_SHORT", []>; -//def BUFFER_STORE_DWORD : MUBUF_ <0x0000001c, "BUFFER_STORE_DWORD", []>; -//def BUFFER_STORE_DWORDX2 : MUBUF_DWORDX2 <0x0000001d, "BUFFER_STORE_DWORDX2", []>; -//def BUFFER_STORE_DWORDX4 : MUBUF_DWORDX4 <0x0000001e, "BUFFER_STORE_DWORDX4", []>; -//def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>; -//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>; -//def BUFFER_ATOMIC_ADD : MUBUF_ <0x00000032, "BUFFER_ATOMIC_ADD", []>; -//def BUFFER_ATOMIC_SUB : MUBUF_ <0x00000033, "BUFFER_ATOMIC_SUB", []>; -//def BUFFER_ATOMIC_RSUB : MUBUF_ <0x00000034, "BUFFER_ATOMIC_RSUB", []>; -//def BUFFER_ATOMIC_SMIN : MUBUF_ <0x00000035, "BUFFER_ATOMIC_SMIN", []>; -//def BUFFER_ATOMIC_UMIN : MUBUF_ <0x00000036, "BUFFER_ATOMIC_UMIN", []>; -//def BUFFER_ATOMIC_SMAX : MUBUF_ <0x00000037, "BUFFER_ATOMIC_SMAX", []>; -//def BUFFER_ATOMIC_UMAX : MUBUF_ <0x00000038, "BUFFER_ATOMIC_UMAX", []>; -//def BUFFER_ATOMIC_AND : MUBUF_ <0x00000039, "BUFFER_ATOMIC_AND", []>; -//def BUFFER_ATOMIC_OR : MUBUF_ <0x0000003a, "BUFFER_ATOMIC_OR", []>; -//def BUFFER_ATOMIC_XOR : MUBUF_ <0x0000003b, "BUFFER_ATOMIC_XOR", []>; -//def BUFFER_ATOMIC_INC : MUBUF_ <0x0000003c, "BUFFER_ATOMIC_INC", []>; -//def BUFFER_ATOMIC_DEC : MUBUF_ <0x0000003d, "BUFFER_ATOMIC_DEC", []>; -//def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ <0x0000003e, "BUFFER_ATOMIC_FCMPSWAP", []>; -//def BUFFER_ATOMIC_FMIN : MUBUF_ <0x0000003f, "BUFFER_ATOMIC_FMIN", []>; -//def BUFFER_ATOMIC_FMAX : MUBUF_ <0x00000040, "BUFFER_ATOMIC_FMAX", []>; -//def BUFFER_ATOMIC_SWAP_X2 : MUBUF_X2 <0x00000050, "BUFFER_ATOMIC_SWAP_X2", []>; -//def BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_X2 <0x00000051, "BUFFER_ATOMIC_CMPSWAP_X2", []>; -//def BUFFER_ATOMIC_ADD_X2 : MUBUF_X2 <0x00000052, "BUFFER_ATOMIC_ADD_X2", []>; -//def BUFFER_ATOMIC_SUB_X2 : MUBUF_X2 <0x00000053, "BUFFER_ATOMIC_SUB_X2", []>; -//def BUFFER_ATOMIC_RSUB_X2 : MUBUF_X2 <0x00000054, "BUFFER_ATOMIC_RSUB_X2", []>; -//def BUFFER_ATOMIC_SMIN_X2 : MUBUF_X2 <0x00000055, "BUFFER_ATOMIC_SMIN_X2", []>; -//def BUFFER_ATOMIC_UMIN_X2 : MUBUF_X2 <0x00000056, "BUFFER_ATOMIC_UMIN_X2", []>; -//def BUFFER_ATOMIC_SMAX_X2 : MUBUF_X2 <0x00000057, "BUFFER_ATOMIC_SMAX_X2", []>; -//def BUFFER_ATOMIC_UMAX_X2 : MUBUF_X2 <0x00000058, "BUFFER_ATOMIC_UMAX_X2", []>; -//def BUFFER_ATOMIC_AND_X2 : MUBUF_X2 <0x00000059, "BUFFER_ATOMIC_AND_X2", []>; -//def BUFFER_ATOMIC_OR_X2 : MUBUF_X2 <0x0000005a, "BUFFER_ATOMIC_OR_X2", []>; -//def BUFFER_ATOMIC_XOR_X2 : MUBUF_X2 <0x0000005b, "BUFFER_ATOMIC_XOR_X2", []>; -//def BUFFER_ATOMIC_INC_X2 : MUBUF_X2 <0x0000005c, "BUFFER_ATOMIC_INC_X2", []>; -//def BUFFER_ATOMIC_DEC_X2 : MUBUF_X2 <0x0000005d, "BUFFER_ATOMIC_DEC_X2", []>; -//def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <0x0000005e, "BUFFER_ATOMIC_FCMPSWAP_X2", []>; -//def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <0x0000005f, "BUFFER_ATOMIC_FMIN_X2", []>; -//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <0x00000060, "BUFFER_ATOMIC_FMAX_X2", []>; -//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <0x00000070, "BUFFER_WBINVL1_SC", []>; -//def BUFFER_WBINVL1 : MUBUF_WBINVL1 <0x00000071, "BUFFER_WBINVL1", []>; -//def TBUFFER_LOAD_FORMAT_X : MTBUF_ <0x00000000, "TBUFFER_LOAD_FORMAT_X", []>; -//def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "TBUFFER_LOAD_FORMAT_XY", []>; -//def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, "TBUFFER_LOAD_FORMAT_XYZ", []>; -def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORMAT_XYZW", VReg_128>; -//def TBUFFER_STORE_FORMAT_X : MTBUF_ <0x00000004, "TBUFFER_STORE_FORMAT_X", []>; -//def TBUFFER_STORE_FORMAT_XY : MTBUF_ <0x00000005, "TBUFFER_STORE_FORMAT_XY", []>; -//def TBUFFER_STORE_FORMAT_XYZ : MTBUF_ <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", []>; -//def TBUFFER_STORE_FORMAT_XYZW : MTBUF_ <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", []>; - -defm S_LOAD_DWORD : SMRD_32 <0x00000000, "S_LOAD_DWORD", SReg_32>; - -//def S_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000001, "S_LOAD_DWORDX2", []>; -defm S_LOAD_DWORDX4 : SMRD_Helper <0x00000002, "S_LOAD_DWORDX4", SReg_128, v4i32>; -defm S_LOAD_DWORDX8 : SMRD_Helper <0x00000003, "S_LOAD_DWORDX8", SReg_256, v8i32>; -//def S_LOAD_DWORDX16 : SMRD_DWORDX16 <0x00000004, "S_LOAD_DWORDX16", []>; -//def S_BUFFER_LOAD_DWORD : SMRD_ <0x00000008, "S_BUFFER_LOAD_DWORD", []>; -//def S_BUFFER_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000009, "S_BUFFER_LOAD_DWORDX2", []>; -//def S_BUFFER_LOAD_DWORDX4 : SMRD_DWORDX4 <0x0000000a, "S_BUFFER_LOAD_DWORDX4", []>; -//def S_BUFFER_LOAD_DWORDX8 : SMRD_DWORDX8 <0x0000000b, "S_BUFFER_LOAD_DWORDX8", []>; -//def S_BUFFER_LOAD_DWORDX16 : SMRD_DWORDX16 <0x0000000c, "S_BUFFER_LOAD_DWORDX16", []>; - -//def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>; -//def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>; -//def IMAGE_LOAD : MIMG_NoPattern_ <"IMAGE_LOAD", 0x00000000>; -//def IMAGE_LOAD_MIP : MIMG_NoPattern_ <"IMAGE_LOAD_MIP", 0x00000001>; -//def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_PCK", 0x00000002>; -//def IMAGE_LOAD_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_PCK_SGN", 0x00000003>; -//def IMAGE_LOAD_MIP_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK", 0x00000004>; -//def IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK_SGN", 0x00000005>; -//def IMAGE_STORE : MIMG_NoPattern_ <"IMAGE_STORE", 0x00000008>; -//def IMAGE_STORE_MIP : MIMG_NoPattern_ <"IMAGE_STORE_MIP", 0x00000009>; -//def IMAGE_STORE_PCK : MIMG_NoPattern_ <"IMAGE_STORE_PCK", 0x0000000a>; -//def IMAGE_STORE_MIP_PCK : MIMG_NoPattern_ <"IMAGE_STORE_MIP_PCK", 0x0000000b>; -//def IMAGE_GET_RESINFO : MIMG_NoPattern_ <"IMAGE_GET_RESINFO", 0x0000000e>; -//def IMAGE_ATOMIC_SWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_SWAP", 0x0000000f>; -//def IMAGE_ATOMIC_CMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_CMPSWAP", 0x00000010>; -//def IMAGE_ATOMIC_ADD : MIMG_NoPattern_ <"IMAGE_ATOMIC_ADD", 0x00000011>; -//def IMAGE_ATOMIC_SUB : MIMG_NoPattern_ <"IMAGE_ATOMIC_SUB", 0x00000012>; -//def IMAGE_ATOMIC_RSUB : MIMG_NoPattern_ <"IMAGE_ATOMIC_RSUB", 0x00000013>; -//def IMAGE_ATOMIC_SMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_SMIN", 0x00000014>; -//def IMAGE_ATOMIC_UMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_UMIN", 0x00000015>; -//def IMAGE_ATOMIC_SMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_SMAX", 0x00000016>; -//def IMAGE_ATOMIC_UMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_UMAX", 0x00000017>; -//def IMAGE_ATOMIC_AND : MIMG_NoPattern_ <"IMAGE_ATOMIC_AND", 0x00000018>; -//def IMAGE_ATOMIC_OR : MIMG_NoPattern_ <"IMAGE_ATOMIC_OR", 0x00000019>; -//def IMAGE_ATOMIC_XOR : MIMG_NoPattern_ <"IMAGE_ATOMIC_XOR", 0x0000001a>; -//def IMAGE_ATOMIC_INC : MIMG_NoPattern_ <"IMAGE_ATOMIC_INC", 0x0000001b>; -//def IMAGE_ATOMIC_DEC : MIMG_NoPattern_ <"IMAGE_ATOMIC_DEC", 0x0000001c>; -//def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_FCMPSWAP", 0x0000001d>; -//def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMIN", 0x0000001e>; -//def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMAX", 0x0000001f>; -def IMAGE_SAMPLE : MIMG_Load_Helper <0x00000020, "IMAGE_SAMPLE">; -//def IMAGE_SAMPLE_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL", 0x00000021>; -//def IMAGE_SAMPLE_D : MIMG_NoPattern_ <"IMAGE_SAMPLE_D", 0x00000022>; -//def IMAGE_SAMPLE_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL", 0x00000023>; -//def IMAGE_SAMPLE_L : MIMG_NoPattern_ <"IMAGE_SAMPLE_L", 0x00000024>; -//def IMAGE_SAMPLE_B : MIMG_NoPattern_ <"IMAGE_SAMPLE_B", 0x00000025>; -//def IMAGE_SAMPLE_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL", 0x00000026>; -//def IMAGE_SAMPLE_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ", 0x00000027>; -//def IMAGE_SAMPLE_C : MIMG_NoPattern_ <"IMAGE_SAMPLE_C", 0x00000028>; -//def IMAGE_SAMPLE_C_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL", 0x00000029>; -//def IMAGE_SAMPLE_C_D : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D", 0x0000002a>; -//def IMAGE_SAMPLE_C_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL", 0x0000002b>; -//def IMAGE_SAMPLE_C_L : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_L", 0x0000002c>; -//def IMAGE_SAMPLE_C_B : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B", 0x0000002d>; -//def IMAGE_SAMPLE_C_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL", 0x0000002e>; -//def IMAGE_SAMPLE_C_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ", 0x0000002f>; -//def IMAGE_SAMPLE_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_O", 0x00000030>; -//def IMAGE_SAMPLE_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL_O", 0x00000031>; -//def IMAGE_SAMPLE_D_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_O", 0x00000032>; -//def IMAGE_SAMPLE_D_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL_O", 0x00000033>; -//def IMAGE_SAMPLE_L_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_L_O", 0x00000034>; -//def IMAGE_SAMPLE_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_O", 0x00000035>; -//def IMAGE_SAMPLE_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL_O", 0x00000036>; -//def IMAGE_SAMPLE_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ_O", 0x00000037>; -//def IMAGE_SAMPLE_C_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_O", 0x00000038>; -//def IMAGE_SAMPLE_C_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL_O", 0x00000039>; -//def IMAGE_SAMPLE_C_D_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_O", 0x0000003a>; -//def IMAGE_SAMPLE_C_D_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL_O", 0x0000003b>; -//def IMAGE_SAMPLE_C_L_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_L_O", 0x0000003c>; -//def IMAGE_SAMPLE_C_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_O", 0x0000003d>; -//def IMAGE_SAMPLE_C_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL_O", 0x0000003e>; -//def IMAGE_SAMPLE_C_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ_O", 0x0000003f>; -//def IMAGE_GATHER4 : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4", 0x00000040>; -//def IMAGE_GATHER4_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL", 0x00000041>; -//def IMAGE_GATHER4_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L", 0x00000044>; -//def IMAGE_GATHER4_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B", 0x00000045>; -//def IMAGE_GATHER4_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL", 0x00000046>; -//def IMAGE_GATHER4_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ", 0x00000047>; -//def IMAGE_GATHER4_C : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C", 0x00000048>; -//def IMAGE_GATHER4_C_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL", 0x00000049>; -//def IMAGE_GATHER4_C_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L", 0x0000004c>; -//def IMAGE_GATHER4_C_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B", 0x0000004d>; -//def IMAGE_GATHER4_C_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL", 0x0000004e>; -//def IMAGE_GATHER4_C_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ", 0x0000004f>; -//def IMAGE_GATHER4_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_O", 0x00000050>; -//def IMAGE_GATHER4_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL_O", 0x00000051>; -//def IMAGE_GATHER4_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L_O", 0x00000054>; -//def IMAGE_GATHER4_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_O", 0x00000055>; -//def IMAGE_GATHER4_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL_O", 0x00000056>; -//def IMAGE_GATHER4_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ_O", 0x00000057>; -//def IMAGE_GATHER4_C_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_O", 0x00000058>; -//def IMAGE_GATHER4_C_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL_O", 0x00000059>; -//def IMAGE_GATHER4_C_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L_O", 0x0000005c>; -//def IMAGE_GATHER4_C_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_O", 0x0000005d>; -//def IMAGE_GATHER4_C_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL_O", 0x0000005e>; -//def IMAGE_GATHER4_C_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ_O", 0x0000005f>; -//def IMAGE_GET_LOD : MIMG_NoPattern_ <"IMAGE_GET_LOD", 0x00000060>; -//def IMAGE_SAMPLE_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD", 0x00000068>; -//def IMAGE_SAMPLE_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL", 0x00000069>; -//def IMAGE_SAMPLE_C_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD", 0x0000006a>; -//def IMAGE_SAMPLE_C_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_CL", 0x0000006b>; -//def IMAGE_SAMPLE_CD_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_O", 0x0000006c>; -//def IMAGE_SAMPLE_CD_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL_O", 0x0000006d>; -//def IMAGE_SAMPLE_C_CD_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_O", 0x0000006e>; -//def IMAGE_SAMPLE_C_CD_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_CL_O", 0x0000006f>; -//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"IMAGE_RSRC256", 0x0000007e>; -//def IMAGE_SAMPLER : MIMG_NoPattern_ <"IMAGE_SAMPLER", 0x0000007f>; -//def V_NOP : VOP1_ <0x00000000, "V_NOP", []>; - -let neverHasSideEffects = 1 in { -defm V_MOV_B32 : VOP1_32 <0x00000001, "V_MOV_B32", []>; -} // End neverHasSideEffects -defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, "V_READFIRSTLANE_B32", []>; -//defm V_CVT_I32_F64 : VOP1_32 <0x00000003, "V_CVT_I32_F64", []>; -//defm V_CVT_F64_I32 : VOP1_64 <0x00000004, "V_CVT_F64_I32", []>; -defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32", - [(set VReg_32:$dst, (sint_to_fp AllReg_32:$src0))] ->; -//defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>; -//defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>; -defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32", - [(set VReg_32:$dst, (fp_to_sint AllReg_32:$src0))] ->; -defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>; -////def V_CVT_F16_F32 : VOP1_F16 <0x0000000a, "V_CVT_F16_F32", []>; -//defm V_CVT_F32_F16 : VOP1_32 <0x0000000b, "V_CVT_F32_F16", []>; -//defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "V_CVT_RPI_I32_F32", []>; -//defm V_CVT_FLR_I32_F32 : VOP1_32 <0x0000000d, "V_CVT_FLR_I32_F32", []>; -//defm V_CVT_OFF_F32_I4 : VOP1_32 <0x0000000e, "V_CVT_OFF_F32_I4", []>; -//defm V_CVT_F32_F64 : VOP1_32 <0x0000000f, "V_CVT_F32_F64", []>; -//defm V_CVT_F64_F32 : VOP1_64 <0x00000010, "V_CVT_F64_F32", []>; -//defm V_CVT_F32_UBYTE0 : VOP1_32 <0x00000011, "V_CVT_F32_UBYTE0", []>; -//defm V_CVT_F32_UBYTE1 : VOP1_32 <0x00000012, "V_CVT_F32_UBYTE1", []>; -//defm V_CVT_F32_UBYTE2 : VOP1_32 <0x00000013, "V_CVT_F32_UBYTE2", []>; -//defm V_CVT_F32_UBYTE3 : VOP1_32 <0x00000014, "V_CVT_F32_UBYTE3", []>; -//defm V_CVT_U32_F64 : VOP1_32 <0x00000015, "V_CVT_U32_F64", []>; -//defm V_CVT_F64_U32 : VOP1_64 <0x00000016, "V_CVT_F64_U32", []>; -defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32", - [(set VReg_32:$dst, (AMDGPUfract AllReg_32:$src0))] ->; -defm V_TRUNC_F32 : VOP1_32 <0x00000021, "V_TRUNC_F32", []>; -defm V_CEIL_F32 : VOP1_32 <0x00000022, "V_CEIL_F32", []>; -defm V_RNDNE_F32 : VOP1_32 <0x00000023, "V_RNDNE_F32", - [(set VReg_32:$dst, (frint AllReg_32:$src0))] ->; -defm V_FLOOR_F32 : VOP1_32 <0x00000024, "V_FLOOR_F32", - [(set VReg_32:$dst, (ffloor AllReg_32:$src0))] ->; -defm V_EXP_F32 : VOP1_32 <0x00000025, "V_EXP_F32", - [(set VReg_32:$dst, (fexp2 AllReg_32:$src0))] ->; -defm V_LOG_CLAMP_F32 : VOP1_32 <0x00000026, "V_LOG_CLAMP_F32", []>; -defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32", []>; -defm V_RCP_CLAMP_F32 : VOP1_32 <0x00000028, "V_RCP_CLAMP_F32", []>; -defm V_RCP_LEGACY_F32 : VOP1_32 <0x00000029, "V_RCP_LEGACY_F32", []>; -defm V_RCP_F32 : VOP1_32 <0x0000002a, "V_RCP_F32", - [(set VReg_32:$dst, (int_AMDGPU_rcp AllReg_32:$src0))] ->; -defm V_RCP_IFLAG_F32 : VOP1_32 <0x0000002b, "V_RCP_IFLAG_F32", []>; -defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32", []>; -defm V_RSQ_LEGACY_F32 : VOP1_32 < - 0x0000002d, "V_RSQ_LEGACY_F32", - [(set VReg_32:$dst, (int_AMDGPU_rsq AllReg_32:$src0))] ->; -defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", []>; -defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64", []>; -defm V_RCP_CLAMP_F64 : VOP1_64 <0x00000030, "V_RCP_CLAMP_F64", []>; -defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64", []>; -defm V_RSQ_CLAMP_F64 : VOP1_64 <0x00000032, "V_RSQ_CLAMP_F64", []>; -defm V_SQRT_F32 : VOP1_32 <0x00000033, "V_SQRT_F32", []>; -defm V_SQRT_F64 : VOP1_64 <0x00000034, "V_SQRT_F64", []>; -defm V_SIN_F32 : VOP1_32 <0x00000035, "V_SIN_F32", []>; -defm V_COS_F32 : VOP1_32 <0x00000036, "V_COS_F32", []>; -defm V_NOT_B32 : VOP1_32 <0x00000037, "V_NOT_B32", []>; -defm V_BFREV_B32 : VOP1_32 <0x00000038, "V_BFREV_B32", []>; -defm V_FFBH_U32 : VOP1_32 <0x00000039, "V_FFBH_U32", []>; -defm V_FFBL_B32 : VOP1_32 <0x0000003a, "V_FFBL_B32", []>; -defm V_FFBH_I32 : VOP1_32 <0x0000003b, "V_FFBH_I32", []>; -//defm V_FREXP_EXP_I32_F64 : VOP1_32 <0x0000003c, "V_FREXP_EXP_I32_F64", []>; -defm V_FREXP_MANT_F64 : VOP1_64 <0x0000003d, "V_FREXP_MANT_F64", []>; -defm V_FRACT_F64 : VOP1_64 <0x0000003e, "V_FRACT_F64", []>; -//defm V_FREXP_EXP_I32_F32 : VOP1_32 <0x0000003f, "V_FREXP_EXP_I32_F32", []>; -defm V_FREXP_MANT_F32 : VOP1_32 <0x00000040, "V_FREXP_MANT_F32", []>; -//def V_CLREXCP : VOP1_ <0x00000041, "V_CLREXCP", []>; -defm V_MOVRELD_B32 : VOP1_32 <0x00000042, "V_MOVRELD_B32", []>; -defm V_MOVRELS_B32 : VOP1_32 <0x00000043, "V_MOVRELS_B32", []>; -defm V_MOVRELSD_B32 : VOP1_32 <0x00000044, "V_MOVRELSD_B32", []>; - -def V_INTERP_P1_F32 : VINTRP < - 0x00000000, - (outs VReg_32:$dst), - (ins VReg_32:$i, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), - "V_INTERP_P1_F32", - []> { - let DisableEncoding = "$m0"; -} - -def V_INTERP_P2_F32 : VINTRP < - 0x00000001, - (outs VReg_32:$dst), - (ins VReg_32:$src0, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), - "V_INTERP_P2_F32", - []> { - - let Constraints = "$src0 = $dst"; - let DisableEncoding = "$src0,$m0"; - -} - -def V_INTERP_MOV_F32 : VINTRP < - 0x00000002, - (outs VReg_32:$dst), - (ins i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), - "V_INTERP_MOV_F32", - []> { - let VSRC = 0; - let DisableEncoding = "$m0"; -} - -//def S_NOP : SOPP_ <0x00000000, "S_NOP", []>; - -let isTerminator = 1 in { - -def S_ENDPGM : SOPP <0x00000001, (ins), "S_ENDPGM", - [(IL_retflag)]> { - let SIMM16 = 0; - let isBarrier = 1; - let hasCtrlDep = 1; -} - -let isBranch = 1 in { -def S_BRANCH : SOPP < - 0x00000002, (ins brtarget:$target), "S_BRANCH", - [] ->; - -let DisableEncoding = "$scc" in { -def S_CBRANCH_SCC0 : SOPP < - 0x00000004, (ins brtarget:$target, SCCReg:$scc), - "S_CBRANCH_SCC0", [] ->; -def S_CBRANCH_SCC1 : SOPP < - 0x00000005, (ins brtarget:$target, SCCReg:$scc), - "S_CBRANCH_SCC1", - [] ->; -} // End DisableEncoding = "$scc" - -def S_CBRANCH_VCCZ : SOPP < - 0x00000006, (ins brtarget:$target, VCCReg:$vcc), - "S_CBRANCH_VCCZ", - [] ->; -def S_CBRANCH_VCCNZ : SOPP < - 0x00000007, (ins brtarget:$target, VCCReg:$vcc), - "S_CBRANCH_VCCNZ", - [] ->; - -let DisableEncoding = "$exec" in { -def S_CBRANCH_EXECZ : SOPP < - 0x00000008, (ins brtarget:$target, EXECReg:$exec), - "S_CBRANCH_EXECZ", - [] ->; -def S_CBRANCH_EXECNZ : SOPP < - 0x00000009, (ins brtarget:$target, EXECReg:$exec), - "S_CBRANCH_EXECNZ", - [] ->; -} // End DisableEncoding = "$exec" - - -} // End isBranch = 1 -} // End isTerminator = 1 - -//def S_BARRIER : SOPP_ <0x0000000a, "S_BARRIER", []>; -def S_WAITCNT : SOPP <0x0000000c, (ins i32imm:$simm16), "S_WAITCNT $simm16", - [] ->; -//def S_SETHALT : SOPP_ <0x0000000d, "S_SETHALT", []>; -//def S_SLEEP : SOPP_ <0x0000000e, "S_SLEEP", []>; -//def S_SETPRIO : SOPP_ <0x0000000f, "S_SETPRIO", []>; -//def S_SENDMSG : SOPP_ <0x00000010, "S_SENDMSG", []>; -//def S_SENDMSGHALT : SOPP_ <0x00000011, "S_SENDMSGHALT", []>; -//def S_TRAP : SOPP_ <0x00000012, "S_TRAP", []>; -//def S_ICACHE_INV : SOPP_ <0x00000013, "S_ICACHE_INV", []>; -//def S_INCPERFLEVEL : SOPP_ <0x00000014, "S_INCPERFLEVEL", []>; -//def S_DECPERFLEVEL : SOPP_ <0x00000015, "S_DECPERFLEVEL", []>; -//def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>; - -/* XXX: No VOP3 version of this instruction yet */ -def V_CNDMASK_B32 : VOP2 <0x00000000, (outs VReg_32:$dst), - (ins AllReg_32:$src0, VReg_32:$src1, VCCReg:$vcc), "V_CNDMASK_B32", - [(set (i32 VReg_32:$dst), - (select VCCReg:$vcc, VReg_32:$src1, AllReg_32:$src0))] > { - - let DisableEncoding = "$vcc"; -} - -//f32 pattern for V_CNDMASK_B32 -def : Pat < - (f32 (select VCCReg:$vcc, VReg_32:$src0, AllReg_32:$src1)), - (V_CNDMASK_B32 AllReg_32:$src1, VReg_32:$src0, VCCReg:$vcc) ->; - -defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>; -defm V_WRITELANE_B32 : VOP2_32 <0x00000002, "V_WRITELANE_B32", []>; - -defm V_ADD_F32 : VOP2_32 < - 0x00000003, "V_ADD_F32", - [(set VReg_32:$dst, (fadd AllReg_32:$src0, VReg_32:$src1))] ->; - -defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32", - [(set VReg_32:$dst, (fsub AllReg_32:$src0, VReg_32:$src1))] ->; -defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", []>; -defm V_MAC_LEGACY_F32 : VOP2_32 <0x00000006, "V_MAC_LEGACY_F32", []>; -defm V_MUL_LEGACY_F32 : VOP2_32 < - 0x00000007, "V_MUL_LEGACY_F32", - [(set VReg_32:$dst, (int_AMDGPU_mul AllReg_32:$src0, VReg_32:$src1))] ->; - -defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32", - [(set VReg_32:$dst, (fmul AllReg_32:$src0, VReg_32:$src1))] ->; -//defm V_MUL_I32_I24 : VOP2_32 <0x00000009, "V_MUL_I32_I24", []>; -//defm V_MUL_HI_I32_I24 : VOP2_32 <0x0000000a, "V_MUL_HI_I32_I24", []>; -//defm V_MUL_U32_U24 : VOP2_32 <0x0000000b, "V_MUL_U32_U24", []>; -//defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "V_MUL_HI_U32_U24", []>; -defm V_MIN_LEGACY_F32 : VOP2_32 <0x0000000d, "V_MIN_LEGACY_F32", - [(set VReg_32:$dst, (AMDGPUfmin AllReg_32:$src0, VReg_32:$src1))] ->; - -defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32", - [(set VReg_32:$dst, (AMDGPUfmax AllReg_32:$src0, VReg_32:$src1))] ->; -defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>; -defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>; -defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32", []>; -defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>; -defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>; -defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>; -defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", []>; -defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", []>; -defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", []>; -defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", []>; -defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", []>; -defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", []>; -defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32", - [(set VReg_32:$dst, (and AllReg_32:$src0, VReg_32:$src1))] ->; -defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32", - [(set VReg_32:$dst, (or AllReg_32:$src0, VReg_32:$src1))] ->; -defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32", - [(set VReg_32:$dst, (xor AllReg_32:$src0, VReg_32:$src1))] ->; -defm V_BFM_B32 : VOP2_32 <0x0000001e, "V_BFM_B32", []>; -defm V_MAC_F32 : VOP2_32 <0x0000001f, "V_MAC_F32", []>; -defm V_MADMK_F32 : VOP2_32 <0x00000020, "V_MADMK_F32", []>; -defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>; -//defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>; -//defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>; -//defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>; -let Defs = [VCC] in { // Carry-out goes to VCC -defm V_ADD_I32 : VOP2_32 <0x00000025, "V_ADD_I32", - [(set VReg_32:$dst, (add (i32 AllReg_32:$src0), (i32 VReg_32:$src1)))] ->; -defm V_SUB_I32 : VOP2_32 <0x00000026, "V_SUB_I32", - [(set VReg_32:$dst, (sub (i32 AllReg_32:$src0), (i32 VReg_32:$src1)))] ->; -} // End Defs = [VCC] -defm V_SUBREV_I32 : VOP2_32 <0x00000027, "V_SUBREV_I32", []>; -defm V_ADDC_U32 : VOP2_32 <0x00000028, "V_ADDC_U32", []>; -defm V_SUBB_U32 : VOP2_32 <0x00000029, "V_SUBB_U32", []>; -defm V_SUBBREV_U32 : VOP2_32 <0x0000002a, "V_SUBBREV_U32", []>; -defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>; -////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>; -////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>; -////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "V_CVT_PKNORM_U16_F32", []>; -defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32", - [(set VReg_32:$dst, (int_SI_packf16 AllReg_32:$src0, VReg_32:$src1))] ->; -////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "V_CVT_PK_U16_U32", []>; -////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "V_CVT_PK_I16_I32", []>; -def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "S_CMP_EQ_I32", []>; -def S_CMP_LG_I32 : SOPC_32 <0x00000001, "S_CMP_LG_I32", []>; -def S_CMP_GT_I32 : SOPC_32 <0x00000002, "S_CMP_GT_I32", []>; -def S_CMP_GE_I32 : SOPC_32 <0x00000003, "S_CMP_GE_I32", []>; -def S_CMP_LT_I32 : SOPC_32 <0x00000004, "S_CMP_LT_I32", []>; -def S_CMP_LE_I32 : SOPC_32 <0x00000005, "S_CMP_LE_I32", []>; -def S_CMP_EQ_U32 : SOPC_32 <0x00000006, "S_CMP_EQ_U32", []>; -def S_CMP_LG_U32 : SOPC_32 <0x00000007, "S_CMP_LG_U32", []>; -def S_CMP_GT_U32 : SOPC_32 <0x00000008, "S_CMP_GT_U32", []>; -def S_CMP_GE_U32 : SOPC_32 <0x00000009, "S_CMP_GE_U32", []>; -def S_CMP_LT_U32 : SOPC_32 <0x0000000a, "S_CMP_LT_U32", []>; -def S_CMP_LE_U32 : SOPC_32 <0x0000000b, "S_CMP_LE_U32", []>; -////def S_BITCMP0_B32 : SOPC_BITCMP0 <0x0000000c, "S_BITCMP0_B32", []>; -////def S_BITCMP1_B32 : SOPC_BITCMP1 <0x0000000d, "S_BITCMP1_B32", []>; -////def S_BITCMP0_B64 : SOPC_BITCMP0 <0x0000000e, "S_BITCMP0_B64", []>; -////def S_BITCMP1_B64 : SOPC_BITCMP1 <0x0000000f, "S_BITCMP1_B64", []>; -//def S_SETVSKIP : SOPC_ <0x00000010, "S_SETVSKIP", []>; - -let neverHasSideEffects = 1 in { - -def V_MAD_LEGACY_F32 : VOP3_32 <0x00000140, "V_MAD_LEGACY_F32", []>; -def V_MAD_F32 : VOP3_32 <0x00000141, "V_MAD_F32", []>; -//def V_MAD_I32_I24 : VOP3_32 <0x00000142, "V_MAD_I32_I24", []>; -//def V_MAD_U32_U24 : VOP3_32 <0x00000143, "V_MAD_U32_U24", []>; - -} // End neverHasSideEffects -def V_CUBEID_F32 : VOP3_32 <0x00000144, "V_CUBEID_F32", []>; -def V_CUBESC_F32 : VOP3_32 <0x00000145, "V_CUBESC_F32", []>; -def V_CUBETC_F32 : VOP3_32 <0x00000146, "V_CUBETC_F32", []>; -def V_CUBEMA_F32 : VOP3_32 <0x00000147, "V_CUBEMA_F32", []>; -def V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32", []>; -def V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32", []>; -def V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32", []>; -def V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32", []>; -def V_FMA_F64 : VOP3_64 <0x0000014c, "V_FMA_F64", []>; -//def V_LERP_U8 : VOP3_U8 <0x0000014d, "V_LERP_U8", []>; -def V_ALIGNBIT_B32 : VOP3_32 <0x0000014e, "V_ALIGNBIT_B32", []>; -def V_ALIGNBYTE_B32 : VOP3_32 <0x0000014f, "V_ALIGNBYTE_B32", []>; -def V_MULLIT_F32 : VOP3_32 <0x00000150, "V_MULLIT_F32", []>; -////def V_MIN3_F32 : VOP3_MIN3 <0x00000151, "V_MIN3_F32", []>; -////def V_MIN3_I32 : VOP3_MIN3 <0x00000152, "V_MIN3_I32", []>; -////def V_MIN3_U32 : VOP3_MIN3 <0x00000153, "V_MIN3_U32", []>; -////def V_MAX3_F32 : VOP3_MAX3 <0x00000154, "V_MAX3_F32", []>; -////def V_MAX3_I32 : VOP3_MAX3 <0x00000155, "V_MAX3_I32", []>; -////def V_MAX3_U32 : VOP3_MAX3 <0x00000156, "V_MAX3_U32", []>; -////def V_MED3_F32 : VOP3_MED3 <0x00000157, "V_MED3_F32", []>; -////def V_MED3_I32 : VOP3_MED3 <0x00000158, "V_MED3_I32", []>; -////def V_MED3_U32 : VOP3_MED3 <0x00000159, "V_MED3_U32", []>; -//def V_SAD_U8 : VOP3_U8 <0x0000015a, "V_SAD_U8", []>; -//def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "V_SAD_HI_U8", []>; -//def V_SAD_U16 : VOP3_U16 <0x0000015c, "V_SAD_U16", []>; -def V_SAD_U32 : VOP3_32 <0x0000015d, "V_SAD_U32", []>; -////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "V_CVT_PK_U8_F32", []>; -def V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32", []>; -def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64", []>; -def V_LSHL_B64 : VOP3_64 <0x00000161, "V_LSHL_B64", []>; -def V_LSHR_B64 : VOP3_64 <0x00000162, "V_LSHR_B64", []>; -def V_ASHR_I64 : VOP3_64 <0x00000163, "V_ASHR_I64", []>; -def V_ADD_F64 : VOP3_64 <0x00000164, "V_ADD_F64", []>; -def V_MUL_F64 : VOP3_64 <0x00000165, "V_MUL_F64", []>; -def V_MIN_F64 : VOP3_64 <0x00000166, "V_MIN_F64", []>; -def V_MAX_F64 : VOP3_64 <0x00000167, "V_MAX_F64", []>; -def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>; -def V_MUL_LO_U32 : VOP3_32 <0x00000169, "V_MUL_LO_U32", []>; -def V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>; -def V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>; -def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>; -def V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>; -def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>; -def V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", []>; -def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64", []>; -//def V_MSAD_U8 : VOP3_U8 <0x00000171, "V_MSAD_U8", []>; -//def V_QSAD_U8 : VOP3_U8 <0x00000172, "V_QSAD_U8", []>; -//def V_MQSAD_U8 : VOP3_U8 <0x00000173, "V_MQSAD_U8", []>; -def V_TRIG_PREOP_F64 : VOP3_64 <0x00000174, "V_TRIG_PREOP_F64", []>; -def S_ADD_U32 : SOP2_32 <0x00000000, "S_ADD_U32", []>; -def S_SUB_U32 : SOP2_32 <0x00000001, "S_SUB_U32", []>; -def S_ADD_I32 : SOP2_32 <0x00000002, "S_ADD_I32", []>; -def S_SUB_I32 : SOP2_32 <0x00000003, "S_SUB_I32", []>; -def S_ADDC_U32 : SOP2_32 <0x00000004, "S_ADDC_U32", []>; -def S_SUBB_U32 : SOP2_32 <0x00000005, "S_SUBB_U32", []>; -def S_MIN_I32 : SOP2_32 <0x00000006, "S_MIN_I32", []>; -def S_MIN_U32 : SOP2_32 <0x00000007, "S_MIN_U32", []>; -def S_MAX_I32 : SOP2_32 <0x00000008, "S_MAX_I32", []>; -def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32", []>; - -def S_CSELECT_B32 : SOP2 < - 0x0000000a, (outs SReg_32:$dst), - (ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "S_CSELECT_B32", - [(set (i32 SReg_32:$dst), (select SCCReg:$scc, SReg_32:$src0, SReg_32:$src1))] ->; - -def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>; - -// f32 pattern for S_CSELECT_B32 -def : Pat < - (f32 (select SCCReg:$scc, SReg_32:$src0, SReg_32:$src1)), - (S_CSELECT_B32 SReg_32:$src0, SReg_32:$src1, SCCReg:$scc) ->; - -def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", []>; - -def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64", - [(set SReg_64:$dst, (and SReg_64:$src0, SReg_64:$src1))] ->; -def S_AND_VCC : SOP2_VCC <0x0000000f, "S_AND_B64", - [(set VCCReg:$vcc, (SIvcc_and SReg_64:$src0, SReg_64:$src1))] ->; -def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", []>; -def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", []>; -def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>; -def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64", []>; -////def S_ANDN2_B32 : SOP2_ANDN2 <0x00000014, "S_ANDN2_B32", []>; -////def S_ANDN2_B64 : SOP2_ANDN2 <0x00000015, "S_ANDN2_B64", []>; -////def S_ORN2_B32 : SOP2_ORN2 <0x00000016, "S_ORN2_B32", []>; -////def S_ORN2_B64 : SOP2_ORN2 <0x00000017, "S_ORN2_B64", []>; -def S_NAND_B32 : SOP2_32 <0x00000018, "S_NAND_B32", []>; -def S_NAND_B64 : SOP2_64 <0x00000019, "S_NAND_B64", []>; -def S_NOR_B32 : SOP2_32 <0x0000001a, "S_NOR_B32", []>; -def S_NOR_B64 : SOP2_64 <0x0000001b, "S_NOR_B64", []>; -def S_XNOR_B32 : SOP2_32 <0x0000001c, "S_XNOR_B32", []>; -def S_XNOR_B64 : SOP2_64 <0x0000001d, "S_XNOR_B64", []>; -def S_LSHL_B32 : SOP2_32 <0x0000001e, "S_LSHL_B32", []>; -def S_LSHL_B64 : SOP2_64 <0x0000001f, "S_LSHL_B64", []>; -def S_LSHR_B32 : SOP2_32 <0x00000020, "S_LSHR_B32", []>; -def S_LSHR_B64 : SOP2_64 <0x00000021, "S_LSHR_B64", []>; -def S_ASHR_I32 : SOP2_32 <0x00000022, "S_ASHR_I32", []>; -def S_ASHR_I64 : SOP2_64 <0x00000023, "S_ASHR_I64", []>; -def S_BFM_B32 : SOP2_32 <0x00000024, "S_BFM_B32", []>; -def S_BFM_B64 : SOP2_64 <0x00000025, "S_BFM_B64", []>; -def S_MUL_I32 : SOP2_32 <0x00000026, "S_MUL_I32", []>; -def S_BFE_U32 : SOP2_32 <0x00000027, "S_BFE_U32", []>; -def S_BFE_I32 : SOP2_32 <0x00000028, "S_BFE_I32", []>; -def S_BFE_U64 : SOP2_64 <0x00000029, "S_BFE_U64", []>; -def S_BFE_I64 : SOP2_64 <0x0000002a, "S_BFE_I64", []>; -//def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "S_CBRANCH_G_FORK", []>; -def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>; - -class V_MOV_IMM <Operand immType, SDNode immNode> : InstSI < - (outs VReg_32:$dst), - (ins immType:$src0), - "V_MOV_IMM", - [(set VReg_32:$dst, (immNode:$src0))] ->; - -let isCodeGenOnly = 1, isPseudo = 1 in { - -def V_MOV_IMM_I32 : V_MOV_IMM<i32imm, imm>; -def V_MOV_IMM_F32 : V_MOV_IMM<f32imm, fpimm>; - -def S_MOV_IMM_I32 : InstSI < - (outs SReg_32:$dst), - (ins i32imm:$src0), - "S_MOV_IMM_I32", - [(set SReg_32:$dst, (imm:$src0))] ->; - -// i64 immediates aren't really supported in hardware, but LLVM will use the i64 -// type for indices on load and store instructions. The pattern for -// S_MOV_IMM_I64 will only match i64 immediates that can fit into 32-bits, -// which the hardware can handle. -def S_MOV_IMM_I64 : InstSI < - (outs SReg_64:$dst), - (ins i64imm:$src0), - "S_MOV_IMM_I64 $dst, $src0", - [(set SReg_64:$dst, (IMM32bitIn64bit:$src0))] ->; - -} // End isCodeGenOnly, isPseudo = 1 - -class SI_LOAD_LITERAL<Operand ImmType> : - Enc32 <(outs), (ins ImmType:$imm), "LOAD_LITERAL $imm", []> { - - bits<32> imm; - let Inst{31-0} = imm; -} - -def SI_LOAD_LITERAL_I32 : SI_LOAD_LITERAL<i32imm>; -def SI_LOAD_LITERAL_F32 : SI_LOAD_LITERAL<f32imm>; - -let isCodeGenOnly = 1, isPseudo = 1 in { - -def SET_M0 : InstSI < - (outs SReg_32:$dst), - (ins i32imm:$src0), - "SET_M0", - [(set SReg_32:$dst, (int_SI_set_M0 imm:$src0))] ->; - -def CONFIG_WRITE : InstSI < - (outs i32imm:$reg), - (ins i32imm:$val), - "CONFIG_WRITE $reg, $val", - [] > { - field bits<32> Inst = 0; -} - -def LOAD_CONST : AMDGPUShaderInst < - (outs GPRF32:$dst), - (ins i32imm:$src), - "LOAD_CONST $dst, $src", - [(set GPRF32:$dst, (int_AMDGPU_load_const imm:$src))] ->; - -let usesCustomInserter = 1 in { - -def SI_V_CNDLT : InstSI < - (outs VReg_32:$dst), - (ins VReg_32:$src0, VReg_32:$src1, VReg_32:$src2), - "SI_V_CNDLT $dst, $src0, $src1, $src2", - [(set VReg_32:$dst, (int_AMDGPU_cndlt VReg_32:$src0, VReg_32:$src1, VReg_32:$src2))] ->; - -def SI_INTERP : InstSI < - (outs VReg_32:$dst), - (ins VReg_32:$i, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, SReg_32:$params), - "SI_INTERP $dst, $i, $j, $attr_chan, $attr, $params", - [] ->; - -def SI_INTERP_CONST : InstSI < - (outs VReg_32:$dst), - (ins i32imm:$attr_chan, i32imm:$attr, SReg_32:$params), - "SI_INTERP_CONST $dst, $attr_chan, $attr, $params", - [(set VReg_32:$dst, (int_SI_fs_interp_constant imm:$attr_chan, - imm:$attr, SReg_32:$params))] ->; - -def SI_KIL : InstSI < - (outs), - (ins VReg_32:$src), - "SI_KIL $src", - [(int_AMDGPU_kill VReg_32:$src)] ->; - -def SI_WQM : InstSI < - (outs), - (ins), - "SI_WQM", - [(int_SI_wqm)] ->; - -} // end usesCustomInserter - -// SI Psuedo branch instructions. These are used by the CFG structurizer pass -// and should be lowered to ISA instructions prior to codegen. - -let isBranch = 1, isTerminator = 1 in { -def SI_IF_NZ : InstSI < - (outs), - (ins brtarget:$target, VCCReg:$vcc), - "SI_BRANCH_NZ", - [(IL_brcond bb:$target, VCCReg:$vcc)] ->; - -def SI_IF_Z : InstSI < - (outs), - (ins brtarget:$target, VCCReg:$vcc), - "SI_BRANCH_Z", - [] ->; -} // end isBranch = 1, isTerminator = 1 -} // end IsCodeGenOnly, isPseudo - -/* int_SI_vs_load_input */ -def : Pat< - (int_SI_vs_load_input SReg_128:$tlst, IMM12bit:$attr_offset, - VReg_32:$buf_idx_vgpr), - (BUFFER_LOAD_FORMAT_XYZW imm:$attr_offset, 0, 1, 0, 0, 0, - VReg_32:$buf_idx_vgpr, SReg_128:$tlst, - 0, 0, (i32 SREG_LIT_0)) ->; - -/* int_SI_export */ -def : Pat < - (int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr, - VReg_32:$src0,VReg_32:$src1, VReg_32:$src2, VReg_32:$src3), - (EXP imm:$en, imm:$tgt, imm:$compr, imm:$done, imm:$vm, - VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3) ->; - -/* int_SI_sample */ -def : Pat < - (int_SI_sample imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler), - (IMAGE_SAMPLE imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_128:$coord, - SReg_256:$rsrc, SReg_128:$sampler) ->; - -def CLAMP_SI : CLAMP<VReg_32>; -def FABS_SI : FABS<VReg_32>; -def FNEG_SI : FNEG<VReg_32>; - -def : Extract_Element <f32, v4f32, VReg_128, 0, sel_x>; -def : Extract_Element <f32, v4f32, VReg_128, 1, sel_y>; -def : Extract_Element <f32, v4f32, VReg_128, 2, sel_z>; -def : Extract_Element <f32, v4f32, VReg_128, 3, sel_w>; - -def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 4, sel_x>; -def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 5, sel_y>; -def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 6, sel_z>; -def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 7, sel_w>; - -def : Vector_Build <v4f32, VReg_128, f32, VReg_32>; -def : Vector_Build <v4i32, SReg_128, i32, SReg_32>; - -def : BitConvert <i32, f32, SReg_32>; -def : BitConvert <i32, f32, VReg_32>; - -def : BitConvert <f32, i32, SReg_32>; -def : BitConvert <f32, i32, VReg_32>; - -def : Pat < - (i64 (SIvcc_bitcast VCCReg:$vcc)), - (S_MOV_B64 (COPY_TO_REGCLASS VCCReg:$vcc, SReg_64)) ->; - -def : Pat < - (i1 (SIvcc_bitcast SReg_64:$vcc)), - (COPY_TO_REGCLASS SReg_64:$vcc, VCCReg) ->; - -/********** ===================== **********/ -/********** Interpolation Paterns **********/ -/********** ===================== **********/ - -def : Pat < - (int_SI_fs_interp_linear_center imm:$attr_chan, imm:$attr, SReg_32:$params), - (SI_INTERP (f32 LINEAR_CENTER_I), (f32 LINEAR_CENTER_J), imm:$attr_chan, - imm:$attr, SReg_32:$params) ->; - -def : Pat < - (int_SI_fs_interp_linear_centroid imm:$attr_chan, imm:$attr, SReg_32:$params), - (SI_INTERP (f32 LINEAR_CENTROID_I), (f32 LINEAR_CENTROID_J), imm:$attr_chan, - imm:$attr, SReg_32:$params) ->; - -def : Pat < - (int_SI_fs_interp_persp_center imm:$attr_chan, imm:$attr, SReg_32:$params), - (SI_INTERP (f32 PERSP_CENTER_I), (f32 PERSP_CENTER_J), imm:$attr_chan, - imm:$attr, SReg_32:$params) ->; - -def : Pat < - (int_SI_fs_interp_persp_centroid imm:$attr_chan, imm:$attr, SReg_32:$params), - (SI_INTERP (f32 PERSP_CENTROID_I), (f32 PERSP_CENTROID_J), imm:$attr_chan, - imm:$attr, SReg_32:$params) ->; - -def : Pat < - (int_SI_fs_read_face), - (f32 FRONT_FACE) ->; - -def : Pat < - (int_SI_fs_read_pos 0), - (f32 POS_X_FLOAT) ->; - -def : Pat < - (int_SI_fs_read_pos 1), - (f32 POS_Y_FLOAT) ->; - -def : Pat < - (int_SI_fs_read_pos 2), - (f32 POS_Z_FLOAT) ->; - -def : Pat < - (int_SI_fs_read_pos 3), - (f32 POS_W_FLOAT) ->; - -/********** ================== **********/ -/********** Intrinsic Patterns **********/ -/********** ================== **********/ - -/* llvm.AMDGPU.pow */ -/* XXX: We are using IEEE MUL, not the 0 * anything = 0 MUL, is this correct? */ -def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_F32_e32, VReg_32>; - -def : Pat < - (int_AMDGPU_div AllReg_32:$src0, AllReg_32:$src1), - (V_MUL_LEGACY_F32_e32 AllReg_32:$src0, (V_RCP_LEGACY_F32_e32 AllReg_32:$src1)) ->; - -/********** ================== **********/ -/********** VOP3 Patterns **********/ -/********** ================== **********/ - -def : Pat <(f32 (IL_mad AllReg_32:$src0, AllReg_32:$src1, AllReg_32:$src2)), - (V_MAD_LEGACY_F32 AllReg_32:$src0, AllReg_32:$src1, AllReg_32:$src2, - 0, 0, 0, 0)>; - -} // End isSI predicate diff --git a/src/gallium/drivers/radeon/SIIntrinsics.td b/src/gallium/drivers/radeon/SIIntrinsics.td deleted file mode 100644 index b3ecba3e2d0..00000000000 --- a/src/gallium/drivers/radeon/SIIntrinsics.td +++ /dev/null @@ -1,40 +0,0 @@ -//===-- SIIntrinsics.td - SI Intrinsic defs ----------------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// SI Intrinsic Definitions -// -//===----------------------------------------------------------------------===// - - -let TargetPrefix = "SI", isTarget = 1 in { - - def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; - /* XXX: We may need a seperate intrinsic here for loading integer values */ - def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_i64_ty, llvm_i32_ty], []>; - def int_SI_vs_load_buffer_index : Intrinsic <[llvm_i32_ty], [], [IntrNoMem]>; - def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i16_ty, llvm_i32_ty], []> ; - def int_SI_wqm : Intrinsic <[], [], []>; - - def int_SI_sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty]>; - - /* Interpolation Intrinsics */ - - def int_SI_set_M0 : Intrinsic <[llvm_i32_ty], [llvm_i32_ty]>; - class Interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>; - - def int_SI_fs_interp_linear_center : Interp; - def int_SI_fs_interp_linear_centroid : Interp; - def int_SI_fs_interp_persp_center : Interp; - def int_SI_fs_interp_persp_centroid : Interp; - def int_SI_fs_interp_constant : Interp; - - def int_SI_fs_read_face : Intrinsic <[llvm_float_ty], [], [IntrNoMem]>; - def int_SI_fs_read_pos : Intrinsic <[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; -} diff --git a/src/gallium/drivers/radeon/SILowerFlowControl.cpp b/src/gallium/drivers/radeon/SILowerFlowControl.cpp deleted file mode 100644 index bf5192efe3d..00000000000 --- a/src/gallium/drivers/radeon/SILowerFlowControl.cpp +++ /dev/null @@ -1,161 +0,0 @@ -//===-- SILowerFlowControl.cpp - Use predicates for flow control ----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass lowers the pseudo flow control instructions (SI_IF_NZ, ELSE, ENDIF) -// to predicated instructions. -// -// All flow control (except loops) is handled using predicated instructions and -// a predicate stack. Each Scalar ALU controls the operations of 64 Vector -// ALUs. The Scalar ALU can update the predicate for any of the Vector ALUs -// by writting to the 64-bit EXEC register (each bit corresponds to a -// single vector ALU). Typically, for predicates, a vector ALU will write -// to its bit of the VCC register (like EXEC VCC is 64-bits, one for each -// Vector ALU) and then the ScalarALU will AND the VCC register with the -// EXEC to update the predicates. -// -// For example: -// %VCC = V_CMP_GT_F32 %VGPR1, %VGPR2 -// SI_IF_NZ %VCC -// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 -// ELSE -// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR0 -// ENDIF -// -// becomes: -// -// %SGPR0 = S_MOV_B64 %EXEC // Save the current exec mask -// %EXEC = S_AND_B64 %VCC, %EXEC // Update the exec mask -// S_CBRANCH_EXECZ label0 // This instruction is an -// // optimization which allows us to -// // branch if all the bits of -// // EXEC are zero. -// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 // Do the IF block of the branch -// -// label0: -// %EXEC = S_NOT_B64 %EXEC // Invert the exec mask for the -// // Then block. -// %EXEC = S_AND_B64 %SGPR0, %EXEC -// S_BRANCH_EXECZ label1 // Use our branch optimization -// // instruction again. -// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR // Do the THEN block -// label1: -// S_MOV_B64 // Restore the old EXEC value -//===----------------------------------------------------------------------===// - -#include "AMDGPU.h" -#include "SIInstrInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" - -using namespace llvm; - -namespace { - -class SILowerFlowControlPass : public MachineFunctionPass { - -private: - static char ID; - const TargetInstrInfo *TII; - std::vector<unsigned> PredicateStack; - std::vector<unsigned> UnusedRegisters; - - void pushExecMask(MachineBasicBlock &MBB, MachineBasicBlock::iterator I); - void popExecMask(MachineBasicBlock &MBB, MachineBasicBlock::iterator I); - -public: - SILowerFlowControlPass(TargetMachine &tm) : - MachineFunctionPass(ID), TII(tm.getInstrInfo()) { } - - virtual bool runOnMachineFunction(MachineFunction &MF); - - const char *getPassName() const { - return "SI Lower flow control instructions"; - } - -}; - -} // End anonymous namespace - -char SILowerFlowControlPass::ID = 0; - -FunctionPass *llvm::createSILowerFlowControlPass(TargetMachine &tm) { - return new SILowerFlowControlPass(tm); -} - -bool SILowerFlowControlPass::runOnMachineFunction(MachineFunction &MF) { - - // Find all the unused registers that can be used for the predicate stack. - for (TargetRegisterClass::iterator S = AMDGPU::SReg_64RegClass.begin(), - I = AMDGPU::SReg_64RegClass.end(); - I != S; --I) { - unsigned Reg = *I; - if (!MF.getRegInfo().isPhysRegOrOverlapUsed(Reg)) { - UnusedRegisters.push_back(Reg); - } - } - - for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); - BB != BB_E; ++BB) { - MachineBasicBlock &MBB = *BB; - for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I); - I != MBB.end(); I = Next, Next = llvm::next(I)) { - MachineInstr &MI = *I; - switch (MI.getOpcode()) { - default: break; - case AMDGPU::SI_IF_NZ: - pushExecMask(MBB, I); - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_AND_B64), - AMDGPU::EXEC) - .addOperand(MI.getOperand(0)) // VCC - .addReg(AMDGPU::EXEC); - MI.eraseFromParent(); - break; - case AMDGPU::ELSE: - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_NOT_B64), - AMDGPU::EXEC) - .addReg(AMDGPU::EXEC); - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_AND_B64), - AMDGPU::EXEC) - .addReg(PredicateStack.back()) - .addReg(AMDGPU::EXEC); - MI.eraseFromParent(); - break; - case AMDGPU::ENDIF: - popExecMask(MBB, I); - MI.eraseFromParent(); - break; - } - } - } - return false; -} - -void SILowerFlowControlPass::pushExecMask(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) { - - assert(!UnusedRegisters.empty() && "Ran out of registers for predicate stack"); - unsigned StackReg = UnusedRegisters.back(); - UnusedRegisters.pop_back(); - PredicateStack.push_back(StackReg); - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B64), - StackReg) - .addReg(AMDGPU::EXEC); -} - -void SILowerFlowControlPass::popExecMask(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) { - unsigned StackReg = PredicateStack.back(); - PredicateStack.pop_back(); - UnusedRegisters.push_back(StackReg); - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B64), - AMDGPU::EXEC) - .addReg(StackReg); -} diff --git a/src/gallium/drivers/radeon/SILowerLiteralConstants.cpp b/src/gallium/drivers/radeon/SILowerLiteralConstants.cpp deleted file mode 100644 index 720245091f7..00000000000 --- a/src/gallium/drivers/radeon/SILowerLiteralConstants.cpp +++ /dev/null @@ -1,105 +0,0 @@ -//===-- SILowerLiteralConstants.cpp - Lower intrs using literal constants--===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// This pass performs the following transformation on instructions with -// literal constants: -// -// %VGPR0 = V_MOV_IMM_I32 1 -// -// becomes: -// -// BUNDLE -// * %VGPR = V_MOV_B32_32 SI_LITERAL_CONSTANT -// * SI_LOAD_LITERAL 1 -// -// The resulting sequence matches exactly how the hardware handles immediate -// operands, so this transformation greatly simplifies the code generator. -// -// Only the *_MOV_IMM_* support immediate operands at the moment, but when -// support for immediate operands is added to other instructions, they -// will be lowered here as well. -//===----------------------------------------------------------------------===// - -#include "AMDGPU.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineInstrBundle.h" - -using namespace llvm; - -namespace { - -class SILowerLiteralConstantsPass : public MachineFunctionPass { - -private: - static char ID; - const TargetInstrInfo *TII; - -public: - SILowerLiteralConstantsPass(TargetMachine &tm) : - MachineFunctionPass(ID), TII(tm.getInstrInfo()) { } - - virtual bool runOnMachineFunction(MachineFunction &MF); - - const char *getPassName() const { - return "SI Lower literal constants pass"; - } -}; - -} // End anonymous namespace - -char SILowerLiteralConstantsPass::ID = 0; - -FunctionPass *llvm::createSILowerLiteralConstantsPass(TargetMachine &tm) { - return new SILowerLiteralConstantsPass(tm); -} - -bool SILowerLiteralConstantsPass::runOnMachineFunction(MachineFunction &MF) { - for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); - BB != BB_E; ++BB) { - MachineBasicBlock &MBB = *BB; - for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I); - I != MBB.end(); I = Next, Next = llvm::next(I)) { - MachineInstr &MI = *I; - switch (MI.getOpcode()) { - default: break; - case AMDGPU::S_MOV_IMM_I32: - case AMDGPU::S_MOV_IMM_I64: - case AMDGPU::V_MOV_IMM_F32: - case AMDGPU::V_MOV_IMM_I32: { - unsigned MovOpcode; - unsigned LoadLiteralOpcode; - MachineOperand LiteralOp = MI.getOperand(1); - if (AMDGPU::VReg_32RegClass.contains(MI.getOperand(0).getReg())) { - MovOpcode = AMDGPU::V_MOV_B32_e32; - } else { - MovOpcode = AMDGPU::S_MOV_B32; - } - if (LiteralOp.isImm()) { - LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_I32; - } else { - LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_F32; - } - MachineInstr *First = - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(MovOpcode), - MI.getOperand(0).getReg()) - .addReg(AMDGPU::SI_LITERAL_CONSTANT); - MachineInstr *Last = - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(LoadLiteralOpcode)) - .addOperand(MI.getOperand(1)); - Last->setIsInsideBundle(); - llvm::finalizeBundle(MBB, First, Last); - MI.eraseFromParent(); - break; - } - } - } - } - return false; -} diff --git a/src/gallium/drivers/radeon/SIMachineFunctionInfo.cpp b/src/gallium/drivers/radeon/SIMachineFunctionInfo.cpp deleted file mode 100644 index 3cc1cd67c47..00000000000 --- a/src/gallium/drivers/radeon/SIMachineFunctionInfo.cpp +++ /dev/null @@ -1,19 +0,0 @@ -//===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - - -#include "SIMachineFunctionInfo.h" - -using namespace llvm; - -SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) - : MachineFunctionInfo(), - SPIPSInputAddr(0), - ShaderType(0) - { } diff --git a/src/gallium/drivers/radeon/SIMachineFunctionInfo.h b/src/gallium/drivers/radeon/SIMachineFunctionInfo.h deleted file mode 100644 index 68097b44c52..00000000000 --- a/src/gallium/drivers/radeon/SIMachineFunctionInfo.h +++ /dev/null @@ -1,38 +0,0 @@ -//===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// SIMachineFunctionInfo is used to keep track of the spi_sp_input_addr config -// register, which is to tell the hardware which interpolation parameters to -// load. -// -//===----------------------------------------------------------------------===// - - -#ifndef _SIMACHINEFUNCTIONINFO_H_ -#define _SIMACHINEFUNCTIONINFO_H_ - -#include "llvm/CodeGen/MachineFunction.h" - -namespace llvm { - -class SIMachineFunctionInfo : public MachineFunctionInfo { - - private: - - public: - SIMachineFunctionInfo(const MachineFunction &MF); - unsigned SPIPSInputAddr; - unsigned ShaderType; - -}; - -} // End namespace llvm - - -#endif //_SIMACHINEFUNCTIONINFO_H_ diff --git a/src/gallium/drivers/radeon/SIRegisterInfo.cpp b/src/gallium/drivers/radeon/SIRegisterInfo.cpp deleted file mode 100644 index 8fe8ae584e2..00000000000 --- a/src/gallium/drivers/radeon/SIRegisterInfo.cpp +++ /dev/null @@ -1,60 +0,0 @@ -//===-- SIRegisterInfo.cpp - SI Register Information ---------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the SI implementation of the TargetRegisterInfo class. -// -//===----------------------------------------------------------------------===// - - -#include "SIRegisterInfo.h" -#include "AMDGPUTargetMachine.h" - -using namespace llvm; - -SIRegisterInfo::SIRegisterInfo(AMDGPUTargetMachine &tm, - const TargetInstrInfo &tii) -: AMDGPURegisterInfo(tm, tii), - TM(tm), - TII(tii) - { } - -BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const -{ - BitVector Reserved(getNumRegs()); - return Reserved; -} - -unsigned SIRegisterInfo::getBinaryCode(unsigned reg) const -{ - switch (reg) { - case AMDGPU::M0: return 124; - case AMDGPU::SREG_LIT_0: return 128; - default: return getHWRegNum(reg); - } -} - -const TargetRegisterClass * -SIRegisterInfo::getISARegClass(const TargetRegisterClass * rc) const -{ - switch (rc->getID()) { - case AMDGPU::GPRF32RegClassID: - return &AMDGPU::VReg_32RegClass; - default: return rc; - } -} - -const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass( - MVT VT) const -{ - switch(VT.SimpleTy) { - default: - case MVT::i32: return &AMDGPU::VReg_32RegClass; - } -} -#include "SIRegisterGetHWRegNum.inc" diff --git a/src/gallium/drivers/radeon/SIRegisterInfo.h b/src/gallium/drivers/radeon/SIRegisterInfo.h deleted file mode 100644 index 99005cbccc1..00000000000 --- a/src/gallium/drivers/radeon/SIRegisterInfo.h +++ /dev/null @@ -1,54 +0,0 @@ -//===-- SIRegisterInfo.h - SI Register Info Interface ----------*- C++ -*--===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Interface definition for SIRegisterInfo -// -//===----------------------------------------------------------------------===// - - -#ifndef SIREGISTERINFO_H_ -#define SIREGISTERINFO_H_ - -#include "AMDGPURegisterInfo.h" - -namespace llvm { - -class AMDGPUTargetMachine; -class TargetInstrInfo; - -struct SIRegisterInfo : public AMDGPURegisterInfo -{ - AMDGPUTargetMachine &TM; - const TargetInstrInfo &TII; - - SIRegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii); - - virtual BitVector getReservedRegs(const MachineFunction &MF) const; - - /// getBinaryCode - Returns the hardware encoding for a register - virtual unsigned getBinaryCode(unsigned reg) const; - - /// getISARegClass - rc is an AMDIL reg class. This function returns the - /// SI register class that is equivalent to the given AMDIL register class. - virtual const TargetRegisterClass * - getISARegClass(const TargetRegisterClass * rc) const; - - /// getHWRegNum - Generated function that returns the hardware encoding for - /// a register - unsigned getHWRegNum(unsigned reg) const; - - /// getCFGStructurizerRegClass - get the register class of the specified - /// type to use in the CFGStructurizer - virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const; - -}; - -} // End namespace llvm - -#endif // SIREGISTERINFO_H_ diff --git a/src/gallium/drivers/radeon/SISchedule.td b/src/gallium/drivers/radeon/SISchedule.td deleted file mode 100644 index 28b65b82585..00000000000 --- a/src/gallium/drivers/radeon/SISchedule.td +++ /dev/null @@ -1,15 +0,0 @@ -//===-- SISchedule.td - SI Scheduling definitons -------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// TODO: This is just a place holder for now. -// -//===----------------------------------------------------------------------===// - - -def SI_Itin : ProcessorItineraries <[], [], []>; diff --git a/src/gallium/drivers/radeon/TargetInfo/AMDGPUTargetInfo.cpp b/src/gallium/drivers/radeon/TargetInfo/AMDGPUTargetInfo.cpp deleted file mode 100644 index 380e7deb280..00000000000 --- a/src/gallium/drivers/radeon/TargetInfo/AMDGPUTargetInfo.cpp +++ /dev/null @@ -1,26 +0,0 @@ -//===-- TargetInfo/AMDGPUTargetInfo.cpp - TODO: Add brief description -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// TODO: Add full description -// -//===----------------------------------------------------------------------===// - -#include "AMDGPU.h" -#include "llvm/Support/TargetRegistry.h" - -using namespace llvm; - -/// The target for the AMDGPU backend -Target llvm::TheAMDGPUTarget; - -/// Extern function to initialize the targets for the AMDGPU backend -extern "C" void LLVMInitializeAMDGPUTargetInfo() { - RegisterTarget<Triple::r600, false> - R600(TheAMDGPUTarget, "r600", "AMD GPUs HD2XXX-HD6XXX"); -} diff --git a/src/gallium/drivers/radeon/loader.cpp b/src/gallium/drivers/radeon/loader.cpp deleted file mode 100644 index 3ea8cd8900e..00000000000 --- a/src/gallium/drivers/radeon/loader.cpp +++ /dev/null @@ -1,35 +0,0 @@ - -#include "radeon_llvm_emit.h" - -#include <llvm/Support/CommandLine.h> -#include <llvm/Support/IRReader.h> -#include <llvm/Support/SourceMgr.h> -#include <llvm/LLVMContext.h> -#include <llvm/Module.h> -#include <stdio.h> - -#include <llvm-c/Core.h> - -using namespace llvm; - -static cl::opt<std::string> -InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-")); - -static cl::opt<std::string> -TargetGPUName("gpu", cl::desc("target gpu name"), cl::value_desc("gpu_name")); - -int main(int argc, char ** argv) -{ - unsigned char * bytes; - unsigned byte_count; - - std::auto_ptr<Module> M; - LLVMContext &Context = getGlobalContext(); - SMDiagnostic Err; - cl::ParseCommandLineOptions(argc, argv, "llvm system compiler\n"); - M.reset(ParseIRFile(InputFilename, Err, Context)); - - Module * mod = M.get(); - - radeon_llvm_compile(wrap(mod), &bytes, &byte_count, TargetGPUName.c_str(), 1); -} diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.cpp b/src/gallium/drivers/radeon/radeon_llvm_emit.cpp index b889f029694..903e1028a09 100644 --- a/src/gallium/drivers/radeon/radeon_llvm_emit.cpp +++ b/src/gallium/drivers/radeon/radeon_llvm_emit.cpp @@ -39,12 +39,7 @@ #include <llvm/Target/TargetMachine.h> #include <llvm/Transforms/Scalar.h> #include <llvm-c/Target.h> - -#if HAVE_LLVM < 0x0302 -#include <llvm/Target/TargetData.h> -#else #include <llvm/DataLayout.h> -#endif #include <iostream> #include <stdlib.h> @@ -52,16 +47,6 @@ using namespace llvm; -#ifndef EXTERNAL_LLVM -extern "C" { - -void LLVMInitializeAMDGPUAsmPrinter(void); -void LLVMInitializeAMDGPUTargetMC(void); -void LLVMInitializeAMDGPUTarget(void); -void LLVMInitializeAMDGPUTargetInfo(void); -} -#endif - namespace { class LLVMEnsureMultithreaded { @@ -89,17 +74,10 @@ radeon_llvm_compile(LLVMModuleRef M, unsigned char ** bytes, Triple AMDGPUTriple(sys::getDefaultTargetTriple()); -#if HAVE_LLVM == 0x0302 - LLVMInitializeAMDGPUTargetInfo(); - LLVMInitializeAMDGPUTarget(); - LLVMInitializeAMDGPUTargetMC(); - LLVMInitializeAMDGPUAsmPrinter(); -#else LLVMInitializeR600TargetInfo(); LLVMInitializeR600Target(); LLVMInitializeR600TargetMC(); LLVMInitializeR600AsmPrinter(); -#endif std::string err; const Target * AMDGPUTarget = TargetRegistry::lookupTarget("r600", err); @@ -130,11 +108,7 @@ radeon_llvm_compile(LLVMModuleRef M, unsigned char ** bytes, )); TargetMachine &AMDGPUTargetMachine = *tm.get(); PassManager PM; -#if HAVE_LLVM < 0x0302 - PM.add(new TargetData(*AMDGPUTargetMachine.getTargetData())); -#else PM.add(new DataLayout(*AMDGPUTargetMachine.getDataLayout())); -#endif PM.add(createPromoteMemoryToRegisterPass()); AMDGPUTargetMachine.setAsmVerbosityDefault(true); |