//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //==-----------------------------------------------------------------------===// // // This file implements the interfaces that AMDIL uses to lower LLVM code into a // selection DAG. // //===----------------------------------------------------------------------===// #include "AMDILISelLowering.h" #include "AMDILDevices.h" #include "AMDILIntrinsicInfo.h" #include "AMDILRegisterInfo.h" #include "AMDILSubtarget.h" #include "AMDILUtilityFunctions.h" #include "llvm/CallingConv.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; #define ISDBITCAST ISD::BITCAST #define MVTGLUE MVT::Glue //===----------------------------------------------------------------------===// // Calling Convention Implementation //===----------------------------------------------------------------------===// #include "AMDGPUGenCallingConv.inc" //===----------------------------------------------------------------------===// // TargetLowering Implementation Help Functions Begin //===----------------------------------------------------------------------===// static SDValue getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType) { DebugLoc DL = Src.getDebugLoc(); EVT svt = Src.getValueType().getScalarType(); EVT dvt = Dst.getValueType().getScalarType(); if (svt.isFloatingPoint() && dvt.isFloatingPoint()) { if (dvt.bitsGT(svt)) { Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src); } else if (svt.bitsLT(svt)) { Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src, DAG.getConstant(1, MVT::i32)); } } else if (svt.isInteger() && dvt.isInteger()) { if (!svt.bitsEq(dvt)) { Src = DAG.getSExtOrTrunc(Src, DL, dvt); } } else if (svt.isInteger()) { unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP; if (!svt.bitsEq(dvt)) { if (dvt.getSimpleVT().SimpleTy == MVT::f32) { Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32); } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) { Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64); } else { assert(0 && "We only support 32 and 64bit fp types"); } } Src = DAG.getNode(opcode, DL, dvt, Src); } else if (dvt.isInteger()) { unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT; if (svt.getSimpleVT().SimpleTy == MVT::f32) { Src = DAG.getNode(opcode, DL, MVT::i32, Src); } else if (svt.getSimpleVT().SimpleTy == MVT::f64) { Src = DAG.getNode(opcode, DL, MVT::i64, Src); } else { assert(0 && "We only support 32 and 64bit fp types"); } Src = DAG.getSExtOrTrunc(Src, DL, dvt); } return Src; } // CondCCodeToCC - Convert a DAG condition code to a AMDIL CC // condition. static AMDILCC::CondCodes CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type) { switch (CC) { default: { errs()<<"Condition Code: "<< (unsigned int)CC<<"\n"; assert(0 && "Unknown condition code!"); } case ISD::SETO: switch(type) { case MVT::f32: return AMDILCC::IL_CC_F_O; case MVT::f64: return AMDILCC::IL_CC_D_O; default: assert(0 && "Opcode combination not generated correctly!"); return AMDILCC::COND_ERROR; }; case ISD::SETUO: switch(type) { case MVT::f32: return AMDILCC::IL_CC_F_UO; case MVT::f64: return AMDILCC::IL_CC_D_UO; default: assert(0 && "Opcode combination not generated correctly!"); return AMDILCC::COND_ERROR; }; case ISD::SETGT: switch (type) { case MVT::i1: case MVT::i8: case MVT::i16: case MVT::i32: return AMDILCC::IL_CC_I_GT; case MVT::f32: return AMDILCC::IL_CC_F_GT; case MVT::f64: return AMDILCC::IL_CC_D_GT; case MVT::i64: return AMDILCC::IL_CC_L_GT; default: assert(0 && "Opcode combination not generated correctly!"); return AMDILCC::COND_ERROR; }; case ISD::SETGE: switch (type) { case MVT::i1: case MVT::i8: case MVT::i16: case MVT::i32: return AMDILCC::IL_CC_I_GE; case MVT::f32: return AMDILCC::IL_CC_F_GE; case MVT::f64: return AMDILCC::IL_CC_D_GE; case MVT::i64: return AMDILCC::IL_CC_L_GE; default: assert(0 && "Opcode combination not generated correctly!"); return AMDILCC::COND_ERROR; }; case ISD::SETLT: switch (type) { case MVT::i1: case MVT::i8: case MVT::i16: case MVT::i32: return AMDILCC::IL_CC_I_LT; case MVT::f32: return AMDILCC::IL_CC_F_LT; case MVT::f64: return AMDILCC::IL_CC_D_LT; case MVT::i64: return AMDILCC::IL_CC_L_LT; default: assert(0 && "Opcode combination not generated correctly!"); return AMDILCC::COND_ERROR; }; case ISD::SETLE: switch (type) { case MVT::i1: case MVT::i8: case MVT::i16: case MVT::i32: return AMDILCC::IL_CC_I_LE; case MVT::f32: return AMDILCC::IL_CC_F_LE; case MVT::f64: return AMDILCC::IL_CC_D_LE; case MVT::i64: return AMDILCC::IL_CC_L_LE; default: assert(0 && "Opcode combination not generated correctly!"); return AMDILCC::COND_ERROR; }; case ISD::SETNE: switch (type) { case MVT::i1: case MVT::i8: case MVT::i16: case MVT::i32: return AMDILCC::IL_CC_I_NE; case MVT::f32: return AMDILCC::IL_CC_F_NE; case MVT::f64: return AMDILCC::IL_CC_D_NE; case MVT::i64: return AMDILCC::IL_CC_L_NE; default: assert(0 && "Opcode combination not generated correctly!"); return AMDILCC::COND_ERROR; }; case ISD::SETEQ: switch (type) { case MVT::i1: case MVT::i8: case MVT::i16: case MVT::i32: return AMDILCC::IL_CC_I_EQ; case MVT::f32: return AMDILCC::IL_CC_F_EQ; case MVT::f64: return AMDILCC::IL_CC_D_EQ; case MVT::i64: return AMDILCC::IL_CC_L_EQ; default: assert(0 && "Opcode combination not generated correctly!"); return AMDILCC::COND_ERROR; }; case ISD::SETUGT: switch (type) { case MVT::i1: case MVT::i8: case MVT::i16: case MVT::i32: return AMDILCC::IL_CC_U_GT; case MVT::f32: return AMDILCC::IL_CC_F_UGT; case MVT::f64: return AMDILCC::IL_CC_D_UGT; case MVT::i64: return AMDILCC::IL_CC_UL_GT; default: assert(0 && "Opcode combination not generated correctly!"); return AMDILCC::COND_ERROR; }; case ISD::SETUGE: switch (type) { case MVT::i1: case MVT::i8: case MVT::i16: case MVT::i32: return AMDILCC::IL_CC_U_GE; case MVT::f32: return AMDILCC::IL_CC_F_UGE; case MVT::f64: return AMDILCC::IL_CC_D_UGE; case MVT::i64: return AMDILCC::IL_CC_UL_GE; default: assert(0 && "Opcode combination not generated correctly!"); return AMDILCC::COND_ERROR; }; case ISD::SETULT: switch (type) { case MVT::i1: case MVT::i8: case MVT::i16: case MVT::i32: return AMDILCC::IL_CC_U_LT; case MVT::f32: return AMDILCC::IL_CC_F_ULT; case MVT::f64: return AMDILCC::IL_CC_D_ULT; case MVT::i64: return AMDILCC::IL_CC_UL_LT; default: assert(0 && "Opcode combination not generated correctly!"); return AMDILCC::COND_ERROR; }; case ISD::SETULE: switch (type) { case MVT::i1: case MVT::i8: case MVT::i16: case MVT::i32: return AMDILCC::IL_CC_U_LE; case MVT::f32: return AMDILCC::IL_CC_F_ULE; case MVT::f64: return AMDILCC::IL_CC_D_ULE; case MVT::i64: return AMDILCC::IL_CC_UL_LE; default: assert(0 && "Opcode combination not generated correctly!"); return AMDILCC::COND_ERROR; }; case ISD::SETUNE: switch (type) { case MVT::i1: case MVT::i8: case MVT::i16: case MVT::i32: return AMDILCC::IL_CC_U_NE; case MVT::f32: return AMDILCC::IL_CC_F_UNE; case MVT::f64: return AMDILCC::IL_CC_D_UNE; case MVT::i64: return AMDILCC::IL_CC_UL_NE; default: assert(0 && "Opcode combination not generated correctly!"); return AMDILCC::COND_ERROR; }; case ISD::SETUEQ: switch (type) { case MVT::i1: case MVT::i8: case MVT::i16: case MVT::i32: return AMDILCC::IL_CC_U_EQ; case MVT::f32: return AMDILCC::IL_CC_F_UEQ; case MVT::f64: return AMDILCC::IL_CC_D_UEQ; case MVT::i64: return AMDILCC::IL_CC_UL_EQ; default: assert(0 && "Opcode combination not generated correctly!"); return AMDILCC::COND_ERROR; }; case ISD::SETOGT: switch (type) { case MVT::f32: return AMDILCC::IL_CC_F_OGT; case MVT::f64: return AMDILCC::IL_CC_D_OGT; case MVT::i1: case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64: default: assert(0 && "Opcode combination not generated correctly!"); return AMDILCC::COND_ERROR; }; case ISD::SETOGE: switch (type) { case MVT::f32: return AMDILCC::IL_CC_F_OGE; case MVT::f64: return AMDILCC::IL_CC_D_OGE; case MVT::i1: case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64: default: assert(0 && "Opcode combination not generated correctly!"); return AMDILCC::COND_ERROR; }; case ISD::SETOLT: switch (type) { case MVT::f32: return AMDILCC::IL_CC_F_OLT; case MVT::f64: return AMDILCC::IL_CC_D_OLT; case MVT::i1: case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64: default: assert(0 && "Opcode combination not generated correctly!"); return AMDILCC::COND_ERROR; }; case ISD::SETOLE: switch (type) { case MVT::f32: return AMDILCC::IL_CC_F_OLE; case MVT::f64: return AMDILCC::IL_CC_D_OLE; case MVT::i1: case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64: default: assert(0 && "Opcode combination not generated correctly!"); return AMDILCC::COND_ERROR; }; case ISD::SETONE: switch (type) { case MVT::f32: return AMDILCC::IL_CC_F_ONE; case MVT::f64: return AMDILCC::IL_CC_D_ONE; case MVT::i1: case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64: default: assert(0 && "Opcode combination not generated correctly!"); return AMDILCC::COND_ERROR; }; case ISD::SETOEQ: switch (type) { case MVT::f32: return AMDILCC::IL_CC_F_OEQ; case MVT::f64: return AMDILCC::IL_CC_D_OEQ; case MVT::i1: case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64: default: assert(0 && "Opcode combination not generated correctly!"); return AMDILCC::COND_ERROR; }; }; } SDValue AMDILTargetLowering::LowerMemArgument( SDValue Chain, CallingConv::ID CallConv, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, const CCValAssign &VA, MachineFrameInfo *MFI, unsigned i) const { // Create the nodes corresponding to a load from this parameter slot. ISD::ArgFlagsTy Flags = Ins[i].Flags; bool AlwaysUseMutable = (CallConv==CallingConv::Fast) && getTargetMachine().Options.GuaranteedTailCallOpt; bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); // FIXME: For now, all byval parameter objects are marked mutable. This can // be changed with more analysis. // In case of tail call optimization mark all arguments mutable. Since they // could be overwritten by lowering of arguments in case of a tail call. int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, VA.getLocMemOffset(), isImmutable); SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); if (Flags.isByVal()) return FIN; return DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo::getFixedStack(FI), false, false, false, 0); } //===----------------------------------------------------------------------===// // TargetLowering Implementation Help Functions End //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // TargetLowering Class Implementation Begins //===----------------------------------------------------------------------===// AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM) : TargetLowering(TM, new TargetLoweringObjectFileELF()) { int types[] = { (int)MVT::i8, (int)MVT::i16, (int)MVT::i32, (int)MVT::f32, (int)MVT::f64, (int)MVT::i64, (int)MVT::v2i8, (int)MVT::v4i8, (int)MVT::v2i16, (int)MVT::v4i16, (int)MVT::v4f32, (int)MVT::v4i32, (int)MVT::v2f32, (int)MVT::v2i32, (int)MVT::v2f64, (int)MVT::v2i64 }; int IntTypes[] = { (int)MVT::i8, (int)MVT::i16, (int)MVT::i32, (int)MVT::i64 }; int FloatTypes[] = { (int)MVT::f32, (int)MVT::f64 }; int VectorTypes[] = { (int)MVT::v2i8, (int)MVT::v4i8, (int)MVT::v2i16, (int)MVT::v4i16, (int)MVT::v4f32, (int)MVT::v4i32, (int)MVT::v2f32, (int)MVT::v2i32, (int)MVT::v2f64, (int)MVT::v2i64 }; size_t numTypes = sizeof(types) / sizeof(*types); size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes); size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes); size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes); const AMDILSubtarget &STM = getTargetMachine().getSubtarget(); // These are the current register classes that are // supported for (unsigned int x = 0; x < numTypes; ++x) { MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x]; //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types // We cannot sextinreg, expand to shifts setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom); setOperationAction(ISD::SUBE, VT, Expand); setOperationAction(ISD::SUBC, VT, Expand); setOperationAction(ISD::ADDE, VT, Expand); setOperationAction(ISD::ADDC, VT, Expand); setOperationAction(ISD::BRCOND, VT, Custom); setOperationAction(ISD::BR_JT, VT, Expand); setOperationAction(ISD::BRIND, VT, Expand); // TODO: Implement custom UREM/SREM routines setOperationAction(ISD::SREM, VT, Expand); setOperationAction(ISD::GlobalAddress, VT, Custom); setOperationAction(ISD::JumpTable, VT, Custom); setOperationAction(ISD::ConstantPool, VT, Custom); setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::SMUL_LOHI, VT, Expand); setOperationAction(ISD::UMUL_LOHI, VT, Expand); if (VT != MVT::i64 && VT != MVT::v2i64) { setOperationAction(ISD::SDIV, VT, Custom); } } for (unsigned int x = 0; x < numFloatTypes; ++x) { MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x]; // IL does not have these operations for floating point types setOperationAction(ISD::FP_ROUND_INREG, VT, Expand); setOperationAction(ISD::SETOLT, VT, Expand); setOperationAction(ISD::SETOGE, VT, Expand); setOperationAction(ISD::SETOGT, VT, Expand); setOperationAction(ISD::SETOLE, VT, Expand); setOperationAction(ISD::SETULT, VT, Expand); setOperationAction(ISD::SETUGE, VT, Expand); setOperationAction(ISD::SETUGT, VT, Expand); setOperationAction(ISD::SETULE, VT, Expand); } for (unsigned int x = 0; x < numIntTypes; ++x) { MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x]; // GPU also does not have divrem function for signed or unsigned setOperationAction(ISD::SDIVREM, VT, Expand); // GPU does not have [S|U]MUL_LOHI functions as a single instruction setOperationAction(ISD::SMUL_LOHI, VT, Expand); setOperationAction(ISD::UMUL_LOHI, VT, Expand); // GPU doesn't have a rotl, rotr, or byteswap instruction setOperationAction(ISD::ROTR, VT, Expand); setOperationAction(ISD::BSWAP, VT, Expand); // GPU doesn't have any counting operators setOperationAction(ISD::CTPOP, VT, Expand); setOperationAction(ISD::CTTZ, VT, Expand); setOperationAction(ISD::CTLZ, VT, Expand); } for ( unsigned int ii = 0; ii < numVectorTypes; ++ii ) { MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii]; setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); setOperationAction(ISD::SDIVREM, VT, Expand); setOperationAction(ISD::SMUL_LOHI, VT, Expand); // setOperationAction(ISD::VSETCC, VT, Expand); setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction(ISD::SELECT, VT, Expand); } if (STM.device()->isSupported(AMDILDeviceInfo::LongOps)) { setOperationAction(ISD::MULHU, MVT::i64, Expand); setOperationAction(ISD::MULHU, MVT::v2i64, Expand); setOperationAction(ISD::MULHS, MVT::i64, Expand); setOperationAction(ISD::MULHS, MVT::v2i64, Expand); setOperationAction(ISD::ADD, MVT::v2i64, Expand); setOperationAction(ISD::SREM, MVT::v2i64, Expand); setOperationAction(ISD::Constant , MVT::i64 , Legal); setOperationAction(ISD::SDIV, MVT::v2i64, Expand); setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand); setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand); setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand); setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand); } if (STM.device()->isSupported(AMDILDeviceInfo::DoubleOps)) { // we support loading/storing v2f64 but not operations on the type setOperationAction(ISD::FADD, MVT::v2f64, Expand); setOperationAction(ISD::FSUB, MVT::v2f64, Expand); setOperationAction(ISD::FMUL, MVT::v2f64, Expand); setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand); setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); setOperationAction(ISD::ConstantFP , MVT::f64 , Legal); // We want to expand vector conversions into their scalar // counterparts. setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand); setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand); setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand); setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand); setOperationAction(ISD::FABS, MVT::f64, Expand); setOperationAction(ISD::FABS, MVT::v2f64, Expand); } // TODO: Fix the UDIV24 algorithm so it works for these // types correctly. This needs vector comparisons // for this to work correctly. setOperationAction(ISD::UDIV, MVT::v2i8, Expand); setOperationAction(ISD::UDIV, MVT::v4i8, Expand); setOperationAction(ISD::UDIV, MVT::v2i16, Expand); setOperationAction(ISD::UDIV, MVT::v4i16, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom); setOperationAction(ISD::SUBC, MVT::Other, Expand); setOperationAction(ISD::ADDE, MVT::Other, Expand); setOperationAction(ISD::ADDC, MVT::Other, Expand); setOperationAction(ISD::BRCOND, MVT::Other, Custom); setOperationAction(ISD::BR_JT, MVT::Other, Expand); setOperationAction(ISD::BRIND, MVT::Other, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand); setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom); // Use the default implementation. setOperationAction(ISD::VAARG , MVT::Other, Expand); setOperationAction(ISD::VACOPY , MVT::Other, Expand); setOperationAction(ISD::VAEND , MVT::Other, Expand); setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); setOperationAction(ISD::ConstantFP , MVT::f32 , Legal); setOperationAction(ISD::Constant , MVT::i32 , Legal); setOperationAction(ISD::TRAP , MVT::Other , Legal); setStackPointerRegisterToSaveRestore(AMDGPU::SP); setSchedulingPreference(Sched::RegPressure); setPow2DivIsCheap(false); setPrefLoopAlignment(16); setSelectIsExpensive(true); setJumpIsExpensive(true); maxStoresPerMemcpy = 4096; maxStoresPerMemmove = 4096; maxStoresPerMemset = 4096; #undef numTypes #undef numIntTypes #undef numVectorTypes #undef numFloatTypes } const char * AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { default: return 0; case AMDILISD::CMOVLOG: return "AMDILISD::CMOVLOG"; case AMDILISD::MAD: return "AMDILISD::MAD"; case AMDILISD::CALL: return "AMDILISD::CALL"; case AMDILISD::SELECT_CC: return "AMDILISD::SELECT_CC"; case AMDILISD::UMUL: return "AMDILISD::UMUL"; case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF"; case AMDILISD::VBUILD: return "AMDILISD::VBUILD"; case AMDILISD::CMP: return "AMDILISD::CMP"; case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT"; case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE"; case AMDILISD::IL_CC_I_GT: return "AMDILISD::IL_CC_I_GT"; case AMDILISD::IL_CC_I_GE: return "AMDILISD::IL_CC_I_GE"; case AMDILISD::IL_CC_I_EQ: return "AMDILISD::IL_CC_I_EQ"; case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE"; case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG"; case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND"; }; } bool AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const { return false; } // The backend supports 32 and 64 bit floating point immediates bool AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { return true; } else { return false; } } bool AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const { if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { return false; } else { return true; } } // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to // be zero. Op is expected to be a target specific node. Used by DAG // combiner. void AMDILTargetLowering::computeMaskedBitsForTargetNode( const SDValue Op, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const { APInt KnownZero2; APInt KnownOne2; KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything switch (Op.getOpcode()) { default: break; case AMDILISD::SELECT_CC: DAG.ComputeMaskedBits( Op.getOperand(1), KnownZero, KnownOne, Depth + 1 ); DAG.ComputeMaskedBits( Op.getOperand(0), KnownZero2, KnownOne2 ); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); // Only known if known in both the LHS and RHS KnownOne &= KnownOne2; KnownZero &= KnownZero2; break; }; } // This is the function that determines which calling convention should // be used. Currently there is only one calling convention CCAssignFn* AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const { //uint64_t CC = cast(Op.getOperand(1))->getZExtValue(); return CC_AMDIL32; } // LowerCallResult - Lower the result values of an ISD::CALL into the // appropriate copies out of appropriate physical registers. This assumes that // Chain/InFlag are the input chain/flag to use, and that TheCall is the call // being lowered. The returns a SDNode with the same number of values as the // ISD::CALL. SDValue AMDILTargetLowering::LowerCallResult( SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { // Assign locations to each value returned by this call SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), getTargetMachine(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32); // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { EVT CopyVT = RVLocs[i].getValVT(); if (RVLocs[i].isRegLoc()) { Chain = DAG.getCopyFromReg( Chain, dl, RVLocs[i].getLocReg(), CopyVT, InFlag ).getValue(1); SDValue Val = Chain.getValue(0); InFlag = Chain.getValue(2); InVals.push_back(Val); } } return Chain; } //===----------------------------------------------------------------------===// // Other Lowering Hooks //===----------------------------------------------------------------------===// // Recursively assign SDNodeOrdering to any unordered nodes // This is necessary to maintain source ordering of instructions // under -O0 to avoid odd-looking "skipping around" issues. static const SDValue Ordered( SelectionDAG &DAG, unsigned order, const SDValue New ) { if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) { DAG.AssignOrdering( New.getNode(), order ); for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i) Ordered( DAG, order, New.getOperand(i) ); } return New; } #define LOWER(A) \ case ISD:: A: \ return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) ) SDValue AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: Op.getNode()->dump(); assert(0 && "Custom lowering code for this" "instruction is not implemented yet!"); break; LOWER(GlobalAddress); LOWER(JumpTable); LOWER(ConstantPool); LOWER(ExternalSymbol); LOWER(SDIV); LOWER(SREM); LOWER(BUILD_VECTOR); LOWER(SELECT); LOWER(SIGN_EXTEND_INREG); LOWER(DYNAMIC_STACKALLOC); LOWER(BRCOND); } return Op; } #undef LOWER SDValue AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { SDValue DST = Op; const GlobalAddressSDNode *GADN = cast(Op); const GlobalValue *G = GADN->getGlobal(); DebugLoc DL = Op.getDebugLoc(); const GlobalVariable *GV = dyn_cast(G); if (!GV) { DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); } else { if (GV->hasInitializer()) { const Constant *C = dyn_cast(GV->getInitializer()); if (const ConstantInt *CI = dyn_cast(C)) { DST = DAG.getConstant(CI->getValue(), Op.getValueType()); } else if (const ConstantFP *CF = dyn_cast(C)) { DST = DAG.getConstantFP(CF->getValueAPF(), Op.getValueType()); } else if (dyn_cast(C)) { EVT VT = Op.getValueType(); if (VT.isInteger()) { DST = DAG.getConstant(0, VT); } else { DST = DAG.getConstantFP(0, VT); } } else { assert(!"lowering this type of Global Address " "not implemented yet!"); C->dump(); DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); } } else { DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); } } return DST; } SDValue AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { JumpTableSDNode *JT = cast(Op); SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32); return Result; } SDValue AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { ConstantPoolSDNode *CP = cast(Op); EVT PtrVT = Op.getValueType(); SDValue Result; if (CP->isMachineConstantPoolEntry()) { Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlignment(), CP->getOffset(), CP->getTargetFlags()); } else { Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(), CP->getTargetFlags()); } return Result; } SDValue AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const { const char *Sym = cast(Op)->getSymbol(); SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32); return Result; } /// LowerFORMAL_ARGUMENTS - transform physical registers into /// virtual registers and generate load operations for /// arguments places on the stack. /// TODO: isVarArg, hasStructRet, isMemReg SDValue AMDILTargetLowering::LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); //const Function *Fn = MF.getFunction(); //MachineRegisterInfo &RegInfo = MF.getRegInfo(); SmallVector ArgLocs; CallingConv::ID CC = MF.getFunction()->getCallingConv(); //bool hasStructRet = MF.getFunction()->hasStructRetAttr(); CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(), getTargetMachine(), ArgLocs, *DAG.getContext()); // When more calling conventions are added, they need to be chosen here CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32); SDValue StackPtr; //unsigned int FirstStackArgLoc = 0; for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; if (VA.isRegLoc()) { EVT RegVT = VA.getLocVT(); const TargetRegisterClass *RC = getRegClassFor( RegVT.getSimpleVT().SimpleTy); unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC); SDValue ArgValue = DAG.getCopyFromReg( Chain, dl, Reg, RegVT); // If this is an 8 or 16-bit value, it is really passed // promoted to 32 bits. Insert an assert[sz]ext to capture // this, then truncate to the right size. if (VA.getLocInfo() == CCValAssign::SExt) { ArgValue = DAG.getNode( ISD::AssertSext, dl, RegVT, ArgValue, DAG.getValueType(VA.getValVT())); } else if (VA.getLocInfo() == CCValAssign::ZExt) { ArgValue = DAG.getNode( ISD::AssertZext, dl, RegVT, ArgValue, DAG.getValueType(VA.getValVT())); } if (VA.getLocInfo() != CCValAssign::Full) { ArgValue = DAG.getNode( ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); } // Add the value to the list of arguments // to be passed in registers InVals.push_back(ArgValue); if (isVarArg) { assert(0 && "Variable arguments are not yet supported"); // See MipsISelLowering.cpp for ideas on how to implement } } else if(VA.isMemLoc()) { InVals.push_back(LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i)); } else { assert(0 && "found a Value Assign that is " "neither a register or a memory location"); } } /*if (hasStructRet) { assert(0 && "Has struct return is not yet implemented"); // See MipsISelLowering.cpp for ideas on how to implement }*/ if (isVarArg) { assert(0 && "Variable arguments are not yet supported"); // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement } // This needs to be changed to non-zero if the return function needs // to pop bytes return Chain; } /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified /// by "Src" to address "Dst" with size and alignment information specified by /// the specific parameter attribute. The copy will be passed as a byval /// function parameter. static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG) { assert(0 && "MemCopy does not exist yet"); SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); return DAG.getMemcpy(Chain, Src.getDebugLoc(), Dst, Src, SizeNode, Flags.getByValAlign(), /*IsVol=*/false, /*AlwaysInline=*/true, MachinePointerInfo(), MachinePointerInfo()); } SDValue AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, DebugLoc dl, SelectionDAG &DAG, const CCValAssign &VA, ISD::ArgFlagsTy Flags) const { unsigned int LocMemOffset = VA.getLocMemOffset(); SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); if (Flags.isByVal()) { PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG); } else { PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo::getStack(LocMemOffset), false, false, 0); } return PtrOff; } /// LowerCAL - functions arguments are copied from virtual /// regs to (physical regs)/(stack frame), CALLSEQ_START and /// CALLSEQ_END are emitted. /// TODO: isVarArg, isTailCall, hasStructRet SDValue AMDILTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool doesNotRet, bool& isTailCall, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { isTailCall = false; MachineFunction& MF = DAG.getMachineFunction(); // FIXME: DO we need to handle fast calling conventions and tail call // optimizations?? X86/PPC ISelLowering /*bool hasStructRet = (TheCall->getNumArgs()) ? TheCall->getArgFlags(0).device()->isSRet() : false;*/ MachineFrameInfo *MFI = MF.getFrameInfo(); // Analyze operands of the call, assigning locations to each operand SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), getTargetMachine(), ArgLocs, *DAG.getContext()); // Analyize the calling operands, but need to change // if we have more than one calling convetion CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv)); unsigned int NumBytes = CCInfo.getNextStackOffset(); if (isTailCall) { assert(isTailCall && "Tail Call not handled yet!"); // See X86/PPC ISelLowering } Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); SmallVector, 8> RegsToPass; SmallVector MemOpChains; SDValue StackPtr; //unsigned int FirstStacArgLoc = 0; //int LastArgStackLoc = 0; // Walk the register/memloc assignments, insert copies/loads for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers // Arguments start after the 5 first operands of ISD::CALL SDValue Arg = OutVals[i]; //Promote the value if needed switch(VA.getLocInfo()) { default: assert(0 && "Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::SExt: Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); break; case CCValAssign::ZExt: Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); break; case CCValAssign::AExt: Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); break; } if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } else if (VA.isMemLoc()) { // Create the frame index object for this incoming parameter int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, VA.getLocMemOffset(), true); SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy()); // emit ISD::STORE whichs stores the // parameter value to a stack Location MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo::getFixedStack(FI), false, false, 0)); } else { assert(0 && "Not a Reg/Mem Loc, major error!"); } } if (!MemOpChains.empty()) { Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); } SDValue InFlag; if (!isTailCall) { for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } } // If the callee is a GlobalAddress/ExternalSymbol node (quite common, // every direct call is) turn it into a TargetGlobalAddress/ // TargetExternalSymbol // node so that legalize doesn't hack it. if (GlobalAddressSDNode *G = dyn_cast(Callee)) { Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy()); } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); } else if (isTailCall) { assert(0 && "Tail calls are not handled yet"); // see X86 ISelLowering for ideas on implementation: 1708 } SDVTList NodeTys = DAG.getVTList(MVT::Other, MVTGLUE); SmallVector Ops; if (isTailCall) { assert(0 && "Tail calls are not handled yet"); // see X86 ISelLowering for ideas on implementation: 1721 } // If this is a direct call, pass the chain and the callee if (Callee.getNode()) { Ops.push_back(Chain); Ops.push_back(Callee); } if (isTailCall) { assert(0 && "Tail calls are not handled yet"); // see X86 ISelLowering for ideas on implementation: 1739 } // Add argument registers to the end of the list so that they are known // live into the call for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) { Ops.push_back(DAG.getRegister( RegsToPass[i].first, RegsToPass[i].second.getValueType())); } if (InFlag.getNode()) { Ops.push_back(InFlag); } // Emit Tail Call if (isTailCall) { assert(0 && "Tail calls are not handled yet"); // see X86 ISelLowering for ideas on implementation: 1762 } Chain = DAG.getNode(AMDILISD::CALL, dl, NodeTys, &Ops[0], Ops.size()); InFlag = Chain.getValue(1); // Create the CALLSEQ_END node Chain = DAG.getCALLSEQ_END( Chain, DAG.getIntPtrConstant(NumBytes, true), DAG.getIntPtrConstant(0, true), InFlag); InFlag = Chain.getValue(1); // Handle result values, copying them out of physregs into vregs that // we return return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, InVals); } SDValue AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const { EVT OVT = Op.getValueType(); SDValue DST; if (OVT.getScalarType() == MVT::i64) { DST = LowerSDIV64(Op, DAG); } else if (OVT.getScalarType() == MVT::i32) { DST = LowerSDIV32(Op, DAG); } else if (OVT.getScalarType() == MVT::i16 || OVT.getScalarType() == MVT::i8) { DST = LowerSDIV24(Op, DAG); } else { DST = SDValue(Op.getNode(), 0); } return DST; } SDValue AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const { EVT OVT = Op.getValueType(); SDValue DST; if (OVT.getScalarType() == MVT::i64) { DST = LowerSREM64(Op, DAG); } else if (OVT.getScalarType() == MVT::i32) { DST = LowerSREM32(Op, DAG); } else if (OVT.getScalarType() == MVT::i16) { DST = LowerSREM16(Op, DAG); } else if (OVT.getScalarType() == MVT::i8) { DST = LowerSREM8(Op, DAG); } else { DST = SDValue(Op.getNode(), 0); } return DST; } SDValue AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const { EVT VT = Op.getValueType(); SDValue Nodes1; SDValue second; SDValue third; SDValue fourth; DebugLoc DL = Op.getDebugLoc(); Nodes1 = DAG.getNode(AMDILISD::VBUILD, DL, VT, Op.getOperand(0)); #if 0 bool allEqual = true; for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) { if (Op.getOperand(0) != Op.getOperand(x)) { allEqual = false; break; } } if (allEqual) { return Nodes1; } #endif switch(Op.getNumOperands()) { default: case 1: break; case 4: fourth = Op.getOperand(3); if (fourth.getOpcode() != ISD::UNDEF) { Nodes1 = DAG.getNode( ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(), Nodes1, fourth, DAG.getConstant(7, MVT::i32)); } case 3: third = Op.getOperand(2); if (third.getOpcode() != ISD::UNDEF) { Nodes1 = DAG.getNode( ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(), Nodes1, third, DAG.getConstant(6, MVT::i32)); } case 2: second = Op.getOperand(1); if (second.getOpcode() != ISD::UNDEF) { Nodes1 = DAG.getNode( ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(), Nodes1, second, DAG.getConstant(5, MVT::i32)); } break; }; return Nodes1; } SDValue AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cond = Op.getOperand(0); SDValue LHS = Op.getOperand(1); SDValue RHS = Op.getOperand(2); DebugLoc DL = Op.getDebugLoc(); Cond = getConversionNode(DAG, Cond, Op, true); Cond = DAG.getNode(AMDILISD::CMOVLOG, DL, Op.getValueType(), Cond, LHS, RHS); return Cond; } SDValue AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const { SDValue Data = Op.getOperand(0); VTSDNode *BaseType = cast(Op.getOperand(1)); DebugLoc DL = Op.getDebugLoc(); EVT DVT = Data.getValueType(); EVT BVT = BaseType->getVT(); unsigned baseBits = BVT.getScalarType().getSizeInBits(); unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1; unsigned shiftBits = srcBits - baseBits; if (srcBits < 32) { // If the op is less than 32 bits, then it needs to extend to 32bits // so it can properly keep the upper bits valid. EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1); Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data); shiftBits = 32 - baseBits; DVT = IVT; } SDValue Shift = DAG.getConstant(shiftBits, DVT); // Shift left by 'Shift' bits. Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift); // Signed shift Right by 'Shift' bits. Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift); if (srcBits < 32) { // Once the sign extension is done, the op needs to be converted to // its original type. Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType()); } return Data; } EVT AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const { int iSize = (size * numEle); int vEle = (iSize >> ((size == 64) ? 6 : 5)); if (!vEle) { vEle = 1; } if (size == 64) { if (vEle == 1) { return EVT(MVT::i64); } else { return EVT(MVT::getVectorVT(MVT::i64, vEle)); } } else { if (vEle == 1) { return EVT(MVT::i32); } else { return EVT(MVT::getVectorVT(MVT::i32, vEle)); } } } SDValue AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); unsigned int SPReg = AMDGPU::SP; DebugLoc DL = Op.getDebugLoc(); SDValue SP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i32); SDValue NewSP = DAG.getNode(ISD::ADD, DL, MVT::i32, SP, Size); Chain = DAG.getCopyToReg(SP.getValue(1), DL, SPReg, NewSP); SDValue Ops[2] = {NewSP, Chain}; Chain = DAG.getMergeValues(Ops, 2 ,DL); return Chain; } SDValue AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Cond = Op.getOperand(1); SDValue Jump = Op.getOperand(2); SDValue Result; Result = DAG.getNode( AMDILISD::BRANCH_COND, Op.getDebugLoc(), Op.getValueType(), Chain, Jump, Cond); return Result; } // LowerRET - Lower an ISD::RET node. SDValue AMDILTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, DebugLoc dl, SelectionDAG &DAG) const { //MachineFunction& MF = DAG.getMachineFunction(); // CCValAssign - represent the assignment of the return value // to a location SmallVector RVLocs; // CCState - Info about the registers and stack slot CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), getTargetMachine(), RVLocs, *DAG.getContext()); // Analyze return values of ISD::RET CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32); // If this is the first return lowered for this function, add // the regs to the liveout set for the function MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) { if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) { MRI.addLiveOut(RVLocs[i].getLocReg()); } } // FIXME: implement this when tail call is implemented // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL); // both x86 and ppc implement this in ISelLowering // Regular return here SDValue Flag; SmallVector RetOps; RetOps.push_back(Chain); RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32)); for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) { CCValAssign &VA = RVLocs[i]; SDValue ValToCopy = OutVals[i]; assert(VA.isRegLoc() && "Can only return in registers!"); // ISD::Ret => ret chain, (regnum1, val1), ... // So i * 2 + 1 index only the regnums Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag); // guarantee that all emitted copies are stuck together // avoiding something bad Flag = Chain.getValue(1); } /*if (MF.getFunction()->hasStructRetAttr()) { assert(0 && "Struct returns are not yet implemented!"); // Both MIPS and X86 have this }*/ RetOps[0] = Chain; if (Flag.getNode()) RetOps.push_back(Flag); Flag = DAG.getNode(AMDILISD::RET_FLAG, dl, MVT::Other, &RetOps[0], RetOps.size()); return Flag; } unsigned int AMDILTargetLowering::getFunctionAlignment(const Function *) const { return 0; } SDValue AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); EVT OVT = Op.getValueType(); SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); MVT INTTY; MVT FLTTY; if (!OVT.isVector()) { INTTY = MVT::i32; FLTTY = MVT::f32; } else if (OVT.getVectorNumElements() == 2) { INTTY = MVT::v2i32; FLTTY = MVT::v2f32; } else if (OVT.getVectorNumElements() == 4) { INTTY = MVT::v4i32; FLTTY = MVT::v4f32; } unsigned bitsize = OVT.getScalarType().getSizeInBits(); // char|short jq = ia ^ ib; SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS); // jq = jq >> (bitsize - 2) jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT)); // jq = jq | 0x1 jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT)); // jq = (int)jq jq = DAG.getSExtOrTrunc(jq, DL, INTTY); // int ia = (int)LHS; SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY); // int ib, (int)RHS; SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY); // float fa = (float)ia; SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia); // float fb = (float)ib; SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib); // float fq = native_divide(fa, fb); SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb); // fq = trunc(fq); fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq); // float fqneg = -fq; SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq); // float fr = mad(fqneg, fb, fa); SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa); // int iq = (int)fq; SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq); // fr = fabs(fr); fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr); // fb = fabs(fb); fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb); // int cv = fr >= fb; SDValue cv; if (INTTY == MVT::i32) { cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); } else { cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); } // jq = (cv ? jq : 0); jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq, DAG.getConstant(0, OVT)); // dst = iq + jq; iq = DAG.getSExtOrTrunc(iq, DL, OVT); iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq); return iq; } SDValue AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); EVT OVT = Op.getValueType(); SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); // The LowerSDIV32 function generates equivalent to the following IL. // mov r0, LHS // mov r1, RHS // ilt r10, r0, 0 // ilt r11, r1, 0 // iadd r0, r0, r10 // iadd r1, r1, r11 // ixor r0, r0, r10 // ixor r1, r1, r11 // udiv r0, r0, r1 // ixor r10, r10, r11 // iadd r0, r0, r10 // ixor DST, r0, r10 // mov r0, LHS SDValue r0 = LHS; // mov r1, RHS SDValue r1 = RHS; // ilt r10, r0, 0 SDValue r10 = DAG.getSelectCC(DL, r0, DAG.getConstant(0, OVT), DAG.getConstant(-1, MVT::i32), DAG.getConstant(0, MVT::i32), ISD::SETLT); // ilt r11, r1, 0 SDValue r11 = DAG.getSelectCC(DL, r1, DAG.getConstant(0, OVT), DAG.getConstant(-1, MVT::i32), DAG.getConstant(0, MVT::i32), ISD::SETLT); // iadd r0, r0, r10 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); // iadd r1, r1, r11 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); // ixor r0, r0, r10 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); // ixor r1, r1, r11 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); // udiv r0, r0, r1 r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1); // ixor r10, r10, r11 r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11); // iadd r0, r0, r10 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); // ixor DST, r0, r10 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); return DST; } SDValue AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const { return SDValue(Op.getNode(), 0); } SDValue AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); EVT OVT = Op.getValueType(); MVT INTTY = MVT::i32; if (OVT == MVT::v2i8) { INTTY = MVT::v2i32; } else if (OVT == MVT::v4i8) { INTTY = MVT::v4i32; } SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); return LHS; } SDValue AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); EVT OVT = Op.getValueType(); MVT INTTY = MVT::i32; if (OVT == MVT::v2i16) { INTTY = MVT::v2i32; } else if (OVT == MVT::v4i16) { INTTY = MVT::v4i32; } SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); return LHS; } SDValue AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); EVT OVT = Op.getValueType(); SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); // The LowerSREM32 function generates equivalent to the following IL. // mov r0, LHS // mov r1, RHS // ilt r10, r0, 0 // ilt r11, r1, 0 // iadd r0, r0, r10 // iadd r1, r1, r11 // ixor r0, r0, r10 // ixor r1, r1, r11 // udiv r20, r0, r1 // umul r20, r20, r1 // sub r0, r0, r20 // iadd r0, r0, r10 // ixor DST, r0, r10 // mov r0, LHS SDValue r0 = LHS; // mov r1, RHS SDValue r1 = RHS; // ilt r10, r0, 0 SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT, DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), r0, DAG.getConstant(0, OVT)); // ilt r11, r1, 0 SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT, DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), r1, DAG.getConstant(0, OVT)); // iadd r0, r0, r10 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); // iadd r1, r1, r11 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); // ixor r0, r0, r10 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); // ixor r1, r1, r11 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); // udiv r20, r0, r1 SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1); // umul r20, r20, r1 r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1); // sub r0, r0, r20 r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20); // iadd r0, r0, r10 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); // ixor DST, r0, r10 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); return DST; } SDValue AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const { return SDValue(Op.getNode(), 0); }