aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/radeon/AMDILISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/radeon/AMDILISelLowering.cpp')
-rw-r--r--src/gallium/drivers/radeon/AMDILISelLowering.cpp5612
1 files changed, 5612 insertions, 0 deletions
diff --git a/src/gallium/drivers/radeon/AMDILISelLowering.cpp b/src/gallium/drivers/radeon/AMDILISelLowering.cpp
new file mode 100644
index 00000000000..6f78d15ad0b
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILISelLowering.cpp
@@ -0,0 +1,5612 @@
+//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file implements the interfaces that AMDIL uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILISelLowering.h"
+#include "AMDILDevices.h"
+#include "AMDILGlobalManager.h"
+#include "AMDILIntrinsicInfo.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILSubtarget.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+#define ISDBITCAST ISD::BITCAST
+#define MVTGLUE MVT::Glue
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+#include "AMDILGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation Help Functions Begin
+//===----------------------------------------------------------------------===//
+ static SDValue
+getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType)
+{
+ DebugLoc DL = Src.getDebugLoc();
+ EVT svt = Src.getValueType().getScalarType();
+ EVT dvt = Dst.getValueType().getScalarType();
+ if (svt.isFloatingPoint() && dvt.isFloatingPoint()) {
+ if (dvt.bitsGT(svt)) {
+ Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src);
+ } else if (svt.bitsLT(svt)) {
+ Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src,
+ DAG.getConstant(1, MVT::i32));
+ }
+ } else if (svt.isInteger() && dvt.isInteger()) {
+ if (!svt.bitsEq(dvt)) {
+ Src = DAG.getSExtOrTrunc(Src, DL, dvt);
+ } else {
+ Src = DAG.getNode(AMDILISD::MOVE, DL, dvt, Src);
+ }
+ } else if (svt.isInteger()) {
+ unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP;
+ if (!svt.bitsEq(dvt)) {
+ if (dvt.getSimpleVT().SimpleTy == MVT::f32) {
+ Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
+ } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) {
+ Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64);
+ } else {
+ assert(0 && "We only support 32 and 64bit fp types");
+ }
+ }
+ Src = DAG.getNode(opcode, DL, dvt, Src);
+ } else if (dvt.isInteger()) {
+ unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT;
+ if (svt.getSimpleVT().SimpleTy == MVT::f32) {
+ Src = DAG.getNode(opcode, DL, MVT::i32, Src);
+ } else if (svt.getSimpleVT().SimpleTy == MVT::f64) {
+ Src = DAG.getNode(opcode, DL, MVT::i64, Src);
+ } else {
+ assert(0 && "We only support 32 and 64bit fp types");
+ }
+ Src = DAG.getSExtOrTrunc(Src, DL, dvt);
+ }
+ return Src;
+}
+// CondCCodeToCC - Convert a DAG condition code to a AMDIL CC
+// condition.
+ static AMDILCC::CondCodes
+CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type)
+{
+ switch (CC) {
+ default:
+ {
+ errs()<<"Condition Code: "<< (unsigned int)CC<<"\n";
+ assert(0 && "Unknown condition code!");
+ }
+ case ISD::SETO:
+ switch(type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_O;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_O;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETUO:
+ switch(type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_UO;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_UO;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETGT:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_I_GT;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_GT;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_GT;
+ case MVT::i64:
+ return AMDILCC::IL_CC_L_GT;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETGE:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_I_GE;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_GE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_GE;
+ case MVT::i64:
+ return AMDILCC::IL_CC_L_GE;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETLT:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_I_LT;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_LT;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_LT;
+ case MVT::i64:
+ return AMDILCC::IL_CC_L_LT;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETLE:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_I_LE;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_LE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_LE;
+ case MVT::i64:
+ return AMDILCC::IL_CC_L_LE;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETNE:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_I_NE;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_NE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_NE;
+ case MVT::i64:
+ return AMDILCC::IL_CC_L_NE;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETEQ:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_I_EQ;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_EQ;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_EQ;
+ case MVT::i64:
+ return AMDILCC::IL_CC_L_EQ;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETUGT:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_U_GT;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_UGT;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_UGT;
+ case MVT::i64:
+ return AMDILCC::IL_CC_UL_GT;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETUGE:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_U_GE;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_UGE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_UGE;
+ case MVT::i64:
+ return AMDILCC::IL_CC_UL_GE;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETULT:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_U_LT;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_ULT;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_ULT;
+ case MVT::i64:
+ return AMDILCC::IL_CC_UL_LT;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETULE:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_U_LE;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_ULE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_ULE;
+ case MVT::i64:
+ return AMDILCC::IL_CC_UL_LE;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETUNE:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_U_NE;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_UNE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_UNE;
+ case MVT::i64:
+ return AMDILCC::IL_CC_UL_NE;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETUEQ:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_U_EQ;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_UEQ;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_UEQ;
+ case MVT::i64:
+ return AMDILCC::IL_CC_UL_EQ;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETOGT:
+ switch (type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_OGT;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_OGT;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETOGE:
+ switch (type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_OGE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_OGE;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETOLT:
+ switch (type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_OLT;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_OLT;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETOLE:
+ switch (type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_OLE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_OLE;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETONE:
+ switch (type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_ONE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_ONE;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETOEQ:
+ switch (type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_OEQ;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_OEQ;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ };
+}
+
+ static unsigned int
+translateToOpcode(uint64_t CCCode, unsigned int regClass)
+{
+ switch (CCCode) {
+ case AMDILCC::IL_CC_D_EQ:
+ case AMDILCC::IL_CC_D_OEQ:
+ if (regClass == AMDIL::GPRV2F64RegClassID) {
+ return (unsigned int)AMDIL::DEQ_v2f64;
+ } else {
+ return (unsigned int)AMDIL::DEQ;
+ }
+ case AMDILCC::IL_CC_D_LE:
+ case AMDILCC::IL_CC_D_OLE:
+ case AMDILCC::IL_CC_D_ULE:
+ case AMDILCC::IL_CC_D_GE:
+ case AMDILCC::IL_CC_D_OGE:
+ case AMDILCC::IL_CC_D_UGE:
+ return (unsigned int)AMDIL::DGE;
+ case AMDILCC::IL_CC_D_LT:
+ case AMDILCC::IL_CC_D_OLT:
+ case AMDILCC::IL_CC_D_ULT:
+ case AMDILCC::IL_CC_D_GT:
+ case AMDILCC::IL_CC_D_OGT:
+ case AMDILCC::IL_CC_D_UGT:
+ return (unsigned int)AMDIL::DLT;
+ case AMDILCC::IL_CC_D_NE:
+ case AMDILCC::IL_CC_D_UNE:
+ return (unsigned int)AMDIL::DNE;
+ case AMDILCC::IL_CC_F_EQ:
+ case AMDILCC::IL_CC_F_OEQ:
+ return (unsigned int)AMDIL::FEQ;
+ case AMDILCC::IL_CC_F_LE:
+ case AMDILCC::IL_CC_F_ULE:
+ case AMDILCC::IL_CC_F_OLE:
+ case AMDILCC::IL_CC_F_GE:
+ case AMDILCC::IL_CC_F_UGE:
+ case AMDILCC::IL_CC_F_OGE:
+ return (unsigned int)AMDIL::FGE;
+ case AMDILCC::IL_CC_F_LT:
+ case AMDILCC::IL_CC_F_OLT:
+ case AMDILCC::IL_CC_F_ULT:
+ case AMDILCC::IL_CC_F_GT:
+ case AMDILCC::IL_CC_F_OGT:
+ case AMDILCC::IL_CC_F_UGT:
+ if (regClass == AMDIL::GPRV2F32RegClassID) {
+ return (unsigned int)AMDIL::FLT_v2f32;
+ } else if (regClass == AMDIL::GPRV4F32RegClassID) {
+ return (unsigned int)AMDIL::FLT_v4f32;
+ } else {
+ return (unsigned int)AMDIL::FLT;
+ }
+ case AMDILCC::IL_CC_F_NE:
+ case AMDILCC::IL_CC_F_UNE:
+ return (unsigned int)AMDIL::FNE;
+ case AMDILCC::IL_CC_I_EQ:
+ case AMDILCC::IL_CC_U_EQ:
+ if (regClass == AMDIL::GPRI32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::IEQ;
+ } else if (regClass == AMDIL::GPRV2I32RegClassID
+ || regClass == AMDIL::GPRV2I8RegClassID
+ || regClass == AMDIL::GPRV2I16RegClassID) {
+ return (unsigned int)AMDIL::IEQ_v2i32;
+ } else if (regClass == AMDIL::GPRV4I32RegClassID
+ || regClass == AMDIL::GPRV4I8RegClassID
+ || regClass == AMDIL::GPRV4I16RegClassID) {
+ return (unsigned int)AMDIL::IEQ_v4i32;
+ } else {
+ assert(!"Unknown reg class!");
+ }
+ case AMDILCC::IL_CC_L_EQ:
+ case AMDILCC::IL_CC_UL_EQ:
+ return (unsigned int)AMDIL::LEQ;
+ case AMDILCC::IL_CC_I_GE:
+ case AMDILCC::IL_CC_I_LE:
+ if (regClass == AMDIL::GPRI32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::IGE;
+ } else if (regClass == AMDIL::GPRV2I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::IGE_v2i32;
+ } else if (regClass == AMDIL::GPRV4I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::IGE_v4i32;
+ } else {
+ assert(!"Unknown reg class!");
+ }
+ case AMDILCC::IL_CC_I_LT:
+ case AMDILCC::IL_CC_I_GT:
+ if (regClass == AMDIL::GPRI32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::ILT;
+ } else if (regClass == AMDIL::GPRV2I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::ILT_v2i32;
+ } else if (regClass == AMDIL::GPRV4I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::ILT_v4i32;
+ } else {
+ assert(!"Unknown reg class!");
+ }
+ case AMDILCC::IL_CC_L_GE:
+ return (unsigned int)AMDIL::LGE;
+ case AMDILCC::IL_CC_L_LE:
+ return (unsigned int)AMDIL::LLE;
+ case AMDILCC::IL_CC_L_LT:
+ return (unsigned int)AMDIL::LLT;
+ case AMDILCC::IL_CC_L_GT:
+ return (unsigned int)AMDIL::LGT;
+ case AMDILCC::IL_CC_I_NE:
+ case AMDILCC::IL_CC_U_NE:
+ if (regClass == AMDIL::GPRI32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::INE;
+ } else if (regClass == AMDIL::GPRV2I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::INE_v2i32;
+ } else if (regClass == AMDIL::GPRV4I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::INE_v4i32;
+ } else {
+ assert(!"Unknown reg class!");
+ }
+ case AMDILCC::IL_CC_U_GE:
+ case AMDILCC::IL_CC_U_LE:
+ if (regClass == AMDIL::GPRI32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::UGE;
+ } else if (regClass == AMDIL::GPRV2I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::UGE_v2i32;
+ } else if (regClass == AMDIL::GPRV4I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::UGE_v4i32;
+ } else {
+ assert(!"Unknown reg class!");
+ }
+ case AMDILCC::IL_CC_L_NE:
+ case AMDILCC::IL_CC_UL_NE:
+ return (unsigned int)AMDIL::LNE;
+ case AMDILCC::IL_CC_UL_GE:
+ return (unsigned int)AMDIL::ULGE;
+ case AMDILCC::IL_CC_UL_LE:
+ return (unsigned int)AMDIL::ULLE;
+ case AMDILCC::IL_CC_U_LT:
+ if (regClass == AMDIL::GPRI32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::ULT;
+ } else if (regClass == AMDIL::GPRV2I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::ULT_v2i32;
+ } else if (regClass == AMDIL::GPRV4I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::ULT_v4i32;
+ } else {
+ assert(!"Unknown reg class!");
+ }
+ case AMDILCC::IL_CC_U_GT:
+ if (regClass == AMDIL::GPRI32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::UGT;
+ } else if (regClass == AMDIL::GPRV2I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::UGT_v2i32;
+ } else if (regClass == AMDIL::GPRV4I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::UGT_v4i32;
+ } else {
+ assert(!"Unknown reg class!");
+ }
+ case AMDILCC::IL_CC_UL_LT:
+ return (unsigned int)AMDIL::ULLT;
+ case AMDILCC::IL_CC_UL_GT:
+ return (unsigned int)AMDIL::ULGT;
+ case AMDILCC::IL_CC_F_UEQ:
+ case AMDILCC::IL_CC_D_UEQ:
+ case AMDILCC::IL_CC_F_ONE:
+ case AMDILCC::IL_CC_D_ONE:
+ case AMDILCC::IL_CC_F_O:
+ case AMDILCC::IL_CC_F_UO:
+ case AMDILCC::IL_CC_D_O:
+ case AMDILCC::IL_CC_D_UO:
+ // we don't care
+ return 0;
+
+ }
+ errs()<<"Opcode: "<<CCCode<<"\n";
+ assert(0 && "Unknown opcode retrieved");
+ return 0;
+}
+SDValue
+AMDILTargetLowering::LowerMemArgument(
+ SDValue Chain,
+ CallingConv::ID CallConv,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ MachineFrameInfo *MFI,
+ unsigned i) const
+{
+ // Create the nodes corresponding to a load from this parameter slot.
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+
+ bool AlwaysUseMutable = (CallConv==CallingConv::Fast) &&
+ getTargetMachine().Options.GuaranteedTailCallOpt;
+ bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
+
+ // FIXME: For now, all byval parameter objects are marked mutable. This can
+ // be changed with more analysis.
+ // In case of tail call optimization mark all arguments mutable. Since they
+ // could be overwritten by lowering of arguments in case of a tail call.
+ int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
+ VA.getLocMemOffset(), isImmutable);
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+
+ if (Flags.isByVal())
+ return FIN;
+ return DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, false, 0);
+}
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation Help Functions End
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// Instruction generation functions
+//===----------------------------------------------------------------------===//
+uint32_t
+AMDILTargetLowering::addExtensionInstructions(
+ uint32_t reg, bool signedShift,
+ unsigned int simpleVT) const
+{
+ int shiftSize = 0;
+ uint32_t LShift, RShift;
+ switch(simpleVT)
+ {
+ default:
+ return reg;
+ case AMDIL::GPRI8RegClassID:
+ shiftSize = 24;
+ LShift = AMDIL::SHL_i8;
+ if (signedShift) {
+ RShift = AMDIL::SHR_i8;
+ } else {
+ RShift = AMDIL::USHR_i8;
+ }
+ break;
+ case AMDIL::GPRV2I8RegClassID:
+ shiftSize = 24;
+ LShift = AMDIL::SHL_v2i8;
+ if (signedShift) {
+ RShift = AMDIL::SHR_v2i8;
+ } else {
+ RShift = AMDIL::USHR_v2i8;
+ }
+ break;
+ case AMDIL::GPRV4I8RegClassID:
+ shiftSize = 24;
+ LShift = AMDIL::SHL_v4i8;
+ if (signedShift) {
+ RShift = AMDIL::SHR_v4i8;
+ } else {
+ RShift = AMDIL::USHR_v4i8;
+ }
+ break;
+ case AMDIL::GPRI16RegClassID:
+ shiftSize = 16;
+ LShift = AMDIL::SHL_i16;
+ if (signedShift) {
+ RShift = AMDIL::SHR_i16;
+ } else {
+ RShift = AMDIL::USHR_i16;
+ }
+ break;
+ case AMDIL::GPRV2I16RegClassID:
+ shiftSize = 16;
+ LShift = AMDIL::SHL_v2i16;
+ if (signedShift) {
+ RShift = AMDIL::SHR_v2i16;
+ } else {
+ RShift = AMDIL::USHR_v2i16;
+ }
+ break;
+ case AMDIL::GPRV4I16RegClassID:
+ shiftSize = 16;
+ LShift = AMDIL::SHL_v4i16;
+ if (signedShift) {
+ RShift = AMDIL::SHR_v4i16;
+ } else {
+ RShift = AMDIL::USHR_v4i16;
+ }
+ break;
+ };
+ uint32_t LoadReg = genVReg(simpleVT);
+ uint32_t tmp1 = genVReg(simpleVT);
+ uint32_t tmp2 = genVReg(simpleVT);
+ generateMachineInst(AMDIL::LOADCONST_i32, LoadReg).addImm(shiftSize);
+ generateMachineInst(LShift, tmp1, reg, LoadReg);
+ generateMachineInst(RShift, tmp2, tmp1, LoadReg);
+ return tmp2;
+}
+
+MachineOperand
+AMDILTargetLowering::convertToReg(MachineOperand op) const
+{
+ if (op.isReg()) {
+ return op;
+ } else if (op.isImm()) {
+ uint32_t loadReg
+ = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass);
+ generateMachineInst(AMDIL::LOADCONST_i32, loadReg)
+ .addImm(op.getImm());
+ op.ChangeToRegister(loadReg, false);
+ } else if (op.isFPImm()) {
+ uint32_t loadReg
+ = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass);
+ generateMachineInst(AMDIL::LOADCONST_f32, loadReg)
+ .addFPImm(op.getFPImm());
+ op.ChangeToRegister(loadReg, false);
+ } else if (op.isMBB()) {
+ op.ChangeToRegister(0, false);
+ } else if (op.isFI()) {
+ op.ChangeToRegister(0, false);
+ } else if (op.isCPI()) {
+ op.ChangeToRegister(0, false);
+ } else if (op.isJTI()) {
+ op.ChangeToRegister(0, false);
+ } else if (op.isGlobal()) {
+ op.ChangeToRegister(0, false);
+ } else if (op.isSymbol()) {
+ op.ChangeToRegister(0, false);
+ }/* else if (op.isMetadata()) {
+ op.ChangeToRegister(0, false);
+ }*/
+ return op;
+}
+
+void
+AMDILTargetLowering::generateCMPInstr(
+ MachineInstr *MI,
+ MachineBasicBlock *BB,
+ const TargetInstrInfo& TII)
+const
+{
+ MachineOperand DST = MI->getOperand(0);
+ MachineOperand CC = MI->getOperand(1);
+ MachineOperand LHS = MI->getOperand(2);
+ MachineOperand RHS = MI->getOperand(3);
+ int64_t ccCode = CC.getImm();
+ unsigned int simpleVT = MI->getDesc().OpInfo[0].RegClass;
+ unsigned int opCode = translateToOpcode(ccCode, simpleVT);
+ DebugLoc DL = MI->getDebugLoc();
+ MachineBasicBlock::iterator BBI = MI;
+ setPrivateData(BB, BBI, &DL, &TII);
+ if (!LHS.isReg()) {
+ LHS = convertToReg(LHS);
+ }
+ if (!RHS.isReg()) {
+ RHS = convertToReg(RHS);
+ }
+ switch (ccCode) {
+ case AMDILCC::IL_CC_I_EQ:
+ case AMDILCC::IL_CC_I_NE:
+ case AMDILCC::IL_CC_I_GE:
+ case AMDILCC::IL_CC_I_LT:
+ {
+ uint32_t lhsreg = addExtensionInstructions(
+ LHS.getReg(), true, simpleVT);
+ uint32_t rhsreg = addExtensionInstructions(
+ RHS.getReg(), true, simpleVT);
+ generateMachineInst(opCode, DST.getReg(), lhsreg, rhsreg);
+ }
+ break;
+ case AMDILCC::IL_CC_U_EQ:
+ case AMDILCC::IL_CC_U_NE:
+ case AMDILCC::IL_CC_U_GE:
+ case AMDILCC::IL_CC_U_LT:
+ case AMDILCC::IL_CC_D_EQ:
+ case AMDILCC::IL_CC_F_EQ:
+ case AMDILCC::IL_CC_F_OEQ:
+ case AMDILCC::IL_CC_D_OEQ:
+ case AMDILCC::IL_CC_D_NE:
+ case AMDILCC::IL_CC_F_NE:
+ case AMDILCC::IL_CC_F_UNE:
+ case AMDILCC::IL_CC_D_UNE:
+ case AMDILCC::IL_CC_D_GE:
+ case AMDILCC::IL_CC_F_GE:
+ case AMDILCC::IL_CC_D_OGE:
+ case AMDILCC::IL_CC_F_OGE:
+ case AMDILCC::IL_CC_D_LT:
+ case AMDILCC::IL_CC_F_LT:
+ case AMDILCC::IL_CC_F_OLT:
+ case AMDILCC::IL_CC_D_OLT:
+ generateMachineInst(opCode, DST.getReg(),
+ LHS.getReg(), RHS.getReg());
+ break;
+ case AMDILCC::IL_CC_I_GT:
+ case AMDILCC::IL_CC_I_LE:
+ {
+ uint32_t lhsreg = addExtensionInstructions(
+ LHS.getReg(), true, simpleVT);
+ uint32_t rhsreg = addExtensionInstructions(
+ RHS.getReg(), true, simpleVT);
+ generateMachineInst(opCode, DST.getReg(), rhsreg, lhsreg);
+ }
+ break;
+ case AMDILCC::IL_CC_U_GT:
+ case AMDILCC::IL_CC_U_LE:
+ case AMDILCC::IL_CC_F_GT:
+ case AMDILCC::IL_CC_D_GT:
+ case AMDILCC::IL_CC_F_OGT:
+ case AMDILCC::IL_CC_D_OGT:
+ case AMDILCC::IL_CC_F_LE:
+ case AMDILCC::IL_CC_D_LE:
+ case AMDILCC::IL_CC_D_OLE:
+ case AMDILCC::IL_CC_F_OLE:
+ generateMachineInst(opCode, DST.getReg(),
+ RHS.getReg(), LHS.getReg());
+ break;
+ case AMDILCC::IL_CC_F_UGT:
+ case AMDILCC::IL_CC_F_ULE:
+ {
+ uint32_t VReg[4] = {
+ genVReg(simpleVT), genVReg(simpleVT),
+ genVReg(simpleVT), genVReg(simpleVT)
+ };
+ generateMachineInst(opCode, VReg[0],
+ RHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::FNE, VReg[1],
+ RHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::FNE, VReg[2],
+ LHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ VReg[3], VReg[0], VReg[1]);
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ DST.getReg(), VReg[2], VReg[3]);
+ }
+ break;
+ case AMDILCC::IL_CC_F_ULT:
+ case AMDILCC::IL_CC_F_UGE:
+ {
+ uint32_t VReg[4] = {
+ genVReg(simpleVT), genVReg(simpleVT),
+ genVReg(simpleVT), genVReg(simpleVT)
+ };
+ generateMachineInst(opCode, VReg[0],
+ LHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::FNE, VReg[1],
+ RHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::FNE, VReg[2],
+ LHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ VReg[3], VReg[0], VReg[1]);
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ DST.getReg(), VReg[2], VReg[3]);
+ }
+ break;
+ case AMDILCC::IL_CC_D_UGT:
+ case AMDILCC::IL_CC_D_ULE:
+ {
+ uint32_t regID = AMDIL::GPRF64RegClassID;
+ uint32_t VReg[4] = {
+ genVReg(regID), genVReg(regID),
+ genVReg(regID), genVReg(regID)
+ };
+ // The result of a double comparison is a 32bit result
+ generateMachineInst(opCode, VReg[0],
+ RHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::DNE, VReg[1],
+ RHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::DNE, VReg[2],
+ LHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ VReg[3], VReg[0], VReg[1]);
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ DST.getReg(), VReg[2], VReg[3]);
+ }
+ break;
+ case AMDILCC::IL_CC_D_UGE:
+ case AMDILCC::IL_CC_D_ULT:
+ {
+ uint32_t regID = AMDIL::GPRF64RegClassID;
+ uint32_t VReg[4] = {
+ genVReg(regID), genVReg(regID),
+ genVReg(regID), genVReg(regID)
+ };
+ // The result of a double comparison is a 32bit result
+ generateMachineInst(opCode, VReg[0],
+ LHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::DNE, VReg[1],
+ RHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::DNE, VReg[2],
+ LHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ VReg[3], VReg[0], VReg[1]);
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ DST.getReg(), VReg[2], VReg[3]);
+ }
+ break;
+ case AMDILCC::IL_CC_F_UEQ:
+ {
+ uint32_t VReg[4] = {
+ genVReg(simpleVT), genVReg(simpleVT),
+ genVReg(simpleVT), genVReg(simpleVT)
+ };
+ generateMachineInst(AMDIL::FEQ, VReg[0],
+ LHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::FNE, VReg[1],
+ LHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::FNE, VReg[2],
+ RHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ VReg[3], VReg[0], VReg[1]);
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ DST.getReg(), VReg[2], VReg[3]);
+ }
+ break;
+ case AMDILCC::IL_CC_F_ONE:
+ {
+ uint32_t VReg[4] = {
+ genVReg(simpleVT), genVReg(simpleVT),
+ genVReg(simpleVT), genVReg(simpleVT)
+ };
+ generateMachineInst(AMDIL::FNE, VReg[0],
+ LHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::FEQ, VReg[1],
+ LHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::FEQ, VReg[2],
+ RHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::BINARY_AND_f32,
+ VReg[3], VReg[0], VReg[1]);
+ generateMachineInst(AMDIL::BINARY_AND_f32,
+ DST.getReg(), VReg[2], VReg[3]);
+ }
+ break;
+ case AMDILCC::IL_CC_D_UEQ:
+ {
+ uint32_t regID = AMDIL::GPRF64RegClassID;
+ uint32_t VReg[4] = {
+ genVReg(regID), genVReg(regID),
+ genVReg(regID), genVReg(regID)
+ };
+ // The result of a double comparison is a 32bit result
+ generateMachineInst(AMDIL::DEQ, VReg[0],
+ LHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::DNE, VReg[1],
+ LHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::DNE, VReg[2],
+ RHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ VReg[3], VReg[0], VReg[1]);
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ DST.getReg(), VReg[2], VReg[3]);
+
+ }
+ break;
+ case AMDILCC::IL_CC_D_ONE:
+ {
+ uint32_t regID = AMDIL::GPRF64RegClassID;
+ uint32_t VReg[4] = {
+ genVReg(regID), genVReg(regID),
+ genVReg(regID), genVReg(regID)
+ };
+ // The result of a double comparison is a 32bit result
+ generateMachineInst(AMDIL::DNE, VReg[0],
+ LHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::DEQ, VReg[1],
+ LHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::DEQ, VReg[2],
+ RHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::BINARY_AND_f32,
+ VReg[3], VReg[0], VReg[1]);
+ generateMachineInst(AMDIL::BINARY_AND_f32,
+ DST.getReg(), VReg[2], VReg[3]);
+
+ }
+ break;
+ case AMDILCC::IL_CC_F_O:
+ {
+ uint32_t VReg[2] = { genVReg(simpleVT), genVReg(simpleVT) };
+ generateMachineInst(AMDIL::FEQ, VReg[0],
+ RHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::FEQ, VReg[1],
+ LHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::BINARY_AND_f32,
+ DST.getReg(), VReg[0], VReg[1]);
+ }
+ break;
+ case AMDILCC::IL_CC_D_O:
+ {
+ uint32_t regID = AMDIL::GPRF64RegClassID;
+ uint32_t VReg[2] = { genVReg(regID), genVReg(regID) };
+ // The result of a double comparison is a 32bit result
+ generateMachineInst(AMDIL::DEQ, VReg[0],
+ RHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::DEQ, VReg[1],
+ LHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::BINARY_AND_f32,
+ DST.getReg(), VReg[0], VReg[1]);
+ }
+ break;
+ case AMDILCC::IL_CC_F_UO:
+ {
+ uint32_t VReg[2] = { genVReg(simpleVT), genVReg(simpleVT) };
+ generateMachineInst(AMDIL::FNE, VReg[0],
+ RHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::FNE, VReg[1],
+ LHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ DST.getReg(), VReg[0], VReg[1]);
+ }
+ break;
+ case AMDILCC::IL_CC_D_UO:
+ {
+ uint32_t regID = AMDIL::GPRF64RegClassID;
+ uint32_t VReg[2] = { genVReg(regID), genVReg(regID) };
+ // The result of a double comparison is a 32bit result
+ generateMachineInst(AMDIL::DNE, VReg[0],
+ RHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::DNE, VReg[1],
+ LHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ DST.getReg(), VReg[0], VReg[1]);
+ }
+ break;
+ case AMDILCC::IL_CC_L_LE:
+ case AMDILCC::IL_CC_L_GE:
+ case AMDILCC::IL_CC_L_EQ:
+ case AMDILCC::IL_CC_L_NE:
+ case AMDILCC::IL_CC_L_LT:
+ case AMDILCC::IL_CC_L_GT:
+ case AMDILCC::IL_CC_UL_LE:
+ case AMDILCC::IL_CC_UL_GE:
+ case AMDILCC::IL_CC_UL_EQ:
+ case AMDILCC::IL_CC_UL_NE:
+ case AMDILCC::IL_CC_UL_LT:
+ case AMDILCC::IL_CC_UL_GT:
+ {
+ const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
+ &this->getTargetMachine())->getSubtargetImpl();
+ if (stm->device()->usesHardware(AMDILDeviceInfo::LongOps)) {
+ generateMachineInst(opCode, DST.getReg(), LHS.getReg(), RHS.getReg());
+ } else {
+ generateLongRelational(MI, opCode);
+ }
+ }
+ break;
+ case AMDILCC::COND_ERROR:
+ assert(0 && "Invalid CC code");
+ break;
+ };
+}
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Class Implementation Begins
+//===----------------------------------------------------------------------===//
+ AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM)
+: TargetLowering(TM, new TargetLoweringObjectFileELF())
+{
+ int types[] =
+ {
+ (int)MVT::i8,
+ (int)MVT::i16,
+ (int)MVT::i32,
+ (int)MVT::f32,
+ (int)MVT::f64,
+ (int)MVT::i64,
+ (int)MVT::v2i8,
+ (int)MVT::v4i8,
+ (int)MVT::v2i16,
+ (int)MVT::v4i16,
+ (int)MVT::v4f32,
+ (int)MVT::v4i32,
+ (int)MVT::v2f32,
+ (int)MVT::v2i32,
+ (int)MVT::v2f64,
+ (int)MVT::v2i64
+ };
+
+ int IntTypes[] =
+ {
+ (int)MVT::i8,
+ (int)MVT::i16,
+ (int)MVT::i32,
+ (int)MVT::i64
+ };
+
+ int FloatTypes[] =
+ {
+ (int)MVT::f32,
+ (int)MVT::f64
+ };
+
+ int VectorTypes[] =
+ {
+ (int)MVT::v2i8,
+ (int)MVT::v4i8,
+ (int)MVT::v2i16,
+ (int)MVT::v4i16,
+ (int)MVT::v4f32,
+ (int)MVT::v4i32,
+ (int)MVT::v2f32,
+ (int)MVT::v2i32,
+ (int)MVT::v2f64,
+ (int)MVT::v2i64
+ };
+ size_t numTypes = sizeof(types) / sizeof(*types);
+ size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
+ size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
+ size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
+
+ const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
+ &this->getTargetMachine())->getSubtargetImpl();
+ // These are the current register classes that are
+ // supported
+
+ addRegisterClass(MVT::i32, AMDIL::GPRI32RegisterClass);
+ addRegisterClass(MVT::f32, AMDIL::GPRF32RegisterClass);
+
+ if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
+ addRegisterClass(MVT::f64, AMDIL::GPRF64RegisterClass);
+ addRegisterClass(MVT::v2f64, AMDIL::GPRV2F64RegisterClass);
+ }
+ if (stm->device()->isSupported(AMDILDeviceInfo::ByteOps)) {
+ addRegisterClass(MVT::i8, AMDIL::GPRI8RegisterClass);
+ addRegisterClass(MVT::v2i8, AMDIL::GPRV2I8RegisterClass);
+ addRegisterClass(MVT::v4i8, AMDIL::GPRV4I8RegisterClass);
+ setOperationAction(ISD::Constant , MVT::i8 , Legal);
+ }
+ if (stm->device()->isSupported(AMDILDeviceInfo::ShortOps)) {
+ addRegisterClass(MVT::i16, AMDIL::GPRI16RegisterClass);
+ addRegisterClass(MVT::v2i16, AMDIL::GPRV2I16RegisterClass);
+ addRegisterClass(MVT::v4i16, AMDIL::GPRV4I16RegisterClass);
+ setOperationAction(ISD::Constant , MVT::i16 , Legal);
+ }
+ addRegisterClass(MVT::v2f32, AMDIL::GPRV2F32RegisterClass);
+ addRegisterClass(MVT::v4f32, AMDIL::GPRV4F32RegisterClass);
+ addRegisterClass(MVT::v2i32, AMDIL::GPRV2I32RegisterClass);
+ addRegisterClass(MVT::v4i32, AMDIL::GPRV4I32RegisterClass);
+ if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) {
+ addRegisterClass(MVT::i64, AMDIL::GPRI64RegisterClass);
+ addRegisterClass(MVT::v2i64, AMDIL::GPRV2I64RegisterClass);
+ }
+
+ for (unsigned int x = 0; x < numTypes; ++x) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
+
+ //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
+ // We cannot sextinreg, expand to shifts
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
+ setOperationAction(ISD::FP_ROUND, VT, Expand);
+ setOperationAction(ISD::OR, VT, Custom);
+ setOperationAction(ISD::SUBE, VT, Expand);
+ setOperationAction(ISD::SUBC, VT, Expand);
+ setOperationAction(ISD::ADD, VT, Custom);
+ setOperationAction(ISD::ADDE, VT, Expand);
+ setOperationAction(ISD::ADDC, VT, Expand);
+ setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::BRCOND, VT, Custom);
+ setOperationAction(ISD::BR_CC, VT, Custom);
+ setOperationAction(ISD::BR_JT, VT, Expand);
+ setOperationAction(ISD::BRIND, VT, Expand);
+ // TODO: Implement custom UREM/SREM routines
+ setOperationAction(ISD::UREM, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::SINT_TO_FP, VT, Custom);
+ setOperationAction(ISD::UINT_TO_FP, VT, Custom);
+ setOperationAction(ISD::FP_TO_SINT, VT, Custom);
+ setOperationAction(ISD::FP_TO_UINT, VT, Custom);
+ setOperationAction(ISDBITCAST, VT, Custom);
+ setOperationAction(ISD::GlobalAddress, VT, Custom);
+ setOperationAction(ISD::JumpTable, VT, Custom);
+ setOperationAction(ISD::ConstantPool, VT, Custom);
+ setOperationAction(ISD::SELECT_CC, VT, Custom);
+ setOperationAction(ISD::SELECT, VT, Custom);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+ if (VT != MVT::i64 && VT != MVT::v2i64) {
+ setOperationAction(ISD::SDIV, VT, Custom);
+ setOperationAction(ISD::UDIV, VT, Custom);
+ }
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ }
+ for (unsigned int x = 0; x < numFloatTypes; ++x) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
+
+ // IL does not have these operations for floating point types
+ setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
+ setOperationAction(ISD::FP_ROUND, VT, Custom);
+ setOperationAction(ISD::SETOLT, VT, Expand);
+ setOperationAction(ISD::SETOGE, VT, Expand);
+ setOperationAction(ISD::SETOGT, VT, Expand);
+ setOperationAction(ISD::SETOLE, VT, Expand);
+ setOperationAction(ISD::SETULT, VT, Expand);
+ setOperationAction(ISD::SETUGE, VT, Expand);
+ setOperationAction(ISD::SETUGT, VT, Expand);
+ setOperationAction(ISD::SETULE, VT, Expand);
+ }
+
+ for (unsigned int x = 0; x < numIntTypes; ++x) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
+
+ // GPU also does not have divrem function for signed or unsigned
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::UDIVREM, VT, Expand);
+ setOperationAction(ISD::FP_ROUND, VT, Expand);
+
+ // GPU does not have [S|U]MUL_LOHI functions as a single instruction
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+
+ // GPU doesn't have a rotl, rotr, or byteswap instruction
+ setOperationAction(ISD::ROTR, VT, Expand);
+ setOperationAction(ISD::ROTL, VT, Expand);
+ setOperationAction(ISD::BSWAP, VT, Expand);
+
+ // GPU doesn't have any counting operators
+ setOperationAction(ISD::CTPOP, VT, Expand);
+ setOperationAction(ISD::CTTZ, VT, Expand);
+ setOperationAction(ISD::CTLZ, VT, Expand);
+ }
+
+ for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
+ {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
+
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
+ setOperationAction(ISD::FP_ROUND, VT, Expand);
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::UDIVREM, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ // setOperationAction(ISD::VSETCC, VT, Expand);
+ setOperationAction(ISD::SETCC, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
+ setOperationAction(ISD::SELECT, VT, Expand);
+
+ }
+ setOperationAction(ISD::FP_ROUND, MVT::Other, Expand);
+ if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) {
+ if (stm->calVersion() < CAL_VERSION_SC_139
+ || stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ setOperationAction(ISD::MUL, MVT::i64, Custom);
+ }
+ setOperationAction(ISD::SUB, MVT::i64, Custom);
+ setOperationAction(ISD::ADD, MVT::i64, Custom);
+ setOperationAction(ISD::MULHU, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
+ setOperationAction(ISD::MULHS, MVT::i64, Expand);
+ setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
+ setOperationAction(ISD::MUL, MVT::v2i64, Expand);
+ setOperationAction(ISD::SUB, MVT::v2i64, Expand);
+ setOperationAction(ISD::ADD, MVT::v2i64, Expand);
+ setOperationAction(ISD::SREM, MVT::v2i64, Expand);
+ setOperationAction(ISD::Constant , MVT::i64 , Legal);
+ setOperationAction(ISD::UDIV, MVT::v2i64, Expand);
+ setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Expand);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Expand);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
+ }
+ if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
+ // we support loading/storing v2f64 but not operations on the type
+ setOperationAction(ISD::FADD, MVT::v2f64, Expand);
+ setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
+ setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
+ setOperationAction(ISD::FP_ROUND, MVT::v2f64, Expand);
+ setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
+ setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
+ setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
+ // We want to expand vector conversions into their scalar
+ // counterparts.
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Expand);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Expand);
+ setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::FABS, MVT::f64, Expand);
+ setOperationAction(ISD::FABS, MVT::v2f64, Expand);
+ }
+ // TODO: Fix the UDIV24 algorithm so it works for these
+ // types correctly. This needs vector comparisons
+ // for this to work correctly.
+ setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
+ setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
+ setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
+ setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
+ setOperationAction(ISD::SUBC, MVT::Other, Expand);
+ setOperationAction(ISD::ADDE, MVT::Other, Expand);
+ setOperationAction(ISD::ADDC, MVT::Other, Expand);
+ setOperationAction(ISD::BRCOND, MVT::Other, Custom);
+ setOperationAction(ISD::BR_CC, MVT::Other, Custom);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BRIND, MVT::Other, Expand);
+ setOperationAction(ISD::SETCC, MVT::Other, Custom);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
+ setOperationAction(ISD::FDIV, MVT::f32, Custom);
+ setOperationAction(ISD::FDIV, MVT::v2f32, Custom);
+ setOperationAction(ISD::FDIV, MVT::v4f32, Custom);
+
+ setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
+ // Use the default implementation.
+ setOperationAction(ISD::VAARG , MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
+ setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
+ setOperationAction(ISD::Constant , MVT::i32 , Legal);
+ setOperationAction(ISD::TRAP , MVT::Other , Legal);
+
+ setStackPointerRegisterToSaveRestore(AMDIL::SP);
+ setSchedulingPreference(Sched::RegPressure);
+ setPow2DivIsCheap(false);
+ setPrefLoopAlignment(16);
+ setSelectIsExpensive(true);
+ setJumpIsExpensive(true);
+ computeRegisterProperties();
+
+ maxStoresPerMemcpy = 4096;
+ maxStoresPerMemmove = 4096;
+ maxStoresPerMemset = 4096;
+
+#undef numTypes
+#undef numIntTypes
+#undef numVectorTypes
+#undef numFloatTypes
+}
+
+const char *
+AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const
+{
+ switch (Opcode) {
+ default: return 0;
+ case AMDILISD::INTTOANY: return "AMDILISD::INTTOANY";
+ case AMDILISD::DP_TO_FP: return "AMDILISD::DP_TO_FP";
+ case AMDILISD::FP_TO_DP: return "AMDILISD::FP_TO_DP";
+ case AMDILISD::BITCONV: return "AMDILISD::BITCONV";
+ case AMDILISD::CMOV: return "AMDILISD::CMOV";
+ case AMDILISD::CMOVLOG: return "AMDILISD::CMOVLOG";
+ case AMDILISD::INEGATE: return "AMDILISD::INEGATE";
+ case AMDILISD::MAD: return "AMDILISD::MAD";
+ case AMDILISD::UMAD: return "AMDILISD::UMAD";
+ case AMDILISD::CALL: return "AMDILISD::CALL";
+ case AMDILISD::RET: return "AMDILISD::RET";
+ case AMDILISD::IFFB_HI: return "AMDILISD::IFFB_HI";
+ case AMDILISD::IFFB_LO: return "AMDILISD::IFFB_LO";
+ case AMDILISD::ADD: return "AMDILISD::ADD";
+ case AMDILISD::UMUL: return "AMDILISD::UMUL";
+ case AMDILISD::AND: return "AMDILISD::AND";
+ case AMDILISD::OR: return "AMDILISD::OR";
+ case AMDILISD::NOT: return "AMDILISD::NOT";
+ case AMDILISD::XOR: return "AMDILISD::XOR";
+ case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF";
+ case AMDILISD::SMAX: return "AMDILISD::SMAX";
+ case AMDILISD::PHIMOVE: return "AMDILISD::PHIMOVE";
+ case AMDILISD::MOVE: return "AMDILISD::MOVE";
+ case AMDILISD::VBUILD: return "AMDILISD::VBUILD";
+ case AMDILISD::VEXTRACT: return "AMDILISD::VEXTRACT";
+ case AMDILISD::VINSERT: return "AMDILISD::VINSERT";
+ case AMDILISD::VCONCAT: return "AMDILISD::VCONCAT";
+ case AMDILISD::LCREATE: return "AMDILISD::LCREATE";
+ case AMDILISD::LCOMPHI: return "AMDILISD::LCOMPHI";
+ case AMDILISD::LCOMPLO: return "AMDILISD::LCOMPLO";
+ case AMDILISD::DCREATE: return "AMDILISD::DCREATE";
+ case AMDILISD::DCOMPHI: return "AMDILISD::DCOMPHI";
+ case AMDILISD::DCOMPLO: return "AMDILISD::DCOMPLO";
+ case AMDILISD::LCREATE2: return "AMDILISD::LCREATE2";
+ case AMDILISD::LCOMPHI2: return "AMDILISD::LCOMPHI2";
+ case AMDILISD::LCOMPLO2: return "AMDILISD::LCOMPLO2";
+ case AMDILISD::DCREATE2: return "AMDILISD::DCREATE2";
+ case AMDILISD::DCOMPHI2: return "AMDILISD::DCOMPHI2";
+ case AMDILISD::DCOMPLO2: return "AMDILISD::DCOMPLO2";
+ case AMDILISD::CMP: return "AMDILISD::CMP";
+ case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT";
+ case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE";
+ case AMDILISD::IL_CC_I_GT: return "AMDILISD::IL_CC_I_GT";
+ case AMDILISD::IL_CC_I_GE: return "AMDILISD::IL_CC_I_GE";
+ case AMDILISD::IL_CC_I_EQ: return "AMDILISD::IL_CC_I_EQ";
+ case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE";
+ case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG";
+ case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND";
+ case AMDILISD::LOOP_NZERO: return "AMDILISD::LOOP_NZERO";
+ case AMDILISD::LOOP_ZERO: return "AMDILISD::LOOP_ZERO";
+ case AMDILISD::LOOP_CMP: return "AMDILISD::LOOP_CMP";
+ case AMDILISD::ADDADDR: return "AMDILISD::ADDADDR";
+ case AMDILISD::ATOM_G_ADD: return "AMDILISD::ATOM_G_ADD";
+ case AMDILISD::ATOM_G_AND: return "AMDILISD::ATOM_G_AND";
+ case AMDILISD::ATOM_G_CMPXCHG: return "AMDILISD::ATOM_G_CMPXCHG";
+ case AMDILISD::ATOM_G_DEC: return "AMDILISD::ATOM_G_DEC";
+ case AMDILISD::ATOM_G_INC: return "AMDILISD::ATOM_G_INC";
+ case AMDILISD::ATOM_G_MAX: return "AMDILISD::ATOM_G_MAX";
+ case AMDILISD::ATOM_G_UMAX: return "AMDILISD::ATOM_G_UMAX";
+ case AMDILISD::ATOM_G_MIN: return "AMDILISD::ATOM_G_MIN";
+ case AMDILISD::ATOM_G_UMIN: return "AMDILISD::ATOM_G_UMIN";
+ case AMDILISD::ATOM_G_OR: return "AMDILISD::ATOM_G_OR";
+ case AMDILISD::ATOM_G_SUB: return "AMDILISD::ATOM_G_SUB";
+ case AMDILISD::ATOM_G_RSUB: return "AMDILISD::ATOM_G_RSUB";
+ case AMDILISD::ATOM_G_XCHG: return "AMDILISD::ATOM_G_XCHG";
+ case AMDILISD::ATOM_G_XOR: return "AMDILISD::ATOM_G_XOR";
+ case AMDILISD::ATOM_G_ADD_NORET: return "AMDILISD::ATOM_G_ADD_NORET";
+ case AMDILISD::ATOM_G_AND_NORET: return "AMDILISD::ATOM_G_AND_NORET";
+ case AMDILISD::ATOM_G_CMPXCHG_NORET: return "AMDILISD::ATOM_G_CMPXCHG_NORET";
+ case AMDILISD::ATOM_G_DEC_NORET: return "AMDILISD::ATOM_G_DEC_NORET";
+ case AMDILISD::ATOM_G_INC_NORET: return "AMDILISD::ATOM_G_INC_NORET";
+ case AMDILISD::ATOM_G_MAX_NORET: return "AMDILISD::ATOM_G_MAX_NORET";
+ case AMDILISD::ATOM_G_UMAX_NORET: return "AMDILISD::ATOM_G_UMAX_NORET";
+ case AMDILISD::ATOM_G_MIN_NORET: return "AMDILISD::ATOM_G_MIN_NORET";
+ case AMDILISD::ATOM_G_UMIN_NORET: return "AMDILISD::ATOM_G_UMIN_NORET";
+ case AMDILISD::ATOM_G_OR_NORET: return "AMDILISD::ATOM_G_OR_NORET";
+ case AMDILISD::ATOM_G_SUB_NORET: return "AMDILISD::ATOM_G_SUB_NORET";
+ case AMDILISD::ATOM_G_RSUB_NORET: return "AMDILISD::ATOM_G_RSUB_NORET";
+ case AMDILISD::ATOM_G_XCHG_NORET: return "AMDILISD::ATOM_G_XCHG_NORET";
+ case AMDILISD::ATOM_G_XOR_NORET: return "AMDILISD::ATOM_G_XOR_NORET";
+ case AMDILISD::ATOM_L_ADD: return "AMDILISD::ATOM_L_ADD";
+ case AMDILISD::ATOM_L_AND: return "AMDILISD::ATOM_L_AND";
+ case AMDILISD::ATOM_L_CMPXCHG: return "AMDILISD::ATOM_L_CMPXCHG";
+ case AMDILISD::ATOM_L_DEC: return "AMDILISD::ATOM_L_DEC";
+ case AMDILISD::ATOM_L_INC: return "AMDILISD::ATOM_L_INC";
+ case AMDILISD::ATOM_L_MAX: return "AMDILISD::ATOM_L_MAX";
+ case AMDILISD::ATOM_L_UMAX: return "AMDILISD::ATOM_L_UMAX";
+ case AMDILISD::ATOM_L_MIN: return "AMDILISD::ATOM_L_MIN";
+ case AMDILISD::ATOM_L_UMIN: return "AMDILISD::ATOM_L_UMIN";
+ case AMDILISD::ATOM_L_OR: return "AMDILISD::ATOM_L_OR";
+ case AMDILISD::ATOM_L_SUB: return "AMDILISD::ATOM_L_SUB";
+ case AMDILISD::ATOM_L_RSUB: return "AMDILISD::ATOM_L_RSUB";
+ case AMDILISD::ATOM_L_XCHG: return "AMDILISD::ATOM_L_XCHG";
+ case AMDILISD::ATOM_L_XOR: return "AMDILISD::ATOM_L_XOR";
+ case AMDILISD::ATOM_L_ADD_NORET: return "AMDILISD::ATOM_L_ADD_NORET";
+ case AMDILISD::ATOM_L_AND_NORET: return "AMDILISD::ATOM_L_AND_NORET";
+ case AMDILISD::ATOM_L_CMPXCHG_NORET: return "AMDILISD::ATOM_L_CMPXCHG_NORET";
+ case AMDILISD::ATOM_L_DEC_NORET: return "AMDILISD::ATOM_L_DEC_NORET";
+ case AMDILISD::ATOM_L_INC_NORET: return "AMDILISD::ATOM_L_INC_NORET";
+ case AMDILISD::ATOM_L_MAX_NORET: return "AMDILISD::ATOM_L_MAX_NORET";
+ case AMDILISD::ATOM_L_UMAX_NORET: return "AMDILISD::ATOM_L_UMAX_NORET";
+ case AMDILISD::ATOM_L_MIN_NORET: return "AMDILISD::ATOM_L_MIN_NORET";
+ case AMDILISD::ATOM_L_UMIN_NORET: return "AMDILISD::ATOM_L_UMIN_NORET";
+ case AMDILISD::ATOM_L_OR_NORET: return "AMDILISD::ATOM_L_OR_NORET";
+ case AMDILISD::ATOM_L_SUB_NORET: return "AMDILISD::ATOM_L_SUB_NORET";
+ case AMDILISD::ATOM_L_RSUB_NORET: return "AMDILISD::ATOM_L_RSUB_NORET";
+ case AMDILISD::ATOM_L_XCHG_NORET: return "AMDILISD::ATOM_L_XCHG_NORET";
+ case AMDILISD::ATOM_R_ADD: return "AMDILISD::ATOM_R_ADD";
+ case AMDILISD::ATOM_R_AND: return "AMDILISD::ATOM_R_AND";
+ case AMDILISD::ATOM_R_CMPXCHG: return "AMDILISD::ATOM_R_CMPXCHG";
+ case AMDILISD::ATOM_R_DEC: return "AMDILISD::ATOM_R_DEC";
+ case AMDILISD::ATOM_R_INC: return "AMDILISD::ATOM_R_INC";
+ case AMDILISD::ATOM_R_MAX: return "AMDILISD::ATOM_R_MAX";
+ case AMDILISD::ATOM_R_UMAX: return "AMDILISD::ATOM_R_UMAX";
+ case AMDILISD::ATOM_R_MIN: return "AMDILISD::ATOM_R_MIN";
+ case AMDILISD::ATOM_R_UMIN: return "AMDILISD::ATOM_R_UMIN";
+ case AMDILISD::ATOM_R_OR: return "AMDILISD::ATOM_R_OR";
+ case AMDILISD::ATOM_R_MSKOR: return "AMDILISD::ATOM_R_MSKOR";
+ case AMDILISD::ATOM_R_SUB: return "AMDILISD::ATOM_R_SUB";
+ case AMDILISD::ATOM_R_RSUB: return "AMDILISD::ATOM_R_RSUB";
+ case AMDILISD::ATOM_R_XCHG: return "AMDILISD::ATOM_R_XCHG";
+ case AMDILISD::ATOM_R_XOR: return "AMDILISD::ATOM_R_XOR";
+ case AMDILISD::ATOM_R_ADD_NORET: return "AMDILISD::ATOM_R_ADD_NORET";
+ case AMDILISD::ATOM_R_AND_NORET: return "AMDILISD::ATOM_R_AND_NORET";
+ case AMDILISD::ATOM_R_CMPXCHG_NORET: return "AMDILISD::ATOM_R_CMPXCHG_NORET";
+ case AMDILISD::ATOM_R_DEC_NORET: return "AMDILISD::ATOM_R_DEC_NORET";
+ case AMDILISD::ATOM_R_INC_NORET: return "AMDILISD::ATOM_R_INC_NORET";
+ case AMDILISD::ATOM_R_MAX_NORET: return "AMDILISD::ATOM_R_MAX_NORET";
+ case AMDILISD::ATOM_R_UMAX_NORET: return "AMDILISD::ATOM_R_UMAX_NORET";
+ case AMDILISD::ATOM_R_MIN_NORET: return "AMDILISD::ATOM_R_MIN_NORET";
+ case AMDILISD::ATOM_R_UMIN_NORET: return "AMDILISD::ATOM_R_UMIN_NORET";
+ case AMDILISD::ATOM_R_OR_NORET: return "AMDILISD::ATOM_R_OR_NORET";
+ case AMDILISD::ATOM_R_MSKOR_NORET: return "AMDILISD::ATOM_R_MSKOR_NORET";
+ case AMDILISD::ATOM_R_SUB_NORET: return "AMDILISD::ATOM_R_SUB_NORET";
+ case AMDILISD::ATOM_R_RSUB_NORET: return "AMDILISD::ATOM_R_RSUB_NORET";
+ case AMDILISD::ATOM_R_XCHG_NORET: return "AMDILISD::ATOM_R_XCHG_NORET";
+ case AMDILISD::ATOM_R_XOR_NORET: return "AMDILISD::ATOM_R_XOR_NORET";
+ case AMDILISD::APPEND_ALLOC: return "AMDILISD::APPEND_ALLOC";
+ case AMDILISD::APPEND_ALLOC_NORET: return "AMDILISD::APPEND_ALLOC_NORET";
+ case AMDILISD::APPEND_CONSUME: return "AMDILISD::APPEND_CONSUME";
+ case AMDILISD::APPEND_CONSUME_NORET: return "AMDILISD::APPEND_CONSUME_NORET";
+ case AMDILISD::IMAGE2D_READ: return "AMDILISD::IMAGE2D_READ";
+ case AMDILISD::IMAGE2D_WRITE: return "AMDILISD::IMAGE2D_WRITE";
+ case AMDILISD::IMAGE2D_INFO0: return "AMDILISD::IMAGE2D_INFO0";
+ case AMDILISD::IMAGE2D_INFO1: return "AMDILISD::IMAGE2D_INFO1";
+ case AMDILISD::IMAGE3D_READ: return "AMDILISD::IMAGE3D_READ";
+ case AMDILISD::IMAGE3D_WRITE: return "AMDILISD::IMAGE3D_WRITE";
+ case AMDILISD::IMAGE3D_INFO0: return "AMDILISD::IMAGE3D_INFO0";
+ case AMDILISD::IMAGE3D_INFO1: return "AMDILISD::IMAGE3D_INFO1";
+
+ };
+}
+bool
+AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I, unsigned Intrinsic) const
+{
+ if (Intrinsic <= AMDGPUIntrinsic::last_non_AMDIL_intrinsic
+ || Intrinsic > AMDGPUIntrinsic::num_AMDIL_intrinsics) {
+ return false;
+ }
+ bool bitCastToInt = false;
+ unsigned IntNo;
+ bool isRet = true;
+ const AMDILSubtarget *STM = &this->getTargetMachine()
+ .getSubtarget<AMDILSubtarget>();
+ switch (Intrinsic) {
+ default: return false; // Don't custom lower most intrinsics.
+ case AMDGPUIntrinsic::AMDIL_atomic_add_gi32:
+ case AMDGPUIntrinsic::AMDIL_atomic_add_gu32:
+ IntNo = AMDILISD::ATOM_G_ADD; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_add_gi32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_add_gu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_ADD_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_add_lu32:
+ case AMDGPUIntrinsic::AMDIL_atomic_add_li32:
+ IntNo = AMDILISD::ATOM_L_ADD; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_add_li32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_add_lu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_ADD_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_add_ru32:
+ case AMDGPUIntrinsic::AMDIL_atomic_add_ri32:
+ IntNo = AMDILISD::ATOM_R_ADD; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_add_ri32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_add_ru32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_ADD_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_and_gi32:
+ case AMDGPUIntrinsic::AMDIL_atomic_and_gu32:
+ IntNo = AMDILISD::ATOM_G_AND; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_and_gi32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_and_gu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_AND_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_and_li32:
+ case AMDGPUIntrinsic::AMDIL_atomic_and_lu32:
+ IntNo = AMDILISD::ATOM_L_AND; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_and_li32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_and_lu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_AND_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_and_ri32:
+ case AMDGPUIntrinsic::AMDIL_atomic_and_ru32:
+ IntNo = AMDILISD::ATOM_R_AND; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_and_ri32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_and_ru32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_AND_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32:
+ case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32:
+ IntNo = AMDILISD::ATOM_G_CMPXCHG; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_CMPXCHG_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32:
+ case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32:
+ IntNo = AMDILISD::ATOM_L_CMPXCHG; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_CMPXCHG_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32:
+ case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32:
+ IntNo = AMDILISD::ATOM_R_CMPXCHG; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_CMPXCHG_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32:
+ case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32:
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_G_DEC;
+ } else {
+ IntNo = AMDILISD::ATOM_G_SUB;
+ }
+ break;
+ case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32_noret:
+ isRet = false;
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_G_DEC_NORET;
+ } else {
+ IntNo = AMDILISD::ATOM_G_SUB_NORET;
+ }
+ break;
+ case AMDGPUIntrinsic::AMDIL_atomic_dec_li32:
+ case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32:
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_L_DEC;
+ } else {
+ IntNo = AMDILISD::ATOM_L_SUB;
+ }
+ break;
+ case AMDGPUIntrinsic::AMDIL_atomic_dec_li32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32_noret:
+ isRet = false;
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_L_DEC_NORET;
+ } else {
+ IntNo = AMDILISD::ATOM_L_SUB_NORET;
+ }
+ break;
+ case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32:
+ case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32:
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_R_DEC;
+ } else {
+ IntNo = AMDILISD::ATOM_R_SUB;
+ }
+ break;
+ case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32_noret:
+ isRet = false;
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_R_DEC_NORET;
+ } else {
+ IntNo = AMDILISD::ATOM_R_SUB_NORET;
+ }
+ break;
+ case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32:
+ case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32:
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_G_INC;
+ } else {
+ IntNo = AMDILISD::ATOM_G_ADD;
+ }
+ break;
+ case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32_noret:
+ isRet = false;
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_G_INC_NORET;
+ } else {
+ IntNo = AMDILISD::ATOM_G_ADD_NORET;
+ }
+ break;
+ case AMDGPUIntrinsic::AMDIL_atomic_inc_li32:
+ case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32:
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_L_INC;
+ } else {
+ IntNo = AMDILISD::ATOM_L_ADD;
+ }
+ break;
+ case AMDGPUIntrinsic::AMDIL_atomic_inc_li32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32_noret:
+ isRet = false;
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_L_INC_NORET;
+ } else {
+ IntNo = AMDILISD::ATOM_L_ADD_NORET;
+ }
+ break;
+ case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32:
+ case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32:
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_R_INC;
+ } else {
+ IntNo = AMDILISD::ATOM_R_ADD;
+ }
+ break;
+ case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32_noret:
+ isRet = false;
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_R_INC_NORET;
+ } else {
+ IntNo = AMDILISD::ATOM_R_ADD_NORET;
+ }
+ break;
+ case AMDGPUIntrinsic::AMDIL_atomic_max_gi32:
+ IntNo = AMDILISD::ATOM_G_MAX; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_max_gu32:
+ IntNo = AMDILISD::ATOM_G_UMAX; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_max_gi32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_MAX_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_max_gu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_UMAX_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_max_li32:
+ IntNo = AMDILISD::ATOM_L_MAX; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_max_lu32:
+ IntNo = AMDILISD::ATOM_L_UMAX; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_max_li32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_MAX_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_max_lu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_UMAX_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_max_ri32:
+ IntNo = AMDILISD::ATOM_R_MAX; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_max_ru32:
+ IntNo = AMDILISD::ATOM_R_UMAX; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_max_ri32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_MAX_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_max_ru32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_UMAX_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_min_gi32:
+ IntNo = AMDILISD::ATOM_G_MIN; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_min_gu32:
+ IntNo = AMDILISD::ATOM_G_UMIN; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_min_gi32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_MIN_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_min_gu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_UMIN_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_min_li32:
+ IntNo = AMDILISD::ATOM_L_MIN; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_min_lu32:
+ IntNo = AMDILISD::ATOM_L_UMIN; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_min_li32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_MIN_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_min_lu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_UMIN_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_min_ri32:
+ IntNo = AMDILISD::ATOM_R_MIN; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_min_ru32:
+ IntNo = AMDILISD::ATOM_R_UMIN; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_min_ri32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_MIN_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_min_ru32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_UMIN_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_or_gi32:
+ case AMDGPUIntrinsic::AMDIL_atomic_or_gu32:
+ IntNo = AMDILISD::ATOM_G_OR; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_or_gi32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_or_gu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_OR_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_or_li32:
+ case AMDGPUIntrinsic::AMDIL_atomic_or_lu32:
+ IntNo = AMDILISD::ATOM_L_OR; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_or_li32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_or_lu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_OR_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_or_ri32:
+ case AMDGPUIntrinsic::AMDIL_atomic_or_ru32:
+ IntNo = AMDILISD::ATOM_R_OR; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_or_ri32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_or_ru32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_OR_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32:
+ case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32:
+ IntNo = AMDILISD::ATOM_G_SUB; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_SUB_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_sub_li32:
+ case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32:
+ IntNo = AMDILISD::ATOM_L_SUB; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_sub_li32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_SUB_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32:
+ case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32:
+ IntNo = AMDILISD::ATOM_R_SUB; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_SUB_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32:
+ case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32:
+ IntNo = AMDILISD::ATOM_G_RSUB; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_RSUB_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32:
+ case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32:
+ IntNo = AMDILISD::ATOM_L_RSUB; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_RSUB_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32:
+ case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32:
+ IntNo = AMDILISD::ATOM_R_RSUB; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_RSUB_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32:
+ bitCastToInt = true;
+ case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32:
+ case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32:
+ IntNo = AMDILISD::ATOM_G_XCHG; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32_noret:
+ bitCastToInt = true;
+ case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_XCHG_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32:
+ bitCastToInt = true;
+ case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32:
+ case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32:
+ IntNo = AMDILISD::ATOM_L_XCHG; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32_noret:
+ bitCastToInt = true;
+ case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_XCHG_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32:
+ bitCastToInt = true;
+ case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32:
+ case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32:
+ IntNo = AMDILISD::ATOM_R_XCHG; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32_noret:
+ bitCastToInt = true;
+ case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_XCHG_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32:
+ case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32:
+ IntNo = AMDILISD::ATOM_G_XOR; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_XOR_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_xor_li32:
+ case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32:
+ IntNo = AMDILISD::ATOM_L_XOR; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_xor_li32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_XOR_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32:
+ case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32:
+ IntNo = AMDILISD::ATOM_R_XOR; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_XOR_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_append_alloc_i32:
+ IntNo = AMDILISD::APPEND_ALLOC; break;
+ case AMDGPUIntrinsic::AMDIL_append_alloc_i32_noret:
+ isRet = false;
+ IntNo = AMDILISD::APPEND_ALLOC_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_append_consume_i32:
+ IntNo = AMDILISD::APPEND_CONSUME; break;
+ case AMDGPUIntrinsic::AMDIL_append_consume_i32_noret:
+ isRet = false;
+ IntNo = AMDILISD::APPEND_CONSUME_NORET; break;
+ };
+ const AMDILSubtarget *stm = &this->getTargetMachine()
+ .getSubtarget<AMDILSubtarget>();
+ AMDILKernelManager *KM = const_cast<AMDILKernelManager*>(
+ stm->getKernelManager());
+ KM->setOutputInst();
+
+ Info.opc = IntNo;
+ Info.memVT = (bitCastToInt) ? MVT::f32 : MVT::i32;
+ Info.ptrVal = I.getOperand(0);
+ Info.offset = 0;
+ Info.align = 4;
+ Info.vol = true;
+ Info.readMem = isRet;
+ Info.writeMem = true;
+ return true;
+}
+// The backend supports 32 and 64 bit floating point immediates
+bool
+AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
+{
+ if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
+ || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool
+AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const
+{
+ if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
+ || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
+ return false;
+ } else {
+ return true;
+ }
+}
+
+
+// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
+// be zero. Op is expected to be a target specific node. Used by DAG
+// combiner.
+
+void
+AMDILTargetLowering::computeMaskedBitsForTargetNode(
+ const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const
+{
+ APInt KnownZero2;
+ APInt KnownOne2;
+ KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
+ switch (Op.getOpcode()) {
+ default: break;
+ case AMDILISD::SELECT_CC:
+ DAG.ComputeMaskedBits(
+ Op.getOperand(1),
+ KnownZero,
+ KnownOne,
+ Depth + 1
+ );
+ DAG.ComputeMaskedBits(
+ Op.getOperand(0),
+ KnownZero2,
+ KnownOne2
+ );
+ assert((KnownZero & KnownOne) == 0
+ && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0
+ && "Bits known to be one AND zero?");
+ // Only known if known in both the LHS and RHS
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ break;
+ };
+}
+
+// This is the function that determines which calling convention should
+// be used. Currently there is only one calling convention
+CCAssignFn*
+AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const
+{
+ //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ return CC_AMDIL32;
+}
+
+// LowerCallResult - Lower the result values of an ISD::CALL into the
+// appropriate copies out of appropriate physical registers. This assumes that
+// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
+// being lowered. The returns a SDNode with the same number of values as the
+// ISD::CALL.
+SDValue
+AMDILTargetLowering::LowerCallResult(
+ SDValue Chain,
+ SDValue InFlag,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const
+{
+ // Assign locations to each value returned by this call
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+ CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ EVT CopyVT = RVLocs[i].getValVT();
+ if (RVLocs[i].isRegLoc()) {
+ Chain = DAG.getCopyFromReg(
+ Chain,
+ dl,
+ RVLocs[i].getLocReg(),
+ CopyVT,
+ InFlag
+ ).getValue(1);
+ SDValue Val = Chain.getValue(0);
+ InFlag = Chain.getValue(2);
+ InVals.push_back(Val);
+ }
+ }
+
+ return Chain;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Hooks
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+AMDILTargetLowering::EmitInstrWithCustomInserter(
+ MachineInstr *MI, MachineBasicBlock *BB) const
+{
+ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+ switch (MI->getOpcode()) {
+ ExpandCaseToAllTypes(AMDIL::CMP);
+ generateCMPInstr(MI, BB, TII);
+ MI->eraseFromParent();
+ break;
+ default:
+ break;
+ }
+ return BB;
+}
+
+// Recursively assign SDNodeOrdering to any unordered nodes
+// This is necessary to maintain source ordering of instructions
+// under -O0 to avoid odd-looking "skipping around" issues.
+ static const SDValue
+Ordered( SelectionDAG &DAG, unsigned order, const SDValue New )
+{
+ if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) {
+ DAG.AssignOrdering( New.getNode(), order );
+ for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i)
+ Ordered( DAG, order, New.getOperand(i) );
+ }
+ return New;
+}
+
+#define LOWER(A) \
+ case ISD:: A: \
+return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) )
+
+SDValue
+AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
+{
+ switch (Op.getOpcode()) {
+ default:
+ Op.getNode()->dump();
+ assert(0 && "Custom lowering code for this"
+ "instruction is not implemented yet!");
+ break;
+ LOWER(GlobalAddress);
+ LOWER(JumpTable);
+ LOWER(ConstantPool);
+ LOWER(ExternalSymbol);
+ LOWER(FP_TO_SINT);
+ LOWER(FP_TO_UINT);
+ LOWER(SINT_TO_FP);
+ LOWER(UINT_TO_FP);
+ LOWER(ADD);
+ LOWER(MUL);
+ LOWER(SUB);
+ LOWER(FDIV);
+ LOWER(SDIV);
+ LOWER(SREM);
+ LOWER(UDIV);
+ LOWER(UREM);
+ LOWER(BUILD_VECTOR);
+ LOWER(INSERT_VECTOR_ELT);
+ LOWER(EXTRACT_VECTOR_ELT);
+ LOWER(EXTRACT_SUBVECTOR);
+ LOWER(SCALAR_TO_VECTOR);
+ LOWER(CONCAT_VECTORS);
+ LOWER(AND);
+ LOWER(OR);
+ LOWER(SELECT);
+ LOWER(SELECT_CC);
+ LOWER(SETCC);
+ LOWER(SIGN_EXTEND_INREG);
+ LOWER(BITCAST);
+ LOWER(DYNAMIC_STACKALLOC);
+ LOWER(BRCOND);
+ LOWER(BR_CC);
+ LOWER(FP_ROUND);
+ }
+ return Op;
+}
+
+int
+AMDILTargetLowering::getVarArgsFrameOffset() const
+{
+ return VarArgsFrameOffset;
+}
+#undef LOWER
+
+SDValue
+AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue DST = Op;
+ const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op);
+ const GlobalValue *G = GADN->getGlobal();
+ const AMDILSubtarget *stm = &this->getTargetMachine()
+ .getSubtarget<AMDILSubtarget>();
+ const AMDILGlobalManager *GM = stm->getGlobalManager();
+ DebugLoc DL = Op.getDebugLoc();
+ int64_t base_offset = GADN->getOffset();
+ int32_t arrayoffset = GM->getArrayOffset(G->getName());
+ int32_t constoffset = GM->getConstOffset(G->getName());
+ if (arrayoffset != -1) {
+ DST = DAG.getConstant(arrayoffset, MVT::i32);
+ DST = DAG.getNode(ISD::ADD, DL, MVT::i32,
+ DST, DAG.getConstant(base_offset, MVT::i32));
+ } else if (constoffset != -1) {
+ if (GM->getConstHWBit(G->getName())) {
+ DST = DAG.getConstant(constoffset, MVT::i32);
+ DST = DAG.getNode(ISD::ADD, DL, MVT::i32,
+ DST, DAG.getConstant(base_offset, MVT::i32));
+ } else {
+ SDValue addr = DAG.getTargetGlobalAddress(G, DL, MVT::i32);
+ SDValue DPReg = DAG.getRegister(AMDIL::SDP, MVT::i32);
+ DPReg = DAG.getNode(ISD::ADD, DL, MVT::i32, DPReg,
+ DAG.getConstant(base_offset, MVT::i32));
+ DST = DAG.getNode(AMDILISD::ADDADDR, DL, MVT::i32, addr, DPReg);
+ }
+ } else {
+ const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
+ if (!GV) {
+ DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
+ } else {
+ if (GV->hasInitializer()) {
+ const Constant *C = dyn_cast<Constant>(GV->getInitializer());
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
+ DST = DAG.getConstant(CI->getValue(), Op.getValueType());
+
+ } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) {
+ DST = DAG.getConstantFP(CF->getValueAPF(),
+ Op.getValueType());
+ } else if (dyn_cast<ConstantAggregateZero>(C)) {
+ EVT VT = Op.getValueType();
+ if (VT.isInteger()) {
+ DST = DAG.getConstant(0, VT);
+ } else {
+ DST = DAG.getConstantFP(0, VT);
+ }
+ } else {
+ assert(!"lowering this type of Global Address "
+ "not implemented yet!");
+ C->dump();
+ DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
+ }
+ } else {
+ DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
+ }
+ }
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
+{
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
+ return Result;
+}
+SDValue
+AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
+{
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ EVT PtrVT = Op.getValueType();
+ SDValue Result;
+ if (CP->isMachineConstantPoolEntry()) {
+ Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
+ CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
+ } else {
+ Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
+ CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
+ }
+ return Result;
+}
+
+SDValue
+AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const
+{
+ const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
+ SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32);
+ return Result;
+}
+/// LowerFORMAL_ARGUMENTS - transform physical registers into
+/// virtual registers and generate load operations for
+/// arguments places on the stack.
+/// TODO: isVarArg, hasStructRet, isMemReg
+ SDValue
+AMDILTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+const
+{
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ AMDILMachineFunctionInfo *FuncInfo
+ = MF.getInfo<AMDILMachineFunctionInfo>();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ //const Function *Fn = MF.getFunction();
+ //MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CallingConv::ID CC = MF.getFunction()->getCallingConv();
+ //bool hasStructRet = MF.getFunction()->hasStructRetAttr();
+
+ CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ // When more calling conventions are added, they need to be chosen here
+ CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32);
+ SDValue StackPtr;
+
+ //unsigned int FirstStackArgLoc = 0;
+
+ for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ if (VA.isRegLoc()) {
+ EVT RegVT = VA.getLocVT();
+ const TargetRegisterClass *RC = getRegClassFromType(
+ RegVT.getSimpleVT().SimpleTy);
+
+ unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ SDValue ArgValue = DAG.getCopyFromReg(
+ Chain,
+ dl,
+ Reg,
+ RegVT);
+ // If this is an 8 or 16-bit value, it is really passed
+ // promoted to 32 bits. Insert an assert[sz]ext to capture
+ // this, then truncate to the right size.
+
+ if (VA.getLocInfo() == CCValAssign::SExt) {
+ ArgValue = DAG.getNode(
+ ISD::AssertSext,
+ dl,
+ RegVT,
+ ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ } else if (VA.getLocInfo() == CCValAssign::ZExt) {
+ ArgValue = DAG.getNode(
+ ISD::AssertZext,
+ dl,
+ RegVT,
+ ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ }
+ if (VA.getLocInfo() != CCValAssign::Full) {
+ ArgValue = DAG.getNode(
+ ISD::TRUNCATE,
+ dl,
+ VA.getValVT(),
+ ArgValue);
+ }
+ // Add the value to the list of arguments
+ // to be passed in registers
+ InVals.push_back(ArgValue);
+ if (isVarArg) {
+ assert(0 && "Variable arguments are not yet supported");
+ // See MipsISelLowering.cpp for ideas on how to implement
+ }
+ } else if(VA.isMemLoc()) {
+ InVals.push_back(LowerMemArgument(Chain, CallConv, Ins,
+ dl, DAG, VA, MFI, i));
+ } else {
+ assert(0 && "found a Value Assign that is "
+ "neither a register or a memory location");
+ }
+ }
+ /*if (hasStructRet) {
+ assert(0 && "Has struct return is not yet implemented");
+ // See MipsISelLowering.cpp for ideas on how to implement
+ }*/
+
+ unsigned int StackSize = CCInfo.getNextStackOffset();
+ if (isVarArg) {
+ assert(0 && "Variable arguments are not yet supported");
+ // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement
+ }
+ // This needs to be changed to non-zero if the return function needs
+ // to pop bytes
+ FuncInfo->setBytesToPopOnReturn(StackSize);
+ return Chain;
+}
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" with size and alignment information specified by
+/// the specific parameter attribute. The copy will be passed as a byval
+/// function parameter.
+static SDValue
+CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
+ ISD::ArgFlagsTy Flags, SelectionDAG &DAG) {
+ assert(0 && "MemCopy does not exist yet");
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+
+ return DAG.getMemcpy(Chain,
+ Src.getDebugLoc(),
+ Dst, Src, SizeNode, Flags.getByValAlign(),
+ /*IsVol=*/false, /*AlwaysInline=*/true,
+ MachinePointerInfo(), MachinePointerInfo());
+}
+
+SDValue
+AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain,
+ SDValue StackPtr, SDValue Arg,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ ISD::ArgFlagsTy Flags) const
+{
+ unsigned int LocMemOffset = VA.getLocMemOffset();
+ SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ PtrOff = DAG.getNode(ISD::ADD,
+ dl,
+ getPointerTy(), StackPtr, PtrOff);
+ if (Flags.isByVal()) {
+ PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG);
+ } else {
+ PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo::getStack(LocMemOffset),
+ false, false, 0);
+ }
+ return PtrOff;
+}
+/// LowerCAL - functions arguments are copied from virtual
+/// regs to (physical regs)/(stack frame), CALLSEQ_START and
+/// CALLSEQ_END are emitted.
+/// TODO: isVarArg, isTailCall, hasStructRet
+SDValue
+AMDILTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg, bool doesNotRet,
+ bool& isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+const
+{
+ isTailCall = false;
+ MachineFunction& MF = DAG.getMachineFunction();
+ // FIXME: DO we need to handle fast calling conventions and tail call
+ // optimizations?? X86/PPC ISelLowering
+ /*bool hasStructRet = (TheCall->getNumArgs())
+ ? TheCall->getArgFlags(0).device()->isSRet()
+ : false;*/
+
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Analyze operands of the call, assigning locations to each operand
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ // Analyize the calling operands, but need to change
+ // if we have more than one calling convetion
+ CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
+
+ unsigned int NumBytes = CCInfo.getNextStackOffset();
+ if (isTailCall) {
+ assert(isTailCall && "Tail Call not handled yet!");
+ // See X86/PPC ISelLowering
+ }
+
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+ SmallVector<std::pair<unsigned int, SDValue>, 8> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+ SDValue StackPtr;
+ //unsigned int FirstStacArgLoc = 0;
+ //int LastArgStackLoc = 0;
+
+ // Walk the register/memloc assignments, insert copies/loads
+ for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers
+ // Arguments start after the 5 first operands of ISD::CALL
+ SDValue Arg = OutVals[i];
+ //Promote the value if needed
+ switch(VA.getLocInfo()) {
+ default: assert(0 && "Unknown loc info!");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND,
+ dl,
+ VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND,
+ dl,
+ VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND,
+ dl,
+ VA.getLocVT(), Arg);
+ break;
+ }
+
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else if (VA.isMemLoc()) {
+ // Create the frame index object for this incoming parameter
+ int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
+ VA.getLocMemOffset(), true);
+ SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
+
+ // emit ISD::STORE whichs stores the
+ // parameter value to a stack Location
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0));
+ } else {
+ assert(0 && "Not a Reg/Mem Loc, major error!");
+ }
+ }
+ if (!MemOpChains.empty()) {
+ Chain = DAG.getNode(ISD::TokenFactor,
+ dl,
+ MVT::Other,
+ &MemOpChains[0],
+ MemOpChains.size());
+ }
+ SDValue InFlag;
+ if (!isTailCall) {
+ for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain,
+ dl,
+ RegsToPass[i].first,
+ RegsToPass[i].second,
+ InFlag);
+ InFlag = Chain.getValue(1);
+ }
+ }
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
+ // every direct call is) turn it into a TargetGlobalAddress/
+ // TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
+ }
+ else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
+ }
+ else if (isTailCall) {
+ assert(0 && "Tail calls are not handled yet");
+ // see X86 ISelLowering for ideas on implementation: 1708
+ }
+
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVTGLUE);
+ SmallVector<SDValue, 8> Ops;
+
+ if (isTailCall) {
+ assert(0 && "Tail calls are not handled yet");
+ // see X86 ISelLowering for ideas on implementation: 1721
+ }
+ // If this is a direct call, pass the chain and the callee
+ if (Callee.getNode()) {
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+ }
+
+ if (isTailCall) {
+ assert(0 && "Tail calls are not handled yet");
+ // see X86 ISelLowering for ideas on implementation: 1739
+ }
+
+ // Add argument registers to the end of the list so that they are known
+ // live into the call
+ for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Ops.push_back(DAG.getRegister(
+ RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+ }
+ if (InFlag.getNode()) {
+ Ops.push_back(InFlag);
+ }
+
+ // Emit Tail Call
+ if (isTailCall) {
+ assert(0 && "Tail calls are not handled yet");
+ // see X86 ISelLowering for ideas on implementation: 1762
+ }
+
+ Chain = DAG.getNode(AMDILISD::CALL,
+ dl,
+ NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node
+ Chain = DAG.getCALLSEQ_END(
+ Chain,
+ DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true),
+ InFlag);
+ InFlag = Chain.getValue(1);
+ // Handle result values, copying them out of physregs into vregs that
+ // we return
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
+ InVals);
+}
+static void checkMADType(
+ SDValue Op, const AMDILSubtarget *STM, bool& is24bitMAD, bool& is32bitMAD)
+{
+ bool globalLoadStore = false;
+ is24bitMAD = false;
+ is32bitMAD = false;
+ return;
+ assert(Op.getOpcode() == ISD::ADD && "The opcode must be a add in order for "
+ "this to work correctly!");
+ if (Op.getNode()->use_empty()) {
+ return;
+ }
+ for (SDNode::use_iterator nBegin = Op.getNode()->use_begin(),
+ nEnd = Op.getNode()->use_end(); nBegin != nEnd; ++nBegin) {
+ SDNode *ptr = *nBegin;
+ const LSBaseSDNode *lsNode = dyn_cast<LSBaseSDNode>(ptr);
+ // If we are not a LSBaseSDNode then we don't do this
+ // optimization.
+ // If we are a LSBaseSDNode, but the op is not the offset
+ // or base pointer, then we don't do this optimization
+ // (i.e. we are the value being stored)
+ if (!lsNode ||
+ (lsNode->writeMem() && lsNode->getOperand(1) == Op)) {
+ return;
+ }
+ const PointerType *PT =
+ dyn_cast<PointerType>(lsNode->getSrcValue()->getType());
+ unsigned as = PT->getAddressSpace();
+ switch(as) {
+ default:
+ globalLoadStore = true;
+ case AMDILAS::PRIVATE_ADDRESS:
+ if (!STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)) {
+ globalLoadStore = true;
+ }
+ break;
+ case AMDILAS::CONSTANT_ADDRESS:
+ if (!STM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)) {
+ globalLoadStore = true;
+ }
+ break;
+ case AMDILAS::LOCAL_ADDRESS:
+ if (!STM->device()->usesHardware(AMDILDeviceInfo::LocalMem)) {
+ globalLoadStore = true;
+ }
+ break;
+ case AMDILAS::REGION_ADDRESS:
+ if (!STM->device()->usesHardware(AMDILDeviceInfo::RegionMem)) {
+ globalLoadStore = true;
+ }
+ break;
+ }
+ }
+ if (globalLoadStore) {
+ is32bitMAD = true;
+ } else {
+ is24bitMAD = true;
+ }
+}
+
+SDValue
+AMDILTargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ const AMDILSubtarget *stm = &this->getTargetMachine()
+ .getSubtarget<AMDILSubtarget>();
+ bool isVec = OVT.isVector();
+ if (OVT.getScalarType() == MVT::i64) {
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i64) {
+ INTTY = MVT::v2i32;
+ }
+ if (stm->device()->usesHardware(AMDILDeviceInfo::LongOps)
+ && INTTY == MVT::i32) {
+ DST = DAG.getNode(AMDILISD::ADD,
+ DL,
+ OVT,
+ LHS, RHS);
+ } else {
+ SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI;
+ // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
+ LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS);
+ RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS);
+ LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS);
+ RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS);
+ INTLO = DAG.getNode(ISD::ADD, DL, INTTY, LHSLO, RHSLO);
+ INTHI = DAG.getNode(ISD::ADD, DL, INTTY, LHSHI, RHSHI);
+ SDValue cmp;
+ cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
+ INTLO, RHSLO);
+ cmp = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, cmp);
+ INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp);
+ DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT,
+ INTLO, INTHI);
+ }
+ } else {
+ if (LHS.getOpcode() == ISD::FrameIndex ||
+ RHS.getOpcode() == ISD::FrameIndex) {
+ DST = DAG.getNode(AMDILISD::ADDADDR,
+ DL,
+ OVT,
+ LHS, RHS);
+ } else {
+ if (stm->device()->usesHardware(AMDILDeviceInfo::LocalMem)
+ && LHS.getNumOperands()
+ && RHS.getNumOperands()) {
+ bool is24bitMAD = false;
+ bool is32bitMAD = false;
+ const ConstantSDNode *LHSConstOpCode =
+ dyn_cast<ConstantSDNode>(LHS.getOperand(LHS.getNumOperands()-1));
+ const ConstantSDNode *RHSConstOpCode =
+ dyn_cast<ConstantSDNode>(RHS.getOperand(RHS.getNumOperands()-1));
+ if ((LHS.getOpcode() == ISD::SHL && LHSConstOpCode)
+ || (RHS.getOpcode() == ISD::SHL && RHSConstOpCode)
+ || LHS.getOpcode() == ISD::MUL
+ || RHS.getOpcode() == ISD::MUL) {
+ SDValue Op1, Op2, Op3;
+ // FIXME: Fix this so that it works for unsigned 24bit ops.
+ if (LHS.getOpcode() == ISD::MUL) {
+ Op1 = LHS.getOperand(0);
+ Op2 = LHS.getOperand(1);
+ Op3 = RHS;
+ } else if (RHS.getOpcode() == ISD::MUL) {
+ Op1 = RHS.getOperand(0);
+ Op2 = RHS.getOperand(1);
+ Op3 = LHS;
+ } else if (LHS.getOpcode() == ISD::SHL && LHSConstOpCode) {
+ Op1 = LHS.getOperand(0);
+ Op2 = DAG.getConstant(
+ 1 << LHSConstOpCode->getZExtValue(), MVT::i32);
+ Op3 = RHS;
+ } else if (RHS.getOpcode() == ISD::SHL && RHSConstOpCode) {
+ Op1 = RHS.getOperand(0);
+ Op2 = DAG.getConstant(
+ 1 << RHSConstOpCode->getZExtValue(), MVT::i32);
+ Op3 = LHS;
+ }
+ checkMADType(Op, stm, is24bitMAD, is32bitMAD);
+ // We can possibly do a MAD transform!
+ if (is24bitMAD && stm->device()->usesHardware(AMDILDeviceInfo::Signed24BitOps)) {
+ uint32_t opcode = AMDGPUIntrinsic::AMDIL_mad24_i32;
+ SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/);
+ DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN,
+ DL, Tys, DAG.getEntryNode(), DAG.getConstant(opcode, MVT::i32),
+ Op1, Op2, Op3);
+ } else if(is32bitMAD) {
+ SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/);
+ DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN,
+ DL, Tys, DAG.getEntryNode(),
+ DAG.getConstant(
+ AMDGPUIntrinsic::AMDIL_mad_i32, MVT::i32),
+ Op1, Op2, Op3);
+ }
+ }
+ }
+ DST = DAG.getNode(AMDILISD::ADD,
+ DL,
+ OVT,
+ LHS, RHS);
+ }
+ }
+ return DST;
+}
+SDValue
+AMDILTargetLowering::genCLZuN(SDValue Op, SelectionDAG &DAG,
+ uint32_t bits) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT INTTY = Op.getValueType();
+ EVT FPTY;
+ if (INTTY.isVector()) {
+ FPTY = EVT(MVT::getVectorVT(MVT::f32,
+ INTTY.getVectorNumElements()));
+ } else {
+ FPTY = EVT(MVT::f32);
+ }
+ /* static inline uint
+ __clz_Nbit(uint x)
+ {
+ int xor = 0x3f800000U | x;
+ float tp = as_float(xor);
+ float t = tp + -1.0f;
+ uint tint = as_uint(t);
+ int cmp = (x != 0);
+ uint tsrc = tint >> 23;
+ uint tmask = tsrc & 0xffU;
+ uint cst = (103 + N)U - tmask;
+ return cmp ? cst : N;
+ }
+ */
+ assert(INTTY.getScalarType().getSimpleVT().SimpleTy == MVT::i32
+ && "genCLZu16 only works on 32bit types");
+ // uint x = Op
+ SDValue x = Op;
+ // xornode = 0x3f800000 | x
+ SDValue xornode = DAG.getNode(ISD::OR, DL, INTTY,
+ DAG.getConstant(0x3f800000, INTTY), x);
+ // float tp = as_float(xornode)
+ SDValue tp = DAG.getNode(ISDBITCAST, DL, FPTY, xornode);
+ // float t = tp + -1.0f
+ SDValue t = DAG.getNode(ISD::FADD, DL, FPTY, tp,
+ DAG.getConstantFP(-1.0f, FPTY));
+ // uint tint = as_uint(t)
+ SDValue tint = DAG.getNode(ISDBITCAST, DL, INTTY, t);
+ // int cmp = (x != 0)
+ SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETNE, MVT::i32), MVT::i32), x,
+ DAG.getConstant(0, INTTY));
+ // uint tsrc = tint >> 23
+ SDValue tsrc = DAG.getNode(ISD::SRL, DL, INTTY, tint,
+ DAG.getConstant(23, INTTY));
+ // uint tmask = tsrc & 0xFF
+ SDValue tmask = DAG.getNode(ISD::AND, DL, INTTY, tsrc,
+ DAG.getConstant(0xFFU, INTTY));
+ // uint cst = (103 + bits) - tmask
+ SDValue cst = DAG.getNode(ISD::SUB, DL, INTTY,
+ DAG.getConstant((103U + bits), INTTY), tmask);
+ // return cmp ? cst : N
+ cst = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, cst,
+ DAG.getConstant(bits, INTTY));
+ return cst;
+}
+
+SDValue
+AMDILTargetLowering::genCLZu32(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue DST = SDValue();
+ DebugLoc DL = Op.getDebugLoc();
+ EVT INTTY = Op.getValueType();
+ const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
+ &this->getTargetMachine())->getSubtargetImpl();
+ if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
+ //__clz_32bit(uint u)
+ //{
+ // int z = __amdil_ffb_hi(u) ;
+ // return z < 0 ? 32 : z;
+ // }
+ // uint u = op
+ SDValue u = Op;
+ // int z = __amdil_ffb_hi(u)
+ SDValue z = DAG.getNode(AMDILISD::IFFB_HI, DL, INTTY, u);
+ // int cmp = z < 0
+ SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
+ z, DAG.getConstant(0, INTTY));
+ // return cmp ? 32 : z
+ DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp,
+ DAG.getConstant(32, INTTY), z);
+ } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ // static inline uint
+ //__clz_32bit(uint x)
+ //{
+ // uint zh = __clz_16bit(x >> 16);
+ // uint zl = __clz_16bit(x & 0xffffU);
+ // return zh == 16U ? 16U + zl : zh;
+ //}
+ // uint x = Op
+ SDValue x = Op;
+ // uint xs16 = x >> 16
+ SDValue xs16 = DAG.getNode(ISD::SRL, DL, INTTY, x,
+ DAG.getConstant(16, INTTY));
+ // uint zh = __clz_16bit(xs16)
+ SDValue zh = genCLZuN(xs16, DAG, 16);
+ // uint xa16 = x & 0xFFFF
+ SDValue xa16 = DAG.getNode(ISD::AND, DL, INTTY, x,
+ DAG.getConstant(0xFFFFU, INTTY));
+ // uint zl = __clz_16bit(xa16)
+ SDValue zl = genCLZuN(xa16, DAG, 16);
+ // uint cmp = zh == 16U
+ SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+ zh, DAG.getConstant(16U, INTTY));
+ // uint zl16 = zl + 16
+ SDValue zl16 = DAG.getNode(ISD::ADD, DL, INTTY,
+ DAG.getConstant(16, INTTY), zl);
+ // return cmp ? zl16 : zh
+ DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY,
+ cmp, zl16, zh);
+ } else {
+ assert(0 && "Attempting to generate a CLZ function with an"
+ " unknown graphics card");
+ }
+ return DST;
+}
+SDValue
+AMDILTargetLowering::genCLZu64(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue DST = SDValue();
+ DebugLoc DL = Op.getDebugLoc();
+ EVT INTTY;
+ EVT LONGTY = Op.getValueType();
+ bool isVec = LONGTY.isVector();
+ if (isVec) {
+ INTTY = EVT(MVT::getVectorVT(MVT::i32, Op.getValueType()
+ .getVectorNumElements()));
+ } else {
+ INTTY = EVT(MVT::i32);
+ }
+ const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
+ &this->getTargetMachine())->getSubtargetImpl();
+ if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
+ // Evergreen:
+ // static inline uint
+ // __clz_u64(ulong x)
+ // {
+ //uint zhi = __clz_32bit((uint)(x >> 32));
+ //uint zlo = __clz_32bit((uint)(x & 0xffffffffUL));
+ //return zhi == 32U ? 32U + zlo : zhi;
+ //}
+ //ulong x = op
+ SDValue x = Op;
+ // uint xhi = x >> 32
+ SDValue xlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x);
+ // uint xlo = x & 0xFFFFFFFF
+ SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, x);
+ // uint zhi = __clz_32bit(xhi)
+ SDValue zhi = genCLZu32(xhi, DAG);
+ // uint zlo = __clz_32bit(xlo)
+ SDValue zlo = genCLZu32(xlo, DAG);
+ // uint cmp = zhi == 32
+ SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+ zhi, DAG.getConstant(32U, INTTY));
+ // uint zlop32 = 32 + zlo
+ SDValue zlop32 = DAG.getNode(AMDILISD::ADD, DL, INTTY,
+ DAG.getConstant(32U, INTTY), zlo);
+ // return cmp ? zlop32: zhi
+ DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, zlop32, zhi);
+ } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ // HD4XXX:
+ // static inline uint
+ //__clz_64bit(ulong x)
+ //{
+ //uint zh = __clz_23bit((uint)(x >> 46)) - 5U;
+ //uint zm = __clz_23bit((uint)(x >> 23) & 0x7fffffU);
+ //uint zl = __clz_23bit((uint)x & 0x7fffffU);
+ //uint r = zh == 18U ? 18U + zm : zh;
+ //return zh + zm == 41U ? 41U + zl : r;
+ //}
+ //ulong x = Op
+ SDValue x = Op;
+ // ulong xs46 = x >> 46
+ SDValue xs46 = DAG.getNode(ISD::SRL, DL, LONGTY, x,
+ DAG.getConstant(46, LONGTY));
+ // uint ixs46 = (uint)xs46
+ SDValue ixs46 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs46);
+ // ulong xs23 = x >> 23
+ SDValue xs23 = DAG.getNode(ISD::SRL, DL, LONGTY, x,
+ DAG.getConstant(23, LONGTY));
+ // uint ixs23 = (uint)xs23
+ SDValue ixs23 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs23);
+ // uint xs23m23 = ixs23 & 0x7FFFFF
+ SDValue xs23m23 = DAG.getNode(ISD::AND, DL, INTTY, ixs23,
+ DAG.getConstant(0x7fffffU, INTTY));
+ // uint ix = (uint)x
+ SDValue ix = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x);
+ // uint xm23 = ix & 0x7FFFFF
+ SDValue xm23 = DAG.getNode(ISD::AND, DL, INTTY, ix,
+ DAG.getConstant(0x7fffffU, INTTY));
+ // uint zh = __clz_23bit(ixs46)
+ SDValue zh = genCLZuN(ixs46, DAG, 23);
+ // uint zm = __clz_23bit(xs23m23)
+ SDValue zm = genCLZuN(xs23m23, DAG, 23);
+ // uint zl = __clz_23bit(xm23)
+ SDValue zl = genCLZuN(xm23, DAG, 23);
+ // uint zhm5 = zh - 5
+ SDValue zhm5 = DAG.getNode(ISD::ADD, DL, INTTY, zh,
+ DAG.getConstant(-5U, INTTY));
+ SDValue const18 = DAG.getConstant(18, INTTY);
+ SDValue const41 = DAG.getConstant(41, INTTY);
+ // uint cmp1 = zh = 18
+ SDValue cmp1 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+ zhm5, const18);
+ // uint zhm5zm = zhm5 + zh
+ SDValue zhm5zm = DAG.getNode(ISD::ADD, DL, INTTY, zhm5, zm);
+ // uint cmp2 = zhm5zm == 41
+ SDValue cmp2 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+ zhm5zm, const41);
+ // uint zmp18 = zhm5 + 18
+ SDValue zmp18 = DAG.getNode(ISD::ADD, DL, INTTY, zm, const18);
+ // uint zlp41 = zl + 41
+ SDValue zlp41 = DAG.getNode(ISD::ADD, DL, INTTY, zl, const41);
+ // uint r = cmp1 ? zmp18 : zh
+ SDValue r = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY,
+ cmp1, zmp18, zhm5);
+ // return cmp2 ? zlp41 : r
+ DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp2, zlp41, r);
+ } else {
+ assert(0 && "Attempting to generate a CLZ function with an"
+ " unknown graphics card");
+ }
+ return DST;
+}
+SDValue
+AMDILTargetLowering::genf64toi64(SDValue RHS, SelectionDAG &DAG,
+ bool includeSign) const
+{
+ EVT INTVT;
+ EVT LONGVT;
+ SDValue DST;
+ DebugLoc DL = RHS.getDebugLoc();
+ EVT RHSVT = RHS.getValueType();
+ bool isVec = RHSVT.isVector();
+ if (isVec) {
+ LONGVT = EVT(MVT::getVectorVT(MVT::i64, RHSVT
+ .getVectorNumElements()));
+ INTVT = EVT(MVT::getVectorVT(MVT::i32, RHSVT
+ .getVectorNumElements()));
+ } else {
+ LONGVT = EVT(MVT::i64);
+ INTVT = EVT(MVT::i32);
+ }
+ const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
+ &this->getTargetMachine())->getSubtargetImpl();
+ if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ // unsigned version:
+ // uint uhi = (uint)(d * 0x1.0p-32);
+ // uint ulo = (uint)(mad((double)uhi, -0x1.0p+32, d));
+ // return as_ulong2((uint2)(ulo, uhi));
+ //
+ // signed version:
+ // double ad = fabs(d);
+ // long l = unsigned_version(ad);
+ // long nl = -l;
+ // return d == ad ? l : nl;
+ SDValue d = RHS;
+ if (includeSign) {
+ d = DAG.getNode(ISD::FABS, DL, RHSVT, d);
+ }
+ SDValue uhid = DAG.getNode(ISD::FMUL, DL, RHSVT, d,
+ DAG.getConstantFP(0x2f800000, RHSVT));
+ SDValue uhi = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, uhid);
+ SDValue ulod = DAG.getNode(ISD::UINT_TO_FP, DL, RHSVT, uhi);
+ ulod = DAG.getNode(AMDILISD::MAD, DL, RHSVT, ulod,
+ DAG.getConstantFP(0xcf800000, RHSVT), d);
+ SDValue ulo = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, ulod);
+ SDValue l = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, ulo, uhi);
+ if (includeSign) {
+ SDValue nl = DAG.getNode(AMDILISD::INEGATE, DL, LONGVT, l);
+ SDValue c = DAG.getNode(AMDILISD::CMP, DL, RHSVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::f64), MVT::i32),
+ RHS, d);
+ l = DAG.getNode(AMDILISD::CMOVLOG, DL, LONGVT, c, l, nl);
+ }
+ DST = l;
+ } else {
+ /*
+ __attribute__((always_inline)) long
+ cast_f64_to_i64(double d)
+ {
+ // Convert d in to 32-bit components
+ long x = as_long(d);
+ xhi = LCOMPHI(x);
+ xlo = LCOMPLO(x);
+
+ // Generate 'normalized' mantissa
+ mhi = xhi | 0x00100000; // hidden bit
+ mhi <<= 11;
+ temp = xlo >> (32 - 11);
+ mhi |= temp
+ mlo = xlo << 11;
+
+ // Compute shift right count from exponent
+ e = (xhi >> (52-32)) & 0x7ff;
+ sr = 1023 + 63 - e;
+ srge64 = sr >= 64;
+ srge32 = sr >= 32;
+
+ // Compute result for 0 <= sr < 32
+ rhi0 = mhi >> (sr &31);
+ rlo0 = mlo >> (sr &31);
+ temp = mhi << (32 - sr);
+ temp |= rlo0;
+ rlo0 = sr ? temp : rlo0;
+
+ // Compute result for 32 <= sr
+ rhi1 = 0;
+ rlo1 = srge64 ? 0 : rhi0;
+
+ // Pick between the 2 results
+ rhi = srge32 ? rhi1 : rhi0;
+ rlo = srge32 ? rlo1 : rlo0;
+
+ // Optional saturate on overflow
+ srlt0 = sr < 0;
+ rhi = srlt0 ? MAXVALUE : rhi;
+ rlo = srlt0 ? MAXVALUE : rlo;
+
+ // Create long
+ res = LCREATE( rlo, rhi );
+
+ // Deal with sign bit (ignoring whether result is signed or unsigned value)
+ if (includeSign) {
+ sign = ((signed int) xhi) >> 31; fill with sign bit
+ sign = LCREATE( sign, sign );
+ res += sign;
+ res ^= sign;
+ }
+
+ return res;
+ }
+ */
+ SDValue c11 = DAG.getConstant( 63 - 52, INTVT );
+ SDValue c32 = DAG.getConstant( 32, INTVT );
+
+ // Convert d in to 32-bit components
+ SDValue d = RHS;
+ SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d);
+ SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
+ SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
+
+ // Generate 'normalized' mantissa
+ SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT,
+ xhi, DAG.getConstant( 0x00100000, INTVT ) );
+ mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 );
+ SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT,
+ xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) );
+ mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp );
+ SDValue mlo = DAG.getNode( ISD::SHL, DL, INTVT, xlo, c11 );
+
+ // Compute shift right count from exponent
+ SDValue e = DAG.getNode( ISD::SRL, DL, INTVT,
+ xhi, DAG.getConstant( 52-32, INTVT ) );
+ e = DAG.getNode( ISD::AND, DL, INTVT,
+ e, DAG.getConstant( 0x7ff, INTVT ) );
+ SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT,
+ DAG.getConstant( 1023 + 63, INTVT ), e );
+ SDValue srge64 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
+ sr, DAG.getConstant(64, INTVT));
+ SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
+ sr, DAG.getConstant(32, INTVT));
+
+ // Compute result for 0 <= sr < 32
+ SDValue rhi0 = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr );
+ SDValue rlo0 = DAG.getNode( ISD::SRL, DL, INTVT, mlo, sr );
+ temp = DAG.getNode( ISD::SUB, DL, INTVT, c32, sr );
+ temp = DAG.getNode( ISD::SHL, DL, INTVT, mhi, temp );
+ temp = DAG.getNode( ISD::OR, DL, INTVT, rlo0, temp );
+ rlo0 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, sr, temp, rlo0 );
+
+ // Compute result for 32 <= sr
+ SDValue rhi1 = DAG.getConstant( 0, INTVT );
+ SDValue rlo1 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+ srge64, rhi1, rhi0 );
+
+ // Pick between the 2 results
+ SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+ srge32, rhi1, rhi0 );
+ SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+ srge32, rlo1, rlo0 );
+
+ // Create long
+ SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
+
+ // Deal with sign bit
+ if (includeSign) {
+ SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT,
+ xhi, DAG.getConstant( 31, INTVT ) );
+ sign = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, sign, sign );
+ res = DAG.getNode( ISD::ADD, DL, LONGVT, res, sign );
+ res = DAG.getNode( ISD::XOR, DL, LONGVT, res, sign );
+ }
+ DST = res;
+ }
+ return DST;
+}
+SDValue
+AMDILTargetLowering::genf64toi32(SDValue RHS, SelectionDAG &DAG,
+ bool includeSign) const
+{
+ EVT INTVT;
+ EVT LONGVT;
+ DebugLoc DL = RHS.getDebugLoc();
+ EVT RHSVT = RHS.getValueType();
+ bool isVec = RHSVT.isVector();
+ if (isVec) {
+ LONGVT = EVT(MVT::getVectorVT(MVT::i64,
+ RHSVT.getVectorNumElements()));
+ INTVT = EVT(MVT::getVectorVT(MVT::i32,
+ RHSVT.getVectorNumElements()));
+ } else {
+ LONGVT = EVT(MVT::i64);
+ INTVT = EVT(MVT::i32);
+ }
+ /*
+ __attribute__((always_inline)) int
+ cast_f64_to_[u|i]32(double d)
+ {
+ // Convert d in to 32-bit components
+ long x = as_long(d);
+ xhi = LCOMPHI(x);
+ xlo = LCOMPLO(x);
+
+ // Generate 'normalized' mantissa
+ mhi = xhi | 0x00100000; // hidden bit
+ mhi <<= 11;
+ temp = xlo >> (32 - 11);
+ mhi |= temp
+
+ // Compute shift right count from exponent
+ e = (xhi >> (52-32)) & 0x7ff;
+ sr = 1023 + 31 - e;
+ srge32 = sr >= 32;
+
+ // Compute result for 0 <= sr < 32
+ res = mhi >> (sr &31);
+ res = srge32 ? 0 : res;
+
+ // Optional saturate on overflow
+ srlt0 = sr < 0;
+ res = srlt0 ? MAXVALUE : res;
+
+ // Deal with sign bit (ignoring whether result is signed or unsigned value)
+ if (includeSign) {
+ sign = ((signed int) xhi) >> 31; fill with sign bit
+ res += sign;
+ res ^= sign;
+ }
+
+ return res;
+ }
+ */
+ SDValue c11 = DAG.getConstant( 63 - 52, INTVT );
+
+ // Convert d in to 32-bit components
+ SDValue d = RHS;
+ SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d);
+ SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
+ SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
+
+ // Generate 'normalized' mantissa
+ SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT,
+ xhi, DAG.getConstant( 0x00100000, INTVT ) );
+ mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 );
+ SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT,
+ xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) );
+ mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp );
+
+ // Compute shift right count from exponent
+ SDValue e = DAG.getNode( ISD::SRL, DL, INTVT,
+ xhi, DAG.getConstant( 52-32, INTVT ) );
+ e = DAG.getNode( ISD::AND, DL, INTVT,
+ e, DAG.getConstant( 0x7ff, INTVT ) );
+ SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT,
+ DAG.getConstant( 1023 + 31, INTVT ), e );
+ SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
+ sr, DAG.getConstant(32, INTVT));
+
+ // Compute result for 0 <= sr < 32
+ SDValue res = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr );
+ res = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+ srge32, DAG.getConstant(0,INTVT), res );
+
+ // Deal with sign bit
+ if (includeSign) {
+ SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT,
+ xhi, DAG.getConstant( 31, INTVT ) );
+ res = DAG.getNode( ISD::ADD, DL, INTVT, res, sign );
+ res = DAG.getNode( ISD::XOR, DL, INTVT, res, sign );
+ }
+ return res;
+}
+SDValue
+AMDILTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue RHS = Op.getOperand(0);
+ EVT RHSVT = RHS.getValueType();
+ MVT RST = RHSVT.getScalarType().getSimpleVT();
+ EVT LHSVT = Op.getValueType();
+ MVT LST = LHSVT.getScalarType().getSimpleVT();
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue DST;
+ const AMDILTargetMachine*
+ amdtm = reinterpret_cast<const AMDILTargetMachine*>
+ (&this->getTargetMachine());
+ const AMDILSubtarget*
+ stm = dynamic_cast<const AMDILSubtarget*>(
+ amdtm->getSubtargetImpl());
+ if (RST == MVT::f64 && RHSVT.isVector()
+ && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ // We dont support vector 64bit floating point convertions.
+ for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) {
+ SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
+ op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op);
+ if (!x) {
+ DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
+ } else {
+ DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT,
+ DST, op, DAG.getTargetConstant(x, MVT::i32));
+ }
+ }
+ } else {
+ if (RST == MVT::f64
+ && LST == MVT::i32) {
+ if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ DST = SDValue(Op.getNode(), 0);
+ } else {
+ DST = genf64toi32(RHS, DAG, true);
+ }
+ } else if (RST == MVT::f64
+ && LST == MVT::i64) {
+ DST = genf64toi64(RHS, DAG, true);
+ } else if (RST == MVT::f64
+ && (LST == MVT::i8 || LST == MVT::i16)) {
+ if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0));
+ } else {
+ SDValue ToInt = genf64toi32(RHS, DAG, true);
+ DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt);
+ }
+
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue DST;
+ SDValue RHS = Op.getOperand(0);
+ EVT RHSVT = RHS.getValueType();
+ MVT RST = RHSVT.getScalarType().getSimpleVT();
+ EVT LHSVT = Op.getValueType();
+ MVT LST = LHSVT.getScalarType().getSimpleVT();
+ DebugLoc DL = Op.getDebugLoc();
+ const AMDILTargetMachine*
+ amdtm = reinterpret_cast<const AMDILTargetMachine*>
+ (&this->getTargetMachine());
+ const AMDILSubtarget*
+ stm = dynamic_cast<const AMDILSubtarget*>(
+ amdtm->getSubtargetImpl());
+ if (RST == MVT::f64 && RHSVT.isVector()
+ && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ // We dont support vector 64bit floating point convertions.
+ for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) {
+ SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
+ op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op);
+ if (!x) {
+ DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
+ } else {
+ DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT,
+ DST, op, DAG.getTargetConstant(x, MVT::i32));
+ }
+
+ }
+ } else {
+ if (RST == MVT::f64
+ && LST == MVT::i32) {
+ if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ DST = SDValue(Op.getNode(), 0);
+ } else {
+ DST = genf64toi32(RHS, DAG, false);
+ }
+ } else if (RST == MVT::f64
+ && LST == MVT::i64) {
+ DST = genf64toi64(RHS, DAG, false);
+ } else if (RST == MVT::f64
+ && (LST == MVT::i8 || LST == MVT::i16)) {
+ if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0));
+ } else {
+ SDValue ToInt = genf64toi32(RHS, DAG, false);
+ DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt);
+ }
+
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ }
+ return DST;
+}
+SDValue
+AMDILTargetLowering::genu32tof64(SDValue RHS, EVT LHSVT,
+ SelectionDAG &DAG) const
+{
+ EVT RHSVT = RHS.getValueType();
+ DebugLoc DL = RHS.getDebugLoc();
+ EVT INTVT;
+ EVT LONGVT;
+ bool isVec = RHSVT.isVector();
+ if (isVec) {
+ LONGVT = EVT(MVT::getVectorVT(MVT::i64,
+ RHSVT.getVectorNumElements()));
+ INTVT = EVT(MVT::getVectorVT(MVT::i32,
+ RHSVT.getVectorNumElements()));
+ } else {
+ LONGVT = EVT(MVT::i64);
+ INTVT = EVT(MVT::i32);
+ }
+ SDValue x = RHS;
+ const AMDILTargetMachine*
+ amdtm = reinterpret_cast<const AMDILTargetMachine*>
+ (&this->getTargetMachine());
+ const AMDILSubtarget*
+ stm = dynamic_cast<const AMDILSubtarget*>(
+ amdtm->getSubtargetImpl());
+ if (stm->calVersion() >= CAL_VERSION_SC_135) {
+ // unsigned x = RHS;
+ // ulong xd = (ulong)(0x4330_0000 << 32) | x;
+ // double d = as_double( xd );
+ // return d - 0x1.0p+52; // 0x1.0p+52 == 0x4330_0000_0000_0000
+ SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, x,
+ DAG.getConstant( 0x43300000, INTVT ) );
+ SDValue d = DAG.getNode( ISDBITCAST, DL, LHSVT, xd );
+ SDValue offsetd = DAG.getNode( ISDBITCAST, DL, LHSVT,
+ DAG.getConstant( 0x4330000000000000ULL, LONGVT ) );
+ return DAG.getNode( ISD::FSUB, DL, LHSVT, d, offsetd );
+ } else {
+ SDValue clz = genCLZu32(x, DAG);
+
+ // Compute the exponent. 1023 is the bias, 31-clz the actual power of 2
+ // Except for an input 0... which requires a 0 exponent
+ SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT,
+ DAG.getConstant( (1023+31), INTVT), clz );
+ exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, x, exp, x );
+
+ // Normalize frac
+ SDValue rhi = DAG.getNode( ISD::SHL, DL, INTVT, x, clz );
+
+ // Eliminate hidden bit
+ rhi = DAG.getNode( ISD::AND, DL, INTVT,
+ rhi, DAG.getConstant( 0x7fffffff, INTVT ) );
+
+ // Pack exponent and frac
+ SDValue rlo = DAG.getNode( ISD::SHL, DL, INTVT,
+ rhi, DAG.getConstant( (32 - 11), INTVT ) );
+ rhi = DAG.getNode( ISD::SRL, DL, INTVT,
+ rhi, DAG.getConstant( 11, INTVT ) );
+ exp = DAG.getNode( ISD::SHL, DL, INTVT,
+ exp, DAG.getConstant( 20, INTVT ) );
+ rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp );
+
+ // Convert 2 x 32 in to 1 x 64, then to double precision float type
+ SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
+ return DAG.getNode(ISDBITCAST, DL, LHSVT, res);
+ }
+}
+SDValue
+AMDILTargetLowering::genu64tof64(SDValue RHS, EVT LHSVT,
+ SelectionDAG &DAG) const
+{
+ EVT RHSVT = RHS.getValueType();
+ DebugLoc DL = RHS.getDebugLoc();
+ EVT INTVT;
+ EVT LONGVT;
+ bool isVec = RHSVT.isVector();
+ if (isVec) {
+ INTVT = EVT(MVT::getVectorVT(MVT::i32,
+ RHSVT.getVectorNumElements()));
+ } else {
+ INTVT = EVT(MVT::i32);
+ }
+ LONGVT = RHSVT;
+ SDValue x = RHS;
+ const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
+ &this->getTargetMachine())->getSubtargetImpl();
+ if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ // double dhi = (double)(as_uint2(x).y);
+ // double dlo = (double)(as_uint2(x).x);
+ // return mad(dhi, 0x1.0p+32, dlo)
+ SDValue dhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x);
+ dhi = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dhi);
+ SDValue dlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x);
+ dlo = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dlo);
+ return DAG.getNode(AMDILISD::MAD, DL, LHSVT, dhi,
+ DAG.getConstantFP(0x4f800000, LHSVT), dlo);
+ } else if (stm->calVersion() >= CAL_VERSION_SC_135) {
+ // double lo = as_double( as_ulong( 0x1.0p+52) | (u & 0xffff_ffffUL));
+ // double hi = as_double( as_ulong( 0x1.0p+84) | (u >> 32));
+ // return (hi - (0x1.0p+84 + 0x1.0p+52)) + lo;
+ SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); // x & 0xffff_ffffUL
+ SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xlo, DAG.getConstant( 0x43300000, INTVT ) );
+ SDValue lo = DAG.getNode( ISDBITCAST, DL, LHSVT, xd );
+ SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); // x >> 32
+ SDValue xe = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xhi, DAG.getConstant( 0x45300000, INTVT ) );
+ SDValue hi = DAG.getNode( ISDBITCAST, DL, LHSVT, xe );
+ SDValue c = DAG.getNode( ISDBITCAST, DL, LHSVT,
+ DAG.getConstant( 0x4530000000100000ULL, LONGVT ) );
+ hi = DAG.getNode( ISD::FSUB, DL, LHSVT, hi, c );
+ return DAG.getNode( ISD::FADD, DL, LHSVT, hi, lo );
+
+ } else {
+ SDValue clz = genCLZu64(x, DAG);
+ SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
+ SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
+
+ // Compute the exponent. 1023 is the bias, 63-clz the actual power of 2
+ SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT,
+ DAG.getConstant( (1023+63), INTVT), clz );
+ SDValue mash = DAG.getNode( ISD::OR, DL, INTVT, xhi, xlo );
+ exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+ mash, exp, mash ); // exp = exp, or 0 if input was 0
+
+ // Normalize frac
+ SDValue clz31 = DAG.getNode( ISD::AND, DL, INTVT,
+ clz, DAG.getConstant( 31, INTVT ) );
+ SDValue rshift = DAG.getNode( ISD::SUB, DL, INTVT,
+ DAG.getConstant( 32, INTVT ), clz31 );
+ SDValue t1 = DAG.getNode( ISD::SHL, DL, INTVT, xhi, clz31 );
+ SDValue t2 = DAG.getNode( ISD::SRL, DL, INTVT, xlo, rshift );
+ t2 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, clz31, t2, t1 );
+ SDValue rhi1 = DAG.getNode( ISD::OR, DL, INTVT, t1, t2 );
+ SDValue rlo1 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 );
+ SDValue rhi2 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 );
+ SDValue rlo2 = DAG.getConstant( 0, INTVT );
+ SDValue clz32 = DAG.getNode( ISD::AND, DL, INTVT,
+ clz, DAG.getConstant( 32, INTVT ) );
+ SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+ clz32, rhi2, rhi1 );
+ SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+ clz32, rlo2, rlo1 );
+
+ // Eliminate hidden bit
+ rhi = DAG.getNode( ISD::AND, DL, INTVT,
+ rhi, DAG.getConstant( 0x7fffffff, INTVT ) );
+
+ // Save bits needed to round properly
+ SDValue round = DAG.getNode( ISD::AND, DL, INTVT,
+ rlo, DAG.getConstant( 0x7ff, INTVT ) );
+
+ // Pack exponent and frac
+ rlo = DAG.getNode( ISD::SRL, DL, INTVT,
+ rlo, DAG.getConstant( 11, INTVT ) );
+ SDValue temp = DAG.getNode( ISD::SHL, DL, INTVT,
+ rhi, DAG.getConstant( (32 - 11), INTVT ) );
+ rlo = DAG.getNode( ISD::OR, DL, INTVT, rlo, temp );
+ rhi = DAG.getNode( ISD::SRL, DL, INTVT,
+ rhi, DAG.getConstant( 11, INTVT ) );
+ exp = DAG.getNode( ISD::SHL, DL, INTVT,
+ exp, DAG.getConstant( 20, INTVT ) );
+ rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp );
+
+ // Compute rounding bit
+ SDValue even = DAG.getNode( ISD::AND, DL, INTVT,
+ rlo, DAG.getConstant( 1, INTVT ) );
+ SDValue grs = DAG.getNode( ISD::AND, DL, INTVT,
+ round, DAG.getConstant( 0x3ff, INTVT ) );
+ grs = DAG.getNode( AMDILISD::CMP, DL, INTVT,
+ DAG.getConstant( CondCCodeToCC( ISD::SETNE, MVT::i32), MVT::i32),
+ grs, DAG.getConstant( 0, INTVT ) ); // -1 if any GRS set, 0 if none
+ grs = DAG.getNode( ISD::OR, DL, INTVT, grs, even );
+ round = DAG.getNode( ISD::SRL, DL, INTVT,
+ round, DAG.getConstant( 10, INTVT ) );
+ round = DAG.getNode( ISD::AND, DL, INTVT, round, grs ); // 0 or 1
+
+ // Add rounding bit
+ SDValue lround = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT,
+ round, DAG.getConstant( 0, INTVT ) );
+ SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
+ res = DAG.getNode( ISD::ADD, DL, LONGVT, res, lround );
+ return DAG.getNode(ISDBITCAST, DL, LHSVT, res);
+ }
+}
+SDValue
+AMDILTargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue RHS = Op.getOperand(0);
+ EVT RHSVT = RHS.getValueType();
+ MVT RST = RHSVT.getScalarType().getSimpleVT();
+ EVT LHSVT = Op.getValueType();
+ MVT LST = LHSVT.getScalarType().getSimpleVT();
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue DST;
+ EVT INTVT;
+ EVT LONGVT;
+ const AMDILTargetMachine*
+ amdtm = reinterpret_cast<const AMDILTargetMachine*>
+ (&this->getTargetMachine());
+ const AMDILSubtarget*
+ stm = dynamic_cast<const AMDILSubtarget*>(
+ amdtm->getSubtargetImpl());
+ if (LST == MVT::f64 && LHSVT.isVector()
+ && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ // We dont support vector 64bit floating point convertions.
+ DST = Op;
+ for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) {
+ SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
+ op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op);
+ if (!x) {
+ DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
+ } else {
+ DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST,
+ op, DAG.getTargetConstant(x, MVT::i32));
+ }
+
+ }
+ } else {
+
+ if (RST == MVT::i32
+ && LST == MVT::f64) {
+ if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ DST = SDValue(Op.getNode(), 0);
+ } else {
+ DST = genu32tof64(RHS, LHSVT, DAG);
+ }
+ } else if (RST == MVT::i64
+ && LST == MVT::f64) {
+ DST = genu64tof64(RHS, LHSVT, DAG);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue RHS = Op.getOperand(0);
+ EVT RHSVT = RHS.getValueType();
+ MVT RST = RHSVT.getScalarType().getSimpleVT();
+ EVT INTVT;
+ EVT LONGVT;
+ SDValue DST;
+ bool isVec = RHSVT.isVector();
+ DebugLoc DL = Op.getDebugLoc();
+ EVT LHSVT = Op.getValueType();
+ MVT LST = LHSVT.getScalarType().getSimpleVT();
+ const AMDILTargetMachine*
+ amdtm = reinterpret_cast<const AMDILTargetMachine*>
+ (&this->getTargetMachine());
+ const AMDILSubtarget*
+ stm = dynamic_cast<const AMDILSubtarget*>(
+ amdtm->getSubtargetImpl());
+ if (LST == MVT::f64 && LHSVT.isVector()
+ && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ // We dont support vector 64bit floating point convertions.
+ for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) {
+ SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
+ op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op);
+ if (!x) {
+ DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
+ } else {
+ DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST,
+ op, DAG.getTargetConstant(x, MVT::i32));
+ }
+
+ }
+ } else {
+
+ if (isVec) {
+ LONGVT = EVT(MVT::getVectorVT(MVT::i64,
+ RHSVT.getVectorNumElements()));
+ INTVT = EVT(MVT::getVectorVT(MVT::i32,
+ RHSVT.getVectorNumElements()));
+ } else {
+ LONGVT = EVT(MVT::i64);
+ INTVT = EVT(MVT::i32);
+ }
+ MVT RST = RHSVT.getScalarType().getSimpleVT();
+ if ((RST == MVT::i32 || RST == MVT::i64)
+ && LST == MVT::f64) {
+ if (RST == MVT::i32) {
+ if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ DST = SDValue(Op.getNode(), 0);
+ return DST;
+ }
+ }
+ SDValue c31 = DAG.getConstant( 31, INTVT );
+ SDValue cSbit = DAG.getConstant( 0x80000000, INTVT );
+
+ SDValue S; // Sign, as 0 or -1
+ SDValue Sbit; // Sign bit, as one bit, MSB only.
+ if (RST == MVT::i32) {
+ Sbit = DAG.getNode( ISD::AND, DL, INTVT, RHS, cSbit );
+ S = DAG.getNode(ISD::SRA, DL, RHSVT, RHS, c31 );
+ } else { // 64-bit case... SRA of 64-bit values is slow
+ SDValue hi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, RHS );
+ Sbit = DAG.getNode( ISD::AND, DL, INTVT, hi, cSbit );
+ SDValue temp = DAG.getNode( ISD::SRA, DL, INTVT, hi, c31 );
+ S = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, RHSVT, temp, temp );
+ }
+
+ // get abs() of input value, given sign as S (0 or -1)
+ // SpI = RHS + S
+ SDValue SpI = DAG.getNode(ISD::ADD, DL, RHSVT, RHS, S);
+ // SpIxS = SpI ^ S
+ SDValue SpIxS = DAG.getNode(ISD::XOR, DL, RHSVT, SpI, S);
+
+ // Convert unsigned value to double precision
+ SDValue R;
+ if (RST == MVT::i32) {
+ // r = cast_u32_to_f64(SpIxS)
+ R = genu32tof64(SpIxS, LHSVT, DAG);
+ } else {
+ // r = cast_u64_to_f64(SpIxS)
+ R = genu64tof64(SpIxS, LHSVT, DAG);
+ }
+
+ // drop in the sign bit
+ SDValue t = DAG.getNode( AMDILISD::BITCONV, DL, LONGVT, R );
+ SDValue thi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, t );
+ SDValue tlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, t );
+ thi = DAG.getNode( ISD::OR, DL, INTVT, thi, Sbit );
+ t = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, tlo, thi );
+ DST = DAG.getNode( AMDILISD::BITCONV, DL, LHSVT, t );
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ }
+ return DST;
+}
+SDValue
+AMDILTargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ bool isVec = RHS.getValueType().isVector();
+ if (OVT.getScalarType() == MVT::i64) {
+ /*const AMDILTargetMachine*
+ amdtm = reinterpret_cast<const AMDILTargetMachine*>
+ (&this->getTargetMachine());
+ const AMDILSubtarget*
+ stm = dynamic_cast<const AMDILSubtarget*>(
+ amdtm->getSubtargetImpl());*/
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i64) {
+ INTTY = MVT::v2i32;
+ }
+ SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI;
+ // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
+ LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS);
+ RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS);
+ LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS);
+ RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS);
+ INTLO = DAG.getNode(ISD::SUB, DL, INTTY, LHSLO, RHSLO);
+ INTHI = DAG.getNode(ISD::SUB, DL, INTTY, LHSHI, RHSHI);
+ //TODO: need to use IBORROW on HD5XXX and later hardware
+ SDValue cmp;
+ if (OVT == MVT::i64) {
+ cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
+ LHSLO, RHSLO);
+ } else {
+ SDValue cmplo;
+ SDValue cmphi;
+ SDValue LHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, MVT::i32, LHSLO, DAG.getTargetConstant(0, MVT::i32));
+ SDValue LHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, MVT::i32, LHSLO, DAG.getTargetConstant(1, MVT::i32));
+ SDValue RHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, MVT::i32, RHSLO, DAG.getTargetConstant(0, MVT::i32));
+ SDValue RHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, MVT::i32, RHSLO, DAG.getTargetConstant(1, MVT::i32));
+ cmplo = DAG.getNode(AMDILISD::CMP, DL, MVT::i32,
+ DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
+ LHSRLO, RHSRLO);
+ cmphi = DAG.getNode(AMDILISD::CMP, DL, MVT::i32,
+ DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
+ LHSRHI, RHSRHI);
+ cmp = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i32, cmplo);
+ cmp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i32,
+ cmp, cmphi, DAG.getTargetConstant(1, MVT::i32));
+ }
+ INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp);
+ DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT,
+ INTLO, INTHI);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+SDValue
+AMDILTargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const
+{
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ if (OVT.getScalarType() == MVT::f64) {
+ DST = LowerFDIV64(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::f32) {
+ DST = LowerFDIV32(Op, DAG);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
+{
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ if (OVT.getScalarType() == MVT::i64) {
+ DST = LowerSDIV64(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i32) {
+ DST = LowerSDIV32(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i16
+ || OVT.getScalarType() == MVT::i8) {
+ DST = LowerSDIV24(Op, DAG);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerUDIV(SDValue Op, SelectionDAG &DAG) const
+{
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ if (OVT.getScalarType() == MVT::i64) {
+ DST = LowerUDIV64(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i32) {
+ DST = LowerUDIV32(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i16
+ || OVT.getScalarType() == MVT::i8) {
+ DST = LowerUDIV24(Op, DAG);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
+{
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ if (OVT.getScalarType() == MVT::i64) {
+ DST = LowerSREM64(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i32) {
+ DST = LowerSREM32(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i16) {
+ DST = LowerSREM16(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i8) {
+ DST = LowerSREM8(Op, DAG);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerUREM(SDValue Op, SelectionDAG &DAG) const
+{
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ if (OVT.getScalarType() == MVT::i64) {
+ DST = LowerUREM64(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i32) {
+ DST = LowerUREM32(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i16) {
+ DST = LowerUREM16(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i8) {
+ DST = LowerUREM8(Op, DAG);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ bool isVec = OVT.isVector();
+ if (OVT.getScalarType() != MVT::i64)
+ {
+ DST = SDValue(Op.getNode(), 0);
+ } else {
+ assert(OVT.getScalarType() == MVT::i64 && "Only 64 bit mul should be lowered!");
+ // TODO: This needs to be turned into a tablegen pattern
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i64) {
+ INTTY = MVT::v2i32;
+ }
+ // mul64(h1, l1, h0, l0)
+ SDValue LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
+ DL,
+ INTTY, LHS);
+ SDValue LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
+ DL,
+ INTTY, LHS);
+ SDValue RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
+ DL,
+ INTTY, RHS);
+ SDValue RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
+ DL,
+ INTTY, RHS);
+ // MULLO_UINT_1 r1, h0, l1
+ SDValue RHILLO = DAG.getNode(AMDILISD::UMUL,
+ DL,
+ INTTY, RHSHI, LHSLO);
+ // MULLO_UINT_1 r2, h1, l0
+ SDValue RLOHHI = DAG.getNode(AMDILISD::UMUL,
+ DL,
+ INTTY, RHSLO, LHSHI);
+ // ADD_INT hr, r1, r2
+ SDValue ADDHI = DAG.getNode(ISD::ADD,
+ DL,
+ INTTY, RHILLO, RLOHHI);
+ // MULHI_UINT_1 r3, l1, l0
+ SDValue RLOLLO = DAG.getNode(ISD::MULHU,
+ DL,
+ INTTY, RHSLO, LHSLO);
+ // ADD_INT hr, hr, r3
+ SDValue HIGH = DAG.getNode(ISD::ADD,
+ DL,
+ INTTY, ADDHI, RLOLLO);
+ // MULLO_UINT_1 l3, l1, l0
+ SDValue LOW = DAG.getNode(AMDILISD::UMUL,
+ DL,
+ INTTY, LHSLO, RHSLO);
+ DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE,
+ DL,
+ OVT, LOW, HIGH);
+ }
+ return DST;
+}
+SDValue
+AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
+{
+ EVT VT = Op.getValueType();
+ //printSDValue(Op, 1);
+ SDValue Nodes1;
+ SDValue second;
+ SDValue third;
+ SDValue fourth;
+ DebugLoc DL = Op.getDebugLoc();
+ Nodes1 = DAG.getNode(AMDILISD::VBUILD,
+ DL,
+ VT, Op.getOperand(0));
+ bool allEqual = true;
+ for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
+ if (Op.getOperand(0) != Op.getOperand(x)) {
+ allEqual = false;
+ break;
+ }
+ }
+ if (allEqual) {
+ return Nodes1;
+ }
+ switch(Op.getNumOperands()) {
+ default:
+ case 1:
+ break;
+ case 4:
+ fourth = Op.getOperand(3);
+ if (fourth.getOpcode() != ISD::UNDEF) {
+ Nodes1 = DAG.getNode(
+ ISD::INSERT_VECTOR_ELT,
+ DL,
+ Op.getValueType(),
+ Nodes1,
+ fourth,
+ DAG.getConstant(7, MVT::i32));
+ }
+ case 3:
+ third = Op.getOperand(2);
+ if (third.getOpcode() != ISD::UNDEF) {
+ Nodes1 = DAG.getNode(
+ ISD::INSERT_VECTOR_ELT,
+ DL,
+ Op.getValueType(),
+ Nodes1,
+ third,
+ DAG.getConstant(6, MVT::i32));
+ }
+ case 2:
+ second = Op.getOperand(1);
+ if (second.getOpcode() != ISD::UNDEF) {
+ Nodes1 = DAG.getNode(
+ ISD::INSERT_VECTOR_ELT,
+ DL,
+ Op.getValueType(),
+ Nodes1,
+ second,
+ DAG.getConstant(5, MVT::i32));
+ }
+ break;
+ };
+ return Nodes1;
+}
+
+SDValue
+AMDILTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ const SDValue *ptr = NULL;
+ const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ uint32_t swizzleNum = 0;
+ SDValue DST;
+ if (!VT.isVector()) {
+ SDValue Res = Op.getOperand(0);
+ return Res;
+ }
+
+ if (Op.getOperand(1).getOpcode() != ISD::UNDEF) {
+ ptr = &Op.getOperand(1);
+ } else {
+ ptr = &Op.getOperand(0);
+ }
+ if (CSDN) {
+ swizzleNum = (uint32_t)CSDN->getZExtValue();
+ uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8));
+ uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8));
+ DST = DAG.getNode(AMDILISD::VINSERT,
+ DL,
+ VT,
+ Op.getOperand(0),
+ *ptr,
+ DAG.getTargetConstant(mask2, MVT::i32),
+ DAG.getTargetConstant(mask3, MVT::i32));
+ } else {
+ uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8));
+ uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8));
+ SDValue res = DAG.getNode(AMDILISD::VINSERT,
+ DL, VT, Op.getOperand(0), *ptr,
+ DAG.getTargetConstant(mask2, MVT::i32),
+ DAG.getTargetConstant(mask3, MVT::i32));
+ for (uint32_t x = 1; x < VT.getVectorNumElements(); ++x) {
+ mask2 = 0x04030201 & ~(0xFF << (x * 8));
+ mask3 = 0x01010101 & (0xFF << (x * 8));
+ SDValue t = DAG.getNode(AMDILISD::VINSERT,
+ DL, VT, Op.getOperand(0), *ptr,
+ DAG.getTargetConstant(mask2, MVT::i32),
+ DAG.getTargetConstant(mask3, MVT::i32));
+ SDValue c = DAG.getNode(AMDILISD::CMP, DL, ptr->getValueType(),
+ DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32),
+ Op.getOperand(2), DAG.getConstant(x, MVT::i32));
+ c = DAG.getNode(AMDILISD::VBUILD, DL, Op.getValueType(), c);
+ res = DAG.getNode(AMDILISD::CMOVLOG, DL, VT, c, t, res);
+ }
+ DST = res;
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const
+{
+ EVT VT = Op.getValueType();
+ //printSDValue(Op, 1);
+ const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ uint64_t swizzleNum = 0;
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue Res;
+ if (!Op.getOperand(0).getValueType().isVector()) {
+ Res = Op.getOperand(0);
+ return Res;
+ }
+ if (CSDN) {
+ // Static vector extraction
+ swizzleNum = CSDN->getZExtValue() + 1;
+ Res = DAG.getNode(AMDILISD::VEXTRACT,
+ DL, VT,
+ Op.getOperand(0),
+ DAG.getTargetConstant(swizzleNum, MVT::i32));
+ } else {
+ SDValue Op1 = Op.getOperand(1);
+ uint32_t vecSize = 4;
+ SDValue Op0 = Op.getOperand(0);
+ SDValue res = DAG.getNode(AMDILISD::VEXTRACT,
+ DL, VT, Op0,
+ DAG.getTargetConstant(1, MVT::i32));
+ if (Op0.getValueType().isVector()) {
+ vecSize = Op0.getValueType().getVectorNumElements();
+ }
+ for (uint32_t x = 2; x <= vecSize; ++x) {
+ SDValue t = DAG.getNode(AMDILISD::VEXTRACT,
+ DL, VT, Op0,
+ DAG.getTargetConstant(x, MVT::i32));
+ SDValue c = DAG.getNode(AMDILISD::CMP,
+ DL, Op1.getValueType(),
+ DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32),
+ Op1, DAG.getConstant(x, MVT::i32));
+ res = DAG.getNode(AMDILISD::CMOVLOG, DL,
+ VT, c, t, res);
+
+ }
+ Res = res;
+ }
+ return Res;
+}
+
+SDValue
+AMDILTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
+ SelectionDAG &DAG) const
+{
+ uint32_t vecSize = Op.getValueType().getVectorNumElements();
+ SDValue src = Op.getOperand(0);
+ const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ uint64_t offset = 0;
+ EVT vecType = Op.getValueType().getVectorElementType();
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue Result;
+ if (CSDN) {
+ offset = CSDN->getZExtValue();
+ Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL,vecType, src, DAG.getConstant(offset, MVT::i32));
+ Result = DAG.getNode(AMDILISD::VBUILD, DL,
+ Op.getValueType(), Result);
+ for (uint32_t x = 1; x < vecSize; ++x) {
+ SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType,
+ src, DAG.getConstant(offset + x, MVT::i32));
+ if (elt.getOpcode() != ISD::UNDEF) {
+ Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
+ Op.getValueType(), Result, elt,
+ DAG.getConstant(x, MVT::i32));
+ }
+ }
+ } else {
+ SDValue idx = Op.getOperand(1);
+ Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, vecType, src, idx);
+ Result = DAG.getNode(AMDILISD::VBUILD, DL,
+ Op.getValueType(), Result);
+ for (uint32_t x = 1; x < vecSize; ++x) {
+ idx = DAG.getNode(ISD::ADD, DL, vecType,
+ idx, DAG.getConstant(1, MVT::i32));
+ SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType,
+ src, idx);
+ if (elt.getOpcode() != ISD::UNDEF) {
+ Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
+ Op.getValueType(), Result, elt, idx);
+ }
+ }
+ }
+ return Result;
+}
+SDValue
+AMDILTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const
+{
+ SDValue Res = DAG.getNode(AMDILISD::VBUILD,
+ Op.getDebugLoc(),
+ Op.getValueType(),
+ Op.getOperand(0));
+ return Res;
+}
+SDValue
+AMDILTargetLowering::LowerAND(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue andOp;
+ andOp = DAG.getNode(
+ AMDILISD::AND,
+ Op.getDebugLoc(),
+ Op.getValueType(),
+ Op.getOperand(0),
+ Op.getOperand(1));
+ return andOp;
+}
+SDValue
+AMDILTargetLowering::LowerOR(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue orOp;
+ orOp = DAG.getNode(AMDILISD::OR,
+ Op.getDebugLoc(),
+ Op.getValueType(),
+ Op.getOperand(0),
+ Op.getOperand(1));
+ return orOp;
+}
+SDValue
+AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Cond = Op.getOperand(0);
+ SDValue LHS = Op.getOperand(1);
+ SDValue RHS = Op.getOperand(2);
+ DebugLoc DL = Op.getDebugLoc();
+ Cond = getConversionNode(DAG, Cond, Op, true);
+ Cond = DAG.getNode(AMDILISD::CMOVLOG,
+ DL,
+ Op.getValueType(), Cond, LHS, RHS);
+ return Cond;
+}
+SDValue
+AMDILTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Cond;
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue TRUE = Op.getOperand(2);
+ SDValue FALSE = Op.getOperand(3);
+ SDValue CC = Op.getOperand(4);
+ DebugLoc DL = Op.getDebugLoc();
+ bool skipCMov = false;
+ bool genINot = false;
+ EVT OVT = Op.getValueType();
+
+ // Check for possible elimination of cmov
+ if (TRUE.getValueType().getSimpleVT().SimpleTy == MVT::i32) {
+ const ConstantSDNode *trueConst
+ = dyn_cast<ConstantSDNode>( TRUE.getNode() );
+ const ConstantSDNode *falseConst
+ = dyn_cast<ConstantSDNode>( FALSE.getNode() );
+ if (trueConst && falseConst) {
+ // both possible result values are constants
+ if (trueConst->isAllOnesValue()
+ && falseConst->isNullValue()) { // and convenient constants
+ skipCMov = true;
+ }
+ else if (trueConst->isNullValue()
+ && falseConst->isAllOnesValue()) { // less convenient
+ skipCMov = true;
+ genINot = true;
+ }
+ }
+ }
+ ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
+ unsigned int AMDILCC = CondCCodeToCC(
+ SetCCOpcode,
+ LHS.getValueType().getSimpleVT().SimpleTy);
+ assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
+ Cond = DAG.getNode(
+ AMDILISD::CMP,
+ DL,
+ LHS.getValueType(),
+ DAG.getConstant(AMDILCC, MVT::i32),
+ LHS,
+ RHS);
+ Cond = getConversionNode(DAG, Cond, Op, true);
+ if (genINot) {
+ Cond = DAG.getNode(AMDILISD::NOT, DL, OVT, Cond);
+ }
+ if (!skipCMov) {
+ Cond = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, Cond, TRUE, FALSE);
+ }
+ return Cond;
+}
+SDValue
+AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Cond;
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue CC = Op.getOperand(2);
+ DebugLoc DL = Op.getDebugLoc();
+ ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
+ unsigned int AMDILCC = CondCCodeToCC(
+ SetCCOpcode,
+ LHS.getValueType().getSimpleVT().SimpleTy);
+ assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
+ Cond = DAG.getNode(
+ AMDILISD::CMP,
+ DL,
+ LHS.getValueType(),
+ DAG.getConstant(AMDILCC, MVT::i32),
+ LHS,
+ RHS);
+ Cond = getConversionNode(DAG, Cond, Op, true);
+ Cond = DAG.getNode(
+ ISD::AND,
+ DL,
+ Cond.getValueType(),
+ DAG.getConstant(1, Cond.getValueType()),
+ Cond);
+ return Cond;
+}
+
+SDValue
+AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Data = Op.getOperand(0);
+ VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
+ DebugLoc DL = Op.getDebugLoc();
+ EVT DVT = Data.getValueType();
+ EVT BVT = BaseType->getVT();
+ unsigned baseBits = BVT.getScalarType().getSizeInBits();
+ unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
+ unsigned shiftBits = srcBits - baseBits;
+ if (srcBits < 32) {
+ // If the op is less than 32 bits, then it needs to extend to 32bits
+ // so it can properly keep the upper bits valid.
+ EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
+ Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
+ shiftBits = 32 - baseBits;
+ DVT = IVT;
+ }
+ SDValue Shift = DAG.getConstant(shiftBits, DVT);
+ // Shift left by 'Shift' bits.
+ Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
+ // Signed shift Right by 'Shift' bits.
+ Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
+ if (srcBits < 32) {
+ // Once the sign extension is done, the op needs to be converted to
+ // its original type.
+ Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
+ }
+ return Data;
+}
+EVT
+AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
+{
+ int iSize = (size * numEle);
+ int vEle = (iSize >> ((size == 64) ? 6 : 5));
+ if (!vEle) {
+ vEle = 1;
+ }
+ if (size == 64) {
+ if (vEle == 1) {
+ return EVT(MVT::i64);
+ } else {
+ return EVT(MVT::getVectorVT(MVT::i64, vEle));
+ }
+ } else {
+ if (vEle == 1) {
+ return EVT(MVT::i32);
+ } else {
+ return EVT(MVT::getVectorVT(MVT::i32, vEle));
+ }
+ }
+}
+
+SDValue
+AMDILTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Src = Op.getOperand(0);
+ SDValue Dst = Op;
+ SDValue Res;
+ DebugLoc DL = Op.getDebugLoc();
+ EVT SrcVT = Src.getValueType();
+ EVT DstVT = Dst.getValueType();
+ // Lets bitcast the floating point types to an
+ // equivalent integer type before converting to vectors.
+ if (SrcVT.getScalarType().isFloatingPoint()) {
+ Src = DAG.getNode(AMDILISD::BITCONV, DL, genIntType(
+ SrcVT.getScalarType().getSimpleVT().getSizeInBits(),
+ SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1),
+ Src);
+ SrcVT = Src.getValueType();
+ }
+ uint32_t ScalarSrcSize = SrcVT.getScalarType()
+ .getSimpleVT().getSizeInBits();
+ uint32_t ScalarDstSize = DstVT.getScalarType()
+ .getSimpleVT().getSizeInBits();
+ uint32_t SrcNumEle = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
+ uint32_t DstNumEle = DstVT.isVector() ? DstVT.getVectorNumElements() : 1;
+ bool isVec = SrcVT.isVector();
+ if (DstVT.getScalarType().isInteger() &&
+ (SrcVT.getScalarType().isInteger()
+ || SrcVT.getScalarType().isFloatingPoint())) {
+ if ((ScalarDstSize == 64 && SrcNumEle == 4 && ScalarSrcSize == 16)
+ || (ScalarSrcSize == 64
+ && DstNumEle == 4
+ && ScalarDstSize == 16)) {
+ // This is the problematic case when bitcasting i64 <-> <4 x i16>
+ // This approach is a little different as we cannot generate a
+ // <4 x i64> vector
+ // as that is illegal in our backend and we are already past
+ // the DAG legalizer.
+ // So, in this case, we will do the following conversion.
+ // Case 1:
+ // %dst = <4 x i16> %src bitconvert i64 ==>
+ // %tmp = <4 x i16> %src convert <4 x i32>
+ // %tmp = <4 x i32> %tmp and 0xFFFF
+ // %tmp = <4 x i32> %tmp shift_left <0, 16, 0, 16>
+ // %tmp = <4 x i32> %tmp or %tmp.xz %tmp.yw
+ // %dst = <2 x i32> %tmp bitcast i64
+ // case 2:
+ // %dst = i64 %src bitconvert <4 x i16> ==>
+ // %tmp = i64 %src bitcast <2 x i32>
+ // %tmp = <4 x i32> %tmp vinsert %tmp.xxyy
+ // %tmp = <4 x i32> %tmp shift_right <0, 16, 0, 16>
+ // %tmp = <4 x i32> %tmp and 0xFFFF
+ // %dst = <4 x i16> %tmp bitcast <4 x i32>
+ SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v4i32,
+ DAG.getConstant(0xFFFF, MVT::i32));
+ SDValue const16 = DAG.getConstant(16, MVT::i32);
+ if (ScalarDstSize == 64) {
+ // case 1
+ Op = DAG.getSExtOrTrunc(Src, DL, MVT::v4i32);
+ Op = DAG.getNode(ISD::AND, DL, Op.getValueType(), Op, mask);
+ SDValue x = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
+ Op, DAG.getConstant(0, MVT::i32));
+ SDValue y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
+ Op, DAG.getConstant(1, MVT::i32));
+ y = DAG.getNode(ISD::SHL, DL, MVT::i32, y, const16);
+ SDValue z = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
+ Op, DAG.getConstant(2, MVT::i32));
+ SDValue w = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
+ Op, DAG.getConstant(3, MVT::i32));
+ w = DAG.getNode(ISD::SHL, DL, MVT::i32, w, const16);
+ x = DAG.getNode(ISD::OR, DL, MVT::i32, x, y);
+ y = DAG.getNode(ISD::OR, DL, MVT::i32, z, w);
+ Res = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, MVT::i64, x, y);
+ return Res;
+ } else {
+ // case 2
+ SDValue lo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, MVT::i32, Src);
+ SDValue lor16
+ = DAG.getNode(ISD::SRL, DL, MVT::i32, lo, const16);
+ SDValue hi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, MVT::i32, Src);
+ SDValue hir16
+ = DAG.getNode(ISD::SRL, DL, MVT::i32, hi, const16);
+ SDValue resVec = DAG.getNode(AMDILISD::VBUILD, DL,
+ MVT::v4i32, lo);
+ SDValue idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ getPointerTy(), DAG.getConstant(1, MVT::i32));
+ resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
+ resVec, lor16, idxVal);
+ idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ getPointerTy(), DAG.getConstant(2, MVT::i32));
+ resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
+ resVec, hi, idxVal);
+ idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ getPointerTy(), DAG.getConstant(3, MVT::i32));
+ resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
+ resVec, hir16, idxVal);
+ resVec = DAG.getNode(ISD::AND, DL, MVT::v4i32, resVec, mask);
+ Res = DAG.getSExtOrTrunc(resVec, DL, MVT::v4i16);
+ return Res;
+ }
+ } else {
+ // There are four cases we need to worry about for bitcasts
+ // where the size of all
+ // source, intermediates and result is <= 128 bits, unlike
+ // the above case
+ // 1) Sub32bit bitcast 32bitAlign
+ // %dst = <4 x i8> bitcast i32
+ // (also <[2|4] x i16> to <[2|4] x i32>)
+ // 2) 32bitAlign bitcast Sub32bit
+ // %dst = i32 bitcast <4 x i8>
+ // 3) Sub32bit bitcast LargerSub32bit
+ // %dst = <2 x i8> bitcast i16
+ // (also <4 x i8> to <2 x i16>)
+ // 4) Sub32bit bitcast SmallerSub32bit
+ // %dst = i16 bitcast <2 x i8>
+ // (also <2 x i16> to <4 x i8>)
+ // This also only handles types that are powers of two
+ if ((ScalarDstSize & (ScalarDstSize - 1))
+ || (ScalarSrcSize & (ScalarSrcSize - 1))) {
+ } else if (ScalarDstSize >= 32 && ScalarSrcSize < 32) {
+ // case 1:
+ EVT IntTy = genIntType(ScalarDstSize, SrcNumEle);
+#if 0 // TODO: LLVM does not like this for some reason, cannot SignExt vectors
+ SDValue res = DAG.getSExtOrTrunc(Src, DL, IntTy);
+#else
+ SDValue res = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
+ DAG.getUNDEF(IntTy.getScalarType()));
+ for (uint32_t x = 0; x < SrcNumEle; ++x) {
+ SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ getPointerTy(), DAG.getConstant(x, MVT::i32));
+ SDValue temp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ SrcVT.getScalarType(), Src,
+ DAG.getConstant(x, MVT::i32));
+ temp = DAG.getSExtOrTrunc(temp, DL, IntTy.getScalarType());
+ res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntTy,
+ res, temp, idx);
+ }
+#endif
+ SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
+ DAG.getConstant((1 << ScalarSrcSize) - 1, MVT::i32));
+ SDValue *newEle = new SDValue[SrcNumEle];
+ res = DAG.getNode(ISD::AND, DL, IntTy, res, mask);
+ for (uint32_t x = 0; x < SrcNumEle; ++x) {
+ newEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ IntTy.getScalarType(), res,
+ DAG.getConstant(x, MVT::i32));
+ }
+ uint32_t Ratio = SrcNumEle / DstNumEle;
+ for (uint32_t x = 0; x < SrcNumEle; ++x) {
+ if (x % Ratio) {
+ newEle[x] = DAG.getNode(ISD::SHL, DL,
+ IntTy.getScalarType(), newEle[x],
+ DAG.getConstant(ScalarSrcSize * (x % Ratio),
+ MVT::i32));
+ }
+ }
+ for (uint32_t x = 0; x < SrcNumEle; x += 2) {
+ newEle[x] = DAG.getNode(ISD::OR, DL,
+ IntTy.getScalarType(), newEle[x], newEle[x + 1]);
+ }
+ if (ScalarSrcSize == 8) {
+ for (uint32_t x = 0; x < SrcNumEle; x += 4) {
+ newEle[x] = DAG.getNode(ISD::OR, DL,
+ IntTy.getScalarType(), newEle[x], newEle[x + 2]);
+ }
+ if (DstNumEle == 1) {
+ Dst = newEle[0];
+ } else {
+ Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT,
+ newEle[0]);
+ for (uint32_t x = 1; x < DstNumEle; ++x) {
+ SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ getPointerTy(), DAG.getConstant(x, MVT::i32));
+ Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
+ DstVT, Dst, newEle[x * 4], idx);
+ }
+ }
+ } else {
+ if (DstNumEle == 1) {
+ Dst = newEle[0];
+ } else {
+ Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT,
+ newEle[0]);
+ for (uint32_t x = 1; x < DstNumEle; ++x) {
+ SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ getPointerTy(), DAG.getConstant(x, MVT::i32));
+ Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
+ DstVT, Dst, newEle[x * 2], idx);
+ }
+ }
+ }
+ delete [] newEle;
+ return Dst;
+ } else if (ScalarDstSize < 32 && ScalarSrcSize >= 32) {
+ // case 2:
+ EVT IntTy = genIntType(ScalarSrcSize, DstNumEle);
+ SDValue vec = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
+ DAG.getUNDEF(IntTy.getScalarType()));
+ uint32_t mult = (ScalarDstSize == 8) ? 4 : 2;
+ for (uint32_t x = 0; x < SrcNumEle; ++x) {
+ for (uint32_t y = 0; y < mult; ++y) {
+ SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ getPointerTy(),
+ DAG.getConstant(x * mult + y, MVT::i32));
+ SDValue t;
+ if (SrcNumEle > 1) {
+ t = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, SrcVT.getScalarType(), Src,
+ DAG.getConstant(x, MVT::i32));
+ } else {
+ t = Src;
+ }
+ if (y != 0) {
+ t = DAG.getNode(ISD::SRL, DL, t.getValueType(),
+ t, DAG.getConstant(y * ScalarDstSize,
+ MVT::i32));
+ }
+ vec = DAG.getNode(ISD::INSERT_VECTOR_ELT,
+ DL, IntTy, vec, t, idx);
+ }
+ }
+ Dst = DAG.getSExtOrTrunc(vec, DL, DstVT);
+ return Dst;
+ } else if (ScalarDstSize == 16 && ScalarSrcSize == 8) {
+ // case 3:
+ SDValue *numEle = new SDValue[SrcNumEle];
+ for (uint32_t x = 0; x < SrcNumEle; ++x) {
+ numEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ MVT::i8, Src, DAG.getConstant(x, MVT::i32));
+ numEle[x] = DAG.getSExtOrTrunc(numEle[x], DL, MVT::i16);
+ numEle[x] = DAG.getNode(ISD::AND, DL, MVT::i16, numEle[x],
+ DAG.getConstant(0xFF, MVT::i16));
+ }
+ for (uint32_t x = 1; x < SrcNumEle; x += 2) {
+ numEle[x] = DAG.getNode(ISD::SHL, DL, MVT::i16, numEle[x],
+ DAG.getConstant(8, MVT::i16));
+ numEle[x - 1] = DAG.getNode(ISD::OR, DL, MVT::i16,
+ numEle[x-1], numEle[x]);
+ }
+ if (DstNumEle > 1) {
+ // If we are not a scalar i16, the only other case is a
+ // v2i16 since we can't have v8i8 at this point, v4i16
+ // cannot be generated
+ Dst = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i16,
+ numEle[0]);
+ SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ getPointerTy(), DAG.getConstant(1, MVT::i32));
+ Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i16,
+ Dst, numEle[2], idx);
+ } else {
+ Dst = numEle[0];
+ }
+ delete [] numEle;
+ return Dst;
+ } else if (ScalarDstSize == 8 && ScalarSrcSize == 16) {
+ // case 4:
+ SDValue *numEle = new SDValue[DstNumEle];
+ for (uint32_t x = 0; x < SrcNumEle; ++x) {
+ numEle[x * 2] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ MVT::i16, Src, DAG.getConstant(x, MVT::i32));
+ numEle[x * 2 + 1] = DAG.getNode(ISD::SRL, DL, MVT::i16,
+ numEle[x * 2], DAG.getConstant(8, MVT::i16));
+ }
+ MVT ty = (SrcNumEle == 1) ? MVT::v2i16 : MVT::v4i16;
+ Dst = DAG.getNode(AMDILISD::VBUILD, DL, ty, numEle[0]);
+ for (uint32_t x = 1; x < DstNumEle; ++x) {
+ SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ getPointerTy(), DAG.getConstant(x, MVT::i32));
+ Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ty,
+ Dst, numEle[x], idx);
+ }
+ delete [] numEle;
+ ty = (SrcNumEle == 1) ? MVT::v2i8 : MVT::v4i8;
+ Res = DAG.getSExtOrTrunc(Dst, DL, ty);
+ return Res;
+ }
+ }
+ }
+ Res = DAG.getNode(AMDILISD::BITCONV,
+ Dst.getDebugLoc(),
+ Dst.getValueType(), Src);
+ return Res;
+}
+
+SDValue
+AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG) const
+{
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ unsigned int SPReg = AMDIL::SP;
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue SP = DAG.getCopyFromReg(Chain,
+ DL,
+ SPReg, MVT::i32);
+ SDValue NewSP = DAG.getNode(ISD::ADD,
+ DL,
+ MVT::i32, SP, Size);
+ Chain = DAG.getCopyToReg(SP.getValue(1),
+ DL,
+ SPReg, NewSP);
+ SDValue Ops[2] = {NewSP, Chain};
+ Chain = DAG.getMergeValues(Ops, 2 ,DL);
+ return Chain;
+}
+SDValue
+AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Chain = Op.getOperand(0);
+ SDValue Cond = Op.getOperand(1);
+ SDValue Jump = Op.getOperand(2);
+ SDValue Result;
+ Result = DAG.getNode(
+ AMDILISD::BRANCH_COND,
+ Op.getDebugLoc(),
+ Op.getValueType(),
+ Chain, Jump, Cond);
+ return Result;
+}
+
+SDValue
+AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Chain = Op.getOperand(0);
+ CondCodeSDNode *CCNode = cast<CondCodeSDNode>(Op.getOperand(1));
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue JumpT = Op.getOperand(4);
+ SDValue CmpValue;
+ ISD::CondCode CC = CCNode->get();
+ SDValue Result;
+ unsigned int cmpOpcode = CondCCodeToCC(
+ CC,
+ LHS.getValueType().getSimpleVT().SimpleTy);
+ CmpValue = DAG.getNode(
+ AMDILISD::CMP,
+ Op.getDebugLoc(),
+ LHS.getValueType(),
+ DAG.getConstant(cmpOpcode, MVT::i32),
+ LHS, RHS);
+ Result = DAG.getNode(
+ AMDILISD::BRANCH_COND,
+ CmpValue.getDebugLoc(),
+ MVT::Other, Chain,
+ JumpT, CmpValue);
+ return Result;
+}
+
+SDValue
+AMDILTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Result = DAG.getNode(
+ AMDILISD::DP_TO_FP,
+ Op.getDebugLoc(),
+ Op.getValueType(),
+ Op.getOperand(0),
+ Op.getOperand(1));
+ return Result;
+}
+
+SDValue
+AMDILTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Result = DAG.getNode(
+ AMDILISD::VCONCAT,
+ Op.getDebugLoc(),
+ Op.getValueType(),
+ Op.getOperand(0),
+ Op.getOperand(1));
+ return Result;
+}
+// LowerRET - Lower an ISD::RET node.
+SDValue
+AMDILTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG)
+const
+{
+ //MachineFunction& MF = DAG.getMachineFunction();
+ // CCValAssign - represent the assignment of the return value
+ // to a location
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slot
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ // Analyze return values of ISD::RET
+ CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32);
+ // If this is the first return lowered for this function, add
+ // the regs to the liveout set for the function
+ MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
+ for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
+ if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) {
+ MRI.addLiveOut(RVLocs[i].getLocReg());
+ }
+ }
+ // FIXME: implement this when tail call is implemented
+ // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL);
+ // both x86 and ppc implement this in ISelLowering
+
+ // Regular return here
+ SDValue Flag;
+ SmallVector<SDValue, 6> RetOps;
+ RetOps.push_back(Chain);
+ RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32));
+ for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
+ CCValAssign &VA = RVLocs[i];
+ SDValue ValToCopy = OutVals[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ // ISD::Ret => ret chain, (regnum1, val1), ...
+ // So i * 2 + 1 index only the regnums
+ Chain = DAG.getCopyToReg(Chain,
+ dl,
+ VA.getLocReg(),
+ ValToCopy,
+ Flag);
+ // guarantee that all emitted copies are stuck together
+ // avoiding something bad
+ Flag = Chain.getValue(1);
+ }
+ /*if (MF.getFunction()->hasStructRetAttr()) {
+ assert(0 && "Struct returns are not yet implemented!");
+ // Both MIPS and X86 have this
+ }*/
+ RetOps[0] = Chain;
+ if (Flag.getNode())
+ RetOps.push_back(Flag);
+
+ Flag = DAG.getNode(AMDILISD::RET_FLAG,
+ dl,
+ MVT::Other, &RetOps[0], RetOps.size());
+ return Flag;
+}
+void
+AMDILTargetLowering::generateLongRelational(MachineInstr *MI,
+ unsigned int opCode) const
+{
+ MachineOperand DST = MI->getOperand(0);
+ MachineOperand LHS = MI->getOperand(2);
+ MachineOperand RHS = MI->getOperand(3);
+ unsigned int opi32Code = 0, si32Code = 0;
+ unsigned int simpleVT = MI->getDesc().OpInfo[0].RegClass;
+ uint32_t REGS[12];
+ // All the relationals can be generated with with 6 temp registers
+ for (int x = 0; x < 12; ++x) {
+ REGS[x] = genVReg(simpleVT);
+ }
+ // Pull out the high and low components of each 64 bit register
+ generateMachineInst(AMDIL::LHI, REGS[0], LHS.getReg());
+ generateMachineInst(AMDIL::LLO, REGS[1], LHS.getReg());
+ generateMachineInst(AMDIL::LHI, REGS[2], RHS.getReg());
+ generateMachineInst(AMDIL::LLO, REGS[3], RHS.getReg());
+ // Determine the correct opcode that we should use
+ switch(opCode) {
+ default:
+ assert(!"comparison case not handled!");
+ break;
+ case AMDIL::LEQ:
+ si32Code = opi32Code = AMDIL::IEQ;
+ break;
+ case AMDIL::LNE:
+ si32Code = opi32Code = AMDIL::INE;
+ break;
+ case AMDIL::LLE:
+ case AMDIL::ULLE:
+ case AMDIL::LGE:
+ case AMDIL::ULGE:
+ if (opCode == AMDIL::LGE || opCode == AMDIL::ULGE) {
+ std::swap(REGS[0], REGS[2]);
+ } else {
+ std::swap(REGS[1], REGS[3]);
+ }
+ if (opCode == AMDIL::LLE || opCode == AMDIL::LGE) {
+ opi32Code = AMDIL::ILT;
+ } else {
+ opi32Code = AMDIL::ULT;
+ }
+ si32Code = AMDIL::UGE;
+ break;
+ case AMDIL::LGT:
+ case AMDIL::ULGT:
+ std::swap(REGS[0], REGS[2]);
+ std::swap(REGS[1], REGS[3]);
+ case AMDIL::LLT:
+ case AMDIL::ULLT:
+ if (opCode == AMDIL::LGT || opCode == AMDIL::LLT) {
+ opi32Code = AMDIL::ILT;
+ } else {
+ opi32Code = AMDIL::ULT;
+ }
+ si32Code = AMDIL::ULT;
+ break;
+ };
+ // Do the initial opcode on the high and low components.
+ // This leaves the following:
+ // REGS[4] = L_HI OP R_HI
+ // REGS[5] = L_LO OP R_LO
+ generateMachineInst(opi32Code, REGS[4], REGS[0], REGS[2]);
+ generateMachineInst(si32Code, REGS[5], REGS[1], REGS[3]);
+ switch(opi32Code) {
+ case AMDIL::IEQ:
+ case AMDIL::INE:
+ {
+ // combine the results with an and or or depending on if
+ // we are eq or ne
+ uint32_t combineOp = (opi32Code == AMDIL::IEQ)
+ ? AMDIL::BINARY_AND_i32 : AMDIL::BINARY_OR_i32;
+ generateMachineInst(combineOp, REGS[11], REGS[4], REGS[5]);
+ }
+ break;
+ default:
+ // this finishes codegen for the following pattern
+ // REGS[4] || (REGS[5] && (L_HI == R_HI))
+ generateMachineInst(AMDIL::IEQ, REGS[9], REGS[0], REGS[2]);
+ generateMachineInst(AMDIL::BINARY_AND_i32, REGS[10], REGS[5],
+ REGS[9]);
+ generateMachineInst(AMDIL::BINARY_OR_i32, REGS[11], REGS[4],
+ REGS[10]);
+ break;
+ }
+ generateMachineInst(AMDIL::LCREATE, DST.getReg(), REGS[11], REGS[11]);
+}
+
+unsigned int
+AMDILTargetLowering::getFunctionAlignment(const Function *) const
+{
+ return 0;
+}
+
+void
+AMDILTargetLowering::setPrivateData(MachineBasicBlock *BB,
+ MachineBasicBlock::iterator &BBI,
+ DebugLoc *DL, const TargetInstrInfo *TII) const
+{
+ mBB = BB;
+ mBBI = BBI;
+ mDL = DL;
+ mTII = TII;
+}
+uint32_t
+AMDILTargetLowering::genVReg(uint32_t regType) const
+{
+ return mBB->getParent()->getRegInfo().createVirtualRegister(
+ getRegClassFromID(regType));
+}
+
+MachineInstrBuilder
+AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst) const
+{
+ return BuildMI(*mBB, mBBI, *mDL, mTII->get(opcode), dst);
+}
+
+MachineInstrBuilder
+AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
+ uint32_t src1) const
+{
+ return generateMachineInst(opcode, dst).addReg(src1);
+}
+
+MachineInstrBuilder
+AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
+ uint32_t src1, uint32_t src2) const
+{
+ return generateMachineInst(opcode, dst, src1).addReg(src2);
+}
+
+MachineInstrBuilder
+AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
+ uint32_t src1, uint32_t src2, uint32_t src3) const
+{
+ return generateMachineInst(opcode, dst, src1, src2).addReg(src3);
+}
+
+
+SDValue
+AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ MVT INTTY;
+ MVT FLTTY;
+ if (!OVT.isVector()) {
+ INTTY = MVT::i32;
+ FLTTY = MVT::f32;
+ } else if (OVT.getVectorNumElements() == 2) {
+ INTTY = MVT::v2i32;
+ FLTTY = MVT::v2f32;
+ } else if (OVT.getVectorNumElements() == 4) {
+ INTTY = MVT::v4i32;
+ FLTTY = MVT::v4f32;
+ }
+ unsigned bitsize = OVT.getScalarType().getSizeInBits();
+ // char|short jq = ia ^ ib;
+ SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
+
+ // jq = jq >> (bitsize - 2)
+ jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
+
+ // jq = jq | 0x1
+ jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
+
+ // jq = (int)jq
+ jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
+
+ // int ia = (int)LHS;
+ SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
+
+ // int ib, (int)RHS;
+ SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
+
+ // float fa = (float)ia;
+ SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
+
+ // float fb = (float)ib;
+ SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
+
+ // float fq = native_divide(fa, fb);
+ SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
+
+ // fq = trunc(fq);
+ fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
+
+ // float fqneg = -fq;
+ SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
+
+ // float fr = mad(fqneg, fb, fa);
+ SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa);
+
+ // int iq = (int)fq;
+ SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
+
+ // fr = fabs(fr);
+ fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
+
+ // fb = fabs(fb);
+ fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
+
+ // int cv = fr >= fb;
+ SDValue cv;
+ if (INTTY == MVT::i32) {
+ cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
+ } else {
+ cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
+ }
+ // jq = (cv ? jq : 0);
+ jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq,
+ DAG.getConstant(0, OVT));
+ // dst = iq + jq;
+ iq = DAG.getSExtOrTrunc(iq, DL, OVT);
+ iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
+ return iq;
+}
+
+SDValue
+AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerSDIV32 function generates equivalent to the following IL.
+ // mov r0, LHS
+ // mov r1, RHS
+ // ilt r10, r0, 0
+ // ilt r11, r1, 0
+ // iadd r0, r0, r10
+ // iadd r1, r1, r11
+ // ixor r0, r0, r10
+ // ixor r1, r1, r11
+ // udiv r0, r0, r1
+ // ixor r10, r10, r11
+ // iadd r0, r0, r10
+ // ixor DST, r0, r10
+
+ // mov r0, LHS
+ SDValue r0 = LHS;
+
+ // mov r1, RHS
+ SDValue r1 = RHS;
+
+ // ilt r10, r0, 0
+ SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
+ r0, DAG.getConstant(0, OVT));
+
+ // ilt r11, r1, 0
+ SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
+ r1, DAG.getConstant(0, OVT));
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // iadd r1, r1, r11
+ r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
+
+ // ixor r0, r0, r10
+ r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+
+ // ixor r1, r1, r11
+ r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
+
+ // udiv r0, r0, r1
+ r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
+
+ // ixor r10, r10, r11
+ r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // ixor DST, r0, r10
+ SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
+{
+ return SDValue(Op.getNode(), 0);
+}
+
+SDValue
+AMDILTargetLowering::LowerUDIV24(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ MVT INTTY;
+ MVT FLTTY;
+ if (!OVT.isVector()) {
+ INTTY = MVT::i32;
+ FLTTY = MVT::f32;
+ } else if (OVT.getVectorNumElements() == 2) {
+ INTTY = MVT::v2i32;
+ FLTTY = MVT::v2f32;
+ } else if (OVT.getVectorNumElements() == 4) {
+ INTTY = MVT::v4i32;
+ FLTTY = MVT::v4f32;
+ }
+
+ // The LowerUDIV24 function implements the following CL.
+ // int ia = (int)LHS
+ // float fa = (float)ia
+ // int ib = (int)RHS
+ // float fb = (float)ib
+ // float fq = native_divide(fa, fb)
+ // fq = trunc(fq)
+ // float t = mad(fq, fb, fb)
+ // int iq = (int)fq - (t <= fa)
+ // return (type)iq
+
+ // int ia = (int)LHS
+ SDValue ia = DAG.getZExtOrTrunc(LHS, DL, INTTY);
+
+ // float fa = (float)ia
+ SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
+
+ // int ib = (int)RHS
+ SDValue ib = DAG.getZExtOrTrunc(RHS, DL, INTTY);
+
+ // float fb = (float)ib
+ SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
+
+ // float fq = native_divide(fa, fb)
+ SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
+
+ // fq = trunc(fq)
+ fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
+
+ // float t = mad(fq, fb, fb)
+ SDValue t = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fq, fb, fb);
+
+ // int iq = (int)fq - (t <= fa) // This is sub and not add because GPU returns 0, -1
+ SDValue iq;
+ fq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
+ if (INTTY == MVT::i32) {
+ iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE);
+ } else {
+ iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE);
+ }
+ iq = DAG.getNode(ISD::ADD, DL, INTTY, fq, iq);
+
+
+ // return (type)iq
+ iq = DAG.getZExtOrTrunc(iq, DL, OVT);
+ return iq;
+
+}
+
+SDValue
+AMDILTargetLowering::LowerUDIV32(SDValue Op, SelectionDAG &DAG) const
+{
+ return SDValue(Op.getNode(), 0);
+}
+
+SDValue
+AMDILTargetLowering::LowerUDIV64(SDValue Op, SelectionDAG &DAG) const
+{
+ return SDValue(Op.getNode(), 0);
+}
+SDValue
+AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i8) {
+ INTTY = MVT::v2i32;
+ } else if (OVT == MVT::v4i8) {
+ INTTY = MVT::v4i32;
+ }
+ SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
+ SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
+ LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
+ LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
+ return LHS;
+}
+
+SDValue
+AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i16) {
+ INTTY = MVT::v2i32;
+ } else if (OVT == MVT::v4i16) {
+ INTTY = MVT::v4i32;
+ }
+ SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
+ SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
+ LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
+ LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
+ return LHS;
+}
+
+SDValue
+AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerSREM32 function generates equivalent to the following IL.
+ // mov r0, LHS
+ // mov r1, RHS
+ // ilt r10, r0, 0
+ // ilt r11, r1, 0
+ // iadd r0, r0, r10
+ // iadd r1, r1, r11
+ // ixor r0, r0, r10
+ // ixor r1, r1, r11
+ // udiv r20, r0, r1
+ // umul r20, r20, r1
+ // sub r0, r0, r20
+ // iadd r0, r0, r10
+ // ixor DST, r0, r10
+
+ // mov r0, LHS
+ SDValue r0 = LHS;
+
+ // mov r1, RHS
+ SDValue r1 = RHS;
+
+ // ilt r10, r0, 0
+ SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
+ r0, DAG.getConstant(0, OVT));
+
+ // ilt r11, r1, 0
+ SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
+ r1, DAG.getConstant(0, OVT));
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // iadd r1, r1, r11
+ r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
+
+ // ixor r0, r0, r10
+ r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+
+ // ixor r1, r1, r11
+ r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
+
+ // udiv r20, r0, r1
+ SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
+
+ // umul r20, r20, r1
+ r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1);
+
+ // sub r0, r0, r20
+ r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // ixor DST, r0, r10
+ SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
+{
+ return SDValue(Op.getNode(), 0);
+}
+
+SDValue
+AMDILTargetLowering::LowerUREM8(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i8) {
+ INTTY = MVT::v2i32;
+ } else if (OVT == MVT::v4i8) {
+ INTTY = MVT::v4i32;
+ }
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerUREM8 function generates equivalent to the following IL.
+ // mov r0, as_u32(LHS)
+ // mov r1, as_u32(RHS)
+ // and r10, r0, 0xFF
+ // and r11, r1, 0xFF
+ // cmov_logical r3, r11, r11, 0x1
+ // udiv r3, r10, r3
+ // cmov_logical r3, r11, r3, 0
+ // umul r3, r3, r11
+ // sub r3, r10, r3
+ // and as_u8(DST), r3, 0xFF
+
+ // mov r0, as_u32(LHS)
+ SDValue r0 = DAG.getSExtOrTrunc(LHS, DL, INTTY);
+
+ // mov r1, as_u32(RHS)
+ SDValue r1 = DAG.getSExtOrTrunc(RHS, DL, INTTY);
+
+ // and r10, r0, 0xFF
+ SDValue r10 = DAG.getNode(ISD::AND, DL, INTTY, r0,
+ DAG.getConstant(0xFF, INTTY));
+
+ // and r11, r1, 0xFF
+ SDValue r11 = DAG.getNode(ISD::AND, DL, INTTY, r1,
+ DAG.getConstant(0xFF, INTTY));
+
+ // cmov_logical r3, r11, r11, 0x1
+ SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r11,
+ DAG.getConstant(0x01, INTTY));
+
+ // udiv r3, r10, r3
+ r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3);
+
+ // cmov_logical r3, r11, r3, 0
+ r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r3,
+ DAG.getConstant(0, INTTY));
+
+ // umul r3, r3, r11
+ r3 = DAG.getNode(AMDILISD::UMUL, DL, INTTY, r3, r11);
+
+ // sub r3, r10, r3
+ r3 = DAG.getNode(ISD::SUB, DL, INTTY, r10, r3);
+
+ // and as_u8(DST), r3, 0xFF
+ SDValue DST = DAG.getNode(ISD::AND, DL, INTTY, r3,
+ DAG.getConstant(0xFF, INTTY));
+ DST = DAG.getZExtOrTrunc(DST, DL, OVT);
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerUREM16(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i16) {
+ INTTY = MVT::v2i32;
+ } else if (OVT == MVT::v4i16) {
+ INTTY = MVT::v4i32;
+ }
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerUREM16 function generatest equivalent to the following IL.
+ // mov r0, LHS
+ // mov r1, RHS
+ // DIV = LowerUDIV16(LHS, RHS)
+ // and r10, r0, 0xFFFF
+ // and r11, r1, 0xFFFF
+ // cmov_logical r3, r11, r11, 0x1
+ // udiv as_u16(r3), as_u32(r10), as_u32(r3)
+ // and r3, r3, 0xFFFF
+ // cmov_logical r3, r11, r3, 0
+ // umul r3, r3, r11
+ // sub r3, r10, r3
+ // and DST, r3, 0xFFFF
+
+ // mov r0, LHS
+ SDValue r0 = LHS;
+
+ // mov r1, RHS
+ SDValue r1 = RHS;
+
+ // and r10, r0, 0xFFFF
+ SDValue r10 = DAG.getNode(ISD::AND, DL, OVT, r0,
+ DAG.getConstant(0xFFFF, OVT));
+
+ // and r11, r1, 0xFFFF
+ SDValue r11 = DAG.getNode(ISD::AND, DL, OVT, r1,
+ DAG.getConstant(0xFFFF, OVT));
+
+ // cmov_logical r3, r11, r11, 0x1
+ SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r11,
+ DAG.getConstant(0x01, OVT));
+
+ // udiv as_u16(r3), as_u32(r10), as_u32(r3)
+ r10 = DAG.getZExtOrTrunc(r10, DL, INTTY);
+ r3 = DAG.getZExtOrTrunc(r3, DL, INTTY);
+ r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3);
+ r3 = DAG.getZExtOrTrunc(r3, DL, OVT);
+ r10 = DAG.getZExtOrTrunc(r10, DL, OVT);
+
+ // and r3, r3, 0xFFFF
+ r3 = DAG.getNode(ISD::AND, DL, OVT, r3,
+ DAG.getConstant(0xFFFF, OVT));
+
+ // cmov_logical r3, r11, r3, 0
+ r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r3,
+ DAG.getConstant(0, OVT));
+ // umul r3, r3, r11
+ r3 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r3, r11);
+
+ // sub r3, r10, r3
+ r3 = DAG.getNode(ISD::SUB, DL, OVT, r10, r3);
+
+ // and DST, r3, 0xFFFF
+ SDValue DST = DAG.getNode(ISD::AND, DL, OVT, r3,
+ DAG.getConstant(0xFFFF, OVT));
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerUREM32(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerUREM32 function generates equivalent to the following IL.
+ // udiv r20, LHS, RHS
+ // umul r20, r20, RHS
+ // sub DST, LHS, r20
+
+ // udiv r20, LHS, RHS
+ SDValue r20 = DAG.getNode(ISD::UDIV, DL, OVT, LHS, RHS);
+
+ // umul r20, r20, RHS
+ r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, RHS);
+
+ // sub DST, LHS, r20
+ SDValue DST = DAG.getNode(ISD::SUB, DL, OVT, LHS, r20);
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerUREM64(SDValue Op, SelectionDAG &DAG) const
+{
+ return SDValue(Op.getNode(), 0);
+}
+
+
+SDValue
+AMDILTargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2f32) {
+ INTTY = MVT::v2i32;
+ } else if (OVT == MVT::v4f32) {
+ INTTY = MVT::v4i32;
+ }
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue DST;
+ const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
+ &this->getTargetMachine())->getSubtargetImpl();
+ if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ // TODO: This doesn't work for vector types yet
+ // The LowerFDIV32 function generates equivalent to the following
+ // IL:
+ // mov r20, as_int(LHS)
+ // mov r21, as_int(RHS)
+ // and r30, r20, 0x7f800000
+ // and r31, r20, 0x807FFFFF
+ // and r32, r21, 0x7f800000
+ // and r33, r21, 0x807FFFFF
+ // ieq r40, r30, 0x7F800000
+ // ieq r41, r31, 0x7F800000
+ // ieq r42, r32, 0
+ // ieq r43, r33, 0
+ // and r50, r20, 0x80000000
+ // and r51, r21, 0x80000000
+ // ior r32, r32, 0x3f800000
+ // ior r33, r33, 0x3f800000
+ // cmov_logical r32, r42, r50, r32
+ // cmov_logical r33, r43, r51, r33
+ // cmov_logical r32, r40, r20, r32
+ // cmov_logical r33, r41, r21, r33
+ // ior r50, r40, r41
+ // ior r51, r42, r43
+ // ior r50, r50, r51
+ // inegate r52, r31
+ // iadd r30, r30, r52
+ // cmov_logical r30, r50, 0, r30
+ // div_zeroop(infinity) r21, 1.0, r33
+ // mul_ieee r20, r32, r21
+ // and r22, r20, 0x7FFFFFFF
+ // and r23, r20, 0x80000000
+ // ishr r60, r22, 0x00000017
+ // ishr r61, r30, 0x00000017
+ // iadd r20, r20, r30
+ // iadd r21, r22, r30
+ // iadd r60, r60, r61
+ // ige r42, 0, R60
+ // ior r41, r23, 0x7F800000
+ // ige r40, r60, 0x000000FF
+ // cmov_logical r40, r50, 0, r40
+ // cmov_logical r20, r42, r23, r20
+ // cmov_logical DST, r40, r41, r20
+ // as_float(DST)
+
+ // mov r20, as_int(LHS)
+ SDValue R20 = DAG.getNode(ISDBITCAST, DL, INTTY, LHS);
+
+ // mov r21, as_int(RHS)
+ SDValue R21 = DAG.getNode(ISDBITCAST, DL, INTTY, RHS);
+
+ // and r30, r20, 0x7f800000
+ SDValue R30 = DAG.getNode(ISD::AND, DL, INTTY, R20,
+ DAG.getConstant(0x7F800000, INTTY));
+
+ // and r31, r21, 0x7f800000
+ SDValue R31 = DAG.getNode(ISD::AND, DL, INTTY, R21,
+ DAG.getConstant(0x7f800000, INTTY));
+
+ // and r32, r20, 0x807FFFFF
+ SDValue R32 = DAG.getNode(ISD::AND, DL, INTTY, R20,
+ DAG.getConstant(0x807FFFFF, INTTY));
+
+ // and r33, r21, 0x807FFFFF
+ SDValue R33 = DAG.getNode(ISD::AND, DL, INTTY, R21,
+ DAG.getConstant(0x807FFFFF, INTTY));
+
+ // ieq r40, r30, 0x7F800000
+ SDValue R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+ R30, DAG.getConstant(0x7F800000, INTTY));
+
+ // ieq r41, r31, 0x7F800000
+ SDValue R41 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+ R31, DAG.getConstant(0x7F800000, INTTY));
+
+ // ieq r42, r30, 0
+ SDValue R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+ R30, DAG.getConstant(0, INTTY));
+
+ // ieq r43, r31, 0
+ SDValue R43 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+ R31, DAG.getConstant(0, INTTY));
+
+ // and r50, r20, 0x80000000
+ SDValue R50 = DAG.getNode(ISD::AND, DL, INTTY, R20,
+ DAG.getConstant(0x80000000, INTTY));
+
+ // and r51, r21, 0x80000000
+ SDValue R51 = DAG.getNode(ISD::AND, DL, INTTY, R21,
+ DAG.getConstant(0x80000000, INTTY));
+
+ // ior r32, r32, 0x3f800000
+ R32 = DAG.getNode(ISD::OR, DL, INTTY, R32,
+ DAG.getConstant(0x3F800000, INTTY));
+
+ // ior r33, r33, 0x3f800000
+ R33 = DAG.getNode(ISD::OR, DL, INTTY, R33,
+ DAG.getConstant(0x3F800000, INTTY));
+
+ // cmov_logical r32, r42, r50, r32
+ R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R50, R32);
+
+ // cmov_logical r33, r43, r51, r33
+ R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R43, R51, R33);
+
+ // cmov_logical r32, r40, r20, r32
+ R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R20, R32);
+
+ // cmov_logical r33, r41, r21, r33
+ R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R41, R21, R33);
+
+ // ior r50, r40, r41
+ R50 = DAG.getNode(ISD::OR, DL, INTTY, R40, R41);
+
+ // ior r51, r42, r43
+ R51 = DAG.getNode(ISD::OR, DL, INTTY, R42, R43);
+
+ // ior r50, r50, r51
+ R50 = DAG.getNode(ISD::OR, DL, INTTY, R50, R51);
+
+ // inegate r52, r31
+ SDValue R52 = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, R31);
+
+ // iadd r30, r30, r52
+ R30 = DAG.getNode(ISD::ADD, DL, INTTY, R30, R52);
+
+ // cmov_logical r30, r50, 0, r30
+ R30 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50,
+ DAG.getConstant(0, INTTY), R30);
+
+ // div_zeroop(infinity) r21, 1.0, as_float(r33)
+ R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33);
+ R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT,
+ DAG.getConstantFP(1.0f, OVT), R33);
+
+ // mul_ieee as_int(r20), as_float(r32), r21
+ R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32);
+ R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21);
+ R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20);
+
+ // div_zeroop(infinity) r21, 1.0, as_float(r33)
+ R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33);
+ R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT,
+ DAG.getConstantFP(1.0f, OVT), R33);
+
+ // mul_ieee as_int(r20), as_float(r32), r21
+ R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32);
+ R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21);
+ R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20);
+
+ // and r22, r20, 0x7FFFFFFF
+ SDValue R22 = DAG.getNode(ISD::AND, DL, INTTY, R20,
+ DAG.getConstant(0x7FFFFFFF, INTTY));
+
+ // and r23, r20, 0x80000000
+ SDValue R23 = DAG.getNode(ISD::AND, DL, INTTY, R20,
+ DAG.getConstant(0x80000000, INTTY));
+
+ // ishr r60, r22, 0x00000017
+ SDValue R60 = DAG.getNode(ISD::SRA, DL, INTTY, R22,
+ DAG.getConstant(0x00000017, INTTY));
+
+ // ishr r61, r30, 0x00000017
+ SDValue R61 = DAG.getNode(ISD::SRA, DL, INTTY, R30,
+ DAG.getConstant(0x00000017, INTTY));
+
+ // iadd r20, r20, r30
+ R20 = DAG.getNode(ISD::ADD, DL, INTTY, R20, R30);
+
+ // iadd r21, r22, r30
+ R21 = DAG.getNode(ISD::ADD, DL, INTTY, R22, R30);
+
+ // iadd r60, r60, r61
+ R60 = DAG.getNode(ISD::ADD, DL, INTTY, R60, R61);
+
+ // ige r42, 0, R60
+ R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
+ DAG.getConstant(0, INTTY),
+ R60);
+
+ // ior r41, r23, 0x7F800000
+ R41 = DAG.getNode(ISD::OR, DL, INTTY, R23,
+ DAG.getConstant(0x7F800000, INTTY));
+
+ // ige r40, r60, 0x000000FF
+ R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
+ R60,
+ DAG.getConstant(0x0000000FF, INTTY));
+
+ // cmov_logical r40, r50, 0, r40
+ R40 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50,
+ DAG.getConstant(0, INTTY),
+ R40);
+
+ // cmov_logical r20, r42, r23, r20
+ R20 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R23, R20);
+
+ // cmov_logical DST, r40, r41, r20
+ DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R41, R20);
+
+ // as_float(DST)
+ DST = DAG.getNode(ISDBITCAST, DL, OVT, DST);
+ } else {
+ // The following sequence of DAG nodes produce the following IL:
+ // fabs r1, RHS
+ // lt r2, 0x1.0p+96f, r1
+ // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
+ // mul_ieee r1, RHS, r3
+ // div_zeroop(infinity) r0, LHS, r1
+ // mul_ieee DST, r0, r3
+
+ // fabs r1, RHS
+ SDValue r1 = DAG.getNode(ISD::FABS, DL, OVT, RHS);
+ // lt r2, 0x1.0p+96f, r1
+ SDValue r2 = DAG.getNode(AMDILISD::CMP, DL, OVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::f32), MVT::i32),
+ DAG.getConstant(0x6f800000, INTTY), r1);
+ // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
+ SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r2,
+ DAG.getConstant(0x2f800000, INTTY),
+ DAG.getConstant(0x3f800000, INTTY));
+ // mul_ieee r1, RHS, r3
+ r1 = DAG.getNode(ISD::FMUL, DL, OVT, RHS, r3);
+ // div_zeroop(infinity) r0, LHS, r1
+ SDValue r0 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, LHS, r1);
+ // mul_ieee DST, r0, r3
+ DST = DAG.getNode(ISD::FMUL, DL, OVT, r0, r3);
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const
+{
+ return SDValue(Op.getNode(), 0);
+}