diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/drivers/radeon/AMDILISelLowering.cpp | 1609 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/AMDILISelLowering.h | 70 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/R600ISelLowering.cpp | 5 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/SIISelLowering.cpp | 3 |
4 files changed, 0 insertions, 1687 deletions
diff --git a/src/gallium/drivers/radeon/AMDILISelLowering.cpp b/src/gallium/drivers/radeon/AMDILISelLowering.cpp index a0be1f45f41..892aaf4a3c1 100644 --- a/src/gallium/drivers/radeon/AMDILISelLowering.cpp +++ b/src/gallium/drivers/radeon/AMDILISelLowering.cpp @@ -551,42 +551,12 @@ AMDILTargetLowering::LowerMemArgument( // These are the current register classes that are // supported - addRegisterClass(MVT::i32, AMDIL::GPRI32RegisterClass); - addRegisterClass(MVT::f32, AMDIL::GPRF32RegisterClass); - - if (STM.device()->isSupported(AMDILDeviceInfo::DoubleOps)) { - addRegisterClass(MVT::f64, AMDIL::GPRF64RegisterClass); - addRegisterClass(MVT::v2f64, AMDIL::GPRV2F64RegisterClass); - } - if (STM.device()->isSupported(AMDILDeviceInfo::ByteOps)) { - addRegisterClass(MVT::i8, AMDIL::GPRI8RegisterClass); - addRegisterClass(MVT::v2i8, AMDIL::GPRV2I8RegisterClass); - addRegisterClass(MVT::v4i8, AMDIL::GPRV4I8RegisterClass); - setOperationAction(ISD::Constant , MVT::i8 , Legal); - } - if (STM.device()->isSupported(AMDILDeviceInfo::ShortOps)) { - addRegisterClass(MVT::i16, AMDIL::GPRI16RegisterClass); - addRegisterClass(MVT::v2i16, AMDIL::GPRV2I16RegisterClass); - addRegisterClass(MVT::v4i16, AMDIL::GPRV4I16RegisterClass); - setOperationAction(ISD::Constant , MVT::i16 , Legal); - } - addRegisterClass(MVT::v2f32, AMDIL::GPRV2F32RegisterClass); - addRegisterClass(MVT::v4f32, AMDIL::GPRV4F32RegisterClass); - addRegisterClass(MVT::v2i32, AMDIL::GPRV2I32RegisterClass); - addRegisterClass(MVT::v4i32, AMDIL::GPRV4I32RegisterClass); - if (STM.device()->isSupported(AMDILDeviceInfo::LongOps)) { - addRegisterClass(MVT::i64, AMDIL::GPRI64RegisterClass); - addRegisterClass(MVT::v2i64, AMDIL::GPRV2I64RegisterClass); - } - for (unsigned int x = 0; x < numTypes; ++x) { MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x]; //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types // We cannot sextinreg, expand to shifts setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); - setOperationAction(ISD::FP_ROUND, VT, Expand); setOperationAction(ISD::SUBE, VT, Expand); setOperationAction(ISD::SUBC, VT, Expand); setOperationAction(ISD::ADDE, VT, Expand); @@ -597,29 +567,22 @@ AMDILTargetLowering::LowerMemArgument( setOperationAction(ISD::BR_JT, VT, Expand); setOperationAction(ISD::BRIND, VT, Expand); // TODO: Implement custom UREM/SREM routines - setOperationAction(ISD::UREM, VT, Expand); setOperationAction(ISD::SREM, VT, Expand); - setOperationAction(ISD::UINT_TO_FP, VT, Custom); - setOperationAction(ISD::FP_TO_UINT, VT, Custom); setOperationAction(ISD::GlobalAddress, VT, Custom); setOperationAction(ISD::JumpTable, VT, Custom); setOperationAction(ISD::ConstantPool, VT, Custom); - setOperationAction(ISD::SELECT_CC, VT, Custom); setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::SMUL_LOHI, VT, Expand); setOperationAction(ISD::UMUL_LOHI, VT, Expand); if (VT != MVT::i64 && VT != MVT::v2i64) { setOperationAction(ISD::SDIV, VT, Custom); } - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); } for (unsigned int x = 0; x < numFloatTypes; ++x) { MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x]; // IL does not have these operations for floating point types setOperationAction(ISD::FP_ROUND_INREG, VT, Expand); - setOperationAction(ISD::FP_ROUND, VT, Custom); setOperationAction(ISD::SETOLT, VT, Expand); setOperationAction(ISD::SETOGE, VT, Expand); setOperationAction(ISD::SETOGT, VT, Expand); @@ -635,8 +598,6 @@ AMDILTargetLowering::LowerMemArgument( // GPU also does not have divrem function for signed or unsigned setOperationAction(ISD::SDIVREM, VT, Expand); - setOperationAction(ISD::UDIVREM, VT, Expand); - setOperationAction(ISD::FP_ROUND, VT, Expand); // GPU does not have [S|U]MUL_LOHI functions as a single instruction setOperationAction(ISD::SMUL_LOHI, VT, Expand); @@ -658,13 +619,8 @@ AMDILTargetLowering::LowerMemArgument( MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii]; setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); - setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); - setOperationAction(ISD::FP_ROUND, VT, Expand); setOperationAction(ISD::SDIVREM, VT, Expand); - setOperationAction(ISD::UDIVREM, VT, Expand); setOperationAction(ISD::SMUL_LOHI, VT, Expand); // setOperationAction(ISD::VSETCC, VT, Expand); setOperationAction(ISD::SETCC, VT, Expand); @@ -672,26 +628,15 @@ AMDILTargetLowering::LowerMemArgument( setOperationAction(ISD::SELECT, VT, Expand); } - setOperationAction(ISD::FP_ROUND, MVT::Other, Expand); if (STM.device()->isSupported(AMDILDeviceInfo::LongOps)) { - if (STM.calVersion() < CAL_VERSION_SC_139 - || STM.device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { - setOperationAction(ISD::MUL, MVT::i64, Custom); - } - setOperationAction(ISD::SUB, MVT::i64, Custom); - setOperationAction(ISD::ADD, MVT::i64, Custom); setOperationAction(ISD::MULHU, MVT::i64, Expand); setOperationAction(ISD::MULHU, MVT::v2i64, Expand); setOperationAction(ISD::MULHS, MVT::i64, Expand); setOperationAction(ISD::MULHS, MVT::v2i64, Expand); - setOperationAction(ISD::MUL, MVT::v2i64, Expand); - setOperationAction(ISD::SUB, MVT::v2i64, Expand); setOperationAction(ISD::ADD, MVT::v2i64, Expand); setOperationAction(ISD::SREM, MVT::v2i64, Expand); setOperationAction(ISD::Constant , MVT::i64 , Legal); setOperationAction(ISD::SDIV, MVT::v2i64, Expand); - setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Expand); - setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Expand); setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand); setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand); setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand); @@ -702,15 +647,11 @@ AMDILTargetLowering::LowerMemArgument( setOperationAction(ISD::FADD, MVT::v2f64, Expand); setOperationAction(ISD::FSUB, MVT::v2f64, Expand); setOperationAction(ISD::FMUL, MVT::v2f64, Expand); - setOperationAction(ISD::FP_ROUND, MVT::v2f64, Expand); setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand); setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); setOperationAction(ISD::ConstantFP , MVT::f64 , Legal); - setOperationAction(ISD::FDIV, MVT::v2f64, Expand); // We want to expand vector conversions into their scalar // counterparts. - setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Expand); - setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Expand); setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand); setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand); setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand); @@ -735,9 +676,6 @@ AMDILTargetLowering::LowerMemArgument( setOperationAction(ISD::BRIND, MVT::Other, Expand); setOperationAction(ISD::SETCC, MVT::Other, Custom); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand); - setOperationAction(ISD::FDIV, MVT::f32, Custom); - setOperationAction(ISD::FDIV, MVT::v2f32, Custom); - setOperationAction(ISD::FDIV, MVT::v4f32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom); // Use the default implementation. @@ -1455,27 +1393,15 @@ AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const LOWER(JumpTable); LOWER(ConstantPool); LOWER(ExternalSymbol); - LOWER(FP_TO_UINT); - LOWER(UINT_TO_FP); - LOWER(MUL); - LOWER(SUB); - LOWER(FDIV); LOWER(SDIV); LOWER(SREM); - LOWER(UREM); LOWER(BUILD_VECTOR); - LOWER(INSERT_VECTOR_ELT); - LOWER(EXTRACT_VECTOR_ELT); - LOWER(EXTRACT_SUBVECTOR); - LOWER(SCALAR_TO_VECTOR); - LOWER(CONCAT_VECTORS); LOWER(SELECT); LOWER(SETCC); LOWER(SIGN_EXTEND_INREG); LOWER(DYNAMIC_STACKALLOC); LOWER(BRCOND); LOWER(BR_CC); - LOWER(FP_ROUND); } return Op; } @@ -1869,856 +1795,6 @@ const } SDValue -AMDILTargetLowering::genCLZuN(SDValue Op, SelectionDAG &DAG, - uint32_t bits) const -{ - DebugLoc DL = Op.getDebugLoc(); - EVT INTTY = Op.getValueType(); - EVT FPTY; - if (INTTY.isVector()) { - FPTY = EVT(MVT::getVectorVT(MVT::f32, - INTTY.getVectorNumElements())); - } else { - FPTY = EVT(MVT::f32); - } - /* static inline uint - __clz_Nbit(uint x) - { - int xor = 0x3f800000U | x; - float tp = as_float(xor); - float t = tp + -1.0f; - uint tint = as_uint(t); - int cmp = (x != 0); - uint tsrc = tint >> 23; - uint tmask = tsrc & 0xffU; - uint cst = (103 + N)U - tmask; - return cmp ? cst : N; - } - */ - assert(INTTY.getScalarType().getSimpleVT().SimpleTy == MVT::i32 - && "genCLZu16 only works on 32bit types"); - // uint x = Op - SDValue x = Op; - // xornode = 0x3f800000 | x - SDValue xornode = DAG.getNode(ISD::OR, DL, INTTY, - DAG.getConstant(0x3f800000, INTTY), x); - // float tp = as_float(xornode) - SDValue tp = DAG.getNode(ISDBITCAST, DL, FPTY, xornode); - // float t = tp + -1.0f - SDValue t = DAG.getNode(ISD::FADD, DL, FPTY, tp, - DAG.getConstantFP(-1.0f, FPTY)); - // uint tint = as_uint(t) - SDValue tint = DAG.getNode(ISDBITCAST, DL, INTTY, t); - // int cmp = (x != 0) - SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, - DAG.getConstant(CondCCodeToCC(ISD::SETNE, MVT::i32), MVT::i32), x, - DAG.getConstant(0, INTTY)); - // uint tsrc = tint >> 23 - SDValue tsrc = DAG.getNode(ISD::SRL, DL, INTTY, tint, - DAG.getConstant(23, INTTY)); - // uint tmask = tsrc & 0xFF - SDValue tmask = DAG.getNode(ISD::AND, DL, INTTY, tsrc, - DAG.getConstant(0xFFU, INTTY)); - // uint cst = (103 + bits) - tmask - SDValue cst = DAG.getNode(ISD::SUB, DL, INTTY, - DAG.getConstant((103U + bits), INTTY), tmask); - // return cmp ? cst : N - cst = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, cst, - DAG.getConstant(bits, INTTY)); - return cst; -} - -SDValue -AMDILTargetLowering::genCLZu32(SDValue Op, SelectionDAG &DAG) const -{ - SDValue DST = SDValue(); - DebugLoc DL = Op.getDebugLoc(); - EVT INTTY = Op.getValueType(); - const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>(); - if (STM.device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) { - //__clz_32bit(uint u) - //{ - // int z = __amdil_ffb_hi(u) ; - // return z < 0 ? 32 : z; - // } - // uint u = op - SDValue u = Op; - // int z = __amdil_ffb_hi(u) - SDValue z = DAG.getNode(AMDILISD::IFFB_HI, DL, INTTY, u); - // int cmp = z < 0 - SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, - DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), - z, DAG.getConstant(0, INTTY)); - // return cmp ? 32 : z - DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, - DAG.getConstant(32, INTTY), z); - } else if (STM.device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { - // static inline uint - //__clz_32bit(uint x) - //{ - // uint zh = __clz_16bit(x >> 16); - // uint zl = __clz_16bit(x & 0xffffU); - // return zh == 16U ? 16U + zl : zh; - //} - // uint x = Op - SDValue x = Op; - // uint xs16 = x >> 16 - SDValue xs16 = DAG.getNode(ISD::SRL, DL, INTTY, x, - DAG.getConstant(16, INTTY)); - // uint zh = __clz_16bit(xs16) - SDValue zh = genCLZuN(xs16, DAG, 16); - // uint xa16 = x & 0xFFFF - SDValue xa16 = DAG.getNode(ISD::AND, DL, INTTY, x, - DAG.getConstant(0xFFFFU, INTTY)); - // uint zl = __clz_16bit(xa16) - SDValue zl = genCLZuN(xa16, DAG, 16); - // uint cmp = zh == 16U - SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, - DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), - zh, DAG.getConstant(16U, INTTY)); - // uint zl16 = zl + 16 - SDValue zl16 = DAG.getNode(ISD::ADD, DL, INTTY, - DAG.getConstant(16, INTTY), zl); - // return cmp ? zl16 : zh - DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, - cmp, zl16, zh); - } else { - assert(0 && "Attempting to generate a CLZ function with an" - " unknown graphics card"); - } - return DST; -} -SDValue -AMDILTargetLowering::genCLZu64(SDValue Op, SelectionDAG &DAG) const -{ - SDValue DST = SDValue(); - DebugLoc DL = Op.getDebugLoc(); - EVT INTTY; - EVT LONGTY = Op.getValueType(); - bool isVec = LONGTY.isVector(); - if (isVec) { - INTTY = EVT(MVT::getVectorVT(MVT::i32, Op.getValueType() - .getVectorNumElements())); - } else { - INTTY = EVT(MVT::i32); - } - const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>(); - if (STM.device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) { - // Evergreen: - // static inline uint - // __clz_u64(ulong x) - // { - //uint zhi = __clz_32bit((uint)(x >> 32)); - //uint zlo = __clz_32bit((uint)(x & 0xffffffffUL)); - //return zhi == 32U ? 32U + zlo : zhi; - //} - //ulong x = op - SDValue x = Op; - // uint xhi = x >> 32 - SDValue xlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x); - // uint xlo = x & 0xFFFFFFFF - SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, x); - // uint zhi = __clz_32bit(xhi) - SDValue zhi = genCLZu32(xhi, DAG); - // uint zlo = __clz_32bit(xlo) - SDValue zlo = genCLZu32(xlo, DAG); - // uint cmp = zhi == 32 - SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, - DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), - zhi, DAG.getConstant(32U, INTTY)); - // uint zlop32 = 32 + zlo - SDValue zlop32 = DAG.getNode(AMDILISD::ADD, DL, INTTY, - DAG.getConstant(32U, INTTY), zlo); - // return cmp ? zlop32: zhi - DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, zlop32, zhi); - } else if (STM.device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { - // HD4XXX: - // static inline uint - //__clz_64bit(ulong x) - //{ - //uint zh = __clz_23bit((uint)(x >> 46)) - 5U; - //uint zm = __clz_23bit((uint)(x >> 23) & 0x7fffffU); - //uint zl = __clz_23bit((uint)x & 0x7fffffU); - //uint r = zh == 18U ? 18U + zm : zh; - //return zh + zm == 41U ? 41U + zl : r; - //} - //ulong x = Op - SDValue x = Op; - // ulong xs46 = x >> 46 - SDValue xs46 = DAG.getNode(ISD::SRL, DL, LONGTY, x, - DAG.getConstant(46, LONGTY)); - // uint ixs46 = (uint)xs46 - SDValue ixs46 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs46); - // ulong xs23 = x >> 23 - SDValue xs23 = DAG.getNode(ISD::SRL, DL, LONGTY, x, - DAG.getConstant(23, LONGTY)); - // uint ixs23 = (uint)xs23 - SDValue ixs23 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs23); - // uint xs23m23 = ixs23 & 0x7FFFFF - SDValue xs23m23 = DAG.getNode(ISD::AND, DL, INTTY, ixs23, - DAG.getConstant(0x7fffffU, INTTY)); - // uint ix = (uint)x - SDValue ix = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x); - // uint xm23 = ix & 0x7FFFFF - SDValue xm23 = DAG.getNode(ISD::AND, DL, INTTY, ix, - DAG.getConstant(0x7fffffU, INTTY)); - // uint zh = __clz_23bit(ixs46) - SDValue zh = genCLZuN(ixs46, DAG, 23); - // uint zm = __clz_23bit(xs23m23) - SDValue zm = genCLZuN(xs23m23, DAG, 23); - // uint zl = __clz_23bit(xm23) - SDValue zl = genCLZuN(xm23, DAG, 23); - // uint zhm5 = zh - 5 - SDValue zhm5 = DAG.getNode(ISD::ADD, DL, INTTY, zh, - DAG.getConstant(-5U, INTTY)); - SDValue const18 = DAG.getConstant(18, INTTY); - SDValue const41 = DAG.getConstant(41, INTTY); - // uint cmp1 = zh = 18 - SDValue cmp1 = DAG.getNode(AMDILISD::CMP, DL, INTTY, - DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), - zhm5, const18); - // uint zhm5zm = zhm5 + zh - SDValue zhm5zm = DAG.getNode(ISD::ADD, DL, INTTY, zhm5, zm); - // uint cmp2 = zhm5zm == 41 - SDValue cmp2 = DAG.getNode(AMDILISD::CMP, DL, INTTY, - DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), - zhm5zm, const41); - // uint zmp18 = zhm5 + 18 - SDValue zmp18 = DAG.getNode(ISD::ADD, DL, INTTY, zm, const18); - // uint zlp41 = zl + 41 - SDValue zlp41 = DAG.getNode(ISD::ADD, DL, INTTY, zl, const41); - // uint r = cmp1 ? zmp18 : zh - SDValue r = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, - cmp1, zmp18, zhm5); - // return cmp2 ? zlp41 : r - DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp2, zlp41, r); - } else { - assert(0 && "Attempting to generate a CLZ function with an" - " unknown graphics card"); - } - return DST; -} -SDValue -AMDILTargetLowering::genf64toi64(SDValue RHS, SelectionDAG &DAG, - bool includeSign) const -{ - EVT INTVT; - EVT LONGVT; - SDValue DST; - DebugLoc DL = RHS.getDebugLoc(); - EVT RHSVT = RHS.getValueType(); - bool isVec = RHSVT.isVector(); - if (isVec) { - LONGVT = EVT(MVT::getVectorVT(MVT::i64, RHSVT - .getVectorNumElements())); - INTVT = EVT(MVT::getVectorVT(MVT::i32, RHSVT - .getVectorNumElements())); - } else { - LONGVT = EVT(MVT::i64); - INTVT = EVT(MVT::i32); - } - const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>(); - if (STM.device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { - // unsigned version: - // uint uhi = (uint)(d * 0x1.0p-32); - // uint ulo = (uint)(mad((double)uhi, -0x1.0p+32, d)); - // return as_ulong2((uint2)(ulo, uhi)); - // - // signed version: - // double ad = fabs(d); - // long l = unsigned_version(ad); - // long nl = -l; - // return d == ad ? l : nl; - SDValue d = RHS; - if (includeSign) { - d = DAG.getNode(ISD::FABS, DL, RHSVT, d); - } - SDValue uhid = DAG.getNode(ISD::FMUL, DL, RHSVT, d, - DAG.getConstantFP(0x2f800000, RHSVT)); - SDValue uhi = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, uhid); - SDValue ulod = DAG.getNode(ISD::UINT_TO_FP, DL, RHSVT, uhi); - ulod = DAG.getNode(AMDILISD::MAD, DL, RHSVT, ulod, - DAG.getConstantFP(0xcf800000, RHSVT), d); - SDValue ulo = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, ulod); - SDValue l = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, ulo, uhi); - if (includeSign) { - SDValue nl = DAG.getNode(AMDILISD::INEGATE, DL, LONGVT, l); - SDValue c = DAG.getNode(AMDILISD::CMP, DL, RHSVT, - DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::f64), MVT::i32), - RHS, d); - l = DAG.getNode(AMDILISD::CMOVLOG, DL, LONGVT, c, l, nl); - } - DST = l; - } else { - /* - __attribute__((always_inline)) long - cast_f64_to_i64(double d) - { - // Convert d in to 32-bit components - long x = as_long(d); - xhi = LCOMPHI(x); - xlo = LCOMPLO(x); - - // Generate 'normalized' mantissa - mhi = xhi | 0x00100000; // hidden bit - mhi <<= 11; - temp = xlo >> (32 - 11); - mhi |= temp - mlo = xlo << 11; - - // Compute shift right count from exponent - e = (xhi >> (52-32)) & 0x7ff; - sr = 1023 + 63 - e; - srge64 = sr >= 64; - srge32 = sr >= 32; - - // Compute result for 0 <= sr < 32 - rhi0 = mhi >> (sr &31); - rlo0 = mlo >> (sr &31); - temp = mhi << (32 - sr); - temp |= rlo0; - rlo0 = sr ? temp : rlo0; - - // Compute result for 32 <= sr - rhi1 = 0; - rlo1 = srge64 ? 0 : rhi0; - - // Pick between the 2 results - rhi = srge32 ? rhi1 : rhi0; - rlo = srge32 ? rlo1 : rlo0; - - // Optional saturate on overflow - srlt0 = sr < 0; - rhi = srlt0 ? MAXVALUE : rhi; - rlo = srlt0 ? MAXVALUE : rlo; - - // Create long - res = LCREATE( rlo, rhi ); - - // Deal with sign bit (ignoring whether result is signed or unsigned value) - if (includeSign) { - sign = ((signed int) xhi) >> 31; fill with sign bit - sign = LCREATE( sign, sign ); - res += sign; - res ^= sign; - } - - return res; - } - */ - SDValue c11 = DAG.getConstant( 63 - 52, INTVT ); - SDValue c32 = DAG.getConstant( 32, INTVT ); - - // Convert d in to 32-bit components - SDValue d = RHS; - SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d); - SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); - SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); - - // Generate 'normalized' mantissa - SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT, - xhi, DAG.getConstant( 0x00100000, INTVT ) ); - mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 ); - SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT, - xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) ); - mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp ); - SDValue mlo = DAG.getNode( ISD::SHL, DL, INTVT, xlo, c11 ); - - // Compute shift right count from exponent - SDValue e = DAG.getNode( ISD::SRL, DL, INTVT, - xhi, DAG.getConstant( 52-32, INTVT ) ); - e = DAG.getNode( ISD::AND, DL, INTVT, - e, DAG.getConstant( 0x7ff, INTVT ) ); - SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT, - DAG.getConstant( 1023 + 63, INTVT ), e ); - SDValue srge64 = DAG.getNode( AMDILISD::CMP, DL, INTVT, - DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32), - sr, DAG.getConstant(64, INTVT)); - SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT, - DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32), - sr, DAG.getConstant(32, INTVT)); - - // Compute result for 0 <= sr < 32 - SDValue rhi0 = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr ); - SDValue rlo0 = DAG.getNode( ISD::SRL, DL, INTVT, mlo, sr ); - temp = DAG.getNode( ISD::SUB, DL, INTVT, c32, sr ); - temp = DAG.getNode( ISD::SHL, DL, INTVT, mhi, temp ); - temp = DAG.getNode( ISD::OR, DL, INTVT, rlo0, temp ); - rlo0 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, sr, temp, rlo0 ); - - // Compute result for 32 <= sr - SDValue rhi1 = DAG.getConstant( 0, INTVT ); - SDValue rlo1 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, - srge64, rhi1, rhi0 ); - - // Pick between the 2 results - SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, - srge32, rhi1, rhi0 ); - SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, - srge32, rlo1, rlo0 ); - - // Create long - SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi ); - - // Deal with sign bit - if (includeSign) { - SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT, - xhi, DAG.getConstant( 31, INTVT ) ); - sign = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, sign, sign ); - res = DAG.getNode( ISD::ADD, DL, LONGVT, res, sign ); - res = DAG.getNode( ISD::XOR, DL, LONGVT, res, sign ); - } - DST = res; - } - return DST; -} -SDValue -AMDILTargetLowering::genf64toi32(SDValue RHS, SelectionDAG &DAG, - bool includeSign) const -{ - EVT INTVT; - EVT LONGVT; - DebugLoc DL = RHS.getDebugLoc(); - EVT RHSVT = RHS.getValueType(); - bool isVec = RHSVT.isVector(); - if (isVec) { - LONGVT = EVT(MVT::getVectorVT(MVT::i64, - RHSVT.getVectorNumElements())); - INTVT = EVT(MVT::getVectorVT(MVT::i32, - RHSVT.getVectorNumElements())); - } else { - LONGVT = EVT(MVT::i64); - INTVT = EVT(MVT::i32); - } - /* - __attribute__((always_inline)) int - cast_f64_to_[u|i]32(double d) - { - // Convert d in to 32-bit components - long x = as_long(d); - xhi = LCOMPHI(x); - xlo = LCOMPLO(x); - - // Generate 'normalized' mantissa - mhi = xhi | 0x00100000; // hidden bit - mhi <<= 11; - temp = xlo >> (32 - 11); - mhi |= temp - - // Compute shift right count from exponent - e = (xhi >> (52-32)) & 0x7ff; - sr = 1023 + 31 - e; - srge32 = sr >= 32; - - // Compute result for 0 <= sr < 32 - res = mhi >> (sr &31); - res = srge32 ? 0 : res; - - // Optional saturate on overflow - srlt0 = sr < 0; - res = srlt0 ? MAXVALUE : res; - - // Deal with sign bit (ignoring whether result is signed or unsigned value) - if (includeSign) { - sign = ((signed int) xhi) >> 31; fill with sign bit - res += sign; - res ^= sign; - } - - return res; - } - */ - SDValue c11 = DAG.getConstant( 63 - 52, INTVT ); - - // Convert d in to 32-bit components - SDValue d = RHS; - SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d); - SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); - SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); - - // Generate 'normalized' mantissa - SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT, - xhi, DAG.getConstant( 0x00100000, INTVT ) ); - mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 ); - SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT, - xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) ); - mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp ); - - // Compute shift right count from exponent - SDValue e = DAG.getNode( ISD::SRL, DL, INTVT, - xhi, DAG.getConstant( 52-32, INTVT ) ); - e = DAG.getNode( ISD::AND, DL, INTVT, - e, DAG.getConstant( 0x7ff, INTVT ) ); - SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT, - DAG.getConstant( 1023 + 31, INTVT ), e ); - SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT, - DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32), - sr, DAG.getConstant(32, INTVT)); - - // Compute result for 0 <= sr < 32 - SDValue res = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr ); - res = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, - srge32, DAG.getConstant(0,INTVT), res ); - - // Deal with sign bit - if (includeSign) { - SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT, - xhi, DAG.getConstant( 31, INTVT ) ); - res = DAG.getNode( ISD::ADD, DL, INTVT, res, sign ); - res = DAG.getNode( ISD::XOR, DL, INTVT, res, sign ); - } - return res; -} - -SDValue -AMDILTargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const -{ - SDValue DST; - SDValue RHS = Op.getOperand(0); - EVT RHSVT = RHS.getValueType(); - MVT RST = RHSVT.getScalarType().getSimpleVT(); - EVT LHSVT = Op.getValueType(); - MVT LST = LHSVT.getScalarType().getSimpleVT(); - DebugLoc DL = Op.getDebugLoc(); - const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>(); - if (RST == MVT::f64 && RHSVT.isVector() - && STM.device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { - // We dont support vector 64bit floating point convertions. - for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) { - SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, - DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32)); - op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op); - if (!x) { - DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op); - } else { - DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, - DST, op, DAG.getTargetConstant(x, MVT::i32)); - } - - } - } else { - if (RST == MVT::f64 - && LST == MVT::i32) { - if (STM.device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { - DST = SDValue(Op.getNode(), 0); - } else { - DST = genf64toi32(RHS, DAG, false); - } - } else if (RST == MVT::f64 - && LST == MVT::i64) { - DST = genf64toi64(RHS, DAG, false); - } else if (RST == MVT::f64 - && (LST == MVT::i8 || LST == MVT::i16)) { - if (STM.device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { - DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0)); - } else { - SDValue ToInt = genf64toi32(RHS, DAG, false); - DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt); - } - - } else { - DST = SDValue(Op.getNode(), 0); - } - } - return DST; -} -SDValue -AMDILTargetLowering::genu32tof64(SDValue RHS, EVT LHSVT, - SelectionDAG &DAG) const -{ - EVT RHSVT = RHS.getValueType(); - DebugLoc DL = RHS.getDebugLoc(); - EVT INTVT; - EVT LONGVT; - bool isVec = RHSVT.isVector(); - if (isVec) { - LONGVT = EVT(MVT::getVectorVT(MVT::i64, - RHSVT.getVectorNumElements())); - INTVT = EVT(MVT::getVectorVT(MVT::i32, - RHSVT.getVectorNumElements())); - } else { - LONGVT = EVT(MVT::i64); - INTVT = EVT(MVT::i32); - } - SDValue x = RHS; - const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>(); - if (STM.calVersion() >= CAL_VERSION_SC_135) { - // unsigned x = RHS; - // ulong xd = (ulong)(0x4330_0000 << 32) | x; - // double d = as_double( xd ); - // return d - 0x1.0p+52; // 0x1.0p+52 == 0x4330_0000_0000_0000 - SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, x, - DAG.getConstant( 0x43300000, INTVT ) ); - SDValue d = DAG.getNode( ISDBITCAST, DL, LHSVT, xd ); - SDValue offsetd = DAG.getNode( ISDBITCAST, DL, LHSVT, - DAG.getConstant( 0x4330000000000000ULL, LONGVT ) ); - return DAG.getNode( ISD::FSUB, DL, LHSVT, d, offsetd ); - } else { - SDValue clz = genCLZu32(x, DAG); - - // Compute the exponent. 1023 is the bias, 31-clz the actual power of 2 - // Except for an input 0... which requires a 0 exponent - SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT, - DAG.getConstant( (1023+31), INTVT), clz ); - exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, x, exp, x ); - - // Normalize frac - SDValue rhi = DAG.getNode( ISD::SHL, DL, INTVT, x, clz ); - - // Eliminate hidden bit - rhi = DAG.getNode( ISD::AND, DL, INTVT, - rhi, DAG.getConstant( 0x7fffffff, INTVT ) ); - - // Pack exponent and frac - SDValue rlo = DAG.getNode( ISD::SHL, DL, INTVT, - rhi, DAG.getConstant( (32 - 11), INTVT ) ); - rhi = DAG.getNode( ISD::SRL, DL, INTVT, - rhi, DAG.getConstant( 11, INTVT ) ); - exp = DAG.getNode( ISD::SHL, DL, INTVT, - exp, DAG.getConstant( 20, INTVT ) ); - rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp ); - - // Convert 2 x 32 in to 1 x 64, then to double precision float type - SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi ); - return DAG.getNode(ISDBITCAST, DL, LHSVT, res); - } -} -SDValue -AMDILTargetLowering::genu64tof64(SDValue RHS, EVT LHSVT, - SelectionDAG &DAG) const -{ - EVT RHSVT = RHS.getValueType(); - DebugLoc DL = RHS.getDebugLoc(); - EVT INTVT; - EVT LONGVT; - bool isVec = RHSVT.isVector(); - if (isVec) { - INTVT = EVT(MVT::getVectorVT(MVT::i32, - RHSVT.getVectorNumElements())); - } else { - INTVT = EVT(MVT::i32); - } - LONGVT = RHSVT; - SDValue x = RHS; - const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>(); - if (STM.device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { - // double dhi = (double)(as_uint2(x).y); - // double dlo = (double)(as_uint2(x).x); - // return mad(dhi, 0x1.0p+32, dlo) - SDValue dhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x); - dhi = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dhi); - SDValue dlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x); - dlo = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dlo); - return DAG.getNode(AMDILISD::MAD, DL, LHSVT, dhi, - DAG.getConstantFP(0x4f800000, LHSVT), dlo); - } else if (STM.calVersion() >= CAL_VERSION_SC_135) { - // double lo = as_double( as_ulong( 0x1.0p+52) | (u & 0xffff_ffffUL)); - // double hi = as_double( as_ulong( 0x1.0p+84) | (u >> 32)); - // return (hi - (0x1.0p+84 + 0x1.0p+52)) + lo; - SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); // x & 0xffff_ffffUL - SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xlo, DAG.getConstant( 0x43300000, INTVT ) ); - SDValue lo = DAG.getNode( ISDBITCAST, DL, LHSVT, xd ); - SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); // x >> 32 - SDValue xe = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xhi, DAG.getConstant( 0x45300000, INTVT ) ); - SDValue hi = DAG.getNode( ISDBITCAST, DL, LHSVT, xe ); - SDValue c = DAG.getNode( ISDBITCAST, DL, LHSVT, - DAG.getConstant( 0x4530000000100000ULL, LONGVT ) ); - hi = DAG.getNode( ISD::FSUB, DL, LHSVT, hi, c ); - return DAG.getNode( ISD::FADD, DL, LHSVT, hi, lo ); - - } else { - SDValue clz = genCLZu64(x, DAG); - SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); - SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); - - // Compute the exponent. 1023 is the bias, 63-clz the actual power of 2 - SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT, - DAG.getConstant( (1023+63), INTVT), clz ); - SDValue mash = DAG.getNode( ISD::OR, DL, INTVT, xhi, xlo ); - exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, - mash, exp, mash ); // exp = exp, or 0 if input was 0 - - // Normalize frac - SDValue clz31 = DAG.getNode( ISD::AND, DL, INTVT, - clz, DAG.getConstant( 31, INTVT ) ); - SDValue rshift = DAG.getNode( ISD::SUB, DL, INTVT, - DAG.getConstant( 32, INTVT ), clz31 ); - SDValue t1 = DAG.getNode( ISD::SHL, DL, INTVT, xhi, clz31 ); - SDValue t2 = DAG.getNode( ISD::SRL, DL, INTVT, xlo, rshift ); - t2 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, clz31, t2, t1 ); - SDValue rhi1 = DAG.getNode( ISD::OR, DL, INTVT, t1, t2 ); - SDValue rlo1 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 ); - SDValue rhi2 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 ); - SDValue rlo2 = DAG.getConstant( 0, INTVT ); - SDValue clz32 = DAG.getNode( ISD::AND, DL, INTVT, - clz, DAG.getConstant( 32, INTVT ) ); - SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, - clz32, rhi2, rhi1 ); - SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, - clz32, rlo2, rlo1 ); - - // Eliminate hidden bit - rhi = DAG.getNode( ISD::AND, DL, INTVT, - rhi, DAG.getConstant( 0x7fffffff, INTVT ) ); - - // Save bits needed to round properly - SDValue round = DAG.getNode( ISD::AND, DL, INTVT, - rlo, DAG.getConstant( 0x7ff, INTVT ) ); - - // Pack exponent and frac - rlo = DAG.getNode( ISD::SRL, DL, INTVT, - rlo, DAG.getConstant( 11, INTVT ) ); - SDValue temp = DAG.getNode( ISD::SHL, DL, INTVT, - rhi, DAG.getConstant( (32 - 11), INTVT ) ); - rlo = DAG.getNode( ISD::OR, DL, INTVT, rlo, temp ); - rhi = DAG.getNode( ISD::SRL, DL, INTVT, - rhi, DAG.getConstant( 11, INTVT ) ); - exp = DAG.getNode( ISD::SHL, DL, INTVT, - exp, DAG.getConstant( 20, INTVT ) ); - rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp ); - - // Compute rounding bit - SDValue even = DAG.getNode( ISD::AND, DL, INTVT, - rlo, DAG.getConstant( 1, INTVT ) ); - SDValue grs = DAG.getNode( ISD::AND, DL, INTVT, - round, DAG.getConstant( 0x3ff, INTVT ) ); - grs = DAG.getNode( AMDILISD::CMP, DL, INTVT, - DAG.getConstant( CondCCodeToCC( ISD::SETNE, MVT::i32), MVT::i32), - grs, DAG.getConstant( 0, INTVT ) ); // -1 if any GRS set, 0 if none - grs = DAG.getNode( ISD::OR, DL, INTVT, grs, even ); - round = DAG.getNode( ISD::SRL, DL, INTVT, - round, DAG.getConstant( 10, INTVT ) ); - round = DAG.getNode( ISD::AND, DL, INTVT, round, grs ); // 0 or 1 - - // Add rounding bit - SDValue lround = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, - round, DAG.getConstant( 0, INTVT ) ); - SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi ); - res = DAG.getNode( ISD::ADD, DL, LONGVT, res, lround ); - return DAG.getNode(ISDBITCAST, DL, LHSVT, res); - } -} -SDValue -AMDILTargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const -{ - SDValue RHS = Op.getOperand(0); - EVT RHSVT = RHS.getValueType(); - MVT RST = RHSVT.getScalarType().getSimpleVT(); - EVT LHSVT = Op.getValueType(); - MVT LST = LHSVT.getScalarType().getSimpleVT(); - DebugLoc DL = Op.getDebugLoc(); - SDValue DST; - EVT INTVT; - EVT LONGVT; - const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>(); - if (LST == MVT::f64 && LHSVT.isVector() - && STM.device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { - // We dont support vector 64bit floating point convertions. - DST = Op; - for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) { - SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, - DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32)); - op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op); - if (!x) { - DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op); - } else { - DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST, - op, DAG.getTargetConstant(x, MVT::i32)); - } - - } - } else { - - if (RST == MVT::i32 - && LST == MVT::f64) { - if (STM.device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { - DST = SDValue(Op.getNode(), 0); - } else { - DST = genu32tof64(RHS, LHSVT, DAG); - } - } else if (RST == MVT::i64 - && LST == MVT::f64) { - DST = genu64tof64(RHS, LHSVT, DAG); - } else { - DST = SDValue(Op.getNode(), 0); - } - } - return DST; -} - -SDValue -AMDILTargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const -{ - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - DebugLoc DL = Op.getDebugLoc(); - EVT OVT = Op.getValueType(); - SDValue DST; - bool isVec = RHS.getValueType().isVector(); - if (OVT.getScalarType() == MVT::i64) { - MVT INTTY = MVT::i32; - if (OVT == MVT::v2i64) { - INTTY = MVT::v2i32; - } - SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI; - // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32 - LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS); - RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS); - LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS); - RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS); - INTLO = DAG.getNode(ISD::SUB, DL, INTTY, LHSLO, RHSLO); - INTHI = DAG.getNode(ISD::SUB, DL, INTTY, LHSHI, RHSHI); - //TODO: need to use IBORROW on HD5XXX and later hardware - SDValue cmp; - if (OVT == MVT::i64) { - cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, - DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32), - LHSLO, RHSLO); - } else { - SDValue cmplo; - SDValue cmphi; - SDValue LHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, - DL, MVT::i32, LHSLO, DAG.getTargetConstant(0, MVT::i32)); - SDValue LHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, - DL, MVT::i32, LHSLO, DAG.getTargetConstant(1, MVT::i32)); - SDValue RHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, - DL, MVT::i32, RHSLO, DAG.getTargetConstant(0, MVT::i32)); - SDValue RHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, - DL, MVT::i32, RHSLO, DAG.getTargetConstant(1, MVT::i32)); - cmplo = DAG.getNode(AMDILISD::CMP, DL, MVT::i32, - DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32), - LHSRLO, RHSRLO); - cmphi = DAG.getNode(AMDILISD::CMP, DL, MVT::i32, - DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32), - LHSRHI, RHSRHI); - cmp = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i32, cmplo); - cmp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i32, - cmp, cmphi, DAG.getTargetConstant(1, MVT::i32)); - } - INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp); - DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT, - INTLO, INTHI); - } else { - DST = SDValue(Op.getNode(), 0); - } - return DST; -} -SDValue -AMDILTargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const -{ - EVT OVT = Op.getValueType(); - SDValue DST; - if (OVT.getScalarType() == MVT::f64) { - DST = LowerFDIV64(Op, DAG); - } else if (OVT.getScalarType() == MVT::f32) { - DST = LowerFDIV32(Op, DAG); - } else { - DST = SDValue(Op.getNode(), 0); - } - return DST; -} - -SDValue AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const { EVT OVT = Op.getValueType(); @@ -2756,88 +1832,6 @@ AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const } SDValue -AMDILTargetLowering::LowerUREM(SDValue Op, SelectionDAG &DAG) const -{ - EVT OVT = Op.getValueType(); - SDValue DST; - if (OVT.getScalarType() == MVT::i64) { - DST = LowerUREM64(Op, DAG); - } else if (OVT.getScalarType() == MVT::i32) { - DST = LowerUREM32(Op, DAG); - } else if (OVT.getScalarType() == MVT::i16) { - DST = LowerUREM16(Op, DAG); - } else if (OVT.getScalarType() == MVT::i8) { - DST = LowerUREM8(Op, DAG); - } else { - DST = SDValue(Op.getNode(), 0); - } - return DST; -} - -SDValue -AMDILTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const -{ - DebugLoc DL = Op.getDebugLoc(); - EVT OVT = Op.getValueType(); - SDValue DST; - bool isVec = OVT.isVector(); - if (OVT.getScalarType() != MVT::i64) - { - DST = SDValue(Op.getNode(), 0); - } else { - assert(OVT.getScalarType() == MVT::i64 && "Only 64 bit mul should be lowered!"); - // TODO: This needs to be turned into a tablegen pattern - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - - MVT INTTY = MVT::i32; - if (OVT == MVT::v2i64) { - INTTY = MVT::v2i32; - } - // mul64(h1, l1, h0, l0) - SDValue LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, - DL, - INTTY, LHS); - SDValue LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, - DL, - INTTY, LHS); - SDValue RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, - DL, - INTTY, RHS); - SDValue RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, - DL, - INTTY, RHS); - // MULLO_UINT_1 r1, h0, l1 - SDValue RHILLO = DAG.getNode(AMDILISD::UMUL, - DL, - INTTY, RHSHI, LHSLO); - // MULLO_UINT_1 r2, h1, l0 - SDValue RLOHHI = DAG.getNode(AMDILISD::UMUL, - DL, - INTTY, RHSLO, LHSHI); - // ADD_INT hr, r1, r2 - SDValue ADDHI = DAG.getNode(ISD::ADD, - DL, - INTTY, RHILLO, RLOHHI); - // MULHI_UINT_1 r3, l1, l0 - SDValue RLOLLO = DAG.getNode(ISD::MULHU, - DL, - INTTY, RHSLO, LHSLO); - // ADD_INT hr, hr, r3 - SDValue HIGH = DAG.getNode(ISD::ADD, - DL, - INTTY, ADDHI, RLOLLO); - // MULLO_UINT_1 l3, l1, l0 - SDValue LOW = DAG.getNode(AMDILISD::UMUL, - DL, - INTTY, LHSLO, RHSLO); - DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, - DL, - OVT, LOW, HIGH); - } - return DST; -} -SDValue AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const { EVT VT = Op.getValueType(); @@ -2904,164 +1898,6 @@ AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const } SDValue -AMDILTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, - SelectionDAG &DAG) const -{ - DebugLoc DL = Op.getDebugLoc(); - EVT VT = Op.getValueType(); - const SDValue *ptr = NULL; - const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(2)); - uint32_t swizzleNum = 0; - SDValue DST; - if (!VT.isVector()) { - SDValue Res = Op.getOperand(0); - return Res; - } - - if (Op.getOperand(1).getOpcode() != ISD::UNDEF) { - ptr = &Op.getOperand(1); - } else { - ptr = &Op.getOperand(0); - } - if (CSDN) { - swizzleNum = (uint32_t)CSDN->getZExtValue(); - uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8)); - uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8)); - DST = DAG.getNode(AMDILISD::VINSERT, - DL, - VT, - Op.getOperand(0), - *ptr, - DAG.getTargetConstant(mask2, MVT::i32), - DAG.getTargetConstant(mask3, MVT::i32)); - } else { - uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8)); - uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8)); - SDValue res = DAG.getNode(AMDILISD::VINSERT, - DL, VT, Op.getOperand(0), *ptr, - DAG.getTargetConstant(mask2, MVT::i32), - DAG.getTargetConstant(mask3, MVT::i32)); - for (uint32_t x = 1; x < VT.getVectorNumElements(); ++x) { - mask2 = 0x04030201 & ~(0xFF << (x * 8)); - mask3 = 0x01010101 & (0xFF << (x * 8)); - SDValue t = DAG.getNode(AMDILISD::VINSERT, - DL, VT, Op.getOperand(0), *ptr, - DAG.getTargetConstant(mask2, MVT::i32), - DAG.getTargetConstant(mask3, MVT::i32)); - SDValue c = DAG.getNode(AMDILISD::CMP, DL, ptr->getValueType(), - DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32), - Op.getOperand(2), DAG.getConstant(x, MVT::i32)); - c = DAG.getNode(AMDILISD::VBUILD, DL, Op.getValueType(), c); - res = DAG.getNode(AMDILISD::CMOVLOG, DL, VT, c, t, res); - } - DST = res; - } - return DST; -} - -SDValue -AMDILTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, - SelectionDAG &DAG) const -{ - EVT VT = Op.getValueType(); - const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1)); - uint64_t swizzleNum = 0; - DebugLoc DL = Op.getDebugLoc(); - SDValue Res; - if (!Op.getOperand(0).getValueType().isVector()) { - Res = Op.getOperand(0); - return Res; - } - if (CSDN) { - // Static vector extraction - swizzleNum = CSDN->getZExtValue() + 1; - Res = DAG.getNode(AMDILISD::VEXTRACT, - DL, VT, - Op.getOperand(0), - DAG.getTargetConstant(swizzleNum, MVT::i32)); - } else { - SDValue Op1 = Op.getOperand(1); - uint32_t vecSize = 4; - SDValue Op0 = Op.getOperand(0); - SDValue res = DAG.getNode(AMDILISD::VEXTRACT, - DL, VT, Op0, - DAG.getTargetConstant(1, MVT::i32)); - if (Op0.getValueType().isVector()) { - vecSize = Op0.getValueType().getVectorNumElements(); - } - for (uint32_t x = 2; x <= vecSize; ++x) { - SDValue t = DAG.getNode(AMDILISD::VEXTRACT, - DL, VT, Op0, - DAG.getTargetConstant(x, MVT::i32)); - SDValue c = DAG.getNode(AMDILISD::CMP, - DL, Op1.getValueType(), - DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32), - Op1, DAG.getConstant(x, MVT::i32)); - res = DAG.getNode(AMDILISD::CMOVLOG, DL, - VT, c, t, res); - - } - Res = res; - } - return Res; -} - -SDValue -AMDILTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, - SelectionDAG &DAG) const -{ - uint32_t vecSize = Op.getValueType().getVectorNumElements(); - SDValue src = Op.getOperand(0); - const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1)); - uint64_t offset = 0; - EVT vecType = Op.getValueType().getVectorElementType(); - DebugLoc DL = Op.getDebugLoc(); - SDValue Result; - if (CSDN) { - offset = CSDN->getZExtValue(); - Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, - DL,vecType, src, DAG.getConstant(offset, MVT::i32)); - Result = DAG.getNode(AMDILISD::VBUILD, DL, - Op.getValueType(), Result); - for (uint32_t x = 1; x < vecSize; ++x) { - SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType, - src, DAG.getConstant(offset + x, MVT::i32)); - if (elt.getOpcode() != ISD::UNDEF) { - Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, - Op.getValueType(), Result, elt, - DAG.getConstant(x, MVT::i32)); - } - } - } else { - SDValue idx = Op.getOperand(1); - Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, - DL, vecType, src, idx); - Result = DAG.getNode(AMDILISD::VBUILD, DL, - Op.getValueType(), Result); - for (uint32_t x = 1; x < vecSize; ++x) { - idx = DAG.getNode(ISD::ADD, DL, vecType, - idx, DAG.getConstant(1, MVT::i32)); - SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType, - src, idx); - if (elt.getOpcode() != ISD::UNDEF) { - Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, - Op.getValueType(), Result, elt, idx); - } - } - } - return Result; -} -SDValue -AMDILTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, - SelectionDAG &DAG) const -{ - SDValue Res = DAG.getNode(AMDILISD::VBUILD, - Op.getDebugLoc(), - Op.getValueType(), - Op.getOperand(0)); - return Res; -} -SDValue AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cond = Op.getOperand(0); @@ -3221,29 +2057,6 @@ AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const return Result; } -SDValue -AMDILTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const -{ - SDValue Result = DAG.getNode( - AMDILISD::DP_TO_FP, - Op.getDebugLoc(), - Op.getValueType(), - Op.getOperand(0), - Op.getOperand(1)); - return Result; -} - -SDValue -AMDILTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const -{ - SDValue Result = DAG.getNode( - AMDILISD::VCONCAT, - Op.getDebugLoc(), - Op.getValueType(), - Op.getOperand(0), - Op.getOperand(1)); - return Result; -} // LowerRET - Lower an ISD::RET node. SDValue AMDILTargetLowering::LowerReturn(SDValue Chain, @@ -3578,425 +2391,3 @@ AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const { return SDValue(Op.getNode(), 0); } - -SDValue -AMDILTargetLowering::LowerUREM8(SDValue Op, SelectionDAG &DAG) const -{ - DebugLoc DL = Op.getDebugLoc(); - EVT OVT = Op.getValueType(); - MVT INTTY = MVT::i32; - if (OVT == MVT::v2i8) { - INTTY = MVT::v2i32; - } else if (OVT == MVT::v4i8) { - INTTY = MVT::v4i32; - } - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - // The LowerUREM8 function generates equivalent to the following IL. - // mov r0, as_u32(LHS) - // mov r1, as_u32(RHS) - // and r10, r0, 0xFF - // and r11, r1, 0xFF - // cmov_logical r3, r11, r11, 0x1 - // udiv r3, r10, r3 - // cmov_logical r3, r11, r3, 0 - // umul r3, r3, r11 - // sub r3, r10, r3 - // and as_u8(DST), r3, 0xFF - - // mov r0, as_u32(LHS) - SDValue r0 = DAG.getSExtOrTrunc(LHS, DL, INTTY); - - // mov r1, as_u32(RHS) - SDValue r1 = DAG.getSExtOrTrunc(RHS, DL, INTTY); - - // and r10, r0, 0xFF - SDValue r10 = DAG.getNode(ISD::AND, DL, INTTY, r0, - DAG.getConstant(0xFF, INTTY)); - - // and r11, r1, 0xFF - SDValue r11 = DAG.getNode(ISD::AND, DL, INTTY, r1, - DAG.getConstant(0xFF, INTTY)); - - // cmov_logical r3, r11, r11, 0x1 - SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r11, - DAG.getConstant(0x01, INTTY)); - - // udiv r3, r10, r3 - r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3); - - // cmov_logical r3, r11, r3, 0 - r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r3, - DAG.getConstant(0, INTTY)); - - // umul r3, r3, r11 - r3 = DAG.getNode(AMDILISD::UMUL, DL, INTTY, r3, r11); - - // sub r3, r10, r3 - r3 = DAG.getNode(ISD::SUB, DL, INTTY, r10, r3); - - // and as_u8(DST), r3, 0xFF - SDValue DST = DAG.getNode(ISD::AND, DL, INTTY, r3, - DAG.getConstant(0xFF, INTTY)); - DST = DAG.getZExtOrTrunc(DST, DL, OVT); - return DST; -} - -SDValue -AMDILTargetLowering::LowerUREM16(SDValue Op, SelectionDAG &DAG) const -{ - DebugLoc DL = Op.getDebugLoc(); - EVT OVT = Op.getValueType(); - MVT INTTY = MVT::i32; - if (OVT == MVT::v2i16) { - INTTY = MVT::v2i32; - } else if (OVT == MVT::v4i16) { - INTTY = MVT::v4i32; - } - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - // The LowerUREM16 function generatest equivalent to the following IL. - // mov r0, LHS - // mov r1, RHS - // DIV = LowerUDIV16(LHS, RHS) - // and r10, r0, 0xFFFF - // and r11, r1, 0xFFFF - // cmov_logical r3, r11, r11, 0x1 - // udiv as_u16(r3), as_u32(r10), as_u32(r3) - // and r3, r3, 0xFFFF - // cmov_logical r3, r11, r3, 0 - // umul r3, r3, r11 - // sub r3, r10, r3 - // and DST, r3, 0xFFFF - - // mov r0, LHS - SDValue r0 = LHS; - - // mov r1, RHS - SDValue r1 = RHS; - - // and r10, r0, 0xFFFF - SDValue r10 = DAG.getNode(ISD::AND, DL, OVT, r0, - DAG.getConstant(0xFFFF, OVT)); - - // and r11, r1, 0xFFFF - SDValue r11 = DAG.getNode(ISD::AND, DL, OVT, r1, - DAG.getConstant(0xFFFF, OVT)); - - // cmov_logical r3, r11, r11, 0x1 - SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r11, - DAG.getConstant(0x01, OVT)); - - // udiv as_u16(r3), as_u32(r10), as_u32(r3) - r10 = DAG.getZExtOrTrunc(r10, DL, INTTY); - r3 = DAG.getZExtOrTrunc(r3, DL, INTTY); - r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3); - r3 = DAG.getZExtOrTrunc(r3, DL, OVT); - r10 = DAG.getZExtOrTrunc(r10, DL, OVT); - - // and r3, r3, 0xFFFF - r3 = DAG.getNode(ISD::AND, DL, OVT, r3, - DAG.getConstant(0xFFFF, OVT)); - - // cmov_logical r3, r11, r3, 0 - r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r3, - DAG.getConstant(0, OVT)); - // umul r3, r3, r11 - r3 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r3, r11); - - // sub r3, r10, r3 - r3 = DAG.getNode(ISD::SUB, DL, OVT, r10, r3); - - // and DST, r3, 0xFFFF - SDValue DST = DAG.getNode(ISD::AND, DL, OVT, r3, - DAG.getConstant(0xFFFF, OVT)); - return DST; -} - -SDValue -AMDILTargetLowering::LowerUREM32(SDValue Op, SelectionDAG &DAG) const -{ - DebugLoc DL = Op.getDebugLoc(); - EVT OVT = Op.getValueType(); - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - // The LowerUREM32 function generates equivalent to the following IL. - // udiv r20, LHS, RHS - // umul r20, r20, RHS - // sub DST, LHS, r20 - - // udiv r20, LHS, RHS - SDValue r20 = DAG.getNode(ISD::UDIV, DL, OVT, LHS, RHS); - - // umul r20, r20, RHS - r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, RHS); - - // sub DST, LHS, r20 - SDValue DST = DAG.getNode(ISD::SUB, DL, OVT, LHS, r20); - return DST; -} - -SDValue -AMDILTargetLowering::LowerUREM64(SDValue Op, SelectionDAG &DAG) const -{ - return SDValue(Op.getNode(), 0); -} - - -SDValue -AMDILTargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const -{ - DebugLoc DL = Op.getDebugLoc(); - EVT OVT = Op.getValueType(); - MVT INTTY = MVT::i32; - if (OVT == MVT::v2f32) { - INTTY = MVT::v2i32; - } else if (OVT == MVT::v4f32) { - INTTY = MVT::v4i32; - } - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - SDValue DST; - const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>(); - if (STM.device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { - // TODO: This doesn't work for vector types yet - // The LowerFDIV32 function generates equivalent to the following - // IL: - // mov r20, as_int(LHS) - // mov r21, as_int(RHS) - // and r30, r20, 0x7f800000 - // and r31, r20, 0x807FFFFF - // and r32, r21, 0x7f800000 - // and r33, r21, 0x807FFFFF - // ieq r40, r30, 0x7F800000 - // ieq r41, r31, 0x7F800000 - // ieq r42, r32, 0 - // ieq r43, r33, 0 - // and r50, r20, 0x80000000 - // and r51, r21, 0x80000000 - // ior r32, r32, 0x3f800000 - // ior r33, r33, 0x3f800000 - // cmov_logical r32, r42, r50, r32 - // cmov_logical r33, r43, r51, r33 - // cmov_logical r32, r40, r20, r32 - // cmov_logical r33, r41, r21, r33 - // ior r50, r40, r41 - // ior r51, r42, r43 - // ior r50, r50, r51 - // inegate r52, r31 - // iadd r30, r30, r52 - // cmov_logical r30, r50, 0, r30 - // div_zeroop(infinity) r21, 1.0, r33 - // mul_ieee r20, r32, r21 - // and r22, r20, 0x7FFFFFFF - // and r23, r20, 0x80000000 - // ishr r60, r22, 0x00000017 - // ishr r61, r30, 0x00000017 - // iadd r20, r20, r30 - // iadd r21, r22, r30 - // iadd r60, r60, r61 - // ige r42, 0, R60 - // ior r41, r23, 0x7F800000 - // ige r40, r60, 0x000000FF - // cmov_logical r40, r50, 0, r40 - // cmov_logical r20, r42, r23, r20 - // cmov_logical DST, r40, r41, r20 - // as_float(DST) - - // mov r20, as_int(LHS) - SDValue R20 = DAG.getNode(ISDBITCAST, DL, INTTY, LHS); - - // mov r21, as_int(RHS) - SDValue R21 = DAG.getNode(ISDBITCAST, DL, INTTY, RHS); - - // and r30, r20, 0x7f800000 - SDValue R30 = DAG.getNode(ISD::AND, DL, INTTY, R20, - DAG.getConstant(0x7F800000, INTTY)); - - // and r31, r21, 0x7f800000 - SDValue R31 = DAG.getNode(ISD::AND, DL, INTTY, R21, - DAG.getConstant(0x7f800000, INTTY)); - - // and r32, r20, 0x807FFFFF - SDValue R32 = DAG.getNode(ISD::AND, DL, INTTY, R20, - DAG.getConstant(0x807FFFFF, INTTY)); - - // and r33, r21, 0x807FFFFF - SDValue R33 = DAG.getNode(ISD::AND, DL, INTTY, R21, - DAG.getConstant(0x807FFFFF, INTTY)); - - // ieq r40, r30, 0x7F800000 - SDValue R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY, - DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), - R30, DAG.getConstant(0x7F800000, INTTY)); - - // ieq r41, r31, 0x7F800000 - SDValue R41 = DAG.getNode(AMDILISD::CMP, DL, INTTY, - DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), - R31, DAG.getConstant(0x7F800000, INTTY)); - - // ieq r42, r30, 0 - SDValue R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY, - DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), - R30, DAG.getConstant(0, INTTY)); - - // ieq r43, r31, 0 - SDValue R43 = DAG.getNode(AMDILISD::CMP, DL, INTTY, - DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), - R31, DAG.getConstant(0, INTTY)); - - // and r50, r20, 0x80000000 - SDValue R50 = DAG.getNode(ISD::AND, DL, INTTY, R20, - DAG.getConstant(0x80000000, INTTY)); - - // and r51, r21, 0x80000000 - SDValue R51 = DAG.getNode(ISD::AND, DL, INTTY, R21, - DAG.getConstant(0x80000000, INTTY)); - - // ior r32, r32, 0x3f800000 - R32 = DAG.getNode(ISD::OR, DL, INTTY, R32, - DAG.getConstant(0x3F800000, INTTY)); - - // ior r33, r33, 0x3f800000 - R33 = DAG.getNode(ISD::OR, DL, INTTY, R33, - DAG.getConstant(0x3F800000, INTTY)); - - // cmov_logical r32, r42, r50, r32 - R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R50, R32); - - // cmov_logical r33, r43, r51, r33 - R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R43, R51, R33); - - // cmov_logical r32, r40, r20, r32 - R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R20, R32); - - // cmov_logical r33, r41, r21, r33 - R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R41, R21, R33); - - // ior r50, r40, r41 - R50 = DAG.getNode(ISD::OR, DL, INTTY, R40, R41); - - // ior r51, r42, r43 - R51 = DAG.getNode(ISD::OR, DL, INTTY, R42, R43); - - // ior r50, r50, r51 - R50 = DAG.getNode(ISD::OR, DL, INTTY, R50, R51); - - // inegate r52, r31 - SDValue R52 = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, R31); - - // iadd r30, r30, r52 - R30 = DAG.getNode(ISD::ADD, DL, INTTY, R30, R52); - - // cmov_logical r30, r50, 0, r30 - R30 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50, - DAG.getConstant(0, INTTY), R30); - - // div_zeroop(infinity) r21, 1.0, as_float(r33) - R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33); - R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, - DAG.getConstantFP(1.0f, OVT), R33); - - // mul_ieee as_int(r20), as_float(r32), r21 - R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32); - R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21); - R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20); - - // div_zeroop(infinity) r21, 1.0, as_float(r33) - R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33); - R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, - DAG.getConstantFP(1.0f, OVT), R33); - - // mul_ieee as_int(r20), as_float(r32), r21 - R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32); - R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21); - R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20); - - // and r22, r20, 0x7FFFFFFF - SDValue R22 = DAG.getNode(ISD::AND, DL, INTTY, R20, - DAG.getConstant(0x7FFFFFFF, INTTY)); - - // and r23, r20, 0x80000000 - SDValue R23 = DAG.getNode(ISD::AND, DL, INTTY, R20, - DAG.getConstant(0x80000000, INTTY)); - - // ishr r60, r22, 0x00000017 - SDValue R60 = DAG.getNode(ISD::SRA, DL, INTTY, R22, - DAG.getConstant(0x00000017, INTTY)); - - // ishr r61, r30, 0x00000017 - SDValue R61 = DAG.getNode(ISD::SRA, DL, INTTY, R30, - DAG.getConstant(0x00000017, INTTY)); - - // iadd r20, r20, r30 - R20 = DAG.getNode(ISD::ADD, DL, INTTY, R20, R30); - - // iadd r21, r22, r30 - R21 = DAG.getNode(ISD::ADD, DL, INTTY, R22, R30); - - // iadd r60, r60, r61 - R60 = DAG.getNode(ISD::ADD, DL, INTTY, R60, R61); - - // ige r42, 0, R60 - R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY, - DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32), - DAG.getConstant(0, INTTY), - R60); - - // ior r41, r23, 0x7F800000 - R41 = DAG.getNode(ISD::OR, DL, INTTY, R23, - DAG.getConstant(0x7F800000, INTTY)); - - // ige r40, r60, 0x000000FF - R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY, - DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32), - R60, - DAG.getConstant(0x0000000FF, INTTY)); - - // cmov_logical r40, r50, 0, r40 - R40 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50, - DAG.getConstant(0, INTTY), - R40); - - // cmov_logical r20, r42, r23, r20 - R20 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R23, R20); - - // cmov_logical DST, r40, r41, r20 - DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R41, R20); - - // as_float(DST) - DST = DAG.getNode(ISDBITCAST, DL, OVT, DST); - } else { - // The following sequence of DAG nodes produce the following IL: - // fabs r1, RHS - // lt r2, 0x1.0p+96f, r1 - // cmov_logical r3, r2, 0x1.0p-23f, 1.0f - // mul_ieee r1, RHS, r3 - // div_zeroop(infinity) r0, LHS, r1 - // mul_ieee DST, r0, r3 - - // fabs r1, RHS - SDValue r1 = DAG.getNode(ISD::FABS, DL, OVT, RHS); - // lt r2, 0x1.0p+96f, r1 - SDValue r2 = DAG.getNode(AMDILISD::CMP, DL, OVT, - DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::f32), MVT::i32), - DAG.getConstant(0x6f800000, INTTY), r1); - // cmov_logical r3, r2, 0x1.0p-23f, 1.0f - SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r2, - DAG.getConstant(0x2f800000, INTTY), - DAG.getConstant(0x3f800000, INTTY)); - // mul_ieee r1, RHS, r3 - r1 = DAG.getNode(ISD::FMUL, DL, OVT, RHS, r3); - // div_zeroop(infinity) r0, LHS, r1 - SDValue r0 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, LHS, r1); - // mul_ieee DST, r0, r3 - DST = DAG.getNode(ISD::FMUL, DL, OVT, r0, r3); - } - return DST; -} - -SDValue -AMDILTargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const -{ - return SDValue(Op.getNode(), 0); -} diff --git a/src/gallium/drivers/radeon/AMDILISelLowering.h b/src/gallium/drivers/radeon/AMDILISelLowering.h index f2b68d13c12..612ca17eb6e 100644 --- a/src/gallium/drivers/radeon/AMDILISelLowering.h +++ b/src/gallium/drivers/radeon/AMDILISelLowering.h @@ -220,9 +220,6 @@ namespace llvm virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; - int - getVarArgsFrameOffset() const; - /// computeMaskedBitsForTargetNode - Determine which of /// the bits specified /// in Mask are known to be either zero or one and return them in @@ -308,34 +305,6 @@ namespace llvm const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; - //+++--- Function dealing with conversions between floating point and - //integer types ---+++// - SDValue - genCLZu64(SDValue Op, SelectionDAG &DAG) const; - SDValue - genCLZuN(SDValue Op, SelectionDAG &DAG, uint32_t bits) const; - SDValue - genCLZu32(SDValue Op, SelectionDAG &DAG) const; - SDValue - genf64toi32(SDValue Op, SelectionDAG &DAG, - bool includeSign) const; - - SDValue - genf64toi64(SDValue Op, SelectionDAG &DAG, - bool includeSign) const; - - SDValue - genu32tof64(SDValue Op, EVT dblvt, SelectionDAG &DAG) const; - - SDValue - genu64tof64(SDValue Op, EVT dblvt, SelectionDAG &DAG) const; - - SDValue - LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const; - - SDValue - LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; @@ -349,9 +318,6 @@ namespace llvm LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; SDValue - LowerSUB(SDValue Op, SelectionDAG &DAG) const; - - SDValue LowerSREM(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSREM8(SDValue Op, SelectionDAG &DAG) const; @@ -363,17 +329,6 @@ namespace llvm LowerSREM64(SDValue Op, SelectionDAG &DAG) const; SDValue - LowerUREM(SDValue Op, SelectionDAG &DAG) const; - SDValue - LowerUREM8(SDValue Op, SelectionDAG &DAG) const; - SDValue - LowerUREM16(SDValue Op, SelectionDAG &DAG) const; - SDValue - LowerUREM32(SDValue Op, SelectionDAG &DAG) const; - SDValue - LowerUREM64(SDValue Op, SelectionDAG &DAG) const; - - SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSDIV24(SDValue Op, SelectionDAG &DAG) const; @@ -383,34 +338,9 @@ namespace llvm LowerSDIV64(SDValue Op, SelectionDAG &DAG) const; SDValue - LowerFDIV(SDValue Op, SelectionDAG &DAG) const; - SDValue - LowerFDIV32(SDValue Op, SelectionDAG &DAG) const; - SDValue - LowerFDIV64(SDValue Op, SelectionDAG &DAG) const; - - SDValue - LowerMUL(SDValue Op, SelectionDAG &DAG) const; - - SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue - LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; - - SDValue - LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; - - SDValue - LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; - - SDValue - LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; - - SDValue - LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; - - SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp b/src/gallium/drivers/radeon/R600ISelLowering.cpp index 9f9f348fc9e..bb034beeb33 100644 --- a/src/gallium/drivers/radeon/R600ISelLowering.cpp +++ b/src/gallium/drivers/radeon/R600ISelLowering.cpp @@ -32,11 +32,6 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : addRegisterClass(MVT::i32, &AMDIL::R600_Reg32RegClass); computeRegisterProperties(); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal); - setOperationAction(ISD::FSUB, MVT::f32, Expand); } diff --git a/src/gallium/drivers/radeon/SIISelLowering.cpp b/src/gallium/drivers/radeon/SIISelLowering.cpp index c8a0e0b036e..5b1959d5024 100644 --- a/src/gallium/drivers/radeon/SIISelLowering.cpp +++ b/src/gallium/drivers/radeon/SIISelLowering.cpp @@ -33,9 +33,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : computeRegisterProperties(); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal); - setOperationAction(ISD::ADD, MVT::i64, Legal); setOperationAction(ISD::ADD, MVT::i32, Legal); |