summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/radeon/AMDILISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/radeon/AMDILISelLowering.cpp')
-rw-r--r--src/gallium/drivers/radeon/AMDILISelLowering.cpp677
1 files changed, 0 insertions, 677 deletions
diff --git a/src/gallium/drivers/radeon/AMDILISelLowering.cpp b/src/gallium/drivers/radeon/AMDILISelLowering.cpp
deleted file mode 100644
index 993025c3d47..00000000000
--- a/src/gallium/drivers/radeon/AMDILISelLowering.cpp
+++ /dev/null
@@ -1,677 +0,0 @@
-//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// This file contains TargetLowering functions borrowed from AMDLI.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPUISelLowering.h"
-#include "AMDGPURegisterInfo.h"
-#include "AMDILDevices.h"
-#include "AMDILIntrinsicInfo.h"
-#include "AMDGPUSubtarget.h"
-#include "AMDILUtilityFunctions.h"
-#include "llvm/CallingConv.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetOptions.h"
-
-using namespace llvm;
-//===----------------------------------------------------------------------===//
-// Calling Convention Implementation
-//===----------------------------------------------------------------------===//
-#include "AMDGPUGenCallingConv.inc"
-
-//===----------------------------------------------------------------------===//
-// TargetLowering Implementation Help Functions End
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// TargetLowering Class Implementation Begins
-//===----------------------------------------------------------------------===//
-void AMDGPUTargetLowering::InitAMDILLowering()
-{
- int types[] =
- {
- (int)MVT::i8,
- (int)MVT::i16,
- (int)MVT::i32,
- (int)MVT::f32,
- (int)MVT::f64,
- (int)MVT::i64,
- (int)MVT::v2i8,
- (int)MVT::v4i8,
- (int)MVT::v2i16,
- (int)MVT::v4i16,
- (int)MVT::v4f32,
- (int)MVT::v4i32,
- (int)MVT::v2f32,
- (int)MVT::v2i32,
- (int)MVT::v2f64,
- (int)MVT::v2i64
- };
-
- int IntTypes[] =
- {
- (int)MVT::i8,
- (int)MVT::i16,
- (int)MVT::i32,
- (int)MVT::i64
- };
-
- int FloatTypes[] =
- {
- (int)MVT::f32,
- (int)MVT::f64
- };
-
- int VectorTypes[] =
- {
- (int)MVT::v2i8,
- (int)MVT::v4i8,
- (int)MVT::v2i16,
- (int)MVT::v4i16,
- (int)MVT::v4f32,
- (int)MVT::v4i32,
- (int)MVT::v2f32,
- (int)MVT::v2i32,
- (int)MVT::v2f64,
- (int)MVT::v2i64
- };
- size_t numTypes = sizeof(types) / sizeof(*types);
- size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
- size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
- size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
-
- const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
- // These are the current register classes that are
- // supported
-
- for (unsigned int x = 0; x < numTypes; ++x) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
-
- //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
- // We cannot sextinreg, expand to shifts
- setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
- setOperationAction(ISD::SUBE, VT, Expand);
- setOperationAction(ISD::SUBC, VT, Expand);
- setOperationAction(ISD::ADDE, VT, Expand);
- setOperationAction(ISD::ADDC, VT, Expand);
- setOperationAction(ISD::BRCOND, VT, Custom);
- setOperationAction(ISD::BR_JT, VT, Expand);
- setOperationAction(ISD::BRIND, VT, Expand);
- // TODO: Implement custom UREM/SREM routines
- setOperationAction(ISD::SREM, VT, Expand);
- setOperationAction(ISD::SMUL_LOHI, VT, Expand);
- setOperationAction(ISD::UMUL_LOHI, VT, Expand);
- if (VT != MVT::i64 && VT != MVT::v2i64) {
- setOperationAction(ISD::SDIV, VT, Custom);
- }
- }
- for (unsigned int x = 0; x < numFloatTypes; ++x) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
-
- // IL does not have these operations for floating point types
- setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
- setOperationAction(ISD::SETOLT, VT, Expand);
- setOperationAction(ISD::SETOGE, VT, Expand);
- setOperationAction(ISD::SETOGT, VT, Expand);
- setOperationAction(ISD::SETOLE, VT, Expand);
- setOperationAction(ISD::SETULT, VT, Expand);
- setOperationAction(ISD::SETUGE, VT, Expand);
- setOperationAction(ISD::SETUGT, VT, Expand);
- setOperationAction(ISD::SETULE, VT, Expand);
- }
-
- for (unsigned int x = 0; x < numIntTypes; ++x) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
-
- // GPU also does not have divrem function for signed or unsigned
- setOperationAction(ISD::SDIVREM, VT, Expand);
-
- // GPU does not have [S|U]MUL_LOHI functions as a single instruction
- setOperationAction(ISD::SMUL_LOHI, VT, Expand);
- setOperationAction(ISD::UMUL_LOHI, VT, Expand);
-
- // GPU doesn't have a rotl, rotr, or byteswap instruction
- setOperationAction(ISD::ROTR, VT, Expand);
- setOperationAction(ISD::BSWAP, VT, Expand);
-
- // GPU doesn't have any counting operators
- setOperationAction(ISD::CTPOP, VT, Expand);
- setOperationAction(ISD::CTTZ, VT, Expand);
- setOperationAction(ISD::CTLZ, VT, Expand);
- }
-
- for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
- {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
-
- setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
- setOperationAction(ISD::SDIVREM, VT, Expand);
- setOperationAction(ISD::SMUL_LOHI, VT, Expand);
- // setOperationAction(ISD::VSETCC, VT, Expand);
- setOperationAction(ISD::SELECT_CC, VT, Expand);
-
- }
- if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) {
- setOperationAction(ISD::MULHU, MVT::i64, Expand);
- setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
- setOperationAction(ISD::MULHS, MVT::i64, Expand);
- setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
- setOperationAction(ISD::ADD, MVT::v2i64, Expand);
- setOperationAction(ISD::SREM, MVT::v2i64, Expand);
- setOperationAction(ISD::Constant , MVT::i64 , Legal);
- setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
- setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
- setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
- }
- if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) {
- // we support loading/storing v2f64 but not operations on the type
- setOperationAction(ISD::FADD, MVT::v2f64, Expand);
- setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
- setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
- setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
- setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
- setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
- // We want to expand vector conversions into their scalar
- // counterparts.
- setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
- setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
- setOperationAction(ISD::FABS, MVT::f64, Expand);
- setOperationAction(ISD::FABS, MVT::v2f64, Expand);
- }
- // TODO: Fix the UDIV24 algorithm so it works for these
- // types correctly. This needs vector comparisons
- // for this to work correctly.
- setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
- setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
- setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
- setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
- setOperationAction(ISD::SUBC, MVT::Other, Expand);
- setOperationAction(ISD::ADDE, MVT::Other, Expand);
- setOperationAction(ISD::ADDC, MVT::Other, Expand);
- setOperationAction(ISD::BRCOND, MVT::Other, Custom);
- setOperationAction(ISD::BR_JT, MVT::Other, Expand);
- setOperationAction(ISD::BRIND, MVT::Other, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
-
-
- // Use the default implementation.
- setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
- setOperationAction(ISD::Constant , MVT::i32 , Legal);
-
- setSchedulingPreference(Sched::RegPressure);
- setPow2DivIsCheap(false);
- setPrefLoopAlignment(16);
- setSelectIsExpensive(true);
- setJumpIsExpensive(true);
-
- maxStoresPerMemcpy = 4096;
- maxStoresPerMemmove = 4096;
- maxStoresPerMemset = 4096;
-
-#undef numTypes
-#undef numIntTypes
-#undef numVectorTypes
-#undef numFloatTypes
-}
-
-bool
-AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
- const CallInst &I, unsigned Intrinsic) const
-{
- return false;
-}
-// The backend supports 32 and 64 bit floating point immediates
-bool
-AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
-{
- if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
- || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
- return true;
- } else {
- return false;
- }
-}
-
-bool
-AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const
-{
- if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
- || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
- return false;
- } else {
- return true;
- }
-}
-
-
-// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
-// be zero. Op is expected to be a target specific node. Used by DAG
-// combiner.
-
-void
-AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
- const SDValue Op,
- APInt &KnownZero,
- APInt &KnownOne,
- const SelectionDAG &DAG,
- unsigned Depth) const
-{
- APInt KnownZero2;
- APInt KnownOne2;
- KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
- switch (Op.getOpcode()) {
- default: break;
- case ISD::SELECT_CC:
- DAG.ComputeMaskedBits(
- Op.getOperand(1),
- KnownZero,
- KnownOne,
- Depth + 1
- );
- DAG.ComputeMaskedBits(
- Op.getOperand(0),
- KnownZero2,
- KnownOne2
- );
- assert((KnownZero & KnownOne) == 0
- && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0
- && "Bits known to be one AND zero?");
- // Only known if known in both the LHS and RHS
- KnownOne &= KnownOne2;
- KnownZero &= KnownZero2;
- break;
- };
-}
-
-//===----------------------------------------------------------------------===//
-// Other Lowering Hooks
-//===----------------------------------------------------------------------===//
-
-SDValue
-AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
-{
- EVT OVT = Op.getValueType();
- SDValue DST;
- if (OVT.getScalarType() == MVT::i64) {
- DST = LowerSDIV64(Op, DAG);
- } else if (OVT.getScalarType() == MVT::i32) {
- DST = LowerSDIV32(Op, DAG);
- } else if (OVT.getScalarType() == MVT::i16
- || OVT.getScalarType() == MVT::i8) {
- DST = LowerSDIV24(Op, DAG);
- } else {
- DST = SDValue(Op.getNode(), 0);
- }
- return DST;
-}
-
-SDValue
-AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
-{
- EVT OVT = Op.getValueType();
- SDValue DST;
- if (OVT.getScalarType() == MVT::i64) {
- DST = LowerSREM64(Op, DAG);
- } else if (OVT.getScalarType() == MVT::i32) {
- DST = LowerSREM32(Op, DAG);
- } else if (OVT.getScalarType() == MVT::i16) {
- DST = LowerSREM16(Op, DAG);
- } else if (OVT.getScalarType() == MVT::i8) {
- DST = LowerSREM8(Op, DAG);
- } else {
- DST = SDValue(Op.getNode(), 0);
- }
- return DST;
-}
-
-SDValue
-AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
-{
- SDValue Data = Op.getOperand(0);
- VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
- DebugLoc DL = Op.getDebugLoc();
- EVT DVT = Data.getValueType();
- EVT BVT = BaseType->getVT();
- unsigned baseBits = BVT.getScalarType().getSizeInBits();
- unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
- unsigned shiftBits = srcBits - baseBits;
- if (srcBits < 32) {
- // If the op is less than 32 bits, then it needs to extend to 32bits
- // so it can properly keep the upper bits valid.
- EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
- Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
- shiftBits = 32 - baseBits;
- DVT = IVT;
- }
- SDValue Shift = DAG.getConstant(shiftBits, DVT);
- // Shift left by 'Shift' bits.
- Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
- // Signed shift Right by 'Shift' bits.
- Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
- if (srcBits < 32) {
- // Once the sign extension is done, the op needs to be converted to
- // its original type.
- Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
- }
- return Data;
-}
-EVT
-AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
-{
- int iSize = (size * numEle);
- int vEle = (iSize >> ((size == 64) ? 6 : 5));
- if (!vEle) {
- vEle = 1;
- }
- if (size == 64) {
- if (vEle == 1) {
- return EVT(MVT::i64);
- } else {
- return EVT(MVT::getVectorVT(MVT::i64, vEle));
- }
- } else {
- if (vEle == 1) {
- return EVT(MVT::i32);
- } else {
- return EVT(MVT::getVectorVT(MVT::i32, vEle));
- }
- }
-}
-
-SDValue
-AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
-{
- SDValue Chain = Op.getOperand(0);
- SDValue Cond = Op.getOperand(1);
- SDValue Jump = Op.getOperand(2);
- SDValue Result;
- Result = DAG.getNode(
- AMDGPUISD::BRANCH_COND,
- Op.getDebugLoc(),
- Op.getValueType(),
- Chain, Jump, Cond);
- return Result;
-}
-
-SDValue
-AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
-{
- DebugLoc DL = Op.getDebugLoc();
- EVT OVT = Op.getValueType();
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- MVT INTTY;
- MVT FLTTY;
- if (!OVT.isVector()) {
- INTTY = MVT::i32;
- FLTTY = MVT::f32;
- } else if (OVT.getVectorNumElements() == 2) {
- INTTY = MVT::v2i32;
- FLTTY = MVT::v2f32;
- } else if (OVT.getVectorNumElements() == 4) {
- INTTY = MVT::v4i32;
- FLTTY = MVT::v4f32;
- }
- unsigned bitsize = OVT.getScalarType().getSizeInBits();
- // char|short jq = ia ^ ib;
- SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
-
- // jq = jq >> (bitsize - 2)
- jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
-
- // jq = jq | 0x1
- jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
-
- // jq = (int)jq
- jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
-
- // int ia = (int)LHS;
- SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
-
- // int ib, (int)RHS;
- SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
-
- // float fa = (float)ia;
- SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
-
- // float fb = (float)ib;
- SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
-
- // float fq = native_divide(fa, fb);
- SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
-
- // fq = trunc(fq);
- fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
-
- // float fqneg = -fq;
- SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
-
- // float fr = mad(fqneg, fb, fa);
- SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa);
-
- // int iq = (int)fq;
- SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
-
- // fr = fabs(fr);
- fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
-
- // fb = fabs(fb);
- fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
-
- // int cv = fr >= fb;
- SDValue cv;
- if (INTTY == MVT::i32) {
- cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
- } else {
- cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
- }
- // jq = (cv ? jq : 0);
- jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
- DAG.getConstant(0, OVT));
- // dst = iq + jq;
- iq = DAG.getSExtOrTrunc(iq, DL, OVT);
- iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
- return iq;
-}
-
-SDValue
-AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
-{
- DebugLoc DL = Op.getDebugLoc();
- EVT OVT = Op.getValueType();
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- // The LowerSDIV32 function generates equivalent to the following IL.
- // mov r0, LHS
- // mov r1, RHS
- // ilt r10, r0, 0
- // ilt r11, r1, 0
- // iadd r0, r0, r10
- // iadd r1, r1, r11
- // ixor r0, r0, r10
- // ixor r1, r1, r11
- // udiv r0, r0, r1
- // ixor r10, r10, r11
- // iadd r0, r0, r10
- // ixor DST, r0, r10
-
- // mov r0, LHS
- SDValue r0 = LHS;
-
- // mov r1, RHS
- SDValue r1 = RHS;
-
- // ilt r10, r0, 0
- SDValue r10 = DAG.getSelectCC(DL,
- r0, DAG.getConstant(0, OVT),
- DAG.getConstant(-1, MVT::i32),
- DAG.getConstant(0, MVT::i32),
- ISD::SETLT);
-
- // ilt r11, r1, 0
- SDValue r11 = DAG.getSelectCC(DL,
- r1, DAG.getConstant(0, OVT),
- DAG.getConstant(-1, MVT::i32),
- DAG.getConstant(0, MVT::i32),
- ISD::SETLT);
-
- // iadd r0, r0, r10
- r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
-
- // iadd r1, r1, r11
- r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
-
- // ixor r0, r0, r10
- r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
-
- // ixor r1, r1, r11
- r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
-
- // udiv r0, r0, r1
- r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
-
- // ixor r10, r10, r11
- r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
-
- // iadd r0, r0, r10
- r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
-
- // ixor DST, r0, r10
- SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
- return DST;
-}
-
-SDValue
-AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
-{
- return SDValue(Op.getNode(), 0);
-}
-
-SDValue
-AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
-{
- DebugLoc DL = Op.getDebugLoc();
- EVT OVT = Op.getValueType();
- MVT INTTY = MVT::i32;
- if (OVT == MVT::v2i8) {
- INTTY = MVT::v2i32;
- } else if (OVT == MVT::v4i8) {
- INTTY = MVT::v4i32;
- }
- SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
- SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
- LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
- LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
- return LHS;
-}
-
-SDValue
-AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
-{
- DebugLoc DL = Op.getDebugLoc();
- EVT OVT = Op.getValueType();
- MVT INTTY = MVT::i32;
- if (OVT == MVT::v2i16) {
- INTTY = MVT::v2i32;
- } else if (OVT == MVT::v4i16) {
- INTTY = MVT::v4i32;
- }
- SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
- SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
- LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
- LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
- return LHS;
-}
-
-SDValue
-AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
-{
- DebugLoc DL = Op.getDebugLoc();
- EVT OVT = Op.getValueType();
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- // The LowerSREM32 function generates equivalent to the following IL.
- // mov r0, LHS
- // mov r1, RHS
- // ilt r10, r0, 0
- // ilt r11, r1, 0
- // iadd r0, r0, r10
- // iadd r1, r1, r11
- // ixor r0, r0, r10
- // ixor r1, r1, r11
- // udiv r20, r0, r1
- // umul r20, r20, r1
- // sub r0, r0, r20
- // iadd r0, r0, r10
- // ixor DST, r0, r10
-
- // mov r0, LHS
- SDValue r0 = LHS;
-
- // mov r1, RHS
- SDValue r1 = RHS;
-
- // ilt r10, r0, 0
- SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
-
- // ilt r11, r1, 0
- SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
-
- // iadd r0, r0, r10
- r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
-
- // iadd r1, r1, r11
- r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
-
- // ixor r0, r0, r10
- r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
-
- // ixor r1, r1, r11
- r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
-
- // udiv r20, r0, r1
- SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
-
- // umul r20, r20, r1
- r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
-
- // sub r0, r0, r20
- r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
-
- // iadd r0, r0, r10
- r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
-
- // ixor DST, r0, r10
- SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
- return DST;
-}
-
-SDValue
-AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
-{
- return SDValue(Op.getNode(), 0);
-}