summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/radeon/AMDGPUGenInstrEnums.pl3
-rw-r--r--src/gallium/drivers/radeon/AMDILISelLowering.cpp169
-rw-r--r--src/gallium/drivers/radeon/AMDILInstructions.td2
-rw-r--r--src/gallium/drivers/radeon/AMDILRegisterInfo.cpp2
-rw-r--r--src/gallium/drivers/radeon/R600InstrInfo.cpp2
-rw-r--r--src/gallium/drivers/radeon/R600Instructions.td5
6 files changed, 4 insertions, 179 deletions
diff --git a/src/gallium/drivers/radeon/AMDGPUGenInstrEnums.pl b/src/gallium/drivers/radeon/AMDGPUGenInstrEnums.pl
index d40495925d8..64a29285782 100644
--- a/src/gallium/drivers/radeon/AMDGPUGenInstrEnums.pl
+++ b/src/gallium/drivers/radeon/AMDGPUGenInstrEnums.pl
@@ -36,7 +36,6 @@ my @F32_MULTICLASSES = qw {
};
my @I32_MULTICLASSES = qw {
- BinaryOpMCi32
BinaryOpMCi32Const
};
@@ -53,7 +52,7 @@ my $FILE_TYPE = $ARGV[0];
open AMDIL, '<', 'AMDILInstructions.td';
-my @INST_ENUMS = ('NONE', 'FEQ', 'FGE', 'FLT', 'FNE', 'MOVE_f32', 'MOVE_i32', 'FTOI', 'ITOF', 'UGT', 'IGE', 'INE', 'UGE', 'IEQ');
+my @INST_ENUMS = ('NONE', 'FEQ', 'FGE', 'FLT', 'FNE', 'MOVE_f32', 'MOVE_i32', 'FTOI', 'ITOF', 'UGT', 'IGE', 'INE', 'UGE', 'IEQ', 'SMULHI_i32', 'SMUL_i32');
while (<AMDIL>) {
if ($_ =~ /defm\s+([A-Z_]+)\s+:\s+([A-Za-z0-9]+)</) {
diff --git a/src/gallium/drivers/radeon/AMDILISelLowering.cpp b/src/gallium/drivers/radeon/AMDILISelLowering.cpp
index 492c1812364..7a75c332189 100644
--- a/src/gallium/drivers/radeon/AMDILISelLowering.cpp
+++ b/src/gallium/drivers/radeon/AMDILISelLowering.cpp
@@ -703,7 +703,6 @@ AMDILTargetLowering::convertToReg(MachineOperand op) const
setOperationAction(ISD::FP_ROUND, VT, Expand);
setOperationAction(ISD::SUBE, VT, Expand);
setOperationAction(ISD::SUBC, VT, Expand);
- setOperationAction(ISD::ADD, VT, Custom);
setOperationAction(ISD::ADDE, VT, Expand);
setOperationAction(ISD::ADDC, VT, Expand);
setOperationAction(ISD::SETCC, VT, Custom);
@@ -1584,7 +1583,6 @@ AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
LOWER(FP_TO_UINT);
LOWER(SINT_TO_FP);
LOWER(UINT_TO_FP);
- LOWER(ADD);
LOWER(MUL);
LOWER(SUB);
LOWER(FDIV);
@@ -2002,175 +2000,8 @@ const
return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
InVals);
}
-static void checkMADType(
- SDValue Op, const AMDILSubtarget *STM, bool& is24bitMAD, bool& is32bitMAD)
-{
- bool globalLoadStore = false;
- is24bitMAD = false;
- is32bitMAD = false;
- return;
- assert(Op.getOpcode() == ISD::ADD && "The opcode must be a add in order for "
- "this to work correctly!");
- if (Op.getNode()->use_empty()) {
- return;
- }
- for (SDNode::use_iterator nBegin = Op.getNode()->use_begin(),
- nEnd = Op.getNode()->use_end(); nBegin != nEnd; ++nBegin) {
- SDNode *ptr = *nBegin;
- const LSBaseSDNode *lsNode = dyn_cast<LSBaseSDNode>(ptr);
- // If we are not a LSBaseSDNode then we don't do this
- // optimization.
- // If we are a LSBaseSDNode, but the op is not the offset
- // or base pointer, then we don't do this optimization
- // (i.e. we are the value being stored)
- if (!lsNode ||
- (lsNode->writeMem() && lsNode->getOperand(1) == Op)) {
- return;
- }
- const PointerType *PT =
- dyn_cast<PointerType>(lsNode->getSrcValue()->getType());
- unsigned as = PT->getAddressSpace();
- switch(as) {
- default:
- globalLoadStore = true;
- case AMDILAS::PRIVATE_ADDRESS:
- if (!STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)) {
- globalLoadStore = true;
- }
- break;
- case AMDILAS::CONSTANT_ADDRESS:
- if (!STM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)) {
- globalLoadStore = true;
- }
- break;
- case AMDILAS::LOCAL_ADDRESS:
- if (!STM->device()->usesHardware(AMDILDeviceInfo::LocalMem)) {
- globalLoadStore = true;
- }
- break;
- case AMDILAS::REGION_ADDRESS:
- if (!STM->device()->usesHardware(AMDILDeviceInfo::RegionMem)) {
- globalLoadStore = true;
- }
- break;
- }
- }
- if (globalLoadStore) {
- is32bitMAD = true;
- } else {
- is24bitMAD = true;
- }
-}
SDValue
-AMDILTargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const
-{
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- DebugLoc DL = Op.getDebugLoc();
- EVT OVT = Op.getValueType();
- SDValue DST;
- const AMDILSubtarget *stm = &this->getTargetMachine()
- .getSubtarget<AMDILSubtarget>();
- bool isVec = OVT.isVector();
- if (OVT.getScalarType() == MVT::i64) {
- MVT INTTY = MVT::i32;
- if (OVT == MVT::v2i64) {
- INTTY = MVT::v2i32;
- }
- if (stm->device()->usesHardware(AMDILDeviceInfo::LongOps)
- && INTTY == MVT::i32) {
- DST = DAG.getNode(AMDILISD::ADD,
- DL,
- OVT,
- LHS, RHS);
- } else {
- SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI;
- // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
- LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS);
- RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS);
- LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS);
- RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS);
- INTLO = DAG.getNode(ISD::ADD, DL, INTTY, LHSLO, RHSLO);
- INTHI = DAG.getNode(ISD::ADD, DL, INTTY, LHSHI, RHSHI);
- SDValue cmp;
- cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
- DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
- INTLO, RHSLO);
- cmp = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, cmp);
- INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp);
- DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT,
- INTLO, INTHI);
- }
- } else {
- if (LHS.getOpcode() == ISD::FrameIndex ||
- RHS.getOpcode() == ISD::FrameIndex) {
- DST = DAG.getNode(AMDILISD::ADDADDR,
- DL,
- OVT,
- LHS, RHS);
- } else {
- if (stm->device()->usesHardware(AMDILDeviceInfo::LocalMem)
- && LHS.getNumOperands()
- && RHS.getNumOperands()) {
- bool is24bitMAD = false;
- bool is32bitMAD = false;
- const ConstantSDNode *LHSConstOpCode =
- dyn_cast<ConstantSDNode>(LHS.getOperand(LHS.getNumOperands()-1));
- const ConstantSDNode *RHSConstOpCode =
- dyn_cast<ConstantSDNode>(RHS.getOperand(RHS.getNumOperands()-1));
- if ((LHS.getOpcode() == ISD::SHL && LHSConstOpCode)
- || (RHS.getOpcode() == ISD::SHL && RHSConstOpCode)
- || LHS.getOpcode() == ISD::MUL
- || RHS.getOpcode() == ISD::MUL) {
- SDValue Op1, Op2, Op3;
- // FIXME: Fix this so that it works for unsigned 24bit ops.
- if (LHS.getOpcode() == ISD::MUL) {
- Op1 = LHS.getOperand(0);
- Op2 = LHS.getOperand(1);
- Op3 = RHS;
- } else if (RHS.getOpcode() == ISD::MUL) {
- Op1 = RHS.getOperand(0);
- Op2 = RHS.getOperand(1);
- Op3 = LHS;
- } else if (LHS.getOpcode() == ISD::SHL && LHSConstOpCode) {
- Op1 = LHS.getOperand(0);
- Op2 = DAG.getConstant(
- 1 << LHSConstOpCode->getZExtValue(), MVT::i32);
- Op3 = RHS;
- } else if (RHS.getOpcode() == ISD::SHL && RHSConstOpCode) {
- Op1 = RHS.getOperand(0);
- Op2 = DAG.getConstant(
- 1 << RHSConstOpCode->getZExtValue(), MVT::i32);
- Op3 = LHS;
- }
- checkMADType(Op, stm, is24bitMAD, is32bitMAD);
- // We can possibly do a MAD transform!
- if (is24bitMAD && stm->device()->usesHardware(AMDILDeviceInfo::Signed24BitOps)) {
- uint32_t opcode = AMDGPUIntrinsic::AMDIL_mad24_i32;
- SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/);
- DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN,
- DL, Tys, DAG.getEntryNode(), DAG.getConstant(opcode, MVT::i32),
- Op1, Op2, Op3);
- } else if(is32bitMAD) {
- SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/);
- DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN,
- DL, Tys, DAG.getEntryNode(),
- DAG.getConstant(
- AMDGPUIntrinsic::AMDIL_mad_i32, MVT::i32),
- Op1, Op2, Op3);
- }
- }
- }
- DST = DAG.getNode(AMDILISD::ADD,
- DL,
- OVT,
- LHS, RHS);
- }
- }
- return DST;
-}
-SDValue
AMDILTargetLowering::genCLZuN(SDValue Op, SelectionDAG &DAG,
uint32_t bits) const
{
diff --git a/src/gallium/drivers/radeon/AMDILInstructions.td b/src/gallium/drivers/radeon/AMDILInstructions.td
index bd37ff0d195..85ece1e2f51 100644
--- a/src/gallium/drivers/radeon/AMDILInstructions.td
+++ b/src/gallium/drivers/radeon/AMDILInstructions.td
@@ -57,9 +57,7 @@ defm SHL : BinaryOpMCi32Const<IL_OP_I_SHL, shl>;
defm SHR : BinaryOpMCi32Const<IL_OP_I_SHR, sra>;
defm SHLVEC : BinaryOpMCi32<IL_OP_I_SHL, shl>;
defm SHRVEC : BinaryOpMCi32<IL_OP_I_SHR, sra>;
-defm ADD : BinaryOpMCi32<IL_OP_I_ADD, add>;
// get rid of the addri via the tablegen instead of custom lowered instruction
-defm CUSTOM_ADD : BinaryOpMCi32<IL_OP_I_ADD, IL_add>;
defm EADD : BinaryOpMCi32<IL_OP_I_ADD, adde>;
def INTTOANY_i32: OneInOneOut<IL_OP_MOV, (outs GPRI32:$dst), (ins GPRI32:$src0),
!strconcat(IL_OP_MOV.Text, " $dst, $src0"),
diff --git a/src/gallium/drivers/radeon/AMDILRegisterInfo.cpp b/src/gallium/drivers/radeon/AMDILRegisterInfo.cpp
index d7c1dc74b8b..453889fc8aa 100644
--- a/src/gallium/drivers/radeon/AMDILRegisterInfo.cpp
+++ b/src/gallium/drivers/radeon/AMDILRegisterInfo.cpp
@@ -128,7 +128,7 @@ AMDILRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineOperand::CreateImm(Offset));
MI.getParent()->insert(II, nMI);
nMI = MF.CreateMachineInstr(
- TII.get(AMDIL::ADD_i32), MI.getDebugLoc());
+ TII.get(AMDIL::ADD_INT), MI.getDebugLoc());
nMI->addOperand(MachineOperand::CreateReg(AMDIL::DFP, true));
nMI->addOperand(MachineOperand::CreateReg(AMDIL::DFP, false));
nMI->addOperand(MachineOperand::CreateReg(AMDIL::FP, false));
diff --git a/src/gallium/drivers/radeon/R600InstrInfo.cpp b/src/gallium/drivers/radeon/R600InstrInfo.cpp
index 075bac559dd..01d3d0d3bc4 100644
--- a/src/gallium/drivers/radeon/R600InstrInfo.cpp
+++ b/src/gallium/drivers/radeon/R600InstrInfo.cpp
@@ -65,8 +65,6 @@ unsigned R600InstrInfo::getISAOpcode(unsigned opcode) const
{
switch (opcode) {
default: return AMDGPUInstrInfo::getISAOpcode(opcode);
- case AMDIL::CUSTOM_ADD_i32:
- return AMDIL::ADD_INT;
case AMDIL::IEQ:
return AMDIL::SETE_INT;
case AMDIL::INE:
diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td
index c8db9380571..4737f1ca9f4 100644
--- a/src/gallium/drivers/radeon/R600Instructions.td
+++ b/src/gallium/drivers/radeon/R600Instructions.td
@@ -410,9 +410,8 @@ def NOT_INT : R600_1OP <
def ADD_INT : R600_2OP <
0x34, "ADD_INT",
- []>{
- let AMDILOp = AMDILInst.ADD_i32;
-}
+ [(set R600_Reg32:$dst, (add R600_Reg32:$src0, R600_Reg32:$src1))]
+>;
def SUB_INT : R600_2OP <
0x35, "SUB_INT",