diff options
author | Tom Stellard <[email protected]> | 2012-07-25 08:30:32 -0400 |
---|---|---|
committer | Tom Stellard <[email protected]> | 2012-07-27 17:08:08 +0000 |
commit | 50ff2dc0a4f553eb8d634d6f081fe5e4e25f6f48 (patch) | |
tree | acc1b4b0a305aff1f771399445614bf10302fd24 /src/gallium | |
parent | c424975572af2edd46863e5bb9fe3c51c96b4f9b (diff) |
radeon/llvm: Add special nodes for SALU operations on VCC
The VCC register is tricky because the SALU views it as 64-bit, but the
VALU views it as 1-bit. In order to deal with this we've added some
special bitcast and binary operations to help convert from the 64-bit
SALU view to the 1-bit VALU view and vice versa.
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/radeon/AMDGPUISelLowering.h | 10 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/SIISelLowering.cpp | 37 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/SIISelLowering.h | 3 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/SIInstrFormats.td | 3 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/SIInstrInfo.td | 19 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/SIInstructions.td | 18 |
6 files changed, 89 insertions, 1 deletions
diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.h b/src/gallium/drivers/radeon/AMDGPUISelLowering.h index 3e6f9ccaced..63710920418 100644 --- a/src/gallium/drivers/radeon/AMDGPUISelLowering.h +++ b/src/gallium/drivers/radeon/AMDGPUISelLowering.h @@ -71,6 +71,16 @@ enum } // End namespace AMDGPUISD +namespace SIISD { + +enum { + SI_FIRST = AMDGPUISD::LAST_AMDGPU_ISD_NUMBER, + VCC_AND, + VCC_BITCAST +}; + +} // End namespace SIISD + } // End namespace llvm #endif // AMDGPUISELLOWERING_H diff --git a/src/gallium/drivers/radeon/SIISelLowering.cpp b/src/gallium/drivers/radeon/SIISelLowering.cpp index 23eb4f8e239..e004562244d 100644 --- a/src/gallium/drivers/radeon/SIISelLowering.cpp +++ b/src/gallium/drivers/radeon/SIISelLowering.cpp @@ -35,6 +35,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : computeRegisterProperties(); + setOperationAction(ISD::AND, MVT::i1, Custom); + setOperationAction(ISD::ADD, MVT::i64, Legal); setOperationAction(ISD::ADD, MVT::i32, Legal); @@ -216,9 +218,33 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); case ISD::BR_CC: return LowerBR_CC(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::AND: return Loweri1ContextSwitch(Op, DAG, ISD::AND); } } +/// Loweri1ContextSwitch - The function is for lowering i1 operations on the +/// VCC register. In the VALU context, VCC is a one bit register, but in the +/// SALU context the VCC is a 64-bit register (1-bit per thread). Since only +/// the SALU can perform operations on the VCC register, we need to promote +/// the operand types from i1 to i64 in order for tablegen to be able to match +/// this operation to the correct SALU instruction. We do this promotion by +/// wrapping the operands in a CopyToReg node. +/// +SDValue SITargetLowering::Loweri1ContextSwitch(SDValue Op, + SelectionDAG &DAG, + unsigned VCCNode) const +{ + DebugLoc DL = Op.getDebugLoc(); + + SDValue OpNode = DAG.getNode(VCCNode, DL, MVT::i64, + DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64, + Op.getOperand(0)), + DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64, + Op.getOperand(1))); + + return DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i1, OpNode); +} + SDValue SITargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); @@ -256,3 +282,14 @@ SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const SDValue Cond = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, CC); return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); } + +#define NODE_NAME_CASE(node) case SIISD::node: return #node; + +const char* SITargetLowering::getTargetNodeName(unsigned Opcode) const +{ + switch (Opcode) { + default: return AMDGPUTargetLowering::getTargetNodeName(Opcode); + NODE_NAME_CASE(VCC_AND) + NODE_NAME_CASE(VCC_BITCAST) + } +} diff --git a/src/gallium/drivers/radeon/SIISelLowering.h b/src/gallium/drivers/radeon/SIISelLowering.h index f16202abcc5..10123e7b732 100644 --- a/src/gallium/drivers/radeon/SIISelLowering.h +++ b/src/gallium/drivers/radeon/SIISelLowering.h @@ -38,6 +38,8 @@ class SITargetLowering : public AMDGPUTargetLowering void lowerUSE_SGPR(MachineInstr *MI, MachineFunction * MF, MachineRegisterInfo & MRI) const; + SDValue Loweri1ContextSwitch(SDValue Op, SelectionDAG &DAG, + unsigned VCCNode) const; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; @@ -47,6 +49,7 @@ public: MachineBasicBlock * BB) const; virtual EVT getSetCCResultType(EVT VT) const; virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + virtual const char* getTargetNodeName(unsigned Opcode) const; }; } // End namespace llvm diff --git a/src/gallium/drivers/radeon/SIInstrFormats.td b/src/gallium/drivers/radeon/SIInstrFormats.td index 79f47087ce7..3d9d47489d7 100644 --- a/src/gallium/drivers/radeon/SIInstrFormats.td +++ b/src/gallium/drivers/radeon/SIInstrFormats.td @@ -41,6 +41,9 @@ class SOP2_32 <bits<7> op, string opName, list<dag> pattern> class SOP2_64 <bits<7> op, string opName, list<dag> pattern> : SOP2 <op, (outs SReg_64:$dst), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>; +class SOP2_VCC <bits<7> op, string opName, list<dag> pattern> + : SOP2 <op, (outs VCCReg:$vcc), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>; + class VOP1_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc, string opName, list<dag> pattern> : VOP1 < diff --git a/src/gallium/drivers/radeon/SIInstrInfo.td b/src/gallium/drivers/radeon/SIInstrInfo.td index aac644369d9..72f03d29315 100644 --- a/src/gallium/drivers/radeon/SIInstrInfo.td +++ b/src/gallium/drivers/radeon/SIInstrInfo.td @@ -7,7 +7,26 @@ // //===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// SI DAG Profiles +//===----------------------------------------------------------------------===// +def SDTVCCBinaryOp : SDTypeProfile<1, 2, [ + SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 2> +]>; + +//===----------------------------------------------------------------------===// +// SI DAG Nodes +//===----------------------------------------------------------------------===// + +// and operation on 64-bit wide vcc +def SIvcc_and : SDNode<"SIISD::VCC_AND", SDTVCCBinaryOp, + [SDNPCommutative, SDNPAssociative] +>; +// Special bitcast node for sharing VCC register between VALU and SALU +def SIvcc_bitcast : SDNode<"SIISD::VCC_BITCAST", + SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]> +>; class InstSI <dag outs, dag ins, string asm, list<dag> pattern> : AMDGPUInst<outs, ins, asm, pattern> { diff --git a/src/gallium/drivers/radeon/SIInstructions.td b/src/gallium/drivers/radeon/SIInstructions.td index 1fb5df10cee..01fb81e948e 100644 --- a/src/gallium/drivers/radeon/SIInstructions.td +++ b/src/gallium/drivers/radeon/SIInstructions.td @@ -745,7 +745,13 @@ def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32", []>; def S_CSELECT_B32 : SOP2_32 <0x0000000a, "S_CSELECT_B32", []>; def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>; def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", []>; -def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64", []>; + +def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64", + [(set SReg_64:$dst, (and SReg_64:$src0, SReg_64:$src1))] +>; +def S_AND_VCC : SOP2_VCC <0x0000000f, "S_AND_B64", + [(set VCCReg:$vcc, (SIvcc_and SReg_64:$src0, SReg_64:$src1))] +>; def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", []>; def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", []>; def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>; @@ -917,6 +923,16 @@ def : BitConvert <i32, f32, VReg_32>; def : BitConvert <f32, i32, SReg_32>; def : BitConvert <f32, i32, VReg_32>; +def : Pat < + (i64 (SIvcc_bitcast VCCReg:$vcc)), + (S_MOV_B64 (COPY_TO_REGCLASS VCCReg:$vcc, SReg_64)) +>; + +def : Pat < + (i1 (SIvcc_bitcast SReg_64:$vcc)), + (COPY_TO_REGCLASS SReg_64:$vcc, VCCReg) +>; + /* def : Pat< (int_SI_vs_load_buffer_index), |