From ff947c6d65830b7be6e9fcbfe666fa7dba6341f6 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Mon, 24 Sep 2012 16:04:26 +0200 Subject: radeon/llvm: improve select_cc lowering to generate CND* more often v2: - Simplify isZero() - Remove a unused function prototype - Clean whitespace trails Reviewed-by: Tom Stellard --- src/gallium/drivers/radeon/R600ISelLowering.cpp | 89 +++++++++++++++---------- src/gallium/drivers/radeon/R600ISelLowering.h | 2 + src/gallium/drivers/radeon/R600Instructions.td | 38 +++++++++-- 3 files changed, 88 insertions(+), 41 deletions(-) (limited to 'src/gallium/drivers/radeon') diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp b/src/gallium/drivers/radeon/R600ISelLowering.cpp index 2fc9c6708ef..5dd2f5334c5 100644 --- a/src/gallium/drivers/radeon/R600ISelLowering.cpp +++ b/src/gallium/drivers/radeon/R600ISelLowering.cpp @@ -516,6 +516,17 @@ SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const Op.getOperand(1))); } +bool R600TargetLowering::isZero(SDValue Op) const +{ + if(ConstantSDNode *Cst = dyn_cast(Op)) { + return Cst->isNullValue(); + } else if(ConstantFPSDNode *CstFP = dyn_cast(Op)){ + return CstFP->isZero(); + } else { + return false; + } +} + SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); @@ -568,47 +579,58 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const if (isHWTrueValue(False) && isHWFalseValue(True)) { } - // XXX Check if we can lower this to a SELECT or if it is supported by a native - // operation. (The code below does this but we don't have the Instruction - // selection patterns to do this yet. -#if 0 + // Check if we can lower this to a native operation. + // CND* instructions requires all operands to have the same type, + // and RHS to be zero. + if (isZero(LHS) || isZero(RHS)) { SDValue Cond = (isZero(LHS) ? RHS : LHS); - bool SwapTF = false; + SDValue Zero = (isZero(LHS) ? LHS : RHS); + ISD::CondCode CCOpcode = cast(CC)->get(); + if (CompareVT != VT) { + True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True); + False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False); + } + if (isZero(LHS)) { + CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode); + } + switch (CCOpcode) { - case ISD::SETOEQ: - case ISD::SETUEQ: - case ISD::SETEQ: - SwapTF = true; - // Fall through case ISD::SETONE: case ISD::SETUNE: case ISD::SETNE: - // We can lower to select - if (SwapTF) { - Temp = True; - True = False; - False = Temp; - } - // CNDE - return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); + case ISD::SETULE: + case ISD::SETULT: + case ISD::SETOLE: + case ISD::SETOLT: + case ISD::SETLE: + case ISD::SETLT: + CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32); + Temp = True; + True = False; + False = Temp; + break; default: - // Supported by a native operation (CNDGE, CNDGT) - return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); + break; } + SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, + Cond, Zero, + True, False, + DAG.getCondCode(CCOpcode)); + return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode); } -#endif + // If we make it this for it means we have no native instructions to handle // this SELECT_CC, so we must lower it. SDValue HWTrue, HWFalse; - if (VT == MVT::f32) { - HWTrue = DAG.getConstantFP(1.0f, VT); - HWFalse = DAG.getConstantFP(0.0f, VT); - } else if (VT == MVT::i32) { - HWTrue = DAG.getConstant(-1, VT); - HWFalse = DAG.getConstant(0, VT); + if (CompareVT == MVT::f32) { + HWTrue = DAG.getConstantFP(1.0f, CompareVT); + HWFalse = DAG.getConstantFP(0.0f, CompareVT); + } else if (CompareVT == MVT::i32) { + HWTrue = DAG.getConstant(-1, CompareVT); + HWFalse = DAG.getConstant(0, CompareVT); } else { assert(!"Unhandled value type in LowerSELECT_CC"); @@ -616,15 +638,12 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const // Lower this unsupported SELECT_CC into a combination of two supported // SELECT_CC operations. - SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC); - - // Convert floating point condition to i1 - if (VT == MVT::f32) { - Cond = DAG.getNode(ISD::FP_TO_SINT, DL, MVT::i32, - DAG.getNode(ISD::FNEG, DL, VT, Cond)); - } + SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC); - return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); + return DAG.getNode(ISD::SELECT_CC, DL, VT, + Cond, HWFalse, + True, False, + DAG.getCondCode(ISD::SETNE)); } SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const diff --git a/src/gallium/drivers/radeon/R600ISelLowering.h b/src/gallium/drivers/radeon/R600ISelLowering.h index 7b9c27ee12e..7df2dd13787 100644 --- a/src/gallium/drivers/radeon/R600ISelLowering.h +++ b/src/gallium/drivers/radeon/R600ISelLowering.h @@ -60,6 +60,8 @@ private: SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerInputFace(SDNode *Op, SelectionDAG &DAG) const; SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const; + + bool isZero(SDValue Op) const; }; } // End namespace llvm; diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td index feb97fac822..1689a2f4ab8 100644 --- a/src/gallium/drivers/radeon/R600Instructions.td +++ b/src/gallium/drivers/radeon/R600Instructions.td @@ -545,7 +545,25 @@ def SETGE_UINT : R600_2OP < def CNDE_INT : R600_3OP < 0x1C, "CNDE_INT", [(set (i32 R600_Reg32:$dst), - (select R600_Reg32:$src0, R600_Reg32:$src2, R600_Reg32:$src1))] + (selectcc (i32 R600_Reg32:$src0), 0, + (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2), + COND_EQ))] +>; + +def CNDGE_INT : R600_3OP < + 0x1E, "CNDGE_INT", + [(set (i32 R600_Reg32:$dst), + (selectcc (i32 R600_Reg32:$src0), 0, + (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2), + COND_GE))] +>; + +def CNDGT_INT : R600_3OP < + 0x1D, "CNDGT_INT", + [(set (i32 R600_Reg32:$dst), + (selectcc (i32 R600_Reg32:$src0), 0, + (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2), + COND_GT))] >; //===----------------------------------------------------------------------===// @@ -642,18 +660,26 @@ class MULADD_Common inst> : R600_3OP < class CNDE_Common inst> : R600_3OP < inst, "CNDE", - [(set (f32 R600_Reg32:$dst), - (select (i32 (fp_to_sint (fneg R600_Reg32:$src0))), (f32 R600_Reg32:$src2), (f32 R600_Reg32:$src1)))] + [(set R600_Reg32:$dst, + (selectcc (f32 R600_Reg32:$src0), FP_ZERO, + (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2), + COND_EQ))] >; class CNDGT_Common inst> : R600_3OP < inst, "CNDGT", - [] + [(set R600_Reg32:$dst, + (selectcc (f32 R600_Reg32:$src0), FP_ZERO, + (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2), + COND_GT))] >; - + class CNDGE_Common inst> : R600_3OP < inst, "CNDGE", - [(set R600_Reg32:$dst, (int_AMDGPU_cndlt R600_Reg32:$src0, R600_Reg32:$src2, R600_Reg32:$src1))] + [(set R600_Reg32:$dst, + (selectcc (f32 R600_Reg32:$src0), FP_ZERO, + (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2), + COND_GE))] >; class DOT4_Common inst> : R600_REDUCTION < -- cgit v1.2.3