From d784bc77405012b442ae9d68f200e9d115030b3c Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stellard@amd.com>
Date: Fri, 25 May 2012 10:50:35 -0400
Subject: radeon/llvm: Use a custom inserter to lower CLAMP

---
 src/gallium/drivers/radeon/AMDGPUInstructions.td   | 17 +++++++++
 src/gallium/drivers/radeon/AMDILInstructions.td    |  1 -
 src/gallium/drivers/radeon/R600ISelLowering.cpp    |  7 ++++
 src/gallium/drivers/radeon/R600Instructions.td     | 11 +-----
 .../drivers/radeon/R600LowerInstructions.cpp       | 27 --------------
 src/gallium/drivers/radeon/SIISelLowering.cpp      | 15 ++++++++
 src/gallium/drivers/radeon/SIInstrInfo.cpp         | 43 ----------------------
 src/gallium/drivers/radeon/SIInstrInfo.h           |  3 --
 src/gallium/drivers/radeon/SIInstructions.td       |  1 +
 9 files changed, 41 insertions(+), 84 deletions(-)

(limited to 'src/gallium/drivers/radeon')

diff --git a/src/gallium/drivers/radeon/AMDGPUInstructions.td b/src/gallium/drivers/radeon/AMDGPUInstructions.td
index e1ace508e88..1f0d582d82b 100644
--- a/src/gallium/drivers/radeon/AMDGPUInstructions.td
+++ b/src/gallium/drivers/radeon/AMDGPUInstructions.td
@@ -39,6 +39,16 @@ int TWO_PI_INV = 0x3e22f983;
 }
 def CONST : Constants;
 
+def FP_ZERO : PatLeaf <
+  (fpimm),
+  [{return N->getValueAPF().isZero();}]
+>;
+
+def FP_ONE : PatLeaf <
+  (fpimm),
+  [{return N->isExactlyValue(1.0);}]
+>;
+
 let isCodeGenOnly = 1 in {
 
   def MASK_WRITE : AMDGPUShaderInst <
@@ -50,6 +60,13 @@ let isCodeGenOnly = 1 in {
 
 let isPseudo = 1, usesCustomInserter = 1  in {
 
+class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
+  (outs rc:$dst),
+  (ins rc:$src0),
+  "CLAMP $dst, $src0",
+  [(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
+>;
+
 class FABS <RegisterClass rc> : AMDGPUShaderInst <
   (outs rc:$dst),
   (ins rc:$src0),
diff --git a/src/gallium/drivers/radeon/AMDILInstructions.td b/src/gallium/drivers/radeon/AMDILInstructions.td
index 6f1f4d55ca9..869c2bb6af2 100644
--- a/src/gallium/drivers/radeon/AMDILInstructions.td
+++ b/src/gallium/drivers/radeon/AMDILInstructions.td
@@ -230,7 +230,6 @@ defm DIV  : BinaryIntrinsicFloat<IL_OP_DIV, int_AMDIL_div>;
   }
 }
   let mayLoad = 0, mayStore=0 in {
-defm CLAMP : TernaryIntrinsicFloat<IL_OP_CLAMP, int_AMDIL_clamp>;
 defm FMA  : TernaryIntrinsicFloat<IL_OP_FMA, int_AMDIL_fma>;
 defm LERP  : TernaryIntrinsicFloat<IL_OP_LERP, int_AMDIL_lerp>;
   }
diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp b/src/gallium/drivers/radeon/R600ISelLowering.cpp
index 4d789cb22b6..59a2bb1cb28 100644
--- a/src/gallium/drivers/radeon/R600ISelLowering.cpp
+++ b/src/gallium/drivers/radeon/R600ISelLowering.cpp
@@ -101,6 +101,13 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
     lowerImplicitParameter(MI, *BB, MRI, 8);
     break;
 
+  case AMDIL::CLAMP_R600:
+    MI->getOperand(0).addTargetFlag(MO_FLAG_CLAMP);
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::MOV))
+           .addOperand(MI->getOperand(0))
+           .addOperand(MI->getOperand(1));
+    break;
+
   case AMDIL::FABS_R600:
     MI->getOperand(1).addTargetFlag(MO_FLAG_ABS);
     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::MOV))
diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td
index cb9a7bba3d6..22f3fc1b780 100644
--- a/src/gallium/drivers/radeon/R600Instructions.td
+++ b/src/gallium/drivers/radeon/R600Instructions.td
@@ -119,16 +119,6 @@ def TEX_SHADOW : PatLeaf<
   }]
 >;
 
-def FP_ZERO : PatLeaf <
-  (fpimm),
-  [{return N->getValueAPF().isZero();}]
->;
-
-def FP_ONE : PatLeaf <
-  (fpimm),
-  [{return N->isExactlyValue(1.0);}]
->;
-
 def COND_EQ : PatLeaf <
   (cond),
   [{switch(N->get()){{default: return false;
@@ -1078,6 +1068,7 @@ def TXD_SHADOW: AMDGPUShaderInst <
 
 } // End isCodeGenOnly = 1
 
+def CLAMP_R600 :  CLAMP <R600_Reg32>;
 def FABS_R600 : FABS<R600_Reg32>;
 
 let isPseudo = 1 in {
diff --git a/src/gallium/drivers/radeon/R600LowerInstructions.cpp b/src/gallium/drivers/radeon/R600LowerInstructions.cpp
index 3a1a12e635f..1795b38dfb6 100644
--- a/src/gallium/drivers/radeon/R600LowerInstructions.cpp
+++ b/src/gallium/drivers/radeon/R600LowerInstructions.cpp
@@ -83,23 +83,6 @@ bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF)
                 .addOperand(MI.getOperand(1));
         break;
 
-      case AMDIL::CLAMP_f32:
-        {
-          MachineOperand lowOp = MI.getOperand(2);
-          MachineOperand highOp = MI.getOperand(3);
-        if (lowOp.isReg() && highOp.isReg()
-            && lowOp.getReg() == AMDIL::ZERO && highOp.getReg() == AMDIL::ONE) {
-          MI.getOperand(0).addTargetFlag(MO_FLAG_CLAMP);
-          BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::MOV))
-                  .addOperand(MI.getOperand(0))
-                  .addOperand(MI.getOperand(1));
-        } else {
-          /* XXX: Handle other cases */
-          abort();
-        }
-        break;
-        }
-
       /* XXX: Figure out the semantics of DIV_INF_f32 and make sure this is OK */
 /*      case AMDIL::DIV_INF_f32:
         {
@@ -218,16 +201,6 @@ bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF)
           }
 
           if (canInline) {
-            MachineOperand * use = dstOp.getNextOperandForReg();
-            /* The lowering operation for CLAMP needs to have the immediates
-             * as operands, so we must propagate them. */
-            while (use) {
-              MachineOperand * next = use->getNextOperandForReg();
-              if (use->getParent()->getOpcode() == AMDIL::CLAMP_f32) {
-                use->setReg(inlineReg);
-              }
-              use = next;
-            }
             BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::COPY))
                     .addOperand(dstOp)
                     .addReg(inlineReg);
diff --git a/src/gallium/drivers/radeon/SIISelLowering.cpp b/src/gallium/drivers/radeon/SIISelLowering.cpp
index e192af091b6..2455b536f9f 100644
--- a/src/gallium/drivers/radeon/SIISelLowering.cpp
+++ b/src/gallium/drivers/radeon/SIISelLowering.cpp
@@ -46,6 +46,21 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
   default:
     return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
 
+  case AMDIL::CLAMP_SI:
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::V_MOV_B32_e64))
+           .addOperand(MI->getOperand(0))
+           .addOperand(MI->getOperand(1))
+          /* VSRC1-2 are unused, but we still need to fill all the
+           * operand slots, so we just reuse the VSRC0 operand */
+           .addOperand(MI->getOperand(1))
+           .addOperand(MI->getOperand(1))
+           .addImm(0) // ABS
+           .addImm(1) // CLAMP
+           .addImm(0) // OMOD
+           .addImm(0); // NEG
+    MI->eraseFromParent();
+    break;
+
   case AMDIL::FABS_SI:
     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::V_MOV_B32_e64))
                  .addOperand(MI->getOperand(0))
diff --git a/src/gallium/drivers/radeon/SIInstrInfo.cpp b/src/gallium/drivers/radeon/SIInstrInfo.cpp
index df2cd9bb023..4ee3e5d5f8d 100644
--- a/src/gallium/drivers/radeon/SIInstrInfo.cpp
+++ b/src/gallium/drivers/radeon/SIInstrInfo.cpp
@@ -80,12 +80,6 @@ unsigned SIInstrInfo::getEncodingBytes(const MachineInstr &MI) const
 MachineInstr * SIInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
     DebugLoc DL) const
 {
-
-  switch (MI.getOpcode()) {
-    default: break;
-    case AMDIL::CLAMP_f32: return convertCLAMP_f32(MI, MF, DL);
-  }
-
   MachineInstr * newMI = AMDGPUInstrInfo::convertToISA(MI, MF, DL);
   const MCInstrDesc &newDesc = get(newMI->getOpcode());
 
@@ -111,40 +105,3 @@ unsigned SIInstrInfo::getISAOpcode(unsigned AMDILopcode) const
   default: return AMDILopcode;
   }
 }
-
-MachineInstr * SIInstrInfo::convertCLAMP_f32(MachineInstr & clampInstr,
-    MachineFunction &MF, DebugLoc DL) const
-{
-  MachineRegisterInfo &MRI = MF.getRegInfo();
-  /* XXX: HACK assume that low == zero and high == one for now until
-   * we have a way to propogate the immediates. */
-
-/*
-  uint32_t zero = (uint32_t)APFloat(0.0f).bitcastToAPInt().getZExtValue();
-  uint32_t one = (uint32_t)APFloat(1.0f).bitcastToAPInt().getZExtValue();
-  uint32_t low = clampInstr.getOperand(2).getImm();
-  uint32_t high = clampInstr.getOperand(3).getImm();
-*/
-//  if (low == zero && high == one) {
-  
-  /* Convert the desination register to the VReg_32 class */
-  if (TargetRegisterInfo::isVirtualRegister(clampInstr.getOperand(0).getReg())) {
-    MRI.setRegClass(clampInstr.getOperand(0).getReg(),
-                    AMDIL::VReg_32RegisterClass);
-  }
-  return BuildMI(MF, DL, get(AMDIL::V_MOV_B32_e64))
-           .addOperand(clampInstr.getOperand(0))
-           .addOperand(clampInstr.getOperand(1))
-          /* VSRC1-2 are unused, but we still need to fill all the
-           * operand slots, so we just reuse the VSRC0 operand */
-           .addOperand(clampInstr.getOperand(1))
-           .addOperand(clampInstr.getOperand(1))
-           .addImm(0) // ABS
-           .addImm(1) // CLAMP
-           .addImm(0) // OMOD
-           .addImm(0); // NEG
-//  } else {
-    /* XXX: Handle other cases */
-//    abort();
-//  }
-}
diff --git a/src/gallium/drivers/radeon/SIInstrInfo.h b/src/gallium/drivers/radeon/SIInstrInfo.h
index 1d137d4efd4..0614638517a 100644
--- a/src/gallium/drivers/radeon/SIInstrInfo.h
+++ b/src/gallium/drivers/radeon/SIInstrInfo.h
@@ -25,9 +25,6 @@ private:
   const SIRegisterInfo RI;
   AMDGPUTargetMachine &TM;
 
-  MachineInstr * convertCLAMP_f32(MachineInstr & clampInstr,
-                                  MachineFunction &MF, DebugLoc DL) const;
-
 public:
   explicit SIInstrInfo(AMDGPUTargetMachine &tm);
 
diff --git a/src/gallium/drivers/radeon/SIInstructions.td b/src/gallium/drivers/radeon/SIInstructions.td
index 87034684f0b..b6097ef1eeb 100644
--- a/src/gallium/drivers/radeon/SIInstructions.td
+++ b/src/gallium/drivers/radeon/SIInstructions.td
@@ -907,6 +907,7 @@ def : Pat <
                 (S_LOAD_DWORDX4_IMM imm:$sampler_offset, SReg_64:$sampler)) /* Sampler */
 >;
 
+def CLAMP_SI : CLAMP<VReg_32>;
 def FABS_SI : FABS<VReg_32>;
 
 def : Extract_Element <f32, v4f32, VReg_128, 0, sel_x>;
-- 
cgit v1.2.3