summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTom Stellard <[email protected]>2012-05-25 10:50:35 -0400
committerTom Stellard <[email protected]>2012-05-25 15:40:58 -0400
commitd784bc77405012b442ae9d68f200e9d115030b3c (patch)
tree7d8b857f99d40f7e26e7f597385c53a6f7c227ee
parent17f852892346fdf3b1e9eec56b7a55c470279bc8 (diff)
radeon/llvm: Use a custom inserter to lower CLAMP
-rw-r--r--src/gallium/drivers/radeon/AMDGPUInstructions.td17
-rw-r--r--src/gallium/drivers/radeon/AMDILInstructions.td1
-rw-r--r--src/gallium/drivers/radeon/R600ISelLowering.cpp7
-rw-r--r--src/gallium/drivers/radeon/R600Instructions.td11
-rw-r--r--src/gallium/drivers/radeon/R600LowerInstructions.cpp27
-rw-r--r--src/gallium/drivers/radeon/SIISelLowering.cpp15
-rw-r--r--src/gallium/drivers/radeon/SIInstrInfo.cpp43
-rw-r--r--src/gallium/drivers/radeon/SIInstrInfo.h3
-rw-r--r--src/gallium/drivers/radeon/SIInstructions.td1
9 files changed, 41 insertions, 84 deletions
diff --git a/src/gallium/drivers/radeon/AMDGPUInstructions.td b/src/gallium/drivers/radeon/AMDGPUInstructions.td
index e1ace508e88..1f0d582d82b 100644
--- a/src/gallium/drivers/radeon/AMDGPUInstructions.td
+++ b/src/gallium/drivers/radeon/AMDGPUInstructions.td
@@ -39,6 +39,16 @@ int TWO_PI_INV = 0x3e22f983;
}
def CONST : Constants;
+def FP_ZERO : PatLeaf <
+ (fpimm),
+ [{return N->getValueAPF().isZero();}]
+>;
+
+def FP_ONE : PatLeaf <
+ (fpimm),
+ [{return N->isExactlyValue(1.0);}]
+>;
+
let isCodeGenOnly = 1 in {
def MASK_WRITE : AMDGPUShaderInst <
@@ -50,6 +60,13 @@ let isCodeGenOnly = 1 in {
let isPseudo = 1, usesCustomInserter = 1 in {
+class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
+ (outs rc:$dst),
+ (ins rc:$src0),
+ "CLAMP $dst, $src0",
+ [(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
+>;
+
class FABS <RegisterClass rc> : AMDGPUShaderInst <
(outs rc:$dst),
(ins rc:$src0),
diff --git a/src/gallium/drivers/radeon/AMDILInstructions.td b/src/gallium/drivers/radeon/AMDILInstructions.td
index 6f1f4d55ca9..869c2bb6af2 100644
--- a/src/gallium/drivers/radeon/AMDILInstructions.td
+++ b/src/gallium/drivers/radeon/AMDILInstructions.td
@@ -230,7 +230,6 @@ defm DIV : BinaryIntrinsicFloat<IL_OP_DIV, int_AMDIL_div>;
}
}
let mayLoad = 0, mayStore=0 in {
-defm CLAMP : TernaryIntrinsicFloat<IL_OP_CLAMP, int_AMDIL_clamp>;
defm FMA : TernaryIntrinsicFloat<IL_OP_FMA, int_AMDIL_fma>;
defm LERP : TernaryIntrinsicFloat<IL_OP_LERP, int_AMDIL_lerp>;
}
diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp b/src/gallium/drivers/radeon/R600ISelLowering.cpp
index 4d789cb22b6..59a2bb1cb28 100644
--- a/src/gallium/drivers/radeon/R600ISelLowering.cpp
+++ b/src/gallium/drivers/radeon/R600ISelLowering.cpp
@@ -101,6 +101,13 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
lowerImplicitParameter(MI, *BB, MRI, 8);
break;
+ case AMDIL::CLAMP_R600:
+ MI->getOperand(0).addTargetFlag(MO_FLAG_CLAMP);
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::MOV))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1));
+ break;
+
case AMDIL::FABS_R600:
MI->getOperand(1).addTargetFlag(MO_FLAG_ABS);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::MOV))
diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td
index cb9a7bba3d6..22f3fc1b780 100644
--- a/src/gallium/drivers/radeon/R600Instructions.td
+++ b/src/gallium/drivers/radeon/R600Instructions.td
@@ -119,16 +119,6 @@ def TEX_SHADOW : PatLeaf<
}]
>;
-def FP_ZERO : PatLeaf <
- (fpimm),
- [{return N->getValueAPF().isZero();}]
->;
-
-def FP_ONE : PatLeaf <
- (fpimm),
- [{return N->isExactlyValue(1.0);}]
->;
-
def COND_EQ : PatLeaf <
(cond),
[{switch(N->get()){{default: return false;
@@ -1078,6 +1068,7 @@ def TXD_SHADOW: AMDGPUShaderInst <
} // End isCodeGenOnly = 1
+def CLAMP_R600 : CLAMP <R600_Reg32>;
def FABS_R600 : FABS<R600_Reg32>;
let isPseudo = 1 in {
diff --git a/src/gallium/drivers/radeon/R600LowerInstructions.cpp b/src/gallium/drivers/radeon/R600LowerInstructions.cpp
index 3a1a12e635f..1795b38dfb6 100644
--- a/src/gallium/drivers/radeon/R600LowerInstructions.cpp
+++ b/src/gallium/drivers/radeon/R600LowerInstructions.cpp
@@ -83,23 +83,6 @@ bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF)
.addOperand(MI.getOperand(1));
break;
- case AMDIL::CLAMP_f32:
- {
- MachineOperand lowOp = MI.getOperand(2);
- MachineOperand highOp = MI.getOperand(3);
- if (lowOp.isReg() && highOp.isReg()
- && lowOp.getReg() == AMDIL::ZERO && highOp.getReg() == AMDIL::ONE) {
- MI.getOperand(0).addTargetFlag(MO_FLAG_CLAMP);
- BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::MOV))
- .addOperand(MI.getOperand(0))
- .addOperand(MI.getOperand(1));
- } else {
- /* XXX: Handle other cases */
- abort();
- }
- break;
- }
-
/* XXX: Figure out the semantics of DIV_INF_f32 and make sure this is OK */
/* case AMDIL::DIV_INF_f32:
{
@@ -218,16 +201,6 @@ bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF)
}
if (canInline) {
- MachineOperand * use = dstOp.getNextOperandForReg();
- /* The lowering operation for CLAMP needs to have the immediates
- * as operands, so we must propagate them. */
- while (use) {
- MachineOperand * next = use->getNextOperandForReg();
- if (use->getParent()->getOpcode() == AMDIL::CLAMP_f32) {
- use->setReg(inlineReg);
- }
- use = next;
- }
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::COPY))
.addOperand(dstOp)
.addReg(inlineReg);
diff --git a/src/gallium/drivers/radeon/SIISelLowering.cpp b/src/gallium/drivers/radeon/SIISelLowering.cpp
index e192af091b6..2455b536f9f 100644
--- a/src/gallium/drivers/radeon/SIISelLowering.cpp
+++ b/src/gallium/drivers/radeon/SIISelLowering.cpp
@@ -46,6 +46,21 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
default:
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
+ case AMDIL::CLAMP_SI:
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::V_MOV_B32_e64))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1))
+ /* VSRC1-2 are unused, but we still need to fill all the
+ * operand slots, so we just reuse the VSRC0 operand */
+ .addOperand(MI->getOperand(1))
+ .addOperand(MI->getOperand(1))
+ .addImm(0) // ABS
+ .addImm(1) // CLAMP
+ .addImm(0) // OMOD
+ .addImm(0); // NEG
+ MI->eraseFromParent();
+ break;
+
case AMDIL::FABS_SI:
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::V_MOV_B32_e64))
.addOperand(MI->getOperand(0))
diff --git a/src/gallium/drivers/radeon/SIInstrInfo.cpp b/src/gallium/drivers/radeon/SIInstrInfo.cpp
index df2cd9bb023..4ee3e5d5f8d 100644
--- a/src/gallium/drivers/radeon/SIInstrInfo.cpp
+++ b/src/gallium/drivers/radeon/SIInstrInfo.cpp
@@ -80,12 +80,6 @@ unsigned SIInstrInfo::getEncodingBytes(const MachineInstr &MI) const
MachineInstr * SIInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
DebugLoc DL) const
{
-
- switch (MI.getOpcode()) {
- default: break;
- case AMDIL::CLAMP_f32: return convertCLAMP_f32(MI, MF, DL);
- }
-
MachineInstr * newMI = AMDGPUInstrInfo::convertToISA(MI, MF, DL);
const MCInstrDesc &newDesc = get(newMI->getOpcode());
@@ -111,40 +105,3 @@ unsigned SIInstrInfo::getISAOpcode(unsigned AMDILopcode) const
default: return AMDILopcode;
}
}
-
-MachineInstr * SIInstrInfo::convertCLAMP_f32(MachineInstr & clampInstr,
- MachineFunction &MF, DebugLoc DL) const
-{
- MachineRegisterInfo &MRI = MF.getRegInfo();
- /* XXX: HACK assume that low == zero and high == one for now until
- * we have a way to propogate the immediates. */
-
-/*
- uint32_t zero = (uint32_t)APFloat(0.0f).bitcastToAPInt().getZExtValue();
- uint32_t one = (uint32_t)APFloat(1.0f).bitcastToAPInt().getZExtValue();
- uint32_t low = clampInstr.getOperand(2).getImm();
- uint32_t high = clampInstr.getOperand(3).getImm();
-*/
-// if (low == zero && high == one) {
-
- /* Convert the desination register to the VReg_32 class */
- if (TargetRegisterInfo::isVirtualRegister(clampInstr.getOperand(0).getReg())) {
- MRI.setRegClass(clampInstr.getOperand(0).getReg(),
- AMDIL::VReg_32RegisterClass);
- }
- return BuildMI(MF, DL, get(AMDIL::V_MOV_B32_e64))
- .addOperand(clampInstr.getOperand(0))
- .addOperand(clampInstr.getOperand(1))
- /* VSRC1-2 are unused, but we still need to fill all the
- * operand slots, so we just reuse the VSRC0 operand */
- .addOperand(clampInstr.getOperand(1))
- .addOperand(clampInstr.getOperand(1))
- .addImm(0) // ABS
- .addImm(1) // CLAMP
- .addImm(0) // OMOD
- .addImm(0); // NEG
-// } else {
- /* XXX: Handle other cases */
-// abort();
-// }
-}
diff --git a/src/gallium/drivers/radeon/SIInstrInfo.h b/src/gallium/drivers/radeon/SIInstrInfo.h
index 1d137d4efd4..0614638517a 100644
--- a/src/gallium/drivers/radeon/SIInstrInfo.h
+++ b/src/gallium/drivers/radeon/SIInstrInfo.h
@@ -25,9 +25,6 @@ private:
const SIRegisterInfo RI;
AMDGPUTargetMachine &TM;
- MachineInstr * convertCLAMP_f32(MachineInstr & clampInstr,
- MachineFunction &MF, DebugLoc DL) const;
-
public:
explicit SIInstrInfo(AMDGPUTargetMachine &tm);
diff --git a/src/gallium/drivers/radeon/SIInstructions.td b/src/gallium/drivers/radeon/SIInstructions.td
index 87034684f0b..b6097ef1eeb 100644
--- a/src/gallium/drivers/radeon/SIInstructions.td
+++ b/src/gallium/drivers/radeon/SIInstructions.td
@@ -907,6 +907,7 @@ def : Pat <
(S_LOAD_DWORDX4_IMM imm:$sampler_offset, SReg_64:$sampler)) /* Sampler */
>;
+def CLAMP_SI : CLAMP<VReg_32>;
def FABS_SI : FABS<VReg_32>;
def : Extract_Element <f32, v4f32, VReg_128, 0, sel_x>;