From d784bc77405012b442ae9d68f200e9d115030b3c Mon Sep 17 00:00:00 2001 From: Tom Stellard <thomas.stellard@amd.com> Date: Fri, 25 May 2012 10:50:35 -0400 Subject: radeon/llvm: Use a custom inserter to lower CLAMP --- src/gallium/drivers/radeon/AMDGPUInstructions.td | 17 +++++++++ src/gallium/drivers/radeon/AMDILInstructions.td | 1 - src/gallium/drivers/radeon/R600ISelLowering.cpp | 7 ++++ src/gallium/drivers/radeon/R600Instructions.td | 11 +----- .../drivers/radeon/R600LowerInstructions.cpp | 27 -------------- src/gallium/drivers/radeon/SIISelLowering.cpp | 15 ++++++++ src/gallium/drivers/radeon/SIInstrInfo.cpp | 43 ---------------------- src/gallium/drivers/radeon/SIInstrInfo.h | 3 -- src/gallium/drivers/radeon/SIInstructions.td | 1 + 9 files changed, 41 insertions(+), 84 deletions(-) (limited to 'src/gallium/drivers/radeon') diff --git a/src/gallium/drivers/radeon/AMDGPUInstructions.td b/src/gallium/drivers/radeon/AMDGPUInstructions.td index e1ace508e88..1f0d582d82b 100644 --- a/src/gallium/drivers/radeon/AMDGPUInstructions.td +++ b/src/gallium/drivers/radeon/AMDGPUInstructions.td @@ -39,6 +39,16 @@ int TWO_PI_INV = 0x3e22f983; } def CONST : Constants; +def FP_ZERO : PatLeaf < + (fpimm), + [{return N->getValueAPF().isZero();}] +>; + +def FP_ONE : PatLeaf < + (fpimm), + [{return N->isExactlyValue(1.0);}] +>; + let isCodeGenOnly = 1 in { def MASK_WRITE : AMDGPUShaderInst < @@ -50,6 +60,13 @@ let isCodeGenOnly = 1 in { let isPseudo = 1, usesCustomInserter = 1 in { +class CLAMP <RegisterClass rc> : AMDGPUShaderInst < + (outs rc:$dst), + (ins rc:$src0), + "CLAMP $dst, $src0", + [(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))] +>; + class FABS <RegisterClass rc> : AMDGPUShaderInst < (outs rc:$dst), (ins rc:$src0), diff --git a/src/gallium/drivers/radeon/AMDILInstructions.td b/src/gallium/drivers/radeon/AMDILInstructions.td index 6f1f4d55ca9..869c2bb6af2 100644 --- a/src/gallium/drivers/radeon/AMDILInstructions.td +++ b/src/gallium/drivers/radeon/AMDILInstructions.td @@ -230,7 +230,6 @@ defm DIV : BinaryIntrinsicFloat<IL_OP_DIV, int_AMDIL_div>; } } let mayLoad = 0, mayStore=0 in { -defm CLAMP : TernaryIntrinsicFloat<IL_OP_CLAMP, int_AMDIL_clamp>; defm FMA : TernaryIntrinsicFloat<IL_OP_FMA, int_AMDIL_fma>; defm LERP : TernaryIntrinsicFloat<IL_OP_LERP, int_AMDIL_lerp>; } diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp b/src/gallium/drivers/radeon/R600ISelLowering.cpp index 4d789cb22b6..59a2bb1cb28 100644 --- a/src/gallium/drivers/radeon/R600ISelLowering.cpp +++ b/src/gallium/drivers/radeon/R600ISelLowering.cpp @@ -101,6 +101,13 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( lowerImplicitParameter(MI, *BB, MRI, 8); break; + case AMDIL::CLAMP_R600: + MI->getOperand(0).addTargetFlag(MO_FLAG_CLAMP); + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::MOV)) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(1)); + break; + case AMDIL::FABS_R600: MI->getOperand(1).addTargetFlag(MO_FLAG_ABS); BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::MOV)) diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td index cb9a7bba3d6..22f3fc1b780 100644 --- a/src/gallium/drivers/radeon/R600Instructions.td +++ b/src/gallium/drivers/radeon/R600Instructions.td @@ -119,16 +119,6 @@ def TEX_SHADOW : PatLeaf< }] >; -def FP_ZERO : PatLeaf < - (fpimm), - [{return N->getValueAPF().isZero();}] ->; - -def FP_ONE : PatLeaf < - (fpimm), - [{return N->isExactlyValue(1.0);}] ->; - def COND_EQ : PatLeaf < (cond), [{switch(N->get()){{default: return false; @@ -1078,6 +1068,7 @@ def TXD_SHADOW: AMDGPUShaderInst < } // End isCodeGenOnly = 1 +def CLAMP_R600 : CLAMP <R600_Reg32>; def FABS_R600 : FABS<R600_Reg32>; let isPseudo = 1 in { diff --git a/src/gallium/drivers/radeon/R600LowerInstructions.cpp b/src/gallium/drivers/radeon/R600LowerInstructions.cpp index 3a1a12e635f..1795b38dfb6 100644 --- a/src/gallium/drivers/radeon/R600LowerInstructions.cpp +++ b/src/gallium/drivers/radeon/R600LowerInstructions.cpp @@ -83,23 +83,6 @@ bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF) .addOperand(MI.getOperand(1)); break; - case AMDIL::CLAMP_f32: - { - MachineOperand lowOp = MI.getOperand(2); - MachineOperand highOp = MI.getOperand(3); - if (lowOp.isReg() && highOp.isReg() - && lowOp.getReg() == AMDIL::ZERO && highOp.getReg() == AMDIL::ONE) { - MI.getOperand(0).addTargetFlag(MO_FLAG_CLAMP); - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::MOV)) - .addOperand(MI.getOperand(0)) - .addOperand(MI.getOperand(1)); - } else { - /* XXX: Handle other cases */ - abort(); - } - break; - } - /* XXX: Figure out the semantics of DIV_INF_f32 and make sure this is OK */ /* case AMDIL::DIV_INF_f32: { @@ -218,16 +201,6 @@ bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF) } if (canInline) { - MachineOperand * use = dstOp.getNextOperandForReg(); - /* The lowering operation for CLAMP needs to have the immediates - * as operands, so we must propagate them. */ - while (use) { - MachineOperand * next = use->getNextOperandForReg(); - if (use->getParent()->getOpcode() == AMDIL::CLAMP_f32) { - use->setReg(inlineReg); - } - use = next; - } BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::COPY)) .addOperand(dstOp) .addReg(inlineReg); diff --git a/src/gallium/drivers/radeon/SIISelLowering.cpp b/src/gallium/drivers/radeon/SIISelLowering.cpp index e192af091b6..2455b536f9f 100644 --- a/src/gallium/drivers/radeon/SIISelLowering.cpp +++ b/src/gallium/drivers/radeon/SIISelLowering.cpp @@ -46,6 +46,21 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); + case AMDIL::CLAMP_SI: + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::V_MOV_B32_e64)) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(1)) + /* VSRC1-2 are unused, but we still need to fill all the + * operand slots, so we just reuse the VSRC0 operand */ + .addOperand(MI->getOperand(1)) + .addOperand(MI->getOperand(1)) + .addImm(0) // ABS + .addImm(1) // CLAMP + .addImm(0) // OMOD + .addImm(0); // NEG + MI->eraseFromParent(); + break; + case AMDIL::FABS_SI: BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::V_MOV_B32_e64)) .addOperand(MI->getOperand(0)) diff --git a/src/gallium/drivers/radeon/SIInstrInfo.cpp b/src/gallium/drivers/radeon/SIInstrInfo.cpp index df2cd9bb023..4ee3e5d5f8d 100644 --- a/src/gallium/drivers/radeon/SIInstrInfo.cpp +++ b/src/gallium/drivers/radeon/SIInstrInfo.cpp @@ -80,12 +80,6 @@ unsigned SIInstrInfo::getEncodingBytes(const MachineInstr &MI) const MachineInstr * SIInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF, DebugLoc DL) const { - - switch (MI.getOpcode()) { - default: break; - case AMDIL::CLAMP_f32: return convertCLAMP_f32(MI, MF, DL); - } - MachineInstr * newMI = AMDGPUInstrInfo::convertToISA(MI, MF, DL); const MCInstrDesc &newDesc = get(newMI->getOpcode()); @@ -111,40 +105,3 @@ unsigned SIInstrInfo::getISAOpcode(unsigned AMDILopcode) const default: return AMDILopcode; } } - -MachineInstr * SIInstrInfo::convertCLAMP_f32(MachineInstr & clampInstr, - MachineFunction &MF, DebugLoc DL) const -{ - MachineRegisterInfo &MRI = MF.getRegInfo(); - /* XXX: HACK assume that low == zero and high == one for now until - * we have a way to propogate the immediates. */ - -/* - uint32_t zero = (uint32_t)APFloat(0.0f).bitcastToAPInt().getZExtValue(); - uint32_t one = (uint32_t)APFloat(1.0f).bitcastToAPInt().getZExtValue(); - uint32_t low = clampInstr.getOperand(2).getImm(); - uint32_t high = clampInstr.getOperand(3).getImm(); -*/ -// if (low == zero && high == one) { - - /* Convert the desination register to the VReg_32 class */ - if (TargetRegisterInfo::isVirtualRegister(clampInstr.getOperand(0).getReg())) { - MRI.setRegClass(clampInstr.getOperand(0).getReg(), - AMDIL::VReg_32RegisterClass); - } - return BuildMI(MF, DL, get(AMDIL::V_MOV_B32_e64)) - .addOperand(clampInstr.getOperand(0)) - .addOperand(clampInstr.getOperand(1)) - /* VSRC1-2 are unused, but we still need to fill all the - * operand slots, so we just reuse the VSRC0 operand */ - .addOperand(clampInstr.getOperand(1)) - .addOperand(clampInstr.getOperand(1)) - .addImm(0) // ABS - .addImm(1) // CLAMP - .addImm(0) // OMOD - .addImm(0); // NEG -// } else { - /* XXX: Handle other cases */ -// abort(); -// } -} diff --git a/src/gallium/drivers/radeon/SIInstrInfo.h b/src/gallium/drivers/radeon/SIInstrInfo.h index 1d137d4efd4..0614638517a 100644 --- a/src/gallium/drivers/radeon/SIInstrInfo.h +++ b/src/gallium/drivers/radeon/SIInstrInfo.h @@ -25,9 +25,6 @@ private: const SIRegisterInfo RI; AMDGPUTargetMachine &TM; - MachineInstr * convertCLAMP_f32(MachineInstr & clampInstr, - MachineFunction &MF, DebugLoc DL) const; - public: explicit SIInstrInfo(AMDGPUTargetMachine &tm); diff --git a/src/gallium/drivers/radeon/SIInstructions.td b/src/gallium/drivers/radeon/SIInstructions.td index 87034684f0b..b6097ef1eeb 100644 --- a/src/gallium/drivers/radeon/SIInstructions.td +++ b/src/gallium/drivers/radeon/SIInstructions.td @@ -907,6 +907,7 @@ def : Pat < (S_LOAD_DWORDX4_IMM imm:$sampler_offset, SReg_64:$sampler)) /* Sampler */ >; +def CLAMP_SI : CLAMP<VReg_32>; def FABS_SI : FABS<VReg_32>; def : Extract_Element <f32, v4f32, VReg_128, 0, sel_x>; -- cgit v1.2.3