diff options
author | Tom Stellard <[email protected]> | 2012-08-29 10:33:58 -0400 |
---|---|---|
committer | Tom Stellard <[email protected]> | 2012-08-29 15:52:10 -0400 |
commit | 05113fd2662eeb0d17fd1074001b7405eeeca43c (patch) | |
tree | 8f5701f3e526c1922a2f424e04b941063880425d /src/gallium/drivers/radeon | |
parent | 733c28a0d95c1da87b14ef893f8a59b1f940322a (diff) |
radeon/llvm: Create a register class for the M0 register
The Common Subexpression Elimination pass will not operate on
instructions with physical register defs, so we end up with
several redundant copies to M0 when using interpolation.
Adding a register class that only contains the M0 register allows
use to use a virtual register to represent M0, and makes it possible
for the Common Subexpression Elimination pass to remove the extra
copies.
Diffstat (limited to 'src/gallium/drivers/radeon')
-rw-r--r-- | src/gallium/drivers/radeon/SIGenRegisterInfo.pl | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/SIISelLowering.cpp | 20 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/SIISelLowering.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/SIInstrInfo.td | 1 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/SIInstructions.td | 15 |
5 files changed, 24 insertions, 16 deletions
diff --git a/src/gallium/drivers/radeon/SIGenRegisterInfo.pl b/src/gallium/drivers/radeon/SIGenRegisterInfo.pl index dd0efcc462f..84f677ed324 100644 --- a/src/gallium/drivers/radeon/SIGenRegisterInfo.pl +++ b/src/gallium/drivers/radeon/SIGenRegisterInfo.pl @@ -171,6 +171,8 @@ def AllReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, def SCCReg : RegisterClass<"AMDGPU", [i1], 1, (add SCC)>; def VCCReg : RegisterClass<"AMDGPU", [i1], 1, (add VCC)>; def EXECReg : RegisterClass<"AMDGPU", [i1], 1, (add EXEC)>; +def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>; + STRING diff --git a/src/gallium/drivers/radeon/SIISelLowering.cpp b/src/gallium/drivers/radeon/SIISelLowering.cpp index c917b26ba13..d2fd995ad01 100644 --- a/src/gallium/drivers/radeon/SIISelLowering.cpp +++ b/src/gallium/drivers/radeon/SIISelLowering.cpp @@ -127,7 +127,7 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( LowerSI_INTERP(MI, *BB, I, MRI); break; case AMDGPU::SI_INTERP_CONST: - LowerSI_INTERP_CONST(MI, *BB, I); + LowerSI_INTERP_CONST(MI, *BB, I, MRI); break; case AMDGPU::SI_KIL: LowerSI_KIL(MI, *BB, I, MRI); @@ -150,6 +150,7 @@ void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB, MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const { unsigned tmp = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); + unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass); MachineOperand dst = MI->getOperand(0); MachineOperand iReg = MI->getOperand(1); MachineOperand jReg = MI->getOperand(2); @@ -157,39 +158,44 @@ void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB, MachineOperand attr = MI->getOperand(4); MachineOperand params = MI->getOperand(5); - BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0) .addOperand(params); BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P1_F32), tmp) .addOperand(iReg) .addOperand(attr_chan) - .addOperand(attr); + .addOperand(attr) + .addReg(M0); BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P2_F32)) .addOperand(dst) .addReg(tmp) .addOperand(jReg) .addOperand(attr_chan) - .addOperand(attr); + .addOperand(attr) + .addReg(M0); MI->eraseFromParent(); } void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI, - MachineBasicBlock &BB, MachineBasicBlock::iterator I) const + MachineBasicBlock &BB, MachineBasicBlock::iterator I, + MachineRegisterInfo &MRI) const { MachineOperand dst = MI->getOperand(0); MachineOperand attr_chan = MI->getOperand(1); MachineOperand attr = MI->getOperand(2); MachineOperand params = MI->getOperand(3); + unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass); - BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0) .addOperand(params); BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_MOV_F32)) .addOperand(dst) .addOperand(attr_chan) - .addOperand(attr); + .addOperand(attr) + .addReg(M0); MI->eraseFromParent(); } diff --git a/src/gallium/drivers/radeon/SIISelLowering.h b/src/gallium/drivers/radeon/SIISelLowering.h index 9609311e814..77d61d86f49 100644 --- a/src/gallium/drivers/radeon/SIISelLowering.h +++ b/src/gallium/drivers/radeon/SIISelLowering.h @@ -32,7 +32,7 @@ class SITargetLowering : public AMDGPUTargetLowering void LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB, MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB, - MachineBasicBlock::iterator I) const; + MachineBasicBlock::iterator I, MachineRegisterInfo &MRI) const; void LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB, MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB, diff --git a/src/gallium/drivers/radeon/SIInstrInfo.td b/src/gallium/drivers/radeon/SIInstrInfo.td index 55882808659..a7ce9a6ec84 100644 --- a/src/gallium/drivers/radeon/SIInstrInfo.td +++ b/src/gallium/drivers/radeon/SIInstrInfo.td @@ -363,7 +363,6 @@ class VINTRP <bits <2> op, dag outs, dag ins, string asm, list<dag> pattern> : let Inst{31-26} = 0x32; // encoding let EncodingType = 11; // SIInstrEncodingType::VINTRP - let Uses = [M0]; let neverHasSideEffects = 1; } diff --git a/src/gallium/drivers/radeon/SIInstructions.td b/src/gallium/drivers/radeon/SIInstructions.td index 8bc311b08a5..5d4deaa4788 100644 --- a/src/gallium/drivers/radeon/SIInstructions.td +++ b/src/gallium/drivers/radeon/SIInstructions.td @@ -566,33 +566,34 @@ defm V_MOVRELSD_B32 : VOP1_32 <0x00000044, "V_MOVRELSD_B32", []>; def V_INTERP_P1_F32 : VINTRP < 0x00000000, (outs VReg_32:$dst), - (ins VReg_32:$i, i32imm:$attr_chan, i32imm:$attr), + (ins VReg_32:$i, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), "V_INTERP_P1_F32", - [] ->; + []> { + let DisableEncoding = "$m0"; +} def V_INTERP_P2_F32 : VINTRP < 0x00000001, (outs VReg_32:$dst), - (ins VReg_32:$src0, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr), + (ins VReg_32:$src0, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), "V_INTERP_P2_F32", []> { let Constraints = "$src0 = $dst"; - let DisableEncoding = "$src0"; + let DisableEncoding = "$src0,$m0"; } def V_INTERP_MOV_F32 : VINTRP < 0x00000002, (outs VReg_32:$dst), - (ins i32imm:$attr_chan, i32imm:$attr), + (ins i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), "V_INTERP_MOV_F32", []> { let VSRC = 0; + let DisableEncoding = "$m0"; } -//def V_INTERP_MOV_F32 : VINTRP_32 <0x00000002, "V_INTERP_MOV_F32", []>; //def S_NOP : SOPP_ <0x00000000, "S_NOP", []>; let isTerminator = 1 in { |