summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/radeon
diff options
context:
space:
mode:
authorTom Stellard <[email protected]>2012-08-29 10:33:58 -0400
committerTom Stellard <[email protected]>2012-08-29 15:52:10 -0400
commit05113fd2662eeb0d17fd1074001b7405eeeca43c (patch)
tree8f5701f3e526c1922a2f424e04b941063880425d /src/gallium/drivers/radeon
parent733c28a0d95c1da87b14ef893f8a59b1f940322a (diff)
radeon/llvm: Create a register class for the M0 register
The Common Subexpression Elimination pass will not operate on instructions with physical register defs, so we end up with several redundant copies to M0 when using interpolation. Adding a register class that only contains the M0 register allows use to use a virtual register to represent M0, and makes it possible for the Common Subexpression Elimination pass to remove the extra copies.
Diffstat (limited to 'src/gallium/drivers/radeon')
-rw-r--r--src/gallium/drivers/radeon/SIGenRegisterInfo.pl2
-rw-r--r--src/gallium/drivers/radeon/SIISelLowering.cpp20
-rw-r--r--src/gallium/drivers/radeon/SIISelLowering.h2
-rw-r--r--src/gallium/drivers/radeon/SIInstrInfo.td1
-rw-r--r--src/gallium/drivers/radeon/SIInstructions.td15
5 files changed, 24 insertions, 16 deletions
diff --git a/src/gallium/drivers/radeon/SIGenRegisterInfo.pl b/src/gallium/drivers/radeon/SIGenRegisterInfo.pl
index dd0efcc462f..84f677ed324 100644
--- a/src/gallium/drivers/radeon/SIGenRegisterInfo.pl
+++ b/src/gallium/drivers/radeon/SIGenRegisterInfo.pl
@@ -171,6 +171,8 @@ def AllReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
def SCCReg : RegisterClass<"AMDGPU", [i1], 1, (add SCC)>;
def VCCReg : RegisterClass<"AMDGPU", [i1], 1, (add VCC)>;
def EXECReg : RegisterClass<"AMDGPU", [i1], 1, (add EXEC)>;
+def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;
+
STRING
diff --git a/src/gallium/drivers/radeon/SIISelLowering.cpp b/src/gallium/drivers/radeon/SIISelLowering.cpp
index c917b26ba13..d2fd995ad01 100644
--- a/src/gallium/drivers/radeon/SIISelLowering.cpp
+++ b/src/gallium/drivers/radeon/SIISelLowering.cpp
@@ -127,7 +127,7 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
LowerSI_INTERP(MI, *BB, I, MRI);
break;
case AMDGPU::SI_INTERP_CONST:
- LowerSI_INTERP_CONST(MI, *BB, I);
+ LowerSI_INTERP_CONST(MI, *BB, I, MRI);
break;
case AMDGPU::SI_KIL:
LowerSI_KIL(MI, *BB, I, MRI);
@@ -150,6 +150,7 @@ void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const
{
unsigned tmp = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
+ unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass);
MachineOperand dst = MI->getOperand(0);
MachineOperand iReg = MI->getOperand(1);
MachineOperand jReg = MI->getOperand(2);
@@ -157,39 +158,44 @@ void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
MachineOperand attr = MI->getOperand(4);
MachineOperand params = MI->getOperand(5);
- BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0)
.addOperand(params);
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P1_F32), tmp)
.addOperand(iReg)
.addOperand(attr_chan)
- .addOperand(attr);
+ .addOperand(attr)
+ .addReg(M0);
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P2_F32))
.addOperand(dst)
.addReg(tmp)
.addOperand(jReg)
.addOperand(attr_chan)
- .addOperand(attr);
+ .addOperand(attr)
+ .addReg(M0);
MI->eraseFromParent();
}
void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI,
- MachineBasicBlock &BB, MachineBasicBlock::iterator I) const
+ MachineBasicBlock &BB, MachineBasicBlock::iterator I,
+ MachineRegisterInfo &MRI) const
{
MachineOperand dst = MI->getOperand(0);
MachineOperand attr_chan = MI->getOperand(1);
MachineOperand attr = MI->getOperand(2);
MachineOperand params = MI->getOperand(3);
+ unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass);
- BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0)
.addOperand(params);
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_MOV_F32))
.addOperand(dst)
.addOperand(attr_chan)
- .addOperand(attr);
+ .addOperand(attr)
+ .addReg(M0);
MI->eraseFromParent();
}
diff --git a/src/gallium/drivers/radeon/SIISelLowering.h b/src/gallium/drivers/radeon/SIISelLowering.h
index 9609311e814..77d61d86f49 100644
--- a/src/gallium/drivers/radeon/SIISelLowering.h
+++ b/src/gallium/drivers/radeon/SIISelLowering.h
@@ -32,7 +32,7 @@ class SITargetLowering : public AMDGPUTargetLowering
void LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB,
- MachineBasicBlock::iterator I) const;
+ MachineBasicBlock::iterator I, MachineRegisterInfo &MRI) const;
void LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
diff --git a/src/gallium/drivers/radeon/SIInstrInfo.td b/src/gallium/drivers/radeon/SIInstrInfo.td
index 55882808659..a7ce9a6ec84 100644
--- a/src/gallium/drivers/radeon/SIInstrInfo.td
+++ b/src/gallium/drivers/radeon/SIInstrInfo.td
@@ -363,7 +363,6 @@ class VINTRP <bits <2> op, dag outs, dag ins, string asm, list<dag> pattern> :
let Inst{31-26} = 0x32; // encoding
let EncodingType = 11; // SIInstrEncodingType::VINTRP
- let Uses = [M0];
let neverHasSideEffects = 1;
}
diff --git a/src/gallium/drivers/radeon/SIInstructions.td b/src/gallium/drivers/radeon/SIInstructions.td
index 8bc311b08a5..5d4deaa4788 100644
--- a/src/gallium/drivers/radeon/SIInstructions.td
+++ b/src/gallium/drivers/radeon/SIInstructions.td
@@ -566,33 +566,34 @@ defm V_MOVRELSD_B32 : VOP1_32 <0x00000044, "V_MOVRELSD_B32", []>;
def V_INTERP_P1_F32 : VINTRP <
0x00000000,
(outs VReg_32:$dst),
- (ins VReg_32:$i, i32imm:$attr_chan, i32imm:$attr),
+ (ins VReg_32:$i, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
"V_INTERP_P1_F32",
- []
->;
+ []> {
+ let DisableEncoding = "$m0";
+}
def V_INTERP_P2_F32 : VINTRP <
0x00000001,
(outs VReg_32:$dst),
- (ins VReg_32:$src0, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr),
+ (ins VReg_32:$src0, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
"V_INTERP_P2_F32",
[]> {
let Constraints = "$src0 = $dst";
- let DisableEncoding = "$src0";
+ let DisableEncoding = "$src0,$m0";
}
def V_INTERP_MOV_F32 : VINTRP <
0x00000002,
(outs VReg_32:$dst),
- (ins i32imm:$attr_chan, i32imm:$attr),
+ (ins i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
"V_INTERP_MOV_F32",
[]> {
let VSRC = 0;
+ let DisableEncoding = "$m0";
}
-//def V_INTERP_MOV_F32 : VINTRP_32 <0x00000002, "V_INTERP_MOV_F32", []>;
//def S_NOP : SOPP_ <0x00000000, "S_NOP", []>;
let isTerminator = 1 in {