summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp')
-rw-r--r--src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp129
1 files changed, 129 insertions, 0 deletions
diff --git a/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp b/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp
index 69ab0ffee8c..d6184e55302 100644
--- a/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp
+++ b/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp
@@ -15,6 +15,7 @@
#include "R600Defines.h"
#include "R600InstrInfo.h"
#include "R600RegisterInfo.h"
+#include "R600MachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -29,6 +30,9 @@ private:
static char ID;
const R600InstrInfo *TII;
+ bool ExpandInputPerspective(MachineInstr& MI);
+ bool ExpandInputConstant(MachineInstr& MI);
+
public:
R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
@@ -48,6 +52,126 @@ FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
return new R600ExpandSpecialInstrsPass(TM);
}
+bool R600ExpandSpecialInstrsPass::ExpandInputPerspective(MachineInstr &MI)
+{
+ const R600RegisterInfo &TRI = TII->getRegisterInfo();
+ if (MI.getOpcode() != AMDGPU::input_perspective)
+ return false;
+
+ MachineBasicBlock::iterator I = &MI;
+ unsigned DstReg = MI.getOperand(0).getReg();
+ R600MachineFunctionInfo *MFI = MI.getParent()->getParent()
+ ->getInfo<R600MachineFunctionInfo>();
+ unsigned IJIndexBase;
+
+ // In Evergreen ISA doc section 8.3.2 :
+ // We need to interpolate XY and ZW in two different instruction groups.
+ // An INTERP_* must occupy all 4 slots of an instruction group.
+ // Output of INTERP_XY is written in X,Y slots
+ // Output of INTERP_ZW is written in Z,W slots
+ //
+ // Thus interpolation requires the following sequences :
+ //
+ // AnyGPR.x = INTERP_ZW; (Write Masked Out)
+ // AnyGPR.y = INTERP_ZW; (Write Masked Out)
+ // DstGPR.z = INTERP_ZW;
+ // DstGPR.w = INTERP_ZW; (End of first IG)
+ // DstGPR.x = INTERP_XY;
+ // DstGPR.y = INTERP_XY;
+ // AnyGPR.z = INTERP_XY; (Write Masked Out)
+ // AnyGPR.w = INTERP_XY; (Write Masked Out) (End of second IG)
+ //
+ switch (MI.getOperand(1).getImm()) {
+ case 0:
+ IJIndexBase = MFI->GetIJPerspectiveIndex();
+ break;
+ case 1:
+ IJIndexBase = MFI->GetIJLinearIndex();
+ break;
+ default:
+ assert(0 && "Unknow ij index");
+ }
+
+ for (unsigned i = 0; i < 8; i++) {
+ unsigned IJIndex = AMDGPU::R600_TReg32RegClass.getRegister(
+ 2 * IJIndexBase + ((i + 1) % 2));
+ unsigned ReadReg = AMDGPU::R600_TReg32RegClass.getRegister(
+ 4 * MI.getOperand(2).getImm());
+
+ unsigned Sel;
+ switch (i % 4) {
+ case 0:Sel = AMDGPU::sel_x;break;
+ case 1:Sel = AMDGPU::sel_y;break;
+ case 2:Sel = AMDGPU::sel_z;break;
+ case 3:Sel = AMDGPU::sel_w;break;
+ default:break;
+ }
+
+ unsigned Res = TRI.getSubReg(DstReg, Sel);
+
+ const MCInstrDesc &Opcode = (i < 4)?
+ TII->get(AMDGPU::INTERP_ZW):
+ TII->get(AMDGPU::INTERP_XY);
+
+ MachineInstr *NewMI = BuildMI(*(MI.getParent()),
+ I, MI.getParent()->findDebugLoc(I),
+ Opcode, Res)
+ .addReg(IJIndex)
+ .addReg(ReadReg)
+ .addImm(0);
+
+ if (!(i> 1 && i < 6)) {
+ TII->addFlag(NewMI, 0, MO_FLAG_MASK);
+ }
+
+ if (i % 4 != 3)
+ TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
+ }
+
+ MI.eraseFromParent();
+
+ return true;
+}
+
+bool R600ExpandSpecialInstrsPass::ExpandInputConstant(MachineInstr &MI)
+{
+ const R600RegisterInfo &TRI = TII->getRegisterInfo();
+ if (MI.getOpcode() != AMDGPU::input_constant)
+ return false;
+
+ MachineBasicBlock::iterator I = &MI;
+ unsigned DstReg = MI.getOperand(0).getReg();
+
+ for (unsigned i = 0; i < 4; i++) {
+ unsigned ReadReg = AMDGPU::R600_TReg32RegClass.getRegister(
+ 4 * MI.getOperand(1).getImm() + i);
+
+ unsigned Sel;
+ switch (i % 4) {
+ case 0:Sel = AMDGPU::sel_x;break;
+ case 1:Sel = AMDGPU::sel_y;break;
+ case 2:Sel = AMDGPU::sel_z;break;
+ case 3:Sel = AMDGPU::sel_w;break;
+ default:break;
+ }
+
+ unsigned Res = TRI.getSubReg(DstReg, Sel);
+
+ MachineInstr *NewMI = BuildMI(*(MI.getParent()),
+ I, MI.getParent()->findDebugLoc(I),
+ TII->get(AMDGPU::INTERP_LOAD_P0), Res)
+ .addReg(ReadReg)
+ .addImm(0);
+
+ if (i % 4 != 3)
+ TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
+ }
+
+ MI.eraseFromParent();
+
+ return true;
+}
+
bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
const R600RegisterInfo &TRI = TII->getRegisterInfo();
@@ -59,6 +183,11 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
while (I != MBB.end()) {
MachineInstr &MI = *I;
I = llvm::next(I);
+
+ if (ExpandInputPerspective(MI))
+ continue;
+ if (ExpandInputConstant(MI))
+ continue;
bool IsReduction = TII->isReductionOp(MI.getOpcode());
bool IsVector = TII->isVector(MI);