summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTom Stellard <[email protected]>2012-08-21 14:33:04 +0000
committerTom Stellard <[email protected]>2012-08-21 15:42:44 +0000
commit6c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6 (patch)
treecad1029c024b87d2a26cc7c2158cab3ed8744c1a
parent82a5d0c64142990236b40567561b6e99b7158216 (diff)
radeon/llvm: ExpandSpecialInstrs - Add support for vector instructions
-rw-r--r--src/gallium/drivers/radeon/R600CodeEmitter.cpp13
-rw-r--r--src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp32
2 files changed, 30 insertions, 15 deletions
diff --git a/src/gallium/drivers/radeon/R600CodeEmitter.cpp b/src/gallium/drivers/radeon/R600CodeEmitter.cpp
index 14e877b2518..396ae6f5054 100644
--- a/src/gallium/drivers/radeon/R600CodeEmitter.cpp
+++ b/src/gallium/drivers/radeon/R600CodeEmitter.cpp
@@ -50,7 +50,6 @@ private:
const R600InstrInfo * TII;
bool IsCube;
- bool IsVector;
unsigned currentElement;
bool IsLast;
@@ -59,7 +58,7 @@ private:
public:
R600CodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID),
- _OS(OS), TM(NULL), IsCube(false), IsVector(false),
+ _OS(OS), TM(NULL), IsCube(false),
IsLast(true) { }
const char *getPassName() const { return "AMDGPU Machine Code Emitter"; }
@@ -161,7 +160,6 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock::instr_iterator I = MBB.instr_begin(),
E = MBB.instr_end(); I != E; ++I) {
MachineInstr &MI = *I;
- IsVector = TII->isVector(MI);
IsCube = TII->isCubeOp(MI.getOpcode());
if (MI.getNumOperands() > 1 && MI.getOperand(0).isReg() && MI.getOperand(0).isDead()) {
continue;
@@ -170,7 +168,7 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) {
EmitTexInstr(MI);
} else if (TII->isFCOp(MI.getOpcode())){
EmitFCInstr(MI);
- } else if (IsVector || IsCube) {
+ } else if (IsCube) {
IsLast = false;
// XXX: On Cayman, some (all?) of the vector instructions only need
// to fill the first three slots.
@@ -178,7 +176,6 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) {
IsLast = (currentElement == 3);
EmitALUInstr(MI);
}
- IsVector = false;
IsCube = false;
} else if (MI.getOpcode() == AMDGPU::RETURN ||
MI.getOpcode() == AMDGPU::BUNDLE ||
@@ -348,7 +345,7 @@ void R600CodeEmitter::EmitDst(const MachineOperand & MO)
EmitByte(getHWReg(MO.getReg()));
// Emit the element of the destination register (1 byte)
- if (IsCube || IsVector) {
+ if (IsCube) {
EmitByte(currentElement);
} else {
EmitByte(TRI->getHWRegChan(MO.getReg()));
@@ -362,9 +359,7 @@ void R600CodeEmitter::EmitDst(const MachineOperand & MO)
}
// Emit writemask (1 byte).
- if ((IsVector &&
- currentElement != TRI->getHWRegChan(MO.getReg()))
- || MO.getTargetFlags() & MO_FLAG_MASK) {
+ if (MO.getTargetFlags() & MO_FLAG_MASK) {
EmitByte(0);
} else {
EmitByte(1);
diff --git a/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp b/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp
index 4c67ba47568..ba336a37467 100644
--- a/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp
+++ b/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp
@@ -59,18 +59,38 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
MachineInstr &MI = *I;
I = llvm::next(I);
- if (!TII->isReductionOp(MI.getOpcode())) {
+ bool IsReduction = TII->isReductionOp(MI.getOpcode());
+ bool IsVector = TII->isVector(MI);
+ if (!IsReduction && !IsVector) {
continue;
}
// Expand the instruction
+ //
+ // Reduction instructions:
+ // T0_X = DP4 T1_XYZW, T2_XYZW
+ // becomes:
+ // TO_X = DP4 T1_X, T2_X
+ // TO_Y (write masked) = DP4 T1_Y, T2_Y
+ // TO_Z (write masked) = DP4 T1_Z, T2_Z
+ // TO_W (write masked) = DP4 T1_W, T2_W
+ //
+ // Vector instructions:
+ // T0_X = MULLO_INT T1_X, T2_X
+ // becomes:
+ // T0_X = MULLO_INT T1_X, T2_X
+ // T0_Y (write masked) = MULLO_INT T1_X, T2_X
+ // T0_Z (write masked) = MULLO_INT T1_X, T2_X
+ // T0_W (write masked) = MULLO_INT T1_X, T2_X
for (unsigned Chan = 0; Chan < 4; Chan++) {
unsigned DstReg = MI.getOperand(0).getReg();
unsigned Src0 = MI.getOperand(1).getReg();
unsigned Src1 = MI.getOperand(2).getReg();
- unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
- unsigned NewSrc0 = TRI.getSubReg(Src0, SubRegIndex);
- unsigned NewSrc1 = TRI.getSubReg(Src1, SubRegIndex);
+ if (IsReduction) {
+ unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
+ Src0 = TRI.getSubReg(Src0, SubRegIndex);
+ Src1 = TRI.getSubReg(Src1, SubRegIndex);
+ }
unsigned DstBase = TRI.getHWRegIndex(DstReg);
unsigned NewDstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
unsigned Flags = (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0);
@@ -80,8 +100,8 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(MI.getOpcode()))
.addOperand(NewDstOp)
- .addReg(NewSrc0)
- .addReg(NewSrc1)
+ .addReg(Src0)
+ .addReg(Src1)
->setIsInsideBundle(Chan != 0);
}
MI.eraseFromParent();