//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // R600 Implementation of TargetInstrInfo. // //===----------------------------------------------------------------------===// #include "R600InstrInfo.h" #include "AMDGPUTargetMachine.h" #include "AMDGPUSubtarget.h" #include "R600RegisterInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "AMDILUtilityFunctions.h" #define GET_INSTRINFO_CTOR #include "AMDGPUGenDFAPacketizer.inc" using namespace llvm; R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm) : AMDGPUInstrInfo(tm), RI(tm, *this), TM(tm) { } const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const { return RI; } bool R600InstrInfo::isTrig(const MachineInstr &MI) const { return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG; } bool R600InstrInfo::isVector(const MachineInstr &MI) const { return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR; } void R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { if (AMDGPU::R600_Reg128RegClass.contains(DestReg) && AMDGPU::R600_Reg128RegClass.contains(SrcReg)) { for (unsigned i = 0; i < 4; i++) { unsigned SubRegIndex = RI.getSubRegFromChannel(i); BuildMI(MBB, MI, DL, get(AMDGPU::MOV)) .addReg(RI.getSubReg(DestReg, SubRegIndex), RegState::Define) .addReg(RI.getSubReg(SrcReg, SubRegIndex)) .addReg(0) // PREDICATE_BIT .addReg(DestReg, RegState::Define | RegState::Implicit); } } else { /* We can't copy vec4 registers */ assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg) && !AMDGPU::R600_Reg128RegClass.contains(SrcReg)); BuildMI(MBB, MI, DL, get(AMDGPU::MOV), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)) .addReg(0); // PREDICATE_BIT } } MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg, int64_t Imm) const { MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc()); MachineInstrBuilder(MI).addReg(DstReg, RegState::Define); MachineInstrBuilder(MI).addReg(AMDGPU::ALU_LITERAL_X); MachineInstrBuilder(MI).addImm(Imm); MachineInstrBuilder(MI).addReg(0); // PREDICATE_BIT return MI; } unsigned R600InstrInfo::getIEQOpcode() const { return AMDGPU::SETE_INT; } bool R600InstrInfo::isMov(unsigned Opcode) const { switch(Opcode) { default: return false; case AMDGPU::MOV: case AMDGPU::MOV_IMM_F32: case AMDGPU::MOV_IMM_I32: return true; } } // Some instructions act as place holders to emulate operations that the GPU // hardware does automatically. This function can be used to check if // an opcode falls into this category. bool R600InstrInfo::isPlaceHolderOpcode(unsigned opcode) const { switch (opcode) { default: return false; case AMDGPU::RETURN: case AMDGPU::LAST: case AMDGPU::MASK_WRITE: case AMDGPU::RESERVE_REG: return true; } } bool R600InstrInfo::isTexOp(unsigned opcode) const { switch(opcode) { default: return false; case AMDGPU::TEX_LD: case AMDGPU::TEX_GET_TEXTURE_RESINFO: case AMDGPU::TEX_SAMPLE: case AMDGPU::TEX_SAMPLE_C: case AMDGPU::TEX_SAMPLE_L: case AMDGPU::TEX_SAMPLE_C_L: case AMDGPU::TEX_SAMPLE_LB: case AMDGPU::TEX_SAMPLE_C_LB: case AMDGPU::TEX_SAMPLE_G: case AMDGPU::TEX_SAMPLE_C_G: case AMDGPU::TEX_GET_GRADIENTS_H: case AMDGPU::TEX_GET_GRADIENTS_V: case AMDGPU::TEX_SET_GRADIENTS_H: case AMDGPU::TEX_SET_GRADIENTS_V: return true; } } bool R600InstrInfo::isReductionOp(unsigned opcode) const { switch(opcode) { default: return false; case AMDGPU::DOT4_r600: case AMDGPU::DOT4_eg: return true; } } bool R600InstrInfo::isCubeOp(unsigned opcode) const { switch(opcode) { default: return false; case AMDGPU::CUBE_r600_pseudo: case AMDGPU::CUBE_r600_real: case AMDGPU::CUBE_eg_pseudo: case AMDGPU::CUBE_eg_real: return true; } } bool R600InstrInfo::isFCOp(unsigned opcode) const { switch(opcode) { default: return false; case AMDGPU::BREAK_LOGICALZ_f32: case AMDGPU::BREAK_LOGICALNZ_i32: case AMDGPU::BREAK_LOGICALZ_i32: case AMDGPU::BREAK_LOGICALNZ_f32: case AMDGPU::CONTINUE_LOGICALNZ_f32: case AMDGPU::IF_LOGICALNZ_i32: case AMDGPU::IF_LOGICALZ_f32: case AMDGPU::ELSE: case AMDGPU::ENDIF: case AMDGPU::ENDLOOP: case AMDGPU::IF_LOGICALNZ_f32: case AMDGPU::WHILELOOP: return true; } } DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM, const ScheduleDAG *DAG) const { const InstrItineraryData *II = TM->getInstrItineraryData(); return TM->getSubtarget().createDFAPacketizer(II); } static bool isPredicateSetter(unsigned opcode) { switch (opcode) { case AMDGPU::PRED_X: return true; default: return false; } } static MachineInstr * findFirstPredicateSetterFrom(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) { while (I != MBB.begin()) { --I; MachineInstr *MI = I; if (isPredicateSetter(MI->getOpcode())) return MI; } return NULL; } bool R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, bool AllowModify) const { // Most of the following comes from the ARM implementation of AnalyzeBranch // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) return false; --I; while (I->isDebugValue()) { if (I == MBB.begin()) return false; --I; } if (static_cast(I)->getOpcode() != AMDGPU::JUMP) { return false; } // Get the last instruction in the block. MachineInstr *LastInst = I; // If there is only one terminator instruction, process it. unsigned LastOpc = LastInst->getOpcode(); if (I == MBB.begin() || static_cast(--I)->getOpcode() != AMDGPU::JUMP) { if (LastOpc == AMDGPU::JUMP) { if(!isPredicated(LastInst)) { TBB = LastInst->getOperand(0).getMBB(); return false; } else { MachineInstr *predSet = I; while (!isPredicateSetter(predSet->getOpcode())) { predSet = --I; } TBB = LastInst->getOperand(0).getMBB(); Cond.push_back(predSet->getOperand(1)); Cond.push_back(predSet->getOperand(2)); Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); return false; } } return true; // Can't handle indirect branch. } // Get the instruction before it if it is a terminator. MachineInstr *SecondLastInst = I; unsigned SecondLastOpc = SecondLastInst->getOpcode(); // If the block ends with a B and a Bcc, handle it. if (SecondLastOpc == AMDGPU::JUMP && isPredicated(SecondLastInst) && LastOpc == AMDGPU::JUMP && !isPredicated(LastInst)) { MachineInstr *predSet = --I; while (!isPredicateSetter(predSet->getOpcode())) { predSet = --I; } TBB = SecondLastInst->getOperand(0).getMBB(); FBB = LastInst->getOperand(0).getMBB(); Cond.push_back(predSet->getOperand(1)); Cond.push_back(predSet->getOperand(2)); Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); return false; } // Otherwise, can't handle this. return true; } int R600InstrInfo::getBranchInstr(const MachineOperand &op) const { const MachineInstr *MI = op.getParent(); switch (MI->getDesc().OpInfo->RegClass) { default: // FIXME: fallthrough?? case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32; case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32; }; } unsigned R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl &Cond, DebugLoc DL) const { assert(TBB && "InsertBranch must not be told to insert a fallthrough"); if (FBB == 0) { if (Cond.empty()) { BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0); return 1; } else { MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); assert(PredSet && "No previous predicate !"); PredSet->getOperand(1).addTargetFlag(1<<4); PredSet->getOperand(2).setImm(Cond[1].getImm()); BuildMI(&MBB, DL, get(AMDGPU::JUMP)) .addMBB(TBB) .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); return 1; } } else { MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); assert(PredSet && "No previous predicate !"); PredSet->getOperand(1).addTargetFlag(1<<4); PredSet->getOperand(2).setImm(Cond[1].getImm()); BuildMI(&MBB, DL, get(AMDGPU::JUMP)) .addMBB(TBB) .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0); return 2; } } unsigned R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { // Note : we leave PRED* instructions there. // They may be needed when predicating instructions. MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) { return 0; } --I; switch (I->getOpcode()) { default: return 0; case AMDGPU::JUMP: if (isPredicated(I)) { MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); char flag = predSet->getOperand(1).getTargetFlags() & (~(1<<4)); predSet->getOperand(1).setTargetFlags(flag); } I->eraseFromParent(); break; } I = MBB.end(); if (I == MBB.begin()) { return 1; } --I; switch (I->getOpcode()) { // FIXME: only one case?? default: return 1; case AMDGPU::JUMP: if (isPredicated(I)) { MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); char flag = predSet->getOperand(1).getTargetFlags() & (~(1<<4)); predSet->getOperand(1).setTargetFlags(flag); } I->eraseFromParent(); break; } return 2; } bool R600InstrInfo::isPredicated(const MachineInstr *MI) const { int idx = MI->findFirstPredOperandIdx(); if (idx < 0) return false; unsigned Reg = MI->getOperand(idx).getReg(); switch (Reg) { default: return false; case AMDGPU::PRED_SEL_ONE: case AMDGPU::PRED_SEL_ZERO: case AMDGPU::PREDICATE_BIT: return true; } } bool R600InstrInfo::isPredicable(MachineInstr *MI) const { return AMDGPUInstrInfo::isPredicable(MI); } bool R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles, unsigned ExtraPredCycles, const BranchProbability &Probability) const{ return true; } bool R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumTCycles, unsigned ExtraTCycles, MachineBasicBlock &FMBB, unsigned NumFCycles, unsigned ExtraFCycles, const BranchProbability &Probability) const { return true; } bool R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles, const BranchProbability &Probability) const { return true; } bool R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, MachineBasicBlock &FMBB) const { return false; } bool R600InstrInfo::ReverseBranchCondition(SmallVectorImpl &Cond) const { MachineOperand &MO = Cond[1]; switch (MO.getImm()) { case OPCODE_IS_ZERO_INT: MO.setImm(OPCODE_IS_NOT_ZERO_INT); break; case OPCODE_IS_NOT_ZERO_INT: MO.setImm(OPCODE_IS_ZERO_INT); break; case OPCODE_IS_ZERO: MO.setImm(OPCODE_IS_NOT_ZERO); break; case OPCODE_IS_NOT_ZERO: MO.setImm(OPCODE_IS_ZERO); break; default: return true; } MachineOperand &MO2 = Cond[2]; switch (MO2.getReg()) { case AMDGPU::PRED_SEL_ZERO: MO2.setReg(AMDGPU::PRED_SEL_ONE); break; case AMDGPU::PRED_SEL_ONE: MO2.setReg(AMDGPU::PRED_SEL_ZERO); break; default: return true; } return false; } bool R600InstrInfo::DefinesPredicate(MachineInstr *MI, std::vector &Pred) const { return isPredicateSetter(MI->getOpcode()); } bool R600InstrInfo::SubsumesPredicate(const SmallVectorImpl &Pred1, const SmallVectorImpl &Pred2) const { return false; } bool R600InstrInfo::PredicateInstruction(MachineInstr *MI, const SmallVectorImpl &Pred) const { int PIdx = MI->findFirstPredOperandIdx(); if (PIdx != -1) { MachineOperand &PMO = MI->getOperand(PIdx); PMO.setReg(Pred[2].getReg()); MachineInstrBuilder(MI).addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); return true; } return false; } int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr *MI, unsigned *PredCost) const { if (PredCost) *PredCost = 2; return 2; } //===----------------------------------------------------------------------===// // Instruction flag setters //===----------------------------------------------------------------------===// void R600InstrInfo::AddFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const { MI->getOperand(Operand).addTargetFlag(Flag); }