//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// R600 Implementation of TargetInstrInfo.
//
//===----------------------------------------------------------------------===//

#include "R600InstrInfo.h"
#include "AMDGPUTargetMachine.h"
#include "AMDGPUSubtarget.h"
#include "R600RegisterInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "AMDILUtilityFunctions.h"

#define GET_INSTRINFO_CTOR
#include "AMDGPUGenDFAPacketizer.inc"

using namespace llvm;

R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
  : AMDGPUInstrInfo(tm),
    RI(tm, *this),
    TM(tm)
  { }

const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const
{
  return RI;
}

bool R600InstrInfo::isTrig(const MachineInstr &MI) const
{
  return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
}

bool R600InstrInfo::isVector(const MachineInstr &MI) const
{
  return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
}

void
R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator MI, DebugLoc DL,
                           unsigned DestReg, unsigned SrcReg,
                           bool KillSrc) const
{
  if (AMDGPU::R600_Reg128RegClass.contains(DestReg)
      && AMDGPU::R600_Reg128RegClass.contains(SrcReg)) {
    for (unsigned i = 0; i < 4; i++) {
      unsigned SubRegIndex = RI.getSubRegFromChannel(i);
      BuildMI(MBB, MI, DL, get(AMDGPU::MOV))
              .addReg(RI.getSubReg(DestReg, SubRegIndex), RegState::Define)
              .addReg(RI.getSubReg(SrcReg, SubRegIndex))
              .addReg(0) // PREDICATE_BIT
              .addReg(DestReg, RegState::Define | RegState::Implicit);
    }
  } else {

    /* We can't copy vec4 registers */
    assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg)
           && !AMDGPU::R600_Reg128RegClass.contains(SrcReg));

    BuildMI(MBB, MI, DL, get(AMDGPU::MOV), DestReg)
      .addReg(SrcReg, getKillRegState(KillSrc))
      .addReg(0); // PREDICATE_BIT
  }
}

MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF,
                                             unsigned DstReg, int64_t Imm) const
{
  MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc());
  MachineInstrBuilder(MI).addReg(DstReg, RegState::Define);
  MachineInstrBuilder(MI).addReg(AMDGPU::ALU_LITERAL_X);
  MachineInstrBuilder(MI).addImm(Imm);
  MachineInstrBuilder(MI).addReg(0); // PREDICATE_BIT

  return MI;
}

unsigned R600InstrInfo::getIEQOpcode() const
{
  return AMDGPU::SETE_INT;
}

bool R600InstrInfo::isMov(unsigned Opcode) const
{


  switch(Opcode) {
  default: return false;
  case AMDGPU::MOV:
  case AMDGPU::MOV_IMM_F32:
  case AMDGPU::MOV_IMM_I32:
    return true;
  }
}

// Some instructions act as place holders to emulate operations that the GPU
// hardware does automatically. This function can be used to check if
// an opcode falls into this category.
bool R600InstrInfo::isPlaceHolderOpcode(unsigned opcode) const
{
  switch (opcode) {
  default: return false;
  case AMDGPU::RETURN:
  case AMDGPU::LAST:
  case AMDGPU::MASK_WRITE:
  case AMDGPU::RESERVE_REG:
    return true;
  }
}

bool R600InstrInfo::isTexOp(unsigned opcode) const
{
  switch(opcode) {
  default: return false;
  case AMDGPU::TEX_LD:
  case AMDGPU::TEX_GET_TEXTURE_RESINFO:
  case AMDGPU::TEX_SAMPLE:
  case AMDGPU::TEX_SAMPLE_C:
  case AMDGPU::TEX_SAMPLE_L:
  case AMDGPU::TEX_SAMPLE_C_L:
  case AMDGPU::TEX_SAMPLE_LB:
  case AMDGPU::TEX_SAMPLE_C_LB:
  case AMDGPU::TEX_SAMPLE_G:
  case AMDGPU::TEX_SAMPLE_C_G:
  case AMDGPU::TEX_GET_GRADIENTS_H:
  case AMDGPU::TEX_GET_GRADIENTS_V:
  case AMDGPU::TEX_SET_GRADIENTS_H:
  case AMDGPU::TEX_SET_GRADIENTS_V:
    return true;
  }
}

bool R600InstrInfo::isReductionOp(unsigned opcode) const
{
  switch(opcode) {
    default: return false;
    case AMDGPU::DOT4_r600:
    case AMDGPU::DOT4_eg:
      return true;
  }
}

bool R600InstrInfo::isCubeOp(unsigned opcode) const
{
  switch(opcode) {
    default: return false;
    case AMDGPU::CUBE_r600_pseudo:
    case AMDGPU::CUBE_r600_real:
    case AMDGPU::CUBE_eg_pseudo:
    case AMDGPU::CUBE_eg_real:
      return true;
  }
}


bool R600InstrInfo::isFCOp(unsigned opcode) const
{
  switch(opcode) {
  default: return false;
  case AMDGPU::BREAK_LOGICALZ_f32:
  case AMDGPU::BREAK_LOGICALNZ_i32:
  case AMDGPU::BREAK_LOGICALZ_i32:
  case AMDGPU::BREAK_LOGICALNZ_f32:
  case AMDGPU::CONTINUE_LOGICALNZ_f32:
  case AMDGPU::IF_LOGICALNZ_i32:
  case AMDGPU::IF_LOGICALZ_f32:
  case AMDGPU::ELSE:
  case AMDGPU::ENDIF:
  case AMDGPU::ENDLOOP:
  case AMDGPU::IF_LOGICALNZ_f32:
  case AMDGPU::WHILELOOP:
    return true;
  }
}

DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
    const ScheduleDAG *DAG) const
{
  const InstrItineraryData *II = TM->getInstrItineraryData();
  return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II);
}

static bool
isPredicateSetter(unsigned opcode)
{
  switch (opcode) {
  case AMDGPU::PRED_X:
    return true;
  default:
    return false;
  }
}

static MachineInstr *
findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
                             MachineBasicBlock::iterator I)
{
  while (I != MBB.begin()) {
    --I;
    MachineInstr *MI = I;
    if (isPredicateSetter(MI->getOpcode()))
      return MI;
  }

  return NULL;
}

bool
R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
                             MachineBasicBlock *&TBB,
                             MachineBasicBlock *&FBB,
                             SmallVectorImpl<MachineOperand> &Cond,
                             bool AllowModify) const
{
  // Most of the following comes from the ARM implementation of AnalyzeBranch

  // If the block has no terminators, it just falls into the block after it.
  MachineBasicBlock::iterator I = MBB.end();
  if (I == MBB.begin())
    return false;
  --I;
  while (I->isDebugValue()) {
    if (I == MBB.begin())
      return false;
    --I;
  }
  if (static_cast<MachineInstr *>(I)->getOpcode() != AMDGPU::JUMP) {
    return false;
  }

  // Get the last instruction in the block.
  MachineInstr *LastInst = I;

  // If there is only one terminator instruction, process it.
  unsigned LastOpc = LastInst->getOpcode();
  if (I == MBB.begin() ||
      static_cast<MachineInstr *>(--I)->getOpcode() != AMDGPU::JUMP) {
    if (LastOpc == AMDGPU::JUMP) {
      if(!isPredicated(LastInst)) {
        TBB = LastInst->getOperand(0).getMBB();
        return false;
      } else {
        MachineInstr *predSet = I;
        while (!isPredicateSetter(predSet->getOpcode())) {
          predSet = --I;
        }
        TBB = LastInst->getOperand(0).getMBB();
        Cond.push_back(predSet->getOperand(1));
        Cond.push_back(predSet->getOperand(2));
        Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
        return false;
      }
    }
    return true;  // Can't handle indirect branch.
  }

  // Get the instruction before it if it is a terminator.
  MachineInstr *SecondLastInst = I;
  unsigned SecondLastOpc = SecondLastInst->getOpcode();

  // If the block ends with a B and a Bcc, handle it.
  if (SecondLastOpc == AMDGPU::JUMP &&
      isPredicated(SecondLastInst) &&
      LastOpc == AMDGPU::JUMP &&
      !isPredicated(LastInst)) {
    MachineInstr *predSet = --I;
    while (!isPredicateSetter(predSet->getOpcode())) {
      predSet = --I;
    }
    TBB = SecondLastInst->getOperand(0).getMBB();
    FBB = LastInst->getOperand(0).getMBB();
    Cond.push_back(predSet->getOperand(1));
    Cond.push_back(predSet->getOperand(2));
    Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
    return false;
  }

  // Otherwise, can't handle this.
  return true;
}

int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
  const MachineInstr *MI = op.getParent();

  switch (MI->getDesc().OpInfo->RegClass) {
  default: // FIXME: fallthrough??
  case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
  case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
  };
}

unsigned
R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
                            MachineBasicBlock *TBB,
                            MachineBasicBlock *FBB,
                            const SmallVectorImpl<MachineOperand> &Cond,
                            DebugLoc DL) const
{
  assert(TBB && "InsertBranch must not be told to insert a fallthrough");

  if (FBB == 0) {
    if (Cond.empty()) {
      BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0);
      return 1;
    } else {
      MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
      assert(PredSet && "No previous predicate !");
      PredSet->getOperand(1).addTargetFlag(1<<4);
      PredSet->getOperand(2).setImm(Cond[1].getImm());

      BuildMI(&MBB, DL, get(AMDGPU::JUMP))
             .addMBB(TBB)
             .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
      return 1;
    }
  } else {
    MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
    assert(PredSet && "No previous predicate !");
    PredSet->getOperand(1).addTargetFlag(1<<4);
    PredSet->getOperand(2).setImm(Cond[1].getImm());
    BuildMI(&MBB, DL, get(AMDGPU::JUMP))
            .addMBB(TBB)
            .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
    BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0);
    return 2;
  }
}

unsigned
R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const
{

  // Note : we leave PRED* instructions there.
  // They may be needed when predicating instructions.

  MachineBasicBlock::iterator I = MBB.end();

  if (I == MBB.begin()) {
    return 0;
  }
  --I;
  switch (I->getOpcode()) {
  default:
    return 0;
  case AMDGPU::JUMP:
    if (isPredicated(I)) {
      MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
      char flag = predSet->getOperand(1).getTargetFlags() & (~(1<<4));
      predSet->getOperand(1).setTargetFlags(flag);
    }
    I->eraseFromParent();
    break;
  }
  I = MBB.end();

  if (I == MBB.begin()) {
    return 1;
  }
  --I;
  switch (I->getOpcode()) {
    // FIXME: only one case??
  default:
    return 1;
  case AMDGPU::JUMP:
    if (isPredicated(I)) {
      MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
      char flag = predSet->getOperand(1).getTargetFlags() & (~(1<<4));
      predSet->getOperand(1).setTargetFlags(flag);
    }
    I->eraseFromParent();
    break;
  }
  return 2;
}

bool
R600InstrInfo::isPredicated(const MachineInstr *MI) const
{
  int idx = MI->findFirstPredOperandIdx();
  if (idx < 0)
    return false;

  unsigned Reg = MI->getOperand(idx).getReg();
  switch (Reg) {
  default: return false;
  case AMDGPU::PRED_SEL_ONE:
  case AMDGPU::PRED_SEL_ZERO:
  case AMDGPU::PREDICATE_BIT:
    return true;
  }
}

bool
R600InstrInfo::isPredicable(MachineInstr *MI) const
{
  return AMDGPUInstrInfo::isPredicable(MI);
}


bool
R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
                                   unsigned NumCyles,
                                   unsigned ExtraPredCycles,
                                   const BranchProbability &Probability) const{
  return true;
}

bool
R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
                                   unsigned NumTCycles,
                                   unsigned ExtraTCycles,
                                   MachineBasicBlock &FMBB,
                                   unsigned NumFCycles,
                                   unsigned ExtraFCycles,
                                   const BranchProbability &Probability) const
{
  return true;
}

bool
R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
                                         unsigned NumCyles,
                                         const BranchProbability &Probability)
                                         const
{
  return true;
}

bool
R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
                                         MachineBasicBlock &FMBB) const
{
  return false;
}


bool
R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
{
  MachineOperand &MO = Cond[1];
  switch (MO.getImm()) {
  case OPCODE_IS_ZERO_INT:
    MO.setImm(OPCODE_IS_NOT_ZERO_INT);
    break;
  case OPCODE_IS_NOT_ZERO_INT:
    MO.setImm(OPCODE_IS_ZERO_INT);
    break;
  case OPCODE_IS_ZERO:
    MO.setImm(OPCODE_IS_NOT_ZERO);
    break;
  case OPCODE_IS_NOT_ZERO:
    MO.setImm(OPCODE_IS_ZERO);
    break;
  default:
    return true;
  }

  MachineOperand &MO2 = Cond[2];
  switch (MO2.getReg()) {
  case AMDGPU::PRED_SEL_ZERO:
    MO2.setReg(AMDGPU::PRED_SEL_ONE);
    break;
  case AMDGPU::PRED_SEL_ONE:
    MO2.setReg(AMDGPU::PRED_SEL_ZERO);
    break;
  default:
    return true;
  }
  return false;
}

bool
R600InstrInfo::DefinesPredicate(MachineInstr *MI,
                                std::vector<MachineOperand> &Pred) const
{
  return isPredicateSetter(MI->getOpcode());
}


bool
R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
                       const SmallVectorImpl<MachineOperand> &Pred2) const
{
  return false;
}


bool
R600InstrInfo::PredicateInstruction(MachineInstr *MI,
                      const SmallVectorImpl<MachineOperand> &Pred) const
{
  int PIdx = MI->findFirstPredOperandIdx();

  if (PIdx != -1) {
    MachineOperand &PMO = MI->getOperand(PIdx);
    PMO.setReg(Pred[2].getReg());
    MachineInstrBuilder(MI).addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
    return true;
  }

  return false;
}

int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
                                   const MachineInstr *MI,
                                   unsigned *PredCost) const
{
  if (PredCost)
    *PredCost = 2;
  return 2;
}

//===----------------------------------------------------------------------===//
// Instruction flag setters
//===----------------------------------------------------------------------===//

void R600InstrInfo::AddFlag(MachineInstr *MI, unsigned Operand,
                            unsigned Flag) const
{
  MI->getOperand(Operand).addTargetFlag(Flag);
}