//===-- R600CodeEmitter.cpp - Code Emitter for R600->Cayman GPU families --===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This code emitters outputs bytecode that is understood by the r600g driver // in the Mesa [1] project. The bytecode is very similar to the hardware's ISA, // except that the size of the instruction fields are rounded up to the // nearest byte. // // [1] http://www.mesa3d.org/ // //===----------------------------------------------------------------------===// #include "AMDGPU.h" #include "AMDGPUCodeEmitter.h" #include "AMDGPUInstrInfo.h" #include "AMDILUtilityFunctions.h" #include "R600InstrInfo.h" #include "R600RegisterInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetMachine.h" #include #define SRC_BYTE_COUNT 11 #define DST_BYTE_COUNT 5 using namespace llvm; namespace { class R600CodeEmitter : public MachineFunctionPass, public AMDGPUCodeEmitter { private: static char ID; formatted_raw_ostream &_OS; const TargetMachine * TM; const MachineRegisterInfo * MRI; const R600RegisterInfo * TRI; const R600InstrInfo * TII; bool IsCube; bool IsReduction; bool IsVector; unsigned currentElement; bool IsLast; unsigned section_start; public: R600CodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID), _OS(OS), TM(NULL), IsCube(false), IsReduction(false), IsVector(false), IsLast(true) { } const char *getPassName() const { return "AMDGPU Machine Code Emitter"; } bool runOnMachineFunction(MachineFunction &MF); virtual uint64_t getMachineOpValue(const MachineInstr &MI, const MachineOperand &MO) const; private: void EmitALUInstr(MachineInstr &MI); void EmitSrc(const MachineOperand & MO, int chan_override = -1); void EmitDst(const MachineOperand & MO); void EmitALU(MachineInstr &MI, unsigned numSrc); void EmitTexInstr(MachineInstr &MI); void EmitFCInstr(MachineInstr &MI); void EmitNullBytes(unsigned int byteCount); void EmitByte(unsigned int byte); void EmitTwoBytes(uint32_t bytes); void Emit(uint32_t value); void Emit(uint64_t value); unsigned getHWReg(unsigned regNo) const; }; } // End anonymous namespace enum RegElement { ELEMENT_X = 0, ELEMENT_Y, ELEMENT_Z, ELEMENT_W }; enum InstrTypes { INSTR_ALU = 0, INSTR_TEX, INSTR_FC, INSTR_NATIVE, INSTR_VTX }; enum FCInstr { FC_IF = 0, FC_IF_INT, FC_ELSE, FC_ENDIF, FC_BGNLOOP, FC_ENDLOOP, FC_BREAK, FC_BREAK_NZ_INT, FC_CONTINUE, FC_BREAK_Z_INT, FC_BREAK_NZ }; enum TextureTypes { TEXTURE_1D = 1, TEXTURE_2D, TEXTURE_3D, TEXTURE_CUBE, TEXTURE_RECT, TEXTURE_SHADOW1D, TEXTURE_SHADOW2D, TEXTURE_SHADOWRECT, TEXTURE_1D_ARRAY, TEXTURE_2D_ARRAY, TEXTURE_SHADOW1D_ARRAY, TEXTURE_SHADOW2D_ARRAY }; char R600CodeEmitter::ID = 0; FunctionPass *llvm::createR600CodeEmitterPass(formatted_raw_ostream &OS) { return new R600CodeEmitter(OS); } bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) { TM = &MF.getTarget(); MRI = &MF.getRegInfo(); TRI = static_cast(TM->getRegisterInfo()); TII = static_cast(TM->getInstrInfo()); const AMDGPUSubtarget &STM = TM->getSubtarget(); std::string gpu = STM.getDeviceName(); if (STM.dumpCode()) { MF.dump(); } for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); BB != BB_E; ++BB) { MachineBasicBlock &MBB = *BB; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { MachineInstr &MI = *I; IsReduction = TII->isReductionOp(MI.getOpcode()); IsVector = TII->isVector(MI); IsCube = TII->isCubeOp(MI.getOpcode()); if (MI.getNumOperands() > 1 && MI.getOperand(0).isReg() && MI.getOperand(0).isDead()) { continue; } if (TII->isTexOp(MI.getOpcode())) { EmitTexInstr(MI); } else if (TII->isFCOp(MI.getOpcode())){ EmitFCInstr(MI); } else if (IsReduction || IsVector || IsCube) { IsLast = false; // XXX: On Cayman, some (all?) of the vector instructions only need // to fill the first three slots. for (currentElement = 0; currentElement < 4; currentElement++) { IsLast = (currentElement == 3); EmitALUInstr(MI); } IsReduction = false; IsVector = false; IsCube = false; } else if (MI.getOpcode() == AMDGPU::RETURN || MI.getOpcode() == AMDGPU::BUNDLE || MI.getOpcode() == AMDGPU::KILL) { continue; } else { switch(MI.getOpcode()) { case AMDGPU::RAT_WRITE_CACHELESS_eg: { uint64_t inst = getBinaryCodeForInstr(MI); // Set End Of Program bit // XXX: Need better check of end of program. EOP should be // encoded in one of the operands of the MI, and it should be // set in a prior pass. MachineBasicBlock::iterator NextI = llvm::next(I); MachineInstr &NextMI = *NextI; if (NextMI.getOpcode() == AMDGPU::RETURN) { inst |= (((uint64_t)1) << 53); } EmitByte(INSTR_NATIVE); Emit(inst); break; } case AMDGPU::VTX_READ_PARAM_i32_eg: case AMDGPU::VTX_READ_PARAM_f32_eg: case AMDGPU::VTX_READ_GLOBAL_i32_eg: case AMDGPU::VTX_READ_GLOBAL_f32_eg: case AMDGPU::VTX_READ_GLOBAL_v4i32_eg: case AMDGPU::VTX_READ_GLOBAL_v4f32_eg: { uint64_t InstWord01 = getBinaryCodeForInstr(MI); uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset EmitByte(INSTR_VTX); Emit(InstWord01); Emit(InstWord2); break; } default: EmitALUInstr(MI); break; } } } } return false; } void R600CodeEmitter::EmitALUInstr(MachineInstr &MI) { unsigned numOperands = MI.getNumExplicitOperands(); if(MI.findFirstPredOperandIdx() > -1) numOperands--; // Some instructions are just place holder instructions that represent // operations that the GPU does automatically. They should be ignored. if (TII->isPlaceHolderOpcode(MI.getOpcode())) { return; } if(MI.getOpcode() == AMDGPU::PRED_X) numOperands = 2; // XXX Check if instruction writes a result if (numOperands < 1) { return; } const MachineOperand dstOp = MI.getOperand(0); // Emit instruction type EmitByte(0); if (IsCube) { static const int cube_src_swz[] = {2, 2, 0, 1}; EmitSrc(MI.getOperand(1), cube_src_swz[currentElement]); EmitSrc(MI.getOperand(1), cube_src_swz[3-currentElement]); EmitNullBytes(SRC_BYTE_COUNT); } else { unsigned int opIndex; for (opIndex = 1; opIndex < numOperands; opIndex++) { // Literal constants are always stored as the last operand. if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) { break; } EmitSrc(MI.getOperand(opIndex)); } // Emit zeros for unused sources for ( ; opIndex < 4; opIndex++) { EmitNullBytes(SRC_BYTE_COUNT); } } EmitDst(dstOp); EmitALU(MI, numOperands - 1); } void R600CodeEmitter::EmitSrc(const MachineOperand & MO, int chan_override) { uint32_t value = 0; // Emit the source select (2 bytes). For GPRs, this is the register index. // For other potential instruction operands, (e.g. constant registers) the // value of the source select is defined in the r600isa docs. if (MO.isReg()) { unsigned reg = MO.getReg(); EmitTwoBytes(getHWReg(reg)); if (reg == AMDGPU::ALU_LITERAL_X) { const MachineInstr * parent = MO.getParent(); unsigned immOpIndex = parent->getNumExplicitOperands() - 1; MachineOperand immOp = parent->getOperand(immOpIndex); if (immOp.isFPImm()) { value = immOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue(); } else { assert(immOp.isImm()); value = immOp.getImm(); } } } else { // XXX: Handle other operand types. EmitTwoBytes(0); } // Emit the source channel (1 byte) if (chan_override != -1) { EmitByte(chan_override); } else if (IsReduction) { EmitByte(currentElement); } else if (MO.isReg()) { EmitByte(TRI->getHWRegChan(MO.getReg())); } else { EmitByte(0); } // XXX: Emit isNegated (1 byte) if ((!(MO.getTargetFlags() & MO_FLAG_ABS)) && (MO.getTargetFlags() & MO_FLAG_NEG || (MO.isReg() && (MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){ EmitByte(1); } else { EmitByte(0); } // Emit isAbsolute (1 byte) if (MO.getTargetFlags() & MO_FLAG_ABS) { EmitByte(1); } else { EmitByte(0); } // XXX: Emit relative addressing mode (1 byte) EmitByte(0); // Emit kc_bank, This will be adjusted later by r600_asm EmitByte(0); // Emit the literal value, if applicable (4 bytes). Emit(value); } void R600CodeEmitter::EmitDst(const MachineOperand & MO) { if (MO.isReg() && MO.getReg() != AMDGPU::PREDICATE_BIT) { // Emit the destination register index (1 byte) EmitByte(getHWReg(MO.getReg())); // Emit the element of the destination register (1 byte) if (IsReduction || IsCube || IsVector) { EmitByte(currentElement); } else { EmitByte(TRI->getHWRegChan(MO.getReg())); } // Emit isClamped (1 byte) if (MO.getTargetFlags() & MO_FLAG_CLAMP) { EmitByte(1); } else { EmitByte(0); } // Emit writemask (1 byte). if (((IsReduction || IsVector) && currentElement != TRI->getHWRegChan(MO.getReg())) || MO.getTargetFlags() & MO_FLAG_MASK) { EmitByte(0); } else { EmitByte(1); } // XXX: Emit relative addressing mode EmitByte(0); } else { // XXX: Handle other operand types. Are there any for destination regs? EmitNullBytes(DST_BYTE_COUNT); } } void R600CodeEmitter::EmitALU(MachineInstr &MI, unsigned numSrc) { // Emit the instruction (2 bytes) EmitTwoBytes(getBinaryCodeForInstr(MI)); // Emit IsLast (for this instruction group) (1 byte) if (IsLast) { EmitByte(1); } else { EmitByte(0); } // Emit isOp3 (1 byte) if (numSrc == 3) { EmitByte(1); } else { EmitByte(0); } // XXX: Emit push modifier if(MI.getOperand(1).getTargetFlags() & MO_FLAG_PUSH) { EmitByte(1); } else { EmitByte(0); } // XXX: Emit predicate (1 byte) int predidx = MI.findFirstPredOperandIdx(); if (predidx > -1) switch(MI.getOperand(predidx).getReg()) { case AMDGPU::PRED_SEL_ZERO: EmitByte(2); break; case AMDGPU::PRED_SEL_ONE: EmitByte(3); break; default: EmitByte(0); break; } else { EmitByte(0); } // XXX: Emit bank swizzle. (1 byte) Do we need this? It looks like // r600_asm.c sets it. EmitByte(0); // XXX: Emit bank_swizzle_force (1 byte) Not sure what this is for. EmitByte(0); // XXX: Emit OMOD (1 byte) Not implemented. EmitByte(0); // XXX: Emit index_mode. I think this is for indirect addressing, so we // don't need to worry about it. EmitByte(0); } void R600CodeEmitter::EmitTexInstr(MachineInstr &MI) { unsigned opcode = MI.getOpcode(); bool hasOffsets = (opcode == AMDGPU::TEX_LD); unsigned op_offset = hasOffsets ? 3 : 0; int64_t sampler = MI.getOperand(op_offset+2).getImm(); int64_t textureType = MI.getOperand(op_offset+3).getImm(); unsigned srcSelect[4] = {0, 1, 2, 3}; // Emit instruction type EmitByte(1); // Emit instruction EmitByte(getBinaryCodeForInstr(MI)); // XXX: Emit resource id r600_shader.c uses sampler + 1. Why? EmitByte(sampler + 1 + 1); // Emit source register EmitByte(getHWReg(MI.getOperand(1).getReg())); // XXX: Emit src isRelativeAddress EmitByte(0); // Emit destination register EmitByte(getHWReg(MI.getOperand(0).getReg())); // XXX: Emit dst isRealtiveAddress EmitByte(0); // XXX: Emit dst select EmitByte(0); // X EmitByte(1); // Y EmitByte(2); // Z EmitByte(3); // W // XXX: Emit lod bias EmitByte(0); // XXX: Emit coord types unsigned coordType[4] = {1, 1, 1, 1}; if (textureType == TEXTURE_RECT || textureType == TEXTURE_SHADOWRECT) { coordType[ELEMENT_X] = 0; coordType[ELEMENT_Y] = 0; } if (textureType == TEXTURE_1D_ARRAY || textureType == TEXTURE_SHADOW1D_ARRAY) { if (opcode == AMDGPU::TEX_SAMPLE_C_L || opcode == AMDGPU::TEX_SAMPLE_C_LB) { coordType[ELEMENT_Y] = 0; } else { coordType[ELEMENT_Z] = 0; srcSelect[ELEMENT_Z] = ELEMENT_Y; } } else if (textureType == TEXTURE_2D_ARRAY || textureType == TEXTURE_SHADOW2D_ARRAY) { coordType[ELEMENT_Z] = 0; } for (unsigned i = 0; i < 4; i++) { EmitByte(coordType[i]); } // XXX: Emit offsets if (hasOffsets) for (unsigned i = 2; i < 5; i++) EmitByte(MI.getOperand(i).getImm()<<1); else EmitNullBytes(3); // Emit sampler id EmitByte(sampler); // XXX:Emit source select if ((textureType == TEXTURE_SHADOW1D || textureType == TEXTURE_SHADOW2D || textureType == TEXTURE_SHADOWRECT || textureType == TEXTURE_SHADOW1D_ARRAY) && opcode != AMDGPU::TEX_SAMPLE_C_L && opcode != AMDGPU::TEX_SAMPLE_C_LB) { srcSelect[ELEMENT_W] = ELEMENT_Z; } for (unsigned i = 0; i < 4; i++) { EmitByte(srcSelect[i]); } } void R600CodeEmitter::EmitFCInstr(MachineInstr &MI) { // Emit instruction type EmitByte(INSTR_FC); // Emit SRC unsigned numOperands = MI.getNumOperands(); if (numOperands > 0) { assert(numOperands == 1); EmitSrc(MI.getOperand(0)); } else { EmitNullBytes(SRC_BYTE_COUNT); } // Emit FC Instruction enum FCInstr instr; switch (MI.getOpcode()) { case AMDGPU::BREAK_LOGICALZ_f32: instr = FC_BREAK; break; case AMDGPU::BREAK_LOGICALNZ_f32: instr = FC_BREAK_NZ; break; case AMDGPU::BREAK_LOGICALNZ_i32: instr = FC_BREAK_NZ_INT; break; case AMDGPU::BREAK_LOGICALZ_i32: instr = FC_BREAK_Z_INT; break; case AMDGPU::CONTINUE_LOGICALNZ_f32: case AMDGPU::CONTINUE_LOGICALNZ_i32: instr = FC_CONTINUE; break; case AMDGPU::IF_LOGICALNZ_f32: instr = FC_IF; case AMDGPU::IF_LOGICALNZ_i32: instr = FC_IF_INT; break; case AMDGPU::IF_LOGICALZ_f32: abort(); break; case AMDGPU::ELSE: instr = FC_ELSE; break; case AMDGPU::ENDIF: instr = FC_ENDIF; break; case AMDGPU::ENDLOOP: instr = FC_ENDLOOP; break; case AMDGPU::WHILELOOP: instr = FC_BGNLOOP; break; default: abort(); break; } EmitByte(instr); } void R600CodeEmitter::EmitNullBytes(unsigned int byteCount) { for (unsigned int i = 0; i < byteCount; i++) { EmitByte(0); } } void R600CodeEmitter::EmitByte(unsigned int byte) { _OS.write((uint8_t) byte & 0xff); } void R600CodeEmitter::EmitTwoBytes(unsigned int bytes) { _OS.write((uint8_t) (bytes & 0xff)); _OS.write((uint8_t) ((bytes >> 8) & 0xff)); } void R600CodeEmitter::Emit(uint32_t value) { for (unsigned i = 0; i < 4; i++) { _OS.write((uint8_t) ((value >> (8 * i)) & 0xff)); } } void R600CodeEmitter::Emit(uint64_t value) { for (unsigned i = 0; i < 8; i++) { EmitByte((value >> (8 * i)) & 0xff); } } unsigned R600CodeEmitter::getHWReg(unsigned regNo) const { unsigned HWReg; HWReg = TRI->getHWRegIndex(regNo); if (AMDGPU::R600_CReg32RegClass.contains(regNo)) { HWReg += 512; } return HWReg; } uint64_t R600CodeEmitter::getMachineOpValue(const MachineInstr &MI, const MachineOperand &MO) const { if (MO.isReg()) { return getHWReg(MO.getReg()); } else { return MO.getImm(); } } #include "AMDGPUGenCodeEmitter.inc"