diff options
author | Michal Sciubidlo <[email protected]> | 2012-09-12 08:57:01 +0200 |
---|---|---|
committer | Tom Stellard <[email protected]> | 2012-09-19 13:17:41 -0400 |
commit | 0e0c21e00ee80bcff67e37ec86b97d6c25db066a (patch) | |
tree | b274df6453672bc6a6a6bbb836398a553d17a775 /src/gallium | |
parent | d525ed1a84fb889ddf380d967b3097fce298f8d4 (diff) |
radeon/llvm: Emit ISA for ALU instructions in the R600 code emitter
Signed-off-by: Tom Stellard <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/r600/r600_asm.c | 43 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_asm.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_llvm.c | 1 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.c | 60 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r700_asm.c | 43 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/AMDGPUSubtarget.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/AMDILBase.td | 5 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/MCTargetDesc/R600MCCodeEmitter.cpp | 209 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/Processors.td | 1 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/R600Instructions.td | 160 |
10 files changed, 359 insertions, 167 deletions
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 03ded6c5877..648e8b6ed5d 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -2874,3 +2874,46 @@ int r600_vertex_elements_build_fetch_shader(struct r600_context *rctx, struct r6 return 0; } + +void r600_bytecode_alu_read(struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1) +{ + /* WORD0 */ + alu->src[0].sel = G_SQ_ALU_WORD0_SRC0_SEL(word0); + alu->src[0].rel = G_SQ_ALU_WORD0_SRC0_REL(word0); + alu->src[0].chan = G_SQ_ALU_WORD0_SRC0_CHAN(word0); + alu->src[0].neg = G_SQ_ALU_WORD0_SRC0_NEG(word0); + alu->src[1].sel = G_SQ_ALU_WORD0_SRC1_SEL(word0); + alu->src[1].rel = G_SQ_ALU_WORD0_SRC1_REL(word0); + alu->src[1].chan = G_SQ_ALU_WORD0_SRC1_CHAN(word0); + alu->src[1].neg = G_SQ_ALU_WORD0_SRC1_NEG(word0); + alu->index_mode = G_SQ_ALU_WORD0_INDEX_MODE(word0); + alu->pred_sel = G_SQ_ALU_WORD0_PRED_SEL(word0); + alu->last = G_SQ_ALU_WORD0_LAST(word0); + + /* WORD1 */ + alu->bank_swizzle = G_SQ_ALU_WORD1_BANK_SWIZZLE(word1); + alu->dst.sel = G_SQ_ALU_WORD1_DST_GPR(word1); + alu->dst.rel = G_SQ_ALU_WORD1_DST_REL(word1); + alu->dst.chan = G_SQ_ALU_WORD1_DST_CHAN(word1); + alu->dst.clamp = G_SQ_ALU_WORD1_CLAMP(word1); + if (G_SQ_ALU_WORD1_ENCODING(word1)) /*ALU_DWORD1_OP3*/ + { + alu->is_op3 = 1; + alu->src[2].sel = G_SQ_ALU_WORD1_OP3_SRC2_SEL(word1); + alu->src[2].rel = G_SQ_ALU_WORD1_OP3_SRC2_REL(word1); + alu->src[2].chan = G_SQ_ALU_WORD1_OP3_SRC2_CHAN(word1); + alu->src[2].neg = G_SQ_ALU_WORD1_OP3_SRC2_NEG(word1); + alu->inst = G_SQ_ALU_WORD1_OP3_ALU_INST(word1); + } + else /*ALU_DWORD1_OP2*/ + { + alu->src[0].abs = G_SQ_ALU_WORD1_OP2_SRC0_ABS(word1); + alu->src[1].abs = G_SQ_ALU_WORD1_OP2_SRC1_ABS(word1); + alu->inst = G_SQ_ALU_WORD1_OP2_ALU_INST(word1); + alu->omod = G_SQ_ALU_WORD1_OP2_OMOD(word1); + alu->dst.write = G_SQ_ALU_WORD1_OP2_WRITE_MASK(word1); + alu->update_pred = G_SQ_ALU_WORD1_OP2_UPDATE_PRED(word1); + alu->execute_mask = + G_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(word1); + } +} diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 87e751adc78..403365ba07b 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -233,6 +233,7 @@ int r600_bytecode_add_cfinst(struct r600_bytecode *bc, int inst); int r600_bytecode_add_alu_type(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu, int type); void r600_bytecode_special_constants(uint32_t value, unsigned *sel, unsigned *neg); void r600_bytecode_dump(struct r600_bytecode *bc); +void r600_bytecode_alu_read(struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1); int cm_bytecode_add_cf_end(struct r600_bytecode *bc); @@ -241,5 +242,6 @@ int r600_vertex_elements_build_fetch_shader(struct r600_context *rctx, struct r6 /* r700_asm.c */ void r700_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_cf *cf); int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id); +void r700_bytecode_alu_read(struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1); #endif diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c index e77758b3a77..776f47b6bff 100644 --- a/src/gallium/drivers/r600/r600_llvm.c +++ b/src/gallium/drivers/r600/r600_llvm.c @@ -259,6 +259,7 @@ const char * r600_llvm_gpu_string(enum radeon_family family) case CHIP_RV630: case CHIP_RV620: case CHIP_RV635: + gpu_family = "r600"; case CHIP_RS780: case CHIP_RS880: case CHIP_RV710: diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 736165b8083..3e746e5e2e2 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -293,32 +293,37 @@ static unsigned r600_alu_from_byte_stream(struct r600_shader_ctx *ctx, unsigned char * bytes, unsigned bytes_read) { unsigned src_idx; - unsigned inst0, inst1; - unsigned push_modifier; struct r600_bytecode_alu alu; + unsigned src_const_reg[3]; + uint32_t word0, word1; + memset(&alu, 0, sizeof(alu)); for(src_idx = 0; src_idx < 3; src_idx++) { - bytes_read = r600_src_from_byte_stream(bytes, bytes_read, - &alu, src_idx); - } - - alu.dst.sel = bytes[bytes_read++]; - alu.dst.chan = bytes[bytes_read++]; - alu.dst.clamp = bytes[bytes_read++]; - alu.dst.write = bytes[bytes_read++]; - alu.dst.rel = bytes[bytes_read++]; - inst0 = bytes[bytes_read++]; - inst1 = bytes[bytes_read++]; - alu.inst = inst0 | (inst1 << 8); - alu.last = bytes[bytes_read++]; - alu.is_op3 = bytes[bytes_read++]; - push_modifier = bytes[bytes_read++]; - alu.pred_sel = bytes[bytes_read++]; - alu.bank_swizzle = bytes[bytes_read++]; - alu.bank_swizzle_force = bytes[bytes_read++]; - alu.omod = bytes[bytes_read++]; - alu.index_mode = bytes[bytes_read++]; + unsigned i; + src_const_reg[src_idx] = bytes[bytes_read++]; + for (i = 0; i < 4; i++) { + alu.src[src_idx].value |= bytes[bytes_read++] << (i * 8); + } + } + word0 = i32_from_byte_stream(bytes, &bytes_read); + word1 = i32_from_byte_stream(bytes, &bytes_read); + + switch(ctx->bc->chip_class) { + case R600: + r600_bytecode_alu_read(&alu, word0, word1); + break; + case R700: + case EVERGREEN: + case CAYMAN: + r700_bytecode_alu_read(&alu, word0, word1); + break; + } + + for(src_idx = 0; src_idx < 3; src_idx++) { + if (src_const_reg[src_idx]) + alu.src[src_idx].sel += 512; + } if (alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE) || alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE) || @@ -329,15 +334,14 @@ static unsigned r600_alu_from_byte_stream(struct r600_shader_ctx *ctx, alu.src[1].sel = V_SQ_ALU_SRC_0; alu.src[1].chan = 0; alu.last = 1; - } + } - if (push_modifier) { - alu.pred_sel = 0; - alu.execute_mask = 1; + if (alu.execute_mask) { + alu.pred_sel = 0; r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); - } else + } else { r600_bytecode_add_alu(ctx->bc, &alu); - + } /* XXX: Handle other KILL instructions */ if (alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT)) { diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c index ea37c63525f..818933a4dbd 100644 --- a/src/gallium/drivers/r600/r700_asm.c +++ b/src/gallium/drivers/r600/r700_asm.c @@ -74,3 +74,46 @@ int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu * } return 0; } + +void r700_bytecode_alu_read(struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1) +{ + /* WORD0 */ + alu->src[0].sel = G_SQ_ALU_WORD0_SRC0_SEL(word0); + alu->src[0].rel = G_SQ_ALU_WORD0_SRC0_REL(word0); + alu->src[0].chan = G_SQ_ALU_WORD0_SRC0_CHAN(word0); + alu->src[0].neg = G_SQ_ALU_WORD0_SRC0_NEG(word0); + alu->src[1].sel = G_SQ_ALU_WORD0_SRC1_SEL(word0); + alu->src[1].rel = G_SQ_ALU_WORD0_SRC1_REL(word0); + alu->src[1].chan = G_SQ_ALU_WORD0_SRC1_CHAN(word0); + alu->src[1].neg = G_SQ_ALU_WORD0_SRC1_NEG(word0); + alu->index_mode = G_SQ_ALU_WORD0_INDEX_MODE(word0); + alu->pred_sel = G_SQ_ALU_WORD0_PRED_SEL(word0); + alu->last = G_SQ_ALU_WORD0_LAST(word0); + + /* WORD1 */ + alu->bank_swizzle = G_SQ_ALU_WORD1_BANK_SWIZZLE(word1); + alu->dst.sel = G_SQ_ALU_WORD1_DST_GPR(word1); + alu->dst.rel = G_SQ_ALU_WORD1_DST_REL(word1); + alu->dst.chan = G_SQ_ALU_WORD1_DST_CHAN(word1); + alu->dst.clamp = G_SQ_ALU_WORD1_CLAMP(word1); + if (G_SQ_ALU_WORD1_ENCODING(word1)) /*ALU_DWORD1_OP3*/ + { + alu->is_op3 = 1; + alu->src[2].sel = G_SQ_ALU_WORD1_OP3_SRC2_SEL(word1); + alu->src[2].rel = G_SQ_ALU_WORD1_OP3_SRC2_REL(word1); + alu->src[2].chan = G_SQ_ALU_WORD1_OP3_SRC2_CHAN(word1); + alu->src[2].neg = G_SQ_ALU_WORD1_OP3_SRC2_NEG(word1); + alu->inst = G_SQ_ALU_WORD1_OP3_ALU_INST(word1); + } + else /*ALU_DWORD1_OP2*/ + { + alu->src[0].abs = G_SQ_ALU_WORD1_OP2_SRC0_ABS(word1); + alu->src[1].abs = G_SQ_ALU_WORD1_OP2_SRC1_ABS(word1); + alu->inst = G_SQ_ALU_WORD1_OP2_ALU_INST(word1); + alu->omod = G_SQ_ALU_WORD1_OP2_OMOD(word1); + alu->dst.write = G_SQ_ALU_WORD1_OP2_WRITE_MASK(word1); + alu->update_pred = G_SQ_ALU_WORD1_OP2_UPDATE_PRED(word1); + alu->execute_mask = + G_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(word1); + } +} diff --git a/src/gallium/drivers/radeon/AMDGPUSubtarget.h b/src/gallium/drivers/radeon/AMDGPUSubtarget.h index 86138617b82..30bda83a205 100644 --- a/src/gallium/drivers/radeon/AMDGPUSubtarget.h +++ b/src/gallium/drivers/radeon/AMDGPUSubtarget.h @@ -36,6 +36,7 @@ private: bool mIs64bit; bool mIs32on64bit; bool mDumpCode; + bool mR600ALUInst; InstrItineraryData InstrItins; @@ -56,6 +57,7 @@ public: std::string getDeviceName() const; virtual size_t getDefaultSize(uint32_t dim) const; bool dumpCode() const { return mDumpCode; } + bool r600ALUEncoding() const { return mR600ALUInst; } }; diff --git a/src/gallium/drivers/radeon/AMDILBase.td b/src/gallium/drivers/radeon/AMDILBase.td index 7f72b4917f8..ffe9ce2c532 100644 --- a/src/gallium/drivers/radeon/AMDILBase.td +++ b/src/gallium/drivers/radeon/AMDILBase.td @@ -69,6 +69,11 @@ def FeatureDumpCode : SubtargetFeature <"DumpCode", "true", "Dump MachineInstrs in the CodeEmitter">; +def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst", + "mR600ALUInst", + "false", + "Older version of ALU instructions encoding.">; + //===----------------------------------------------------------------------===// // Register File, Calling Conv, Instruction Descriptions diff --git a/src/gallium/drivers/radeon/MCTargetDesc/R600MCCodeEmitter.cpp b/src/gallium/drivers/radeon/MCTargetDesc/R600MCCodeEmitter.cpp index 34c4b396c77..dcf833876ce 100644 --- a/src/gallium/drivers/radeon/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/src/gallium/drivers/radeon/MCTargetDesc/R600MCCodeEmitter.cpp @@ -61,10 +61,9 @@ private: void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups, raw_ostream &OS) const; void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const; + void EmitSrcISA(const MCInst &MI, unsigned OpIdx, uint64_t &Value, + raw_ostream &OS) const; void EmitDst(const MCInst &MI, raw_ostream &OS) const; - void EmitALU(const MCInst &MI, unsigned numSrc, - SmallVectorImpl<MCFixup> &Fixups, - raw_ostream &OS) const; void EmitTexInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups, raw_ostream &OS) const; void EmitFCInstr(const MCInst &MI, raw_ostream &OS) const; @@ -210,7 +209,18 @@ void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI, } // Emit instruction type - EmitByte(0, OS); + EmitByte(INSTR_ALU, OS); + + uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups); + + //older alu have different encoding for instructions with one or two src + //parameters. + if (STI.getFeatureBits() & AMDGPU::FeatureR600ALUInst && + MI.getNumOperands() < 4) { + uint64_t ISAOpCode = InstWord01 & (0x3FFULL << 39); + InstWord01 &= ~(0x3FFULL << 39); + InstWord01 |= ISAOpCode << 1; + } unsigned int OpIndex; for (OpIndex = 1; OpIndex < NumOperands; OpIndex++) { @@ -218,17 +228,64 @@ void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI, if (MI.getOperand(OpIndex).isImm() || MI.getOperand(OpIndex).isFPImm()) { break; } - EmitSrc(MI, OpIndex, OS); + EmitSrcISA(MI, OpIndex, InstWord01, OS); } // Emit zeros for unused sources for ( ; OpIndex < 4; OpIndex++) { - EmitNullBytes(SRC_BYTE_COUNT, OS); + EmitNullBytes(SRC_BYTE_COUNT - 6, OS); } - EmitDst(MI, OS); + // Emit destination register + const MCOperand &dstOp = MI.getOperand(0); + if (dstOp.isReg() && dstOp.getReg() != AMDGPU::PREDICATE_BIT) { + //element of destination register + InstWord01 |= uint64_t(getHWRegChan(dstOp.getReg())) << 61; + + // isClamped + if (isFlagSet(MI, 0, MO_FLAG_CLAMP)) { + InstWord01 |= 1ULL << 63; + } + + // write mask + if (!isFlagSet(MI, 0, MO_FLAG_MASK) && NumOperands < 4) { + InstWord01 |= 1ULL << 36; + } + + // XXX: Emit relative addressing mode + } - EmitALU(MI, NumOperands - 1, Fixups, OS); + // Emit ALU + + // Emit IsLast (for this instruction group) (1 byte) + if (!isFlagSet(MI, 0, MO_FLAG_NOT_LAST)) { + InstWord01 |= 1ULL << 31; + } + + // XXX: Emit push modifier + if(isFlagSet(MI, 1, MO_FLAG_PUSH)) { + InstWord01 |= 1ULL << 34; + } + + // XXX: Emit predicate (1 byte) + int PredIdx = MCDesc.findFirstPredOperandIdx(); + if (PredIdx != -1) { + switch(MI.getOperand(PredIdx).getReg()) { + case AMDGPU::PRED_SEL_ZERO: + InstWord01 |= 2ULL << 29; + break; + case AMDGPU::PRED_SEL_ONE: + InstWord01 |= 3ULL << 29; + break; + } + } + + //XXX: predicate + //XXX: bank swizzle + //XXX: OMOD + //XXX: index mode + + Emit(InstWord01, OS); } void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx, @@ -295,99 +352,74 @@ void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx, } -void R600MCCodeEmitter::EmitDst(const MCInst &MI, raw_ostream &OS) const { - - const MCOperand &MO = MI.getOperand(0); - if (MO.isReg() && MO.getReg() != AMDGPU::PREDICATE_BIT) { - // Emit the destination register index (1 byte) - EmitByte(getHWReg(MO.getReg()), OS); - - // Emit the element of the destination register (1 byte) - EmitByte(getHWRegChan(MO.getReg()), OS); - - // Emit isClamped (1 byte) - if (isFlagSet(MI, 0, MO_FLAG_CLAMP)) { +void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned OpIdx, + uint64_t &Value, raw_ostream &OS) const { + const MCOperand &MO = MI.getOperand(OpIdx); + union { + float f; + uint32_t i; + } InlineConstant; + InlineConstant.i = 0; + // Emit the source select (2 bytes). For GPRs, this is the register index. + // For other potential instruction operands, (e.g. constant registers) the + // value of the source select is defined in the r600isa docs. + if (MO.isReg()) { + unsigned Reg = MO.getReg(); + if (AMDGPUMCRegisterClasses[AMDGPU::R600_CReg32RegClassID].contains(Reg)) { EmitByte(1, OS); } else { EmitByte(0, OS); } - // Emit writemask (1 byte). - if (isFlagSet(MI, 0, MO_FLAG_MASK)) { - EmitByte(0, OS); - } else { - EmitByte(1, OS); + if (Reg == AMDGPU::ALU_LITERAL_X) { + unsigned ImmOpIndex = MI.getNumOperands() - 1; + MCOperand ImmOp = MI.getOperand(ImmOpIndex); + if (ImmOp.isFPImm()) { + InlineConstant.f = ImmOp.getFPImm(); + } else { + assert(ImmOp.isImm()); + InlineConstant.i = ImmOp.getImm(); + } } - - // XXX: Emit relative addressing mode - EmitByte(0, OS); - } else { - // XXX: Handle other operand types. Are there any for destination regs? - EmitNullBytes(DST_BYTE_COUNT, OS); - } -} - -void R600MCCodeEmitter::EmitALU(const MCInst &MI, unsigned numSrc, - SmallVectorImpl<MCFixup> &Fixups, - raw_ostream &OS) const { - const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode()); - - // Emit the instruction (2 bytes) - EmitTwoBytes(getBinaryCodeForInstr(MI, Fixups), OS); - - // Emit IsLast (for this instruction group) (1 byte) - if (isFlagSet(MI, 0, MO_FLAG_NOT_LAST)) { - EmitByte(0, OS); } else { - EmitByte(1, OS); + // XXX: Handle other operand types. + EmitTwoBytes(0, OS); } - // Emit isOp3 (1 byte) - if (numSrc == 3) { - EmitByte(1, OS); - } else { - EmitByte(0, OS); - } + // source channel + uint64_t sourceChannelValue = getHWRegChan(MO.getReg()); + if (OpIdx == 1) + Value |= sourceChannelValue << 10; + if (OpIdx == 2) + Value |= sourceChannelValue << 23; + if (OpIdx == 3) + Value |= sourceChannelValue << 42; - // XXX: Emit push modifier - if(isFlagSet(MI, 1, MO_FLAG_PUSH)) { - EmitByte(1, OS); - } else { - EmitByte(0, OS); + // isNegated + if ((!(isFlagSet(MI, OpIdx, MO_FLAG_ABS))) + && (isFlagSet(MI, OpIdx, MO_FLAG_NEG) || + (MO.isReg() && + (MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){ + if (OpIdx == 1) + Value |= 1ULL << 12; + else if (OpIdx == 2) + Value |= 1ULL << 25; + else if (OpIdx == 3) + Value |= 1ULL << 44; } - // XXX: Emit predicate (1 byte) - int PredIdx = MCDesc.findFirstPredOperandIdx(); - if (PredIdx > -1) - switch(MI.getOperand(PredIdx).getReg()) { - case AMDGPU::PRED_SEL_ZERO: - EmitByte(2, OS); - break; - case AMDGPU::PRED_SEL_ONE: - EmitByte(3, OS); - break; - default: - EmitByte(0, OS); - break; - } - else { - EmitByte(0, OS); + // isAbsolute + if (isFlagSet(MI, OpIdx, MO_FLAG_ABS)) { + assert(OpIdx < 3); + Value |= 1ULL << (32+OpIdx-1); } + // XXX: relative addressing mode + // XXX: kc_bank - // XXX: Emit bank swizzle. (1 byte) Do we need this? It looks like - // r600_asm.c sets it. - EmitByte(0, OS); - - // XXX: Emit bank_swizzle_force (1 byte) Not sure what this is for. - EmitByte(0, OS); - - // XXX: Emit OMOD (1 byte) Not implemented. - EmitByte(0, OS); + // Emit the literal value, if applicable (4 bytes). + Emit(InlineConstant.i, OS); - // XXX: Emit index_mode. I think this is for indirect addressing, so we - // don't need to worry about it. - EmitByte(0, OS); } void R600MCCodeEmitter::EmitTexInstr(const MCInst &MI, @@ -621,9 +653,12 @@ uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl<MCFixup> &Fixup) const { if (MO.isReg()) { - return getHWReg(MO.getReg()); - } else { + return getHWRegIndex(MO.getReg()); + } else if (MO.isImm()) { return MO.getImm(); + } else { + assert(0); + return 0; } } diff --git a/src/gallium/drivers/radeon/Processors.td b/src/gallium/drivers/radeon/Processors.td index 92f46367090..3469f828fc0 100644 --- a/src/gallium/drivers/radeon/Processors.td +++ b/src/gallium/drivers/radeon/Processors.td @@ -13,6 +13,7 @@ class Proc<string Name, ProcessorItineraries itin, list<SubtargetFeature> Features> : Processor<Name, itin, Features>; +def : Proc<"r600", R600_EG_Itin, [FeatureR600ALUInst]>; def : Proc<"rv710", R600_EG_Itin, []>; def : Proc<"rv730", R600_EG_Itin, []>; def : Proc<"rv770", R600_EG_Itin, [FeatureFP64]>; diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td index e2f8d338cc3..c9c9f611ed6 100644 --- a/src/gallium/drivers/radeon/R600Instructions.td +++ b/src/gallium/drivers/radeon/R600Instructions.td @@ -13,17 +13,18 @@ include "R600Intrinsics.td" -class InstR600 <bits<32> inst, dag outs, dag ins, string asm, list<dag> pattern, +class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin> : AMDGPUInst <outs, ins, asm, pattern> { - field bits<32> Inst; + field bits<64> Inst; bit Trig = 0; bit Op3 = 0; bit isVector = 0; bits<2> FlagOperandIdx = 0; - let Inst = inst; + bits<11> op_code = inst; + //let Inst = inst; let Namespace = "AMDGPU"; let OutOperandList = outs; let InOperandList = ins; @@ -75,27 +76,39 @@ def R600_Pred : PredicateOperand<i32, (ops R600_Predicate), (ops PRED_SEL_OFF)>; -class R600_1OP <bits<32> inst, string opName, list<dag> pattern, +class R600_1OP <bits<11> inst, string opName, list<dag> pattern, InstrItinClass itin = AnyALU> : InstR600 <inst, (outs R600_Reg32:$dst), (ins R600_Reg32:$src, R600_Pred:$p, variable_ops), !strconcat(opName, " $dst, $src ($p)"), pattern, - itin - >; + itin>{ + bits<7> dst; + bits<9> src; + let Inst{8-0} = src; + let Inst{49-39} = inst; + let Inst{59-53} = dst; + } -class R600_2OP <bits<32> inst, string opName, list<dag> pattern, +class R600_2OP <bits<11> inst, string opName, list<dag> pattern, InstrItinClass itin = AnyALU> : InstR600 <inst, (outs R600_Reg32:$dst), (ins R600_Reg32:$src0, R600_Reg32:$src1,R600_Pred:$p, variable_ops), !strconcat(opName, " $dst, $src0, $src1"), pattern, - itin - >; + itin>{ + bits<7> dst; + bits<9> src0; + bits<9> src1; + let Inst{8-0} = src0; + let Inst{21-13} = src1; + let Inst{49-39} = inst; + let Inst{59-53} = dst; + } -class R600_3OP <bits<32> inst, string opName, list<dag> pattern, +class R600_3OP <bits<11> inst, string opName, list<dag> pattern, InstrItinClass itin = AnyALU> : InstR600 <inst, (outs R600_Reg32:$dst), @@ -103,7 +116,15 @@ class R600_3OP <bits<32> inst, string opName, list<dag> pattern, !strconcat(opName, " $dst, $src0, $src1, $src2"), pattern, itin>{ - + bits<7> dst; + bits<9> src0; + bits<9> src1; + bits<9> src2; + let Inst{8-0} = src0; + let Inst{21-13} = src1; + let Inst{40-32} = src2; + let Inst{49-45} = inst{4-0}; + let Inst{59-53} = dst; let Op3 = 1; } @@ -114,11 +135,12 @@ def PRED_X : InstR600 <0, (outs R600_Predicate_Bit:$dst), "PRED $dst, $src0, $src1", [], NullALU> { - let DisableEncoding = "$src0"; - field bits<32> Inst; - bits<32> src1; - - let Inst = src1; + bits<7> dst; + bits<9> src0; + bits<11> src1; + let Inst{8-0} = src0; + let Inst{49-39} = src1; + let Inst{59-53} = dst; let FlagOperandIdx = 3; } @@ -131,26 +153,29 @@ def JUMP : InstR600 <0x10, >; } -class R600_REDUCTION <bits<32> inst, dag ins, string asm, list<dag> pattern, +class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern, InstrItinClass itin = VecALU> : InstR600 <inst, (outs R600_Reg32:$dst), ins, asm, pattern, - itin - - >; + itin>{ + bits<7> dst; + let Inst{49-39} = inst; + let Inst{59-53} = dst; + } -class R600_TEX <bits<32> inst, string opName, list<dag> pattern, +class R600_TEX <bits<11> inst, string opName, list<dag> pattern, InstrItinClass itin = AnyALU> : InstR600 <inst, (outs R600_Reg128:$dst), (ins R600_Reg128:$src0, i32imm:$src1, i32imm:$src2), !strconcat(opName, "$dst, $src0, $src1, $src2"), pattern, - itin - >; + itin>{ + let Inst {10-0} = inst; + } def TEX_SHADOW : PatLeaf< (imm), @@ -328,6 +353,11 @@ def MOV : InstR600 <0x19, (outs R600_Reg32:$dst), R600_Pred:$p), "MOV $dst, $src0", [], AnyALU> { let FlagOperandIdx = 2; + bits<7> dst; + bits<9> src0; + let Inst{8-0} = src0; + let Inst{49-39} = op_code; + let Inst{59-53} = dst; } class MOV_IMM <ValueType vt, Operand immType> : InstR600 <0x19, @@ -335,7 +365,15 @@ class MOV_IMM <ValueType vt, Operand immType> : InstR600 <0x19, (ins R600_Reg32:$alu_literal, R600_Pred:$p, immType:$imm), "MOV_IMM $dst, $imm", [], AnyALU ->; +>{ + bits<7> dst; + bits<9> alu_literal; + bits<9> p; + let Inst{8-0} = alu_literal; + let Inst{21-13} = p; + let Inst{49-39} = op_code; + let Inst{59-53} = dst; +} def MOV_IMM_I32 : MOV_IMM<i32, i32imm>; def : Pat < @@ -357,6 +395,13 @@ def KILLGT : InstR600 <0x2D, [], NullALU>{ let FlagOperandIdx = 3; + bits<7> dst; + bits<9> src0; + bits<9> src1; + let Inst{8-0} = src0; + let Inst{21-13} = src1; + let Inst{49-39} = op_code; + let Inst{59-53} = dst; } def AND_INT : R600_2OP < @@ -530,39 +575,43 @@ def TEX_SAMPLE_C_G : R600_TEX < // Helper classes for common instructions //===----------------------------------------------------------------------===// -class MUL_LIT_Common <bits<32> inst> : R600_3OP < +class MUL_LIT_Common <bits<11> inst> : R600_3OP < inst, "MUL_LIT", [] >; -class MULADD_Common <bits<32> inst> : R600_3OP < +class MULADD_Common <bits<11> inst> : R600_3OP < inst, "MULADD", [(set (f32 R600_Reg32:$dst), (IL_mad R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))] >; -class CNDE_Common <bits<32> inst> : R600_3OP < +class CNDE_Common <bits<11> inst> : R600_3OP < inst, "CNDE", [(set (f32 R600_Reg32:$dst), (select (i32 (fp_to_sint (fneg R600_Reg32:$src0))), (f32 R600_Reg32:$src2), (f32 R600_Reg32:$src1)))] >; -class CNDGT_Common <bits<32> inst> : R600_3OP < +class CNDGT_Common <bits<11> inst> : R600_3OP < inst, "CNDGT", [] >; -class CNDGE_Common <bits<32> inst> : R600_3OP < +class CNDGE_Common <bits<11> inst> : R600_3OP < inst, "CNDGE", [(set R600_Reg32:$dst, (int_AMDGPU_cndlt R600_Reg32:$src0, R600_Reg32:$src2, R600_Reg32:$src1))] >; -class DOT4_Common <bits<32> inst> : R600_REDUCTION < +class DOT4_Common <bits<11> inst> : R600_REDUCTION < inst, (ins R600_Reg128:$src0, R600_Reg128:$src1, i32imm:$flags), "DOT4 $dst $src0, $src1", [] > { + bits<9> src0; + bits<9> src1; + let Inst{8-0} = src0; + let Inst{21-13} = src1; let FlagOperandIdx = 3; } @@ -571,7 +620,7 @@ class DOT4_Pat <Instruction dot4> : Pat < (dot4 R600_Reg128:$src0, R600_Reg128:$src1, 0) >; -multiclass CUBE_Common <bits<32> inst> { +multiclass CUBE_Common <bits<11> inst> { def _pseudo : InstR600 < inst, @@ -590,110 +639,117 @@ multiclass CUBE_Common <bits<32> inst> { [], VecALU >{ let FlagOperandIdx = 3; + bits<7> dst; + bits<9> src0; + bits<9> src1; + let Inst{8-0} = src0; + let Inst{21-13} = src1; + let Inst{49-39} = inst; + let Inst{59-53} = dst; } } -class EXP_IEEE_Common <bits<32> inst> : R600_1OP < +class EXP_IEEE_Common <bits<11> inst> : R600_1OP < inst, "EXP_IEEE", [(set R600_Reg32:$dst, (fexp2 R600_Reg32:$src))] >; -class FLT_TO_INT_Common <bits<32> inst> : R600_1OP < +class FLT_TO_INT_Common <bits<11> inst> : R600_1OP < inst, "FLT_TO_INT", [(set R600_Reg32:$dst, (fp_to_sint R600_Reg32:$src))] >; -class INT_TO_FLT_Common <bits<32> inst> : R600_1OP < +class INT_TO_FLT_Common <bits<11> inst> : R600_1OP < inst, "INT_TO_FLT", [(set R600_Reg32:$dst, (sint_to_fp R600_Reg32:$src))] >; -class FLT_TO_UINT_Common <bits<32> inst> : R600_1OP < +class FLT_TO_UINT_Common <bits<11> inst> : R600_1OP < inst, "FLT_TO_UINT", [(set R600_Reg32:$dst, (fp_to_uint R600_Reg32:$src))] >; -class UINT_TO_FLT_Common <bits<32> inst> : R600_1OP < +class UINT_TO_FLT_Common <bits<11> inst> : R600_1OP < inst, "UINT_TO_FLT", [(set R600_Reg32:$dst, (uint_to_fp R600_Reg32:$src))] >; -class LOG_CLAMPED_Common <bits<32> inst> : R600_1OP < +class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP < inst, "LOG_CLAMPED", [] >; -class LOG_IEEE_Common <bits<32> inst> : R600_1OP < +class LOG_IEEE_Common <bits<11> inst> : R600_1OP < inst, "LOG_IEEE", [(set R600_Reg32:$dst, (int_AMDIL_log R600_Reg32:$src))] >; -class LSHL_Common <bits<32> inst> : R600_2OP < +class LSHL_Common <bits<11> inst> : R600_2OP < inst, "LSHL $dst, $src0, $src1", [(set R600_Reg32:$dst, (shl R600_Reg32:$src0, R600_Reg32:$src1))] >; -class LSHR_Common <bits<32> inst> : R600_2OP < +class LSHR_Common <bits<11> inst> : R600_2OP < inst, "LSHR $dst, $src0, $src1", [(set R600_Reg32:$dst, (srl R600_Reg32:$src0, R600_Reg32:$src1))] >; -class ASHR_Common <bits<32> inst> : R600_2OP < +class ASHR_Common <bits<11> inst> : R600_2OP < inst, "ASHR $dst, $src0, $src1", [(set R600_Reg32:$dst, (sra R600_Reg32:$src0, R600_Reg32:$src1))] >; -class MULHI_INT_Common <bits<32> inst> : R600_2OP < +class MULHI_INT_Common <bits<11> inst> : R600_2OP < inst, "MULHI_INT $dst, $src0, $src1", [(set R600_Reg32:$dst, (mulhs R600_Reg32:$src0, R600_Reg32:$src1))] >; -class MULHI_UINT_Common <bits<32> inst> : R600_2OP < +class MULHI_UINT_Common <bits<11> inst> : R600_2OP < inst, "MULHI $dst, $src0, $src1", [(set R600_Reg32:$dst, (mulhu R600_Reg32:$src0, R600_Reg32:$src1))] >; -class MULLO_INT_Common <bits<32> inst> : R600_2OP < +class MULLO_INT_Common <bits<11> inst> : R600_2OP < inst, "MULLO_INT $dst, $src0, $src1", [(set R600_Reg32:$dst, (mul R600_Reg32:$src0, R600_Reg32:$src1))] >; -class MULLO_UINT_Common <bits<32> inst> : R600_2OP < +class MULLO_UINT_Common <bits<11> inst> : R600_2OP < inst, "MULLO_UINT $dst, $src0, $src1", [] >; -class RECIP_CLAMPED_Common <bits<32> inst> : R600_1OP < +class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP < inst, "RECIP_CLAMPED", [] >; -class RECIP_IEEE_Common <bits<32> inst> : R600_1OP < +class RECIP_IEEE_Common <bits<11> inst> : R600_1OP < inst, "RECIP_IEEE", [(set R600_Reg32:$dst, (int_AMDGPU_rcp R600_Reg32:$src))] >; -class RECIP_UINT_Common <bits<32> inst> : R600_1OP < +class RECIP_UINT_Common <bits<11> inst> : R600_1OP < inst, "RECIP_INT $dst, $src", [(set R600_Reg32:$dst, (AMDGPUurecip R600_Reg32:$src))] >; -class RECIPSQRT_CLAMPED_Common <bits<32> inst> : R600_1OP < +class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP < inst, "RECIPSQRT_CLAMPED", [(set R600_Reg32:$dst, (int_AMDGPU_rsq R600_Reg32:$src))] >; -class RECIPSQRT_IEEE_Common <bits<32> inst> : R600_1OP < +class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP < inst, "RECIPSQRT_IEEE", [] >; -class SIN_Common <bits<32> inst> : R600_1OP < +class SIN_Common <bits<11> inst> : R600_1OP < inst, "SIN", []>{ let Trig = 1; } -class COS_Common <bits<32> inst> : R600_1OP < +class COS_Common <bits<11> inst> : R600_1OP < inst, "COS", []> { let Trig = 1; } |