/* * Copyright 2011 Christoph Bumiller * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "nv50_ir_target_nvc0.h" namespace nv50_ir { // Argh, all these assertions ... class CodeEmitterNVC0 : public CodeEmitter { public: CodeEmitterNVC0(const TargetNVC0 *); virtual bool emitInstruction(Instruction *); virtual uint32_t getMinEncodingSize(const Instruction *) const; inline void setProgramType(Program::Type pType) { progType = pType; } private: const TargetNVC0 *targ; Program::Type progType; private: void emitForm_A(const Instruction *, uint64_t); void emitForm_B(const Instruction *, uint64_t); void emitForm_S(const Instruction *, uint32_t, bool pred); void emitPredicate(const Instruction *); void setAddress16(const ValueRef&); void setImmediate(const Instruction *, const int s); // needs op already set void setImmediateS8(const ValueRef&); void emitCondCode(CondCode cc, int pos); void emitInterpMode(const Instruction *); void emitLoadStoreType(DataType ty); void emitCachingMode(CacheMode c); void emitShortSrc2(const ValueRef&); inline uint8_t getSRegEncoding(const ValueRef&); void roundMode_A(const Instruction *); void roundMode_C(const Instruction *); void roundMode_CS(const Instruction *); void emitNegAbs12(const Instruction *); void emitNOP(const Instruction *); void emitLOAD(const Instruction *); void emitSTORE(const Instruction *); void emitMOV(const Instruction *); void emitINTERP(const Instruction *); void emitPFETCH(const Instruction *); void emitVFETCH(const Instruction *); void emitEXPORT(const Instruction *); void emitOUT(const Instruction *); void emitUADD(const Instruction *); void emitFADD(const Instruction *); void emitUMUL(const Instruction *); void emitFMUL(const Instruction *); void emitIMAD(const Instruction *); void emitFMAD(const Instruction *); void emitNOT(Instruction *); void emitLogicOp(const Instruction *, uint8_t subOp); void emitPOPC(const Instruction *); void emitINSBF(const Instruction *); void emitShift(const Instruction *); void emitSFnOp(const Instruction *, uint8_t subOp); void emitCVT(Instruction *); void emitMINMAX(const Instruction *); void emitPreOp(const Instruction *); void emitSET(const CmpInstruction *); void emitSLCT(const CmpInstruction *); void emitSELP(const Instruction *); void emitTEX(const TexInstruction *); void emitTEXCSAA(const TexInstruction *); void emitTXQ(const TexInstruction *); void emitPIXLD(const TexInstruction *); void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask); void emitFlow(const Instruction *); inline void defId(const ValueDef&, const int pos); inline void srcId(const ValueRef&, const int pos); inline void srcId(const ValueRef *, const int pos); inline void srcId(const Instruction *, int s, const int pos); inline void srcAddr32(const ValueRef&, const int pos); // address / 4 inline bool isLIMM(const ValueRef&, DataType ty); }; // for better visibility #define HEX64(h, l) 0x##h##l##ULL #define SDATA(a) ((a).rep()->reg.data) #define DDATA(a) ((a).rep()->reg.data) void CodeEmitterNVC0::srcId(const ValueRef& src, const int pos) { code[pos / 32] |= (src.get() ? SDATA(src).id : 63) << (pos % 32); } void CodeEmitterNVC0::srcId(const ValueRef *src, const int pos) { code[pos / 32] |= (src ? SDATA(*src).id : 63) << (pos % 32); } void CodeEmitterNVC0::srcId(const Instruction *insn, int s, int pos) { int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : 63; code[pos / 32] |= r << (pos % 32); } void CodeEmitterNVC0::srcAddr32(const ValueRef& src, const int pos) { code[pos / 32] |= (SDATA(src).offset >> 2) << (pos % 32); } void CodeEmitterNVC0::defId(const ValueDef& def, const int pos) { code[pos / 32] |= (def.get() ? DDATA(def).id : 63) << (pos % 32); } bool CodeEmitterNVC0::isLIMM(const ValueRef& ref, DataType ty) { const ImmediateValue *imm = ref.get()->asImm(); return imm && (imm->reg.data.u32 & ((ty == TYPE_F32) ? 0xfff : 0xfff00000)); } void CodeEmitterNVC0::roundMode_A(const Instruction *insn) { switch (insn->rnd) { case ROUND_M: code[1] |= 1 << 23; break; case ROUND_P: code[1] |= 2 << 23; break; case ROUND_Z: code[1] |= 3 << 23; break; default: assert(insn->rnd == ROUND_N); break; } } void CodeEmitterNVC0::emitNegAbs12(const Instruction *i) { if (i->src(1).mod.abs()) code[0] |= 1 << 6; if (i->src(0).mod.abs()) code[0] |= 1 << 7; if (i->src(1).mod.neg()) code[0] |= 1 << 8; if (i->src(0).mod.neg()) code[0] |= 1 << 9; } void CodeEmitterNVC0::emitCondCode(CondCode cc, int pos) { uint8_t val; switch (cc) { case CC_LT: val = 0x1; break; case CC_LTU: val = 0x9; break; case CC_EQ: val = 0x2; break; case CC_EQU: val = 0xa; break; case CC_LE: val = 0x3; break; case CC_LEU: val = 0xb; break; case CC_GT: val = 0x4; break; case CC_GTU: val = 0xc; break; case CC_NE: val = 0x5; break; case CC_NEU: val = 0xd; break; case CC_GE: val = 0x6; break; case CC_GEU: val = 0xe; break; case CC_TR: val = 0xf; break; case CC_FL: val = 0x0; break; case CC_A: val = 0x14; break; case CC_NA: val = 0x13; break; case CC_S: val = 0x15; break; case CC_NS: val = 0x12; break; case CC_C: val = 0x16; break; case CC_NC: val = 0x11; break; case CC_O: val = 0x17; break; case CC_NO: val = 0x10; break; default: val = 0; assert(!"invalid condition code"); break; } code[pos / 32] |= val << (pos % 32); } void CodeEmitterNVC0::emitPredicate(const Instruction *i) { if (i->predSrc >= 0) { assert(i->getPredicate()->reg.file == FILE_PREDICATE); srcId(i->src(i->predSrc), 10); if (i->cc == CC_NOT_P) code[0] |= 0x2000; // negate } else { code[0] |= 0x1c00; } } void CodeEmitterNVC0::setAddress16(const ValueRef& src) { Symbol *sym = src.get()->asSym(); assert(sym); code[0] |= (sym->reg.data.offset & 0x003f) << 26; code[1] |= (sym->reg.data.offset & 0xffc0) >> 6; } void CodeEmitterNVC0::setImmediate(const Instruction *i, const int s) { const ImmediateValue *imm = i->src(s).get()->asImm(); uint32_t u32; assert(imm); u32 = imm->reg.data.u32; if ((code[0] & 0xf) == 0x2) { // LIMM code[0] |= (u32 & 0x3f) << 26; code[1] |= u32 >> 6; } else if ((code[0] & 0xf) == 0x3 || (code[0] & 0xf) == 4) { // integer immediate assert((u32 & 0xfff00000) == 0 || (u32 & 0xfff00000) == 0xfff00000); assert(!(code[1] & 0xc000)); u32 &= 0xfffff; code[0] |= (u32 & 0x3f) << 26; code[1] |= 0xc000 | (u32 >> 6); } else { // float immediate assert(!(u32 & 0x00000fff)); assert(!(code[1] & 0xc000)); code[0] |= ((u32 >> 12) & 0x3f) << 26; code[1] |= 0xc000 | (u32 >> 18); } } void CodeEmitterNVC0::setImmediateS8(const ValueRef &ref) { const ImmediateValue *imm = ref.get()->asImm(); int8_t s8 = static_cast(imm->reg.data.s32); assert(s8 == imm->reg.data.s32); code[0] |= (s8 & 0x3f) << 26; code[0] |= (s8 >> 6) << 8; } void CodeEmitterNVC0::emitForm_A(const Instruction *i, uint64_t opc) { code[0] = opc; code[1] = opc >> 32; emitPredicate(i); defId(i->def(0), 14); int s1 = 26; if (i->srcExists(2) && i->getSrc(2)->reg.file == FILE_MEMORY_CONST) s1 = 49; for (int s = 0; s < 3 && i->srcExists(s); ++s) { switch (i->getSrc(s)->reg.file) { case FILE_MEMORY_CONST: assert(!(code[1] & 0xc000)); code[1] |= (s == 2) ? 0x8000 : 0x4000; code[1] |= i->getSrc(s)->reg.fileIndex << 10; setAddress16(i->src(s)); break; case FILE_IMMEDIATE: assert(s == 1 || i->op == OP_MOV || i->op == OP_PRESIN || i->op == OP_PREEX2); assert(!(code[1] & 0xc000)); setImmediate(i, s); break; case FILE_GPR: if ((s == 2) && ((code[0] & 0x7) == 2)) // LIMM: 3rd src == dst break; srcId(i->src(s), s ? ((s == 2) ? 49 : s1) : 20); break; default: // ignore here, can be predicate or flags, but must not be address break; } } } void CodeEmitterNVC0::emitForm_B(const Instruction *i, uint64_t opc) { code[0] = opc; code[1] = opc >> 32; emitPredicate(i); defId(i->def(0), 14); switch (i->src(0).getFile()) { case FILE_MEMORY_CONST: assert(!(code[1] & 0xc000)); code[1] |= 0x4000 | (i->src(0).get()->reg.fileIndex << 10); setAddress16(i->src(0)); break; case FILE_IMMEDIATE: assert(!(code[1] & 0xc000)); setImmediate(i, 0); break; case FILE_GPR: srcId(i->src(0), 26); break; default: // ignore here, can be predicate or flags, but must not be address break; } } void CodeEmitterNVC0::emitForm_S(const Instruction *i, uint32_t opc, bool pred) { code[0] = opc; int ss2a = 0; if (opc == 0x0d || opc == 0x0e) ss2a = 2; defId(i->def(0), 14); srcId(i->src(0), 20); assert(pred || (i->predSrc < 0)); if (pred) emitPredicate(i); for (int s = 1; s < 3 && i->srcExists(s); ++s) { if (i->src(s).get()->reg.file == FILE_MEMORY_CONST) { assert(!(code[0] & (0x300 >> ss2a))); switch (i->src(s).get()->reg.fileIndex) { case 0: code[0] |= 0x100 >> ss2a; break; case 1: code[0] |= 0x200 >> ss2a; break; case 16: code[0] |= 0x300 >> ss2a; break; default: ERROR("invalid c[] space for short form\n"); break; } if (s == 1) code[0] |= i->getSrc(s)->reg.data.offset << 24; else code[0] |= i->getSrc(s)->reg.data.offset << 6; } else if (i->src(s).getFile() == FILE_IMMEDIATE) { assert(s == 1); setImmediateS8(i->src(s)); } else if (i->src(s).getFile() == FILE_GPR) { srcId(i->src(s), (s == 1) ? 26 : 8); } } } void CodeEmitterNVC0::emitShortSrc2(const ValueRef &src) { if (src.getFile() == FILE_MEMORY_CONST) { switch (src.get()->reg.fileIndex) { case 0: code[0] |= 0x100; break; case 1: code[0] |= 0x200; break; case 16: code[0] |= 0x300; break; default: assert(!"unsupported file index for short op"); break; } srcAddr32(src, 20); } else { srcId(src, 20); assert(src.getFile() == FILE_GPR); } } void CodeEmitterNVC0::emitNOP(const Instruction *i) { code[0] = 0x000001e4; code[1] = 0x40000000; emitPredicate(i); } void CodeEmitterNVC0::emitFMAD(const Instruction *i) { bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg(); if (i->encSize == 8) { if (isLIMM(i->src(1), TYPE_F32)) { emitForm_A(i, HEX64(20000000, 00000002)); } else { emitForm_A(i, HEX64(30000000, 00000000)); if (i->src(2).mod.neg()) code[0] |= 1 << 8; } roundMode_A(i); if (neg1) code[0] |= 1 << 9; if (i->saturate) code[0] |= 1 << 5; if (i->ftz) code[0] |= 1 << 6; } else { assert(!i->saturate && !i->src(2).mod.neg()); emitForm_S(i, (i->src(2).getFile() == FILE_MEMORY_CONST) ? 0x2e : 0x0e, false); if (neg1) code[0] |= 1 << 4; } } void CodeEmitterNVC0::emitFMUL(const Instruction *i) { bool neg = (i->src(0).mod ^ i->src(1).mod).neg(); assert(i->postFactor >= -3 && i->postFactor <= 3); if (i->encSize == 8) { if (isLIMM(i->src(1), TYPE_F32)) { assert(i->postFactor == 0); // constant folded, hopefully emitForm_A(i, HEX64(30000000, 00000002)); } else { emitForm_A(i, HEX64(58000000, 00000000)); roundMode_A(i); code[1] |= ((i->postFactor > 0) ? (7 - i->postFactor) : (0 - i->postFactor)) << 17; } if (neg) code[1] ^= 1 << 25; // aliases with LIMM sign bit if (i->saturate) code[0] |= 1 << 5; if (i->dnz) code[0] |= 1 << 7; else if (i->ftz) code[0] |= 1 << 6; } else { assert(!neg && !i->saturate && !i->ftz && !i->postFactor); emitForm_S(i, 0xa8, true); } } void CodeEmitterNVC0::emitUMUL(const Instruction *i) { if (i->encSize == 8) { if (i->src(1).getFile() == FILE_IMMEDIATE) { emitForm_A(i, HEX64(10000000, 00000002)); } else { emitForm_A(i, HEX64(50000000, 00000003)); } if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) code[0] |= 1 << 6; if (i->sType == TYPE_S32) code[0] |= 1 << 5; if (i->dType == TYPE_S32) code[0] |= 1 << 7; } else { emitForm_S(i, i->src(1).getFile() == FILE_IMMEDIATE ? 0xaa : 0x2a, true); if (i->sType == TYPE_S32) code[0] |= 1 << 6; } } void CodeEmitterNVC0::emitFADD(const Instruction *i) { if (i->encSize == 8) { if (isLIMM(i->src(1), TYPE_F32)) { assert(!i->saturate); emitForm_A(i, HEX64(28000000, 00000002)); code[0] |= i->src(0).mod.abs() << 7; code[0] |= i->src(0).mod.neg() << 9; if (i->src(1).mod.abs()) code[1] &= 0xfdffffff; if ((i->op == OP_SUB) != static_cast(i->src(1).mod.neg())) code[1] ^= 0x02000000; } else { emitForm_A(i, HEX64(50000000, 00000000)); roundMode_A(i); if (i->saturate) code[1] |= 1 << 17; emitNegAbs12(i); if (i->op == OP_SUB) code[0] ^= 1 << 8; } if (i->ftz) code[0] |= 1 << 5; } else { assert(!i->saturate && i->op != OP_SUB && !i->src(0).mod.abs() && !i->src(1).mod.neg() && !i->src(1).mod.abs()); emitForm_S(i, 0x49, true); if (i->src(0).mod.neg()) code[0] |= 1 << 7; } } void CodeEmitterNVC0::emitUADD(const Instruction *i) { uint32_t addOp = 0; assert(!i->src(0).mod.abs() && !i->src(1).mod.abs()); assert(!i->src(0).mod.neg() || !i->src(1).mod.neg()); if (i->src(0).mod.neg()) addOp |= 0x200; if (i->src(1).mod.neg()) addOp |= 0x100; if (i->op == OP_SUB) { addOp ^= 0x100; assert(addOp != 0x300); // would be add-plus-one } if (i->encSize == 8) { if (isLIMM(i->src(1), TYPE_U32)) { emitForm_A(i, HEX64(08000000, 00000002)); if (i->defExists(1)) code[1] |= 1 << 26; // write carry } else { emitForm_A(i, HEX64(48000000, 00000003)); if (i->defExists(1)) code[1] |= 1 << 16; // write carry } code[0] |= addOp; if (i->saturate) code[0] |= 1 << 5; if (i->flagsSrc >= 0) // add carry code[0] |= 1 << 6; } else { assert(!(addOp & 0x100)); emitForm_S(i, (addOp >> 3) | ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0xac : 0x2c), true); } } // TODO: shl-add void CodeEmitterNVC0::emitIMAD(const Instruction *i) { assert(i->encSize == 8); emitForm_A(i, HEX64(20000000, 00000003)); if (isSignedType(i->dType)) code[0] |= 1 << 7; if (isSignedType(i->sType)) code[0] |= 1 << 5; code[1] |= i->saturate << 24; if (i->flagsDef >= 0) code[1] |= 1 << 16; if (i->flagsSrc >= 0) code[1] |= 1 << 23; if (i->src(2).mod.neg()) code[0] |= 0x10; if (i->src(1).mod.neg() ^ i->src(0).mod.neg()) code[0] |= 0x20; if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) code[0] |= 1 << 6; } void CodeEmitterNVC0::emitNOT(Instruction *i) { assert(i->encSize == 8); i->setSrc(1, i->src(0)); emitForm_A(i, HEX64(68000000, 000001c3)); } void CodeEmitterNVC0::emitLogicOp(const Instruction *i, uint8_t subOp) { if (i->encSize == 8) { if (isLIMM(i->src(1), TYPE_U32)) { emitForm_A(i, HEX64(38000000, 00000002)); if (i->srcExists(2)) code[1] |= 1 << 26; } else { emitForm_A(i, HEX64(68000000, 00000003)); if (i->srcExists(2)) code[1] |= 1 << 16; } code[0] |= subOp << 6; if (i->srcExists(2)) // carry code[0] |= 1 << 5; if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9; if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8; } else { emitForm_S(i, (subOp << 5) | ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0x1d : 0x8d), true); } } void CodeEmitterNVC0::emitPOPC(const Instruction *i) { emitForm_A(i, HEX64(54000000, 00000004)); if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9; if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8; } void CodeEmitterNVC0::emitINSBF(const Instruction *i) { emitForm_A(i, HEX64(28000000, 30000000)); } void CodeEmitterNVC0::emitShift(const Instruction *i) { if (i->op == OP_SHR) { emitForm_A(i, HEX64(58000000, 00000003) | (isSignedType(i->dType) ? 0x20 : 0x00)); } else { emitForm_A(i, HEX64(60000000, 00000003)); } if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP) code[0] |= 1 << 9; } void CodeEmitterNVC0::emitPreOp(const Instruction *i) { if (i->encSize == 8) { emitForm_B(i, HEX64(60000000, 00000000)); if (i->op == OP_PREEX2) code[0] |= 0x20; if (i->src(0).mod.abs()) code[0] |= 1 << 6; if (i->src(0).mod.neg()) code[0] |= 1 << 8; } else { emitForm_S(i, i->op == OP_PREEX2 ? 0x74000008 : 0x70000008, true); } } void CodeEmitterNVC0::emitSFnOp(const Instruction *i, uint8_t subOp) { if (i->encSize == 8) { code[0] = 0x00000000 | (subOp << 26); code[1] = 0xc8000000; emitPredicate(i); defId(i->def(0), 14); srcId(i->src(0), 20); assert(i->src(0).getFile() == FILE_GPR); if (i->saturate) code[0] |= 1 << 5; if (i->src(0).mod.abs()) code[0] |= 1 << 7; if (i->src(0).mod.neg()) code[0] |= 1 << 9; } else { emitForm_S(i, 0x80000008 | (subOp << 26), true); assert(!i->src(0).mod.neg()); if (i->src(0).mod.abs()) code[0] |= 1 << 30; } } void CodeEmitterNVC0::emitMINMAX(const Instruction *i) { uint64_t op; assert(i->encSize == 8); op = (i->op == OP_MIN) ? 0x080e000000000000ULL : 0x081e000000000000ULL; if (i->ftz) op |= 1 << 5; else if (!isFloatType(i->dType)) op |= isSignedType(i->dType) ? 0x23 : 0x03; emitForm_A(i, op); emitNegAbs12(i); } void CodeEmitterNVC0::roundMode_C(const Instruction *i) { switch (i->rnd) { case ROUND_M: code[1] |= 1 << 17; break; case ROUND_P: code[1] |= 2 << 17; break; case ROUND_Z: code[1] |= 3 << 17; break; case ROUND_NI: code[0] |= 1 << 7; break; case ROUND_MI: code[0] |= 1 << 7; code[1] |= 1 << 17; break; case ROUND_PI: code[0] |= 1 << 7; code[1] |= 2 << 17; break; case ROUND_ZI: code[0] |= 1 << 7; code[1] |= 3 << 17; break; case ROUND_N: break; default: assert(!"invalid round mode"); break; } } void CodeEmitterNVC0::roundMode_CS(const Instruction *i) { switch (i->rnd) { case ROUND_M: case ROUND_MI: code[0] |= 1 << 16; break; case ROUND_P: case ROUND_PI: code[0] |= 2 << 16; break; case ROUND_Z: case ROUND_ZI: code[0] |= 3 << 16; break; default: break; } } void CodeEmitterNVC0::emitCVT(Instruction *i) { const bool f2f = isFloatType(i->dType) && isFloatType(i->sType); switch (i->op) { case OP_CEIL: i->rnd = f2f ? ROUND_PI : ROUND_P; break; case OP_FLOOR: i->rnd = f2f ? ROUND_MI : ROUND_M; break; case OP_TRUNC: i->rnd = f2f ? ROUND_ZI : ROUND_Z; break; default: break; } const bool sat = (i->op == OP_SAT) || i->saturate; const bool abs = (i->op == OP_ABS) || i->src(0).mod.abs(); const bool neg = (i->op == OP_NEG) || i->src(0).mod.neg(); if (i->encSize == 8) { emitForm_B(i, HEX64(10000000, 00000004)); roundMode_C(i); // cvt u16 f32 sets high bits to 0, so we don't have to use Value::Size() code[0] |= util_logbase2(typeSizeof(i->dType)) << 20; code[0] |= util_logbase2(typeSizeof(i->sType)) << 23; if (sat) code[0] |= 0x20; if (abs) code[0] |= 1 << 6; if (neg && i->op != OP_ABS) code[0] |= 1 << 8; if (i->ftz) code[1] |= 1 << 23; if (isSignedIntType(i->dType)) code[0] |= 0x080; if (isSignedIntType(i->sType)) code[0] |= 0x200; if (isFloatType(i->dType)) { if (!isFloatType(i->sType)) code[1] |= 0x08000000; } else { if (isFloatType(i->sType)) code[1] |= 0x04000000; else code[1] |= 0x0c000000; } } else { if (i->op == OP_CEIL || i->op == OP_FLOOR || i->op == OP_TRUNC) { code[0] = 0x298; } else if (isFloatType(i->dType)) { if (isFloatType(i->sType)) code[0] = 0x098; else code[0] = 0x088 | (isSignedType(i->sType) ? (1 << 8) : 0); } else { assert(isFloatType(i->sType)); code[0] = 0x288 | (isSignedType(i->sType) ? (1 << 8) : 0); } if (neg) code[0] |= 1 << 16; if (sat) code[0] |= 1 << 18; if (abs) code[0] |= 1 << 19; roundMode_CS(i); } } void CodeEmitterNVC0::emitSET(const CmpInstruction *i) { uint32_t hi; uint32_t lo = 0; if (i->sType == TYPE_F64) lo = 0x1; else if (!isFloatType(i->sType)) lo = 0x3; if (isFloatType(i->dType) || isSignedIntType(i->sType)) lo |= 0x20; switch (i->op) { case OP_SET_AND: hi = 0x10000000; break; case OP_SET_OR: hi = 0x10200000; break; case OP_SET_XOR: hi = 0x10400000; break; default: hi = 0x100e0000; break; } emitForm_A(i, (static_cast(hi) << 32) | lo); if (i->op != OP_SET) srcId(i->src(2), 32 + 17); if (i->def(0).getFile() == FILE_PREDICATE) { if (i->sType == TYPE_F32) code[1] += 0x10000000; else code[1] += 0x08000000; code[0] &= ~0xfc000; defId(i->def(0), 17); if (i->defExists(1)) defId(i->def(1), 14); else code[0] |= 0x1c000; } if (i->ftz) code[1] |= 1 << 27; emitCondCode(i->setCond, 32 + 23); emitNegAbs12(i); } void CodeEmitterNVC0::emitSLCT(const CmpInstruction *i) { uint64_t op; switch (i->dType) { case TYPE_S32: op = HEX64(30000000, 00000023); break; case TYPE_U32: op = HEX64(30000000, 00000003); break; case TYPE_F32: op = HEX64(38000000, 00000000); break; default: assert(!"invalid type for SLCT"); op = 0; break; } emitForm_A(i, op); CondCode cc = i->setCond; if (i->src(2).mod.neg()) cc = reverseCondCode(cc); emitCondCode(cc, 32 + 23); if (i->ftz) code[0] |= 1 << 5; } void CodeEmitterNVC0::emitSELP(const Instruction *i) { emitForm_A(i, HEX64(20000000, 00000004)); if (i->cc == CC_NOT_P || i->src(2).mod & Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 20; } void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction *i) { code[0] = 0x00000086; code[1] = 0xd0000000; code[1] |= i->tex.r; code[1] |= i->tex.s << 8; if (i->tex.liveOnly) code[0] |= 1 << 9; defId(i->def(0), 14); srcId(i->src(0), 20); } void CodeEmitterNVC0::emitTEX(const TexInstruction *i) { code[0] = 0x00000006; if (1) code[0] |= 0x80; // normal/t/p mode = t, XXX: what is this ? if (i->tex.liveOnly) code[0] |= 1 << 9; switch (i->op) { case OP_TEX: code[1] = 0x80000000; break; case OP_TXB: code[1] = 0x84000000; break; case OP_TXL: code[1] = 0x86000000; break; case OP_TXF: code[1] = 0x90000000; break; case OP_TXG: code[1] = 0xa0000000; break; case OP_TXD: code[1] = 0xe0000000; break; default: assert(!"invalid texture op"); break; } if (i->op == OP_TXF) { if (!i->tex.levelZero) code[1] |= 0x02000000; } else if (i->tex.levelZero) { code[1] |= 0x02000000; } if (i->tex.derivAll) code[1] |= 1 << 13; defId(i->def(0), 14); srcId(i->src(0), 20); emitPredicate(i); if (i->op == OP_TXG) code[0] |= i->tex.gatherComp << 5; code[1] |= i->tex.mask << 14; code[1] |= i->tex.r; code[1] |= i->tex.s << 8; if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) code[1] |= 1 << 18; // in 1st source (with array index) // texture target: code[1] |= (i->tex.target.getDim() - 1) << 20; if (i->tex.target.isCube()) code[1] += 2 << 20; if (i->tex.target.isArray()) code[1] |= 1 << 19; if (i->tex.target.isShadow()) code[1] |= 1 << 24; const int src1 = MAX2(i->predSrc + 1, 1); // if predSrc == 1, no 2nd src if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) { // lzero if (i->op == OP_TXL) code[1] &= ~(1 << 26); else if (i->op == OP_TXF) code[1] &= ~(1 << 25); } if (i->tex.target == TEX_TARGET_2D_MS || i->tex.target == TEX_TARGET_2D_MS_ARRAY) code[1] |= 1 << 23; if (i->tex.useOffsets) // in vecSrc0.w code[1] |= 1 << 22; srcId(i, src1, 26); } void CodeEmitterNVC0::emitTXQ(const TexInstruction *i) { code[0] = 0x00000086; code[1] = 0xc0000000; switch (i->tex.query) { case TXQ_DIMS: code[1] |= 0 << 22; break; case TXQ_TYPE: code[1] |= 1 << 22; break; case TXQ_SAMPLE_POSITION: code[1] |= 2 << 22; break; case TXQ_FILTER: code[1] |= 3 << 22; break; case TXQ_LOD: code[1] |= 4 << 22; break; case TXQ_BORDER_COLOUR: code[1] |= 5 << 22; break; default: assert(!"invalid texture query"); break; } code[1] |= i->tex.mask << 14; code[1] |= i->tex.r; code[1] |= i->tex.s << 8; if (i->tex.sIndirectSrc >= 0 || i->tex.rIndirectSrc >= 0) code[1] |= 1 << 18; defId(i->def(0), 14); srcId(i->src(0), 20); srcId(i->src(1), 26); emitPredicate(i); } void CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask) { code[0] = 0x00000000 | (laneMask << 6); code[1] = 0x48000000 | qOp; defId(i->def(0), 14); srcId(i->src(0), 20); srcId(i->srcExists(1) ? i->src(1) : i->src(0), 26); if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT) code[0] |= 1 << 9; // dall emitPredicate(i); } void CodeEmitterNVC0::emitFlow(const Instruction *i) { const FlowInstruction *f = i->asFlow(); unsigned mask; // bit 0: predicate, bit 1: target code[0] = 0x00000007; switch (i->op) { case OP_BRA: code[1] = f->absolute ? 0x00000000 : 0x40000000; if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST) code[0] |= 0x4000; mask = 3; break; case OP_CALL: code[1] = f->absolute ? 0x10000000 : 0x50000000; if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST) code[0] |= 0x4000; mask = 2; break; case OP_EXIT: code[1] = 0x80000000; mask = 1; break; case OP_RET: code[1] = 0x90000000; mask = 1; break; case OP_DISCARD: code[1] = 0x98000000; mask = 1; break; case OP_BREAK: code[1] = 0xa8000000; mask = 1; break; case OP_CONT: code[1] = 0xb0000000; mask = 1; break; case OP_JOINAT: code[1] = 0x60000000; mask = 2; break; case OP_PREBREAK: code[1] = 0x68000000; mask = 2; break; case OP_PRECONT: code[1] = 0x70000000; mask = 2; break; case OP_PRERET: code[1] = 0x78000000; mask = 2; break; case OP_QUADON: code[1] = 0xc0000000; mask = 0; break; case OP_QUADPOP: code[1] = 0xc8000000; mask = 0; break; case OP_BRKPT: code[1] = 0xd0000000; mask = 0; break; default: assert(!"invalid flow operation"); return; } if (mask & 1) { emitPredicate(i); if (i->flagsSrc < 0) code[0] |= 0x1e0; } if (!f) return; if (f->allWarp) code[0] |= 1 << 15; if (f->limit) code[0] |= 1 << 16; if (f->op == OP_CALL) { if (f->builtin) { assert(f->absolute); uint32_t pcAbs = targ->getBuiltinOffset(f->target.builtin); addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfc000000, 26); addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x03ffffff, -6); } else { assert(!f->absolute); int32_t pcRel = f->target.fn->binPos - (codeSize + 8); code[0] |= (pcRel & 0x3f) << 26; code[1] |= (pcRel >> 6) & 0x3ffff; } } else if (mask & 2) { int32_t pcRel = f->target.bb->binPos - (codeSize + 8); // currently we don't want absolute branches assert(!f->absolute); code[0] |= (pcRel & 0x3f) << 26; code[1] |= (pcRel >> 6) & 0x3ffff; } } void CodeEmitterNVC0::emitPFETCH(const Instruction *i) { uint32_t prim = i->src(0).get()->reg.data.u32; code[0] = 0x00000006 | ((prim & 0x3f) << 26); code[1] = 0x00000000 | (prim >> 6); emitPredicate(i); defId(i->def(0), 14); srcId(i->src(1), 20); } void CodeEmitterNVC0::emitVFETCH(const Instruction *i) { code[0] = 0x00000006; code[1] = 0x06000000 | i->src(0).get()->reg.data.offset; if (i->perPatch) code[0] |= 0x100; if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT) code[0] |= 0x200; // yes, TCPs can read from *outputs* of other threads emitPredicate(i); code[0] |= ((i->getDef(0)->reg.size / 4) - 1) << 5; defId(i->def(0), 14); srcId(i->src(0).getIndirect(0), 20); srcId(i->src(0).getIndirect(1), 26); // vertex address } void CodeEmitterNVC0::emitEXPORT(const Instruction *i) { unsigned int size = typeSizeof(i->dType); code[0] = 0x00000006 | ((size / 4 - 1) << 5); code[1] = 0x0a000000 | i->src(0).get()->reg.data.offset; assert(!(code[1] & ((size == 12) ? 15 : (size - 1)))); if (i->perPatch) code[0] |= 0x100; emitPredicate(i); assert(i->src(1).getFile() == FILE_GPR); srcId(i->src(0).getIndirect(0), 20); srcId(i->src(0).getIndirect(1), 32 + 17); // vertex base address srcId(i->src(1), 26); } void CodeEmitterNVC0::emitOUT(const Instruction *i) { code[0] = 0x00000006; code[1] = 0x1c000000; emitPredicate(i); defId(i->def(0), 14); // new secret address srcId(i->src(0), 20); // old secret address, should be 0 initially assert(i->src(0).getFile() == FILE_GPR); if (i->op == OP_EMIT) code[0] |= 1 << 5; if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART) code[0] |= 1 << 6; // vertex stream if (i->src(1).getFile() == FILE_IMMEDIATE) { code[1] |= 0xc000; code[0] |= SDATA(i->src(1)).u32 << 26; } else { srcId(i->src(1), 26); } } void CodeEmitterNVC0::emitInterpMode(const Instruction *i) { if (i->encSize == 8) { code[0] |= i->ipa << 6; // TODO: INTERP_SAMPLEID } else { if (i->getInterpMode() == NV50_IR_INTERP_SC) code[0] |= 0x80; assert(i->op == OP_PINTERP && i->getSampleMode() == 0); } } void CodeEmitterNVC0::emitINTERP(const Instruction *i) { const uint32_t base = i->getSrc(0)->reg.data.offset; if (i->encSize == 8) { code[0] = 0x00000000; code[1] = 0xc0000000 | (base & 0xffff); if (i->saturate) code[0] |= 1 << 5; if (i->op == OP_PINTERP) srcId(i->src(1), 26); else code[0] |= 0x3f << 26; srcId(i->src(0).getIndirect(0), 20); } else { assert(i->op == OP_PINTERP); code[0] = 0x00000009 | ((base & 0xc) << 6) | ((base >> 4) << 26); srcId(i->src(1), 20); } emitInterpMode(i); emitPredicate(i); defId(i->def(0), 14); if (i->getSampleMode() == NV50_IR_INTERP_OFFSET) srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 17); else code[1] |= 0x3f << 17; } void CodeEmitterNVC0::emitLoadStoreType(DataType ty) { uint8_t val; switch (ty) { case TYPE_U8: val = 0x00; break; case TYPE_S8: val = 0x20; break; case TYPE_F16: case TYPE_U16: val = 0x40; break; case TYPE_S16: val = 0x60; break; case TYPE_F32: case TYPE_U32: case TYPE_S32: val = 0x80; break; case TYPE_F64: case TYPE_U64: case TYPE_S64: val = 0xa0; break; case TYPE_B128: val = 0xc0; break; default: val = 0x80; assert(!"invalid type"); break; } code[0] |= val; } void CodeEmitterNVC0::emitCachingMode(CacheMode c) { uint32_t val; switch (c) { case CACHE_CA: // case CACHE_WB: val = 0x000; break; case CACHE_CG: val = 0x100; break; case CACHE_CS: val = 0x200; break; case CACHE_CV: // case CACHE_WT: val = 0x300; break; default: val = 0; assert(!"invalid caching mode"); break; } code[0] |= val; } void CodeEmitterNVC0::emitSTORE(const Instruction *i) { uint32_t opc; switch (i->src(0).getFile()) { case FILE_MEMORY_GLOBAL: opc = 0x90000000; break; case FILE_MEMORY_LOCAL: opc = 0xc8000000; break; case FILE_MEMORY_SHARED: opc = 0xc9000000; break; default: assert(!"invalid memory file"); opc = 0; break; } code[0] = 0x00000005; code[1] = opc; setAddress16(i->src(0)); srcId(i->src(1), 14); srcId(i->src(0).getIndirect(0), 20); emitPredicate(i); emitLoadStoreType(i->dType); emitCachingMode(i->cache); } void CodeEmitterNVC0::emitLOAD(const Instruction *i) { uint32_t opc; code[0] = 0x00000005; switch (i->src(0).getFile()) { case FILE_MEMORY_GLOBAL: opc = 0x80000000; break; case FILE_MEMORY_LOCAL: opc = 0xc0000000; break; case FILE_MEMORY_SHARED: opc = 0xc1000000; break; case FILE_MEMORY_CONST: if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) { emitMOV(i); // not sure if this is any better return; } opc = 0x14000000 | (i->src(0).get()->reg.fileIndex << 10); code[0] = 0x00000006 | (i->subOp << 8); break; default: assert(!"invalid memory file"); opc = 0; break; } code[1] = opc; defId(i->def(0), 14); setAddress16(i->src(0)); srcId(i->src(0).getIndirect(0), 20); emitPredicate(i); emitLoadStoreType(i->dType); emitCachingMode(i->cache); } uint8_t CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref) { switch (SDATA(ref).sv.sv) { case SV_LANEID: return 0x00; case SV_PHYSID: return 0x03; case SV_VERTEX_COUNT: return 0x10; case SV_INVOCATION_ID: return 0x11; case SV_YDIR: return 0x12; case SV_TID: return 0x21 + SDATA(ref).sv.index; case SV_CTAID: return 0x25 + SDATA(ref).sv.index; case SV_NTID: return 0x29 + SDATA(ref).sv.index; case SV_GRIDID: return 0x2c; case SV_NCTAID: return 0x2d + SDATA(ref).sv.index; case SV_LBASE: return 0x34; case SV_SBASE: return 0x30; case SV_CLOCK: return 0x50 + SDATA(ref).sv.index; default: assert(!"no sreg for system value"); return 0; } } void CodeEmitterNVC0::emitMOV(const Instruction *i) { if (i->src(0).getFile() == FILE_SYSTEM_VALUE) { uint8_t sr = getSRegEncoding(i->src(0)); if (i->encSize == 8) { code[0] = 0x00000004 | (sr << 26); code[1] = 0x2c000000; } else { code[0] = 0x40000008 | (sr << 20); } defId(i->def(0), 14); emitPredicate(i); } else if (i->encSize == 8) { uint64_t opc; if (i->src(0).getFile() == FILE_IMMEDIATE) opc = HEX64(18000000, 000001e2); else if (i->src(0).getFile() == FILE_PREDICATE) opc = HEX64(080e0000, 1c000004); else opc = HEX64(28000000, 00000004); opc |= i->lanes << 5; emitForm_B(i, opc); } else { uint32_t imm; if (i->src(0).getFile() == FILE_IMMEDIATE) { imm = SDATA(i->src(0)).u32; if (imm & 0xfff00000) { assert(!(imm & 0x000fffff)); code[0] = 0x00000318 | imm; } else { assert(imm < 0x800 || ((int32_t)imm >= -0x800)); code[0] = 0x00000118 | (imm << 20); } } else { code[0] = 0x0028; emitShortSrc2(i->src(0)); } defId(i->def(0), 14); emitPredicate(i); } } bool CodeEmitterNVC0::emitInstruction(Instruction *insn) { if (!insn->encSize) { ERROR("skipping unencodable instruction: "); insn->print(); return false; } else if (codeSize + insn->encSize > codeSizeLimit) { ERROR("code emitter output buffer too small\n"); return false; } // assert that instructions with multiple defs don't corrupt registers for (int d = 0; insn->defExists(d); ++d) assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0); switch (insn->op) { case OP_MOV: case OP_RDSV: emitMOV(insn); break; case OP_NOP: break; case OP_LOAD: emitLOAD(insn); break; case OP_STORE: emitSTORE(insn); break; case OP_LINTERP: case OP_PINTERP: emitINTERP(insn); break; case OP_VFETCH: emitVFETCH(insn); break; case OP_EXPORT: emitEXPORT(insn); break; case OP_PFETCH: emitPFETCH(insn); break; case OP_EMIT: case OP_RESTART: emitOUT(insn); break; case OP_ADD: case OP_SUB: if (isFloatType(insn->dType)) emitFADD(insn); else emitUADD(insn); break; case OP_MUL: if (isFloatType(insn->dType)) emitFMUL(insn); else emitUMUL(insn); break; case OP_MAD: case OP_FMA: if (isFloatType(insn->dType)) emitFMAD(insn); else emitIMAD(insn); break; case OP_NOT: emitNOT(insn); break; case OP_AND: emitLogicOp(insn, 0); break; case OP_OR: emitLogicOp(insn, 1); break; case OP_XOR: emitLogicOp(insn, 2); break; case OP_SHL: case OP_SHR: emitShift(insn); break; case OP_SET: case OP_SET_AND: case OP_SET_OR: case OP_SET_XOR: emitSET(insn->asCmp()); break; case OP_SELP: emitSELP(insn); break; case OP_SLCT: emitSLCT(insn->asCmp()); break; case OP_MIN: case OP_MAX: emitMINMAX(insn); break; case OP_ABS: case OP_NEG: case OP_CEIL: case OP_FLOOR: case OP_TRUNC: case OP_CVT: case OP_SAT: emitCVT(insn); break; case OP_RSQ: emitSFnOp(insn, 5); break; case OP_RCP: emitSFnOp(insn, 4); break; case OP_LG2: emitSFnOp(insn, 3); break; case OP_EX2: emitSFnOp(insn, 2); break; case OP_SIN: emitSFnOp(insn, 1); break; case OP_COS: emitSFnOp(insn, 0); break; case OP_PRESIN: case OP_PREEX2: emitPreOp(insn); break; case OP_TEX: case OP_TXB: case OP_TXL: case OP_TXD: case OP_TXF: emitTEX(insn->asTex()); break; case OP_TXQ: emitTXQ(insn->asTex()); break; case OP_BRA: case OP_CALL: case OP_PRERET: case OP_RET: case OP_DISCARD: case OP_EXIT: case OP_PRECONT: case OP_CONT: case OP_PREBREAK: case OP_BREAK: case OP_JOINAT: case OP_BRKPT: case OP_QUADON: case OP_QUADPOP: emitFlow(insn); break; case OP_QUADOP: emitQUADOP(insn, insn->subOp, insn->lanes); break; case OP_DFDX: emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4); break; case OP_DFDY: emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5); break; case OP_POPCNT: emitPOPC(insn); break; case OP_JOIN: emitNOP(insn); insn->join = 1; break; case OP_PHI: case OP_UNION: case OP_CONSTRAINT: ERROR("operation should have been eliminated"); return false; case OP_EXP: case OP_LOG: case OP_SQRT: case OP_POW: ERROR("operation should have been lowered\n"); return false; default: ERROR("unknow op\n"); return false; } if (insn->join) { code[0] |= 0x10; assert(insn->encSize == 8); } code += insn->encSize / 4; codeSize += insn->encSize; return true; } uint32_t CodeEmitterNVC0::getMinEncodingSize(const Instruction *i) const { const Target::OpInfo &info = targ->getOpInfo(i); if (info.minEncSize == 8 || 1) return 8; if (i->ftz || i->saturate || i->join) return 8; if (i->rnd != ROUND_N) return 8; if (i->predSrc >= 0 && i->op == OP_MAD) return 8; if (i->op == OP_PINTERP) { if (i->getSampleMode() || 1) // XXX: grr, short op doesn't work return 8; } else if (i->op == OP_MOV && i->lanes != 0xf) { return 8; } for (int s = 0; i->srcExists(s); ++s) { if (i->src(s).isIndirect(0)) return 8; if (i->src(s).getFile() == FILE_MEMORY_CONST) { if (SDATA(i->src(s)).offset >= 0x100) return 8; if (i->getSrc(s)->reg.fileIndex > 1 && i->getSrc(s)->reg.fileIndex != 16) return 8; } else if (i->src(s).getFile() == FILE_IMMEDIATE) { if (i->dType == TYPE_F32) { if (SDATA(i->src(s)).u32 >= 0x100) return 8; } else { if (SDATA(i->src(s)).u32 > 0xff) return 8; } } if (i->op == OP_CVT) continue; if (i->src(s).mod != Modifier(0)) { if (i->src(s).mod == Modifier(NV50_IR_MOD_ABS)) if (i->op != OP_RSQ) return 8; if (i->src(s).mod == Modifier(NV50_IR_MOD_NEG)) if (i->op != OP_ADD || s != 0) return 8; } } return 4; } CodeEmitterNVC0::CodeEmitterNVC0(const TargetNVC0 *target) : CodeEmitter(target) { code = NULL; codeSize = codeSizeLimit = 0; relocInfo = NULL; } CodeEmitter * TargetNVC0::getCodeEmitter(Program::Type type) { CodeEmitterNVC0 *emit = new CodeEmitterNVC0(this); emit->setProgramType(type); return emit; } } // namespace nv50_ir