summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/nvc0/codegen
diff options
context:
space:
mode:
authorChristoph Bumiller <[email protected]>2011-09-14 16:18:23 +0200
committerChristoph Bumiller <[email protected]>2011-09-14 16:19:52 +0200
commit57594065c30feec9376be9b2132659f7d87362ee (patch)
tree7e6808e0c5240b513851b7925c5be6678663b5e5 /src/gallium/drivers/nvc0/codegen
parenta42eca84c56f6860e67c0c57f4765a5530cc5f81 (diff)
nv50/ir: import new shader backend code
Diffstat (limited to 'src/gallium/drivers/nvc0/codegen')
-rw-r--r--src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp1714
-rw-r--r--src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp705
-rw-r--r--src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp568
-rw-r--r--src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.h46
4 files changed, 3033 insertions, 0 deletions
diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp
new file mode 100644
index 00000000000..2ab06f426e5
--- /dev/null
+++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp
@@ -0,0 +1,1714 @@
+
+#include "nv50_ir_target_nvc0.h"
+
+namespace nv50_ir {
+
+// Argh, all these assertions ...
+
+class CodeEmitterNVC0 : public CodeEmitter
+{
+public:
+ CodeEmitterNVC0(const TargetNVC0 *);
+
+ virtual bool emitInstruction(Instruction *);
+ virtual uint32_t getMinEncodingSize(const Instruction *) const;
+
+ inline void setProgramType(Program::Type pType) { progType = pType; }
+
+private:
+ const TargetNVC0 *targ;
+
+ Program::Type progType;
+
+private:
+ void emitForm_A(const Instruction *, uint64_t);
+ void emitForm_B(const Instruction *, uint64_t);
+ void emitForm_S(const Instruction *, uint32_t, bool pred);
+
+ void emitPredicate(const Instruction *);
+
+ void setAddress16(const ValueRef&);
+ void setImmediate(const Instruction *, const int s); // needs op already set
+ void setImmediateS8(const ValueRef&);
+
+ void emitCondCode(CondCode cc, int pos);
+ void emitInterpMode(const Instruction *);
+ void emitLoadStoreType(DataType ty);
+ void emitCachingMode(CacheMode c);
+
+ void emitShortSrc2(const ValueRef&);
+
+ inline uint8_t getSRegEncoding(const ValueRef&);
+
+ void roundMode_A(const Instruction *);
+ void roundMode_C(const Instruction *);
+ void roundMode_CS(const Instruction *);
+
+ void emitNegAbs12(const Instruction *);
+
+ void emitNOP(const Instruction *);
+
+ void emitLOAD(const Instruction *);
+ void emitSTORE(const Instruction *);
+ void emitMOV(const Instruction *);
+
+ void emitINTERP(const Instruction *);
+ void emitPFETCH(const Instruction *);
+ void emitVFETCH(const Instruction *);
+ void emitEXPORT(const Instruction *);
+ void emitOUT(const Instruction *);
+
+ void emitUADD(const Instruction *);
+ void emitFADD(const Instruction *);
+ void emitUMUL(const Instruction *);
+ void emitFMUL(const Instruction *);
+ void emitIMAD(const Instruction *);
+ void emitFMAD(const Instruction *);
+
+ void emitNOT(Instruction *);
+ void emitLogicOp(const Instruction *, uint8_t subOp);
+ void emitPOPC(const Instruction *);
+ void emitINSBF(const Instruction *);
+ void emitShift(const Instruction *);
+
+ void emitSFnOp(const Instruction *, uint8_t subOp);
+
+ void emitCVT(Instruction *);
+ void emitMINMAX(const Instruction *);
+ void emitPreOp(const Instruction *);
+
+ void emitSET(const CmpInstruction *);
+ void emitSLCT(const CmpInstruction *);
+ void emitSELP(const Instruction *);
+
+ void emitTEX(const TexInstruction *);
+ void emitTEXCSAA(const TexInstruction *);
+ void emitTXQ(const TexInstruction *);
+ void emitPIXLD(const TexInstruction *);
+
+ void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);
+
+ void emitFlow(const Instruction *);
+
+ inline void defId(const ValueDef&, const int pos);
+ inline void srcId(const ValueRef&, const int pos);
+
+ inline void srcAddr32(const ValueRef&, const int pos); // address / 4
+
+ inline void srcId(const ValueRef *, const int pos);
+
+ inline bool isLIMM(const ValueRef&, DataType ty);
+};
+
+// for better visibility
+#define HEX64(h, l) 0x##h##l##ULL
+
+#define SDATA(a) ((a).rep()->reg.data)
+#define DDATA(a) ((a).rep()->reg.data)
+
+void CodeEmitterNVC0::srcId(const ValueRef& src, const int pos)
+{
+ code[pos / 32] |= (src.get() ? SDATA(src).id : 63) << (pos % 32);
+}
+
+void CodeEmitterNVC0::srcId(const ValueRef *src, const int pos)
+{
+ code[pos / 32] |= (src ? SDATA(*src).id : 63) << (pos % 32);
+}
+
+void CodeEmitterNVC0::srcAddr32(const ValueRef& src, const int pos)
+{
+ code[pos / 32] |= (SDATA(src).offset >> 2) << (pos % 32);
+}
+
+void CodeEmitterNVC0::defId(const ValueDef& def, const int pos)
+{
+ code[pos / 32] |= (def.get() ? DDATA(def).id : 63) << (pos % 32);
+}
+
+bool CodeEmitterNVC0::isLIMM(const ValueRef& ref, DataType ty)
+{
+ const ImmediateValue *imm = ref.get()->asImm();
+
+ return imm && (imm->reg.data.u32 & ((ty == TYPE_F32) ? 0xfff : 0xfff00000));
+}
+
+void
+CodeEmitterNVC0::roundMode_A(const Instruction *insn)
+{
+ switch (insn->rnd) {
+ case ROUND_M: code[1] |= 1 << 23; break;
+ case ROUND_P: code[1] |= 2 << 23; break;
+ case ROUND_Z: code[1] |= 3 << 23; break;
+ default:
+ assert(insn->rnd == ROUND_N);
+ break;
+ }
+}
+
+void
+CodeEmitterNVC0::emitNegAbs12(const Instruction *i)
+{
+ if (i->src[1].mod.abs()) code[0] |= 1 << 6;
+ if (i->src[0].mod.abs()) code[0] |= 1 << 7;
+ if (i->src[1].mod.neg()) code[0] |= 1 << 8;
+ if (i->src[0].mod.neg()) code[0] |= 1 << 9;
+}
+
+void CodeEmitterNVC0::emitCondCode(CondCode cc, int pos)
+{
+ uint8_t val;
+
+ switch (cc) {
+ case CC_LT: val = 0x1; break;
+ case CC_LTU: val = 0x9; break;
+ case CC_EQ: val = 0x2; break;
+ case CC_EQU: val = 0xa; break;
+ case CC_LE: val = 0x3; break;
+ case CC_LEU: val = 0xb; break;
+ case CC_GT: val = 0x4; break;
+ case CC_GTU: val = 0xc; break;
+ case CC_NE: val = 0x5; break;
+ case CC_NEU: val = 0xd; break;
+ case CC_GE: val = 0x6; break;
+ case CC_GEU: val = 0xe; break;
+ case CC_TR: val = 0xf; break;
+ case CC_FL: val = 0x0; break;
+
+ case CC_A: val = 0x14; break;
+ case CC_NA: val = 0x13; break;
+ case CC_S: val = 0x15; break;
+ case CC_NS: val = 0x12; break;
+ case CC_C: val = 0x16; break;
+ case CC_NC: val = 0x11; break;
+ case CC_O: val = 0x17; break;
+ case CC_NO: val = 0x10; break;
+
+ default:
+ val = 0;
+ assert(!"invalid condition code");
+ break;
+ }
+ code[pos / 32] |= val << (pos % 32);
+}
+
+void
+CodeEmitterNVC0::emitPredicate(const Instruction *i)
+{
+ if (i->predSrc >= 0) {
+ assert(i->getPredicate()->reg.file == FILE_PREDICATE);
+ srcId(i->src[i->predSrc], 10);
+ if (i->cc == CC_NOT_P)
+ code[0] |= 0x2000; // negate
+ } else {
+ code[0] |= 0x1c00;
+ }
+}
+
+void
+CodeEmitterNVC0::setAddress16(const ValueRef& src)
+{
+ Symbol *sym = src.get()->asSym();
+
+ assert(sym);
+
+ code[0] |= (sym->reg.data.offset & 0x003f) << 26;
+ code[1] |= (sym->reg.data.offset & 0xffc0) >> 6;
+}
+
+void
+CodeEmitterNVC0::setImmediate(const Instruction *i, const int s)
+{
+ const ImmediateValue *imm = i->src[s].get()->asImm();
+ uint32_t u32;
+
+ assert(imm);
+ u32 = imm->reg.data.u32;
+
+ if ((code[0] & 0xf) == 0x2) {
+ // LIMM
+ code[0] |= (u32 & 0x3f) << 26;
+ code[1] |= u32 >> 6;
+ } else
+ if ((code[0] & 0xf) == 0x3 || (code[0] & 0xf) == 4) {
+ // integer immediate
+ assert((u32 & 0xfff00000) == 0 || (u32 & 0xfff00000) == 0xfff00000);
+ assert(!(code[1] & 0xc000));
+ u32 &= 0xfffff;
+ code[0] |= (u32 & 0x3f) << 26;
+ code[1] |= 0xc000 | (u32 >> 6);
+ } else {
+ // float immediate
+ assert(!(u32 & 0x00000fff));
+ assert(!(code[1] & 0xc000));
+ code[0] |= ((u32 >> 12) & 0x3f) << 26;
+ code[1] |= 0xc000 | (u32 >> 18);
+ }
+}
+
+void CodeEmitterNVC0::setImmediateS8(const ValueRef &ref)
+{
+ const ImmediateValue *imm = ref.get()->asImm();
+
+ int8_t s8 = static_cast<int8_t>(imm->reg.data.s32);
+
+ assert(s8 == imm->reg.data.s32);
+
+ code[0] |= (s8 & 0x3f) << 26;
+ code[0] |= (s8 >> 6) << 8;
+}
+
+void
+CodeEmitterNVC0::emitForm_A(const Instruction *i, uint64_t opc)
+{
+ code[0] = opc;
+ code[1] = opc >> 32;
+
+ emitPredicate(i);
+
+ defId(i->def[0], 14);
+
+ int s1 = 26;
+ if (i->srcExists(2) && i->getSrc(2)->reg.file == FILE_MEMORY_CONST)
+ s1 = 49;
+
+ for (int s = 0; s < 3 && i->srcExists(s); ++s) {
+ switch (i->getSrc(s)->reg.file) {
+ case FILE_MEMORY_CONST:
+ assert(!(code[1] & 0xc000));
+ code[1] |= (s == 2) ? 0x8000 : 0x4000;
+ code[1] |= i->getSrc(s)->reg.fileIndex << 10;
+ setAddress16(i->src[s]);
+ break;
+ case FILE_IMMEDIATE:
+ assert(s == 1 ||
+ i->op == OP_MOV || i->op == OP_PRESIN || i->op == OP_PREEX2);
+ assert(!(code[1] & 0xc000));
+ setImmediate(i, s);
+ break;
+ case FILE_GPR:
+ if ((s == 2) && ((code[0] & 0x7) == 2)) // LIMM: 3rd src == dst
+ break;
+ srcId(i->src[s], s ? ((s == 2) ? 49 : s1) : 20);
+ break;
+ default:
+ // ignore here, can be predicate or flags, but must not be address
+ break;
+ }
+ }
+}
+
+void
+CodeEmitterNVC0::emitForm_B(const Instruction *i, uint64_t opc)
+{
+ code[0] = opc;
+ code[1] = opc >> 32;
+
+ emitPredicate(i);
+
+ defId(i->def[0], 14);
+
+ switch (i->src[0].getFile()) {
+ case FILE_MEMORY_CONST:
+ assert(!(code[1] & 0xc000));
+ code[1] |= 0x4000 | (i->src[0].get()->reg.fileIndex << 10);
+ setAddress16(i->src[0]);
+ break;
+ case FILE_IMMEDIATE:
+ assert(!(code[1] & 0xc000));
+ setImmediate(i, 0);
+ break;
+ case FILE_GPR:
+ srcId(i->src[0], 26);
+ break;
+ default:
+ // ignore here, can be predicate or flags, but must not be address
+ break;
+ }
+}
+
+void
+CodeEmitterNVC0::emitForm_S(const Instruction *i, uint32_t opc, bool pred)
+{
+ code[0] = opc;
+
+ int ss2a = 0;
+ if (opc == 0x0d || opc == 0x0e)
+ ss2a = 2;
+
+ defId(i->def[0], 14);
+ srcId(i->src[0], 20);
+
+ assert(pred || (i->predSrc < 0));
+ if (pred)
+ emitPredicate(i);
+
+ for (int s = 1; s < 3 && i->srcExists(s); ++s) {
+ if (i->src[s].get()->reg.file == FILE_MEMORY_CONST) {
+ assert(!(code[0] & (0x300 >> ss2a)));
+ switch (i->src[s].get()->reg.fileIndex) {
+ case 0: code[0] |= 0x100 >> ss2a; break;
+ case 1: code[0] |= 0x200 >> ss2a; break;
+ case 16: code[0] |= 0x300 >> ss2a; break;
+ default:
+ ERROR("invalid c[] space for short form\n");
+ break;
+ }
+ if (s == 1)
+ code[0] |= i->getSrc(s)->reg.data.offset << 24;
+ else
+ code[0] |= i->getSrc(s)->reg.data.offset << 6;
+ } else
+ if (i->src[s].getFile() == FILE_IMMEDIATE) {
+ assert(s == 1);
+ setImmediateS8(i->src[s]);
+ } else
+ if (i->src[s].getFile() == FILE_GPR) {
+ srcId(i->src[s], (s == 1) ? 26 : 8);
+ }
+ }
+}
+
+void
+CodeEmitterNVC0::emitShortSrc2(const ValueRef &src)
+{
+ if (src.getFile() == FILE_MEMORY_CONST) {
+ switch (src.get()->reg.fileIndex) {
+ case 0: code[0] |= 0x100; break;
+ case 1: code[0] |= 0x200; break;
+ case 16: code[0] |= 0x300; break;
+ default:
+ assert(!"unsupported file index for short op");
+ break;
+ }
+ srcAddr32(src, 20);
+ } else {
+ srcId(src, 20);
+ assert(src.getFile() == FILE_GPR);
+ }
+}
+
+void
+CodeEmitterNVC0::emitNOP(const Instruction *i)
+{
+ code[0] = 0x000001e4;
+ code[1] = 0x40000000;
+ emitPredicate(i);
+}
+
+void
+CodeEmitterNVC0::emitFMAD(const Instruction *i)
+{
+ bool neg1 = (i->src[0].mod ^ i->src[1].mod).neg();
+
+ if (i->encSize == 8) {
+ if (isLIMM(i->src[1], TYPE_F32)) {
+ emitForm_A(i, HEX64(20000000, 00000002));
+ } else {
+ emitForm_A(i, HEX64(30000000, 00000000));
+
+ if (i->src[2].mod.neg())
+ code[0] |= 1 << 8;
+ }
+ roundMode_A(i);
+
+ if (neg1)
+ code[0] |= 1 << 9;
+
+ if (i->saturate)
+ code[0] |= 1 << 5;
+ if (i->ftz)
+ code[0] |= 1 << 6;
+ } else {
+ assert(!i->saturate && !i->src[2].mod.neg());
+ emitForm_S(i, (i->src[2].getFile() == FILE_MEMORY_CONST) ? 0x2e : 0x0e,
+ false);
+ if (neg1)
+ code[0] |= 1 << 4;
+ }
+}
+
+void
+CodeEmitterNVC0::emitFMUL(const Instruction *i)
+{
+ bool neg = (i->src[0].mod ^ i->src[1].mod).neg();
+
+ assert(i->postFactor >= -3 && i->postFactor <= 3);
+
+ if (i->encSize == 8) {
+ if (isLIMM(i->src[1], TYPE_F32)) {
+ assert(i->postFactor == 0); // constant folded, hopefully
+ emitForm_A(i, HEX64(30000000, 00000002));
+ } else {
+ emitForm_A(i, HEX64(58000000, 00000000));
+ roundMode_A(i);
+ code[1] |= ((i->postFactor > 0) ?
+ (7 - i->postFactor) : (0 - i->postFactor)) << 17;
+ }
+ if (neg)
+ code[1] ^= 1 << 25; // aliases with LIMM sign bit
+
+ if (i->saturate)
+ code[0] |= 1 << 5;
+
+ if (i->dnz)
+ code[0] |= 1 << 7;
+ else
+ if (i->ftz)
+ code[0] |= 1 << 6;
+ } else {
+ assert(!neg && !i->saturate && !i->ftz && !i->postFactor);
+ emitForm_S(i, 0xa8, true);
+ }
+}
+
+void
+CodeEmitterNVC0::emitUMUL(const Instruction *i)
+{
+ if (i->encSize == 8) {
+ if (i->src[1].getFile() == FILE_IMMEDIATE) {
+ emitForm_A(i, HEX64(10000000, 00000002));
+ } else {
+ emitForm_A(i, HEX64(50000000, 00000003));
+ }
+ if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
+ code[0] |= 1 << 6;
+ if (i->sType == TYPE_S32)
+ code[0] |= 1 << 5;
+ if (i->dType == TYPE_S32)
+ code[0] |= 1 << 7;
+ } else {
+ emitForm_S(i, i->src[1].getFile() == FILE_IMMEDIATE ? 0xaa : 0x2a, true);
+
+ if (i->sType == TYPE_S32)
+ code[0] |= 1 << 6;
+ }
+}
+
+void
+CodeEmitterNVC0::emitFADD(const Instruction *i)
+{
+ if (i->encSize == 8) {
+ if (isLIMM(i->src[1], TYPE_F32)) {
+ emitForm_A(i, HEX64(28000000, 00000002));
+
+ assert(!i->src[1].mod.neg() && !i->src[1].mod.abs() && !i->saturate);
+ } else {
+ emitForm_A(i, HEX64(50000000, 00000000));
+
+ roundMode_A(i);
+ if (i->saturate)
+ code[1] |= 1 << 17;
+ }
+ emitNegAbs12(i);
+
+ if (i->op == OP_SUB) code[0] ^= 1 << 8;
+
+ if (i->ftz)
+ code[0] |= 1 << 5;
+ } else {
+ assert(!i->saturate && i->op != OP_SUB &&
+ !i->src[0].mod.abs() &&
+ !i->src[1].mod.neg() && !i->src[1].mod.abs());
+
+ emitForm_S(i, 0x49, true);
+
+ if (i->src[0].mod.neg())
+ code[0] |= 1 << 7;
+ }
+}
+
+void
+CodeEmitterNVC0::emitUADD(const Instruction *i)
+{
+ uint32_t addOp = 0;
+
+ assert(!i->src[0].mod.abs() && !i->src[1].mod.abs());
+ assert(!i->src[0].mod.neg() || !i->src[1].mod.neg());
+
+ if (i->src[0].mod.neg())
+ addOp |= 0x200;
+ if (i->src[1].mod.neg())
+ addOp |= 0x100;
+ if (i->op == OP_SUB) {
+ addOp ^= 0x100;
+ assert(addOp != 0x300); // would be add-plus-one
+ }
+
+ if (i->encSize == 8) {
+ if (isLIMM(i->src[1], TYPE_U32)) {
+ emitForm_A(i, HEX64(08000000, 00000002));
+ if (i->def[1].exists())
+ code[1] |= 1 << 26; // write carry
+ } else {
+ emitForm_A(i, HEX64(48000000, 00000003));
+ if (i->def[1].exists())
+ code[1] |= 1 << 16; // write carry
+ }
+ code[0] |= addOp;
+
+ if (i->saturate)
+ code[0] |= 1 << 5;
+ if (i->flagsSrc >= 0) // add carry
+ code[0] |= 1 << 6;
+ } else {
+ assert(!(addOp & 0x100));
+ emitForm_S(i, (addOp >> 3) |
+ ((i->src[1].getFile() == FILE_IMMEDIATE) ? 0xac : 0x2c), true);
+ }
+}
+
+// TODO: shl-add
+void
+CodeEmitterNVC0::emitIMAD(const Instruction *i)
+{
+ assert(i->encSize == 8);
+ emitForm_A(i, HEX64(20000000, 00000003));
+
+ if (isSignedType(i->dType))
+ code[0] |= 1 << 7;
+ if (isSignedType(i->sType))
+ code[0] |= 1 << 5;
+
+ code[1] |= i->saturate << 24;
+
+ if (i->flagsDef >= 0) code[1] |= 1 << 16;
+ if (i->flagsSrc >= 0) code[1] |= 1 << 23;
+
+ if (i->src[2].mod.neg()) code[0] |= 0x10;
+ if (i->src[1].mod.neg() ^
+ i->src[0].mod.neg()) code[0] |= 0x20;
+
+ if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
+ code[0] |= 1 << 6;
+}
+
+void
+CodeEmitterNVC0::emitNOT(Instruction *i)
+{
+ assert(i->encSize == 8);
+ i->src[1].set(i->src[0]);
+ emitForm_A(i, HEX64(68000000, 000001c3));
+}
+
+void
+CodeEmitterNVC0::emitLogicOp(const Instruction *i, uint8_t subOp)
+{
+ if (i->encSize == 8) {
+ if (isLIMM(i->src[1], TYPE_U32)) {
+ emitForm_A(i, HEX64(38000000, 00000002));
+
+ if (i->src[2].exists())
+ code[1] |= 1 << 26;
+ } else {
+ emitForm_A(i, HEX64(68000000, 00000003));
+
+ if (i->src[2].exists())
+ code[1] |= 1 << 16;
+ }
+ code[0] |= subOp << 6;
+
+ if (i->src[2].exists()) // carry
+ code[0] |= 1 << 5;
+
+ if (i->src[0].mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
+ if (i->src[1].mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
+ } else {
+ emitForm_S(i, (subOp << 5) |
+ ((i->src[1].getFile() == FILE_IMMEDIATE) ? 0x1d : 0x8d), true);
+ }
+}
+
+void
+CodeEmitterNVC0::emitPOPC(const Instruction *i)
+{
+ emitForm_A(i, HEX64(54000000, 00000004));
+
+ if (i->src[0].mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
+ if (i->src[1].mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
+}
+
+void
+CodeEmitterNVC0::emitINSBF(const Instruction *i)
+{
+ emitForm_A(i, HEX64(28000000, 30000000));
+}
+
+void
+CodeEmitterNVC0::emitShift(const Instruction *i)
+{
+ if (i->op == OP_SHR) {
+ emitForm_A(i, HEX64(58000000, 00000003)
+ | (isSignedType(i->dType) ? 0x20 : 0x00));
+ } else {
+ emitForm_A(i, HEX64(60000000, 00000003));
+ }
+
+ if (0)
+ code[0] |= 1 << 9; // clamp shift amount
+}
+
+void
+CodeEmitterNVC0::emitPreOp(const Instruction *i)
+{
+ if (i->encSize == 8) {
+ emitForm_B(i, HEX64(60000000, 00000000));
+
+ if (i->op == OP_PREEX2)
+ code[0] |= 0x20;
+
+ if (i->src[0].mod.abs()) code[0] |= 1 << 6;
+ if (i->src[0].mod.neg()) code[0] |= 1 << 8;
+ } else {
+ emitForm_S(i, i->op == OP_PREEX2 ? 0x74000008 : 0x70000008, true);
+ }
+}
+
+void
+CodeEmitterNVC0::emitSFnOp(const Instruction *i, uint8_t subOp)
+{
+ if (i->encSize == 8) {
+ code[0] = 0x00000000 | (subOp << 26);
+ code[1] = 0xc8000000;
+
+ emitPredicate(i);
+
+ defId(i->def[0], 14);
+ srcId(i->src[0], 20);
+
+ assert(i->src[0].getFile() == FILE_GPR);
+
+ if (i->saturate) code[0] |= 1 << 5;
+
+ if (i->src[0].mod.abs()) code[0] |= 1 << 7;
+ if (i->src[0].mod.neg()) code[0] |= 1 << 9;
+ } else {
+ emitForm_S(i, 0x80000008 | (subOp << 26), true);
+
+ assert(!i->src[0].mod.neg());
+ if (i->src[0].mod.abs()) code[0] |= 1 << 30;
+ }
+}
+
+void
+CodeEmitterNVC0::emitMINMAX(const Instruction *i)
+{
+ uint64_t op;
+
+ assert(i->encSize == 8);
+
+ op = (i->op == OP_MIN) ? 0x080e000000000000ULL : 0x081e000000000000ULL;
+
+ if (i->ftz)
+ op |= 1 << 5;
+ else
+ if (!isFloatType(i->dType))
+ op |= isSignedType(i->dType) ? 0x23 : 0x03;
+
+ emitForm_A(i, op);
+ emitNegAbs12(i);
+}
+
+void
+CodeEmitterNVC0::roundMode_C(const Instruction *i)
+{
+ switch (i->rnd) {
+ case ROUND_M: code[1] |= 1 << 17; break;
+ case ROUND_P: code[1] |= 2 << 17; break;
+ case ROUND_Z: code[1] |= 3 << 17; break;
+ case ROUND_NI: code[0] |= 1 << 7; break;
+ case ROUND_MI: code[0] |= 1 << 7; code[1] |= 1 << 17; break;
+ case ROUND_PI: code[0] |= 1 << 7; code[1] |= 2 << 17; break;
+ case ROUND_ZI: code[0] |= 1 << 7; code[1] |= 3 << 17; break;
+ case ROUND_N: break;
+ default:
+ assert(!"invalid round mode");
+ break;
+ }
+}
+
+void
+CodeEmitterNVC0::roundMode_CS(const Instruction *i)
+{
+ switch (i->rnd) {
+ case ROUND_M:
+ case ROUND_MI: code[0] |= 1 << 16; break;
+ case ROUND_P:
+ case ROUND_PI: code[0] |= 2 << 16; break;
+ case ROUND_Z:
+ case ROUND_ZI: code[0] |= 3 << 16; break;
+ default:
+ break;
+ }
+}
+
+void
+CodeEmitterNVC0::emitCVT(Instruction *i)
+{
+ const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
+
+ switch (i->op) {
+ case OP_CEIL: i->rnd = f2f ? ROUND_PI : ROUND_P; break;
+ case OP_FLOOR: i->rnd = f2f ? ROUND_MI : ROUND_M; break;
+ case OP_TRUNC: i->rnd = f2f ? ROUND_ZI : ROUND_Z; break;
+ default:
+ break;
+ }
+
+ const bool sat = (i->op == OP_SAT) || i->saturate;
+ const bool abs = (i->op == OP_ABS) || i->src[0].mod.abs();
+ const bool neg = (i->op == OP_NEG) || i->src[0].mod.neg();
+
+ if (i->encSize == 8) {
+ emitForm_B(i, HEX64(10000000, 00000004));
+
+ roundMode_C(i);
+
+ code[0] |= util_logbase2(i->def[0].getSize()) << 20;
+ code[0] |= util_logbase2(i->src[0].getSize()) << 23;
+
+ if (sat)
+ code[0] |= 0x20;
+ if (abs)
+ code[0] |= 1 << 6;
+ if (neg && i->op != OP_ABS)
+ code[0] |= 1 << 8;
+
+ if (i->ftz)
+ code[1] |= 1 << 23;
+
+ if (isSignedIntType(i->dType))
+ code[0] |= 0x080;
+ if (isSignedIntType(i->sType))
+ code[0] |= 0x200;
+
+ if (isFloatType(i->dType)) {
+ if (!isFloatType(i->sType))
+ code[1] |= 0x08000000;
+ } else {
+ if (isFloatType(i->sType))
+ code[1] |= 0x04000000;
+ else
+ code[1] |= 0x0c000000;
+ }
+ } else {
+ if (i->op == OP_CEIL || i->op == OP_FLOOR || i->op == OP_TRUNC) {
+ code[0] = 0x298;
+ } else
+ if (isFloatType(i->dType)) {
+ if (isFloatType(i->sType))
+ code[0] = 0x098;
+ else
+ code[0] = 0x088 | (isSignedType(i->sType) ? (1 << 8) : 0);
+ } else {
+ assert(isFloatType(i->sType));
+
+ code[0] = 0x288 | (isSignedType(i->sType) ? (1 << 8) : 0);
+ }
+
+ if (neg) code[0] |= 1 << 16;
+ if (sat) code[0] |= 1 << 18;
+ if (abs) code[0] |= 1 << 19;
+
+ roundMode_CS(i);
+ }
+}
+
+void
+CodeEmitterNVC0::emitSET(const CmpInstruction *i)
+{
+ uint32_t hi;
+ uint32_t lo = 0;
+
+ if (i->sType == TYPE_F64)
+ lo = 0x1;
+ else
+ if (!isFloatType(i->sType))
+ lo = 0x3;
+
+ if (isFloatType(i->dType) || isSignedIntType(i->sType))
+ lo |= 0x20;
+
+ switch (i->op) {
+ case OP_SET_AND: hi = 0x10000000; break;
+ case OP_SET_OR: hi = 0x10200000; break;
+ case OP_SET_XOR: hi = 0x10400000; break;
+ default:
+ hi = 0x100e0000;
+ break;
+ }
+ emitForm_A(i, (static_cast<uint64_t>(hi) << 32) | lo);
+
+ if (i->def[0].getFile() == FILE_PREDICATE) {
+ if (i->sType == TYPE_F32)
+ code[1] += 0x10000000;
+ else
+ code[1] += 0x08000000;
+
+ code[0] &= ~0xfc000;
+ defId(i->def[0], 17);
+ if (i->defExists(1))
+ defId(i->def[1], 14);
+ else
+ code[0] |= 0x1c000;
+ }
+
+ if (i->ftz)
+ code[1] |= 1 << 27;
+
+ emitCondCode(i->setCond, 32 + 23);
+ emitNegAbs12(i);
+}
+
+void
+CodeEmitterNVC0::emitSLCT(const CmpInstruction *i)
+{
+ uint64_t op;
+
+ switch (i->dType) {
+ case TYPE_S32:
+ op = HEX64(30000000, 00000023);
+ break;
+ case TYPE_U32:
+ op = HEX64(30000000, 00000003);
+ break;
+ case TYPE_F32:
+ op = HEX64(38000000, 00000000);
+ break;
+ default:
+ assert(!"invalid type for SLCT");
+ op = 0;
+ break;
+ }
+ emitForm_A(i, op);
+
+ CondCode cc = i->setCond;
+
+ if (i->src[2].mod.neg())
+ cc = reverseCondCode(cc);
+
+ emitCondCode(cc, 32 + 23);
+
+ if (i->ftz)
+ code[0] |= 1 << 5;
+}
+
+void CodeEmitterNVC0::emitSELP(const Instruction *i)
+{
+ emitForm_A(i, HEX64(20000000, 00000004));
+
+ if (i->cc == CC_NOT_P || i->src[2].mod & Modifier(NV50_IR_MOD_NOT))
+ code[1] |= 1 << 20;
+}
+
+void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction *i)
+{
+ code[0] = 0x00000086;
+ code[1] = 0xd0000000;
+
+ code[1] |= i->tex.r;
+ code[1] |= i->tex.s << 8;
+
+ if (i->tex.liveOnly)
+ code[0] |= 1 << 9;
+
+ defId(i->def[0], 14);
+ srcId(i->src[0], 20);
+}
+
+void
+CodeEmitterNVC0::emitTEX(const TexInstruction *i)
+{
+ code[0] = 0x00000006;
+
+ if (1)
+ code[0] |= 0x80; // normal/t/p mode = t, XXX: what is this ?
+
+ if (i->tex.liveOnly)
+ code[0] |= 1 << 9;
+
+ switch (i->op) {
+ case OP_TEX: code[1] = 0x80000000; break;
+ case OP_TXB: code[1] = 0x84000000; break;
+ case OP_TXL: code[1] = 0x86000000; break;
+ case OP_TXF: code[1] = 0x92000000; break;
+ case OP_TXG: code[1] = 0xa0000000; break;
+ case OP_TXD: code[1] = 0xe0000000; break;
+ default:
+ assert(!"invalid texture op");
+ break;
+ }
+ defId(i->def[0], 14);
+ srcId(i->src[0], 20);
+
+ emitPredicate(i);
+
+ if (i->op == OP_TXG) code[0] |= i->tex.gatherComp << 5;
+
+ code[1] |= i->tex.mask << 14;
+
+ code[1] |= i->tex.r;
+ code[1] |= i->tex.s << 8;
+ if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0)
+ code[1] |= 1 << 18; // in 1st source (with array index)
+
+ // texture target:
+ code[1] |= (i->tex.target.getDim() - 1) << 20;
+ if (i->tex.target.isCube())
+ code[1] += 2 << 20;
+ if (i->tex.target.isArray())
+ code[1] |= 1 << 19;
+ if (i->tex.target.isShadow())
+ code[1] |= 1 << 24;
+
+ int src1 = i->tex.target.getArgCount();
+
+ if (i->src[src1].getFile() == FILE_IMMEDIATE) { // lzero
+ if (i->op == OP_TXL)
+ code[1] &= ~(1 << 26);
+ else
+ if (i->op == OP_TXF)
+ code[1] &= ~(1 << 25);
+ }
+ if (i->tex.target == TEX_TARGET_2D_MS ||
+ i->tex.target == TEX_TARGET_2D_MS_ARRAY)
+ code[1] |= 1 << 23;
+
+ if (i->tex.useOffsets) // in vecSrc0.w
+ code[1] |= 1 << 22;
+
+ srcId(i->src[src1], 26);
+}
+
+void
+CodeEmitterNVC0::emitTXQ(const TexInstruction *i)
+{
+ code[0] = 0x00000086;
+ code[1] = 0xc0000000;
+
+ switch (i->tex.query) {
+ case TXQ_DIMS: code[1] |= 0 << 22; break;
+ case TXQ_TYPE: code[1] |= 1 << 22; break;
+ case TXQ_SAMPLE_POSITION: code[1] |= 2 << 22; break;
+ case TXQ_FILTER: code[1] |= 3 << 22; break;
+ case TXQ_LOD: code[1] |= 4 << 22; break;
+ case TXQ_BORDER_COLOUR: code[1] |= 5 << 22; break;
+ default:
+ assert(!"invalid texture query");
+ break;
+ }
+
+ code[1] |= i->tex.mask << 14;
+
+ code[1] |= i->tex.r;
+ code[1] |= i->tex.s << 8;
+ if (i->tex.sIndirectSrc >= 0 || i->tex.rIndirectSrc >= 0)
+ code[1] |= 1 << 18;
+
+ defId(i->def[0], 14);
+ srcId(i->src[0], 20);
+ srcId(i->src[1], 26);
+
+ emitPredicate(i);
+}
+
+void
+CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
+{
+ code[0] = 0x00000000 | (laneMask << 6);
+ code[1] = 0x48000000 | qOp;
+
+ defId(i->def[0], 14);
+ srcId(i->src[0], 20);
+ srcId(i->srcExists(1) ? i->src[1] : i->src[0], 26);
+
+ emitPredicate(i);
+}
+
+void
+CodeEmitterNVC0::emitFlow(const Instruction *i)
+{
+ const FlowInstruction *f = i->asFlow();
+
+ unsigned mask; // bit 0: predicate, bit 1: target
+
+ code[0] = 0x00000007;
+
+ switch (i->op) {
+ case OP_BRA:
+ code[1] = f->absolute ? 0x00000000 : 0x40000000;
+ if (i->src[0].getFile() == FILE_MEMORY_CONST ||
+ i->src[1].getFile() == FILE_MEMORY_CONST)
+ code[1] |= 0x4000;
+ mask = 3;
+ break;
+ case OP_CALL:
+ code[1] = f->absolute ? 0x10000000 : 0x50000000;
+ if (i->src[0].getFile() == FILE_MEMORY_CONST)
+ code[1] |= 0x4000;
+ mask = 2;
+ break;
+
+ case OP_EXIT: code[1] = 0x80000000; mask = 1; break;
+ case OP_RET: code[1] = 0x90000000; mask = 1; break;
+ case OP_DISCARD: code[1] = 0x98000000; mask = 1; break;
+ case OP_BREAK: code[1] = 0xa8000000; mask = 1; break;
+ case OP_CONT: code[1] = 0xb0000000; mask = 1; break;
+
+ case OP_JOINAT: code[1] = 0x60000000; mask = 2; break;
+ case OP_PREBREAK: code[1] = 0x68000000; mask = 2; break;
+ case OP_PRECONT: code[1] = 0x70000000; mask = 2; break;
+ case OP_PRERET: code[1] = 0x78000000; mask = 2; break;
+
+ case OP_QUADON: code[1] = 0xc0000000; mask = 0; break;
+ case OP_QUADPOP: code[1] = 0xc8000000; mask = 0; break;
+ case OP_BRKPT: code[1] = 0xd0000000; mask = 0; break;
+ default:
+ assert(!"invalid flow operation");
+ return;
+ }
+
+ if (mask & 1) {
+ emitPredicate(i);
+ if (i->flagsSrc < 0)
+ code[0] |= 0x1e0;
+ }
+
+ if (!f)
+ return;
+
+ if (f->allWarp)
+ code[0] |= 1 << 15;
+ if (f->limit)
+ code[0] |= 1 << 16;
+
+ if (f->op == OP_CALL) {
+ if (f->builtin) {
+ assert(f->absolute);
+ uint32_t pcAbs = targ->getBuiltinOffset(f->target.builtin);
+ addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfc000000, 26);
+ addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x03ffffff, -6);
+ } else {
+ assert(!f->absolute);
+ int32_t pcRel = f->target.fn->binPos - (codeSize + 8);
+ code[0] |= (pcRel & 0x3f) << 26;
+ code[1] |= (pcRel >> 6) & 0x3ffff;
+ }
+ } else
+ if (mask & 2) {
+ int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
+ // currently we don't want absolute branches
+ assert(!f->absolute);
+ code[0] |= (pcRel & 0x3f) << 26;
+ code[1] |= (pcRel >> 6) & 0x3ffff;
+ }
+}
+
+void
+CodeEmitterNVC0::emitPFETCH(const Instruction *i)
+{
+ uint32_t prim = i->src[0].get()->reg.data.u32;
+
+ code[0] = 0x00000006 | ((prim & 0x3f) << 26);
+ code[1] = 0x00000000 | (prim >> 6);
+
+ emitPredicate(i);
+
+ defId(i->def[0], 14);
+ srcId(i->src[1], 20);
+}
+
+void
+CodeEmitterNVC0::emitVFETCH(const Instruction *i)
+{
+ code[0] = 0x00000006;
+ code[1] = 0x06000000 | i->src[0].get()->reg.data.offset;
+
+ if (i->perPatch)
+ code[0] |= 0x100;
+ if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
+ code[0] |= 0x200; // yes, TCPs can read from *outputs* of other threads
+
+ emitPredicate(i);
+
+ code[0] |= (i->defCount(0xf) - 1) << 5;
+
+ defId(i->def[0], 14);
+ srcId(i->src[0].getIndirect(0), 20);
+ srcId(i->src[0].getIndirect(1), 26); // vertex address
+}
+
+void
+CodeEmitterNVC0::emitEXPORT(const Instruction *i)
+{
+ unsigned int size = typeSizeof(i->dType);
+
+ code[0] = 0x00000006 | ((size / 4 - 1) << 5);
+ code[1] = 0x0a000000 | i->src[0].get()->reg.data.offset;
+
+ assert(size != 12 && !(code[1] & (size - 1)));
+
+ if (i->perPatch)
+ code[0] |= 0x100;
+
+ emitPredicate(i);
+
+ assert(i->src[1].getFile() == FILE_GPR);
+
+ srcId(i->src[0].getIndirect(0), 20);
+ srcId(i->src[0].getIndirect(1), 32 + 17); // vertex base address
+ srcId(i->src[1], 26);
+}
+
+void
+CodeEmitterNVC0::emitOUT(const Instruction *i)
+{
+ code[0] = 0x00000006;
+ code[1] = 0x1c000000;
+
+ emitPredicate(i);
+
+ defId(i->def[0], 14); // new secret address
+ srcId(i->src[0], 20); // old secret address, should be 0 initially
+
+ assert(i->src[0].getFile() == FILE_GPR);
+
+ if (i->op == OP_EMIT)
+ code[0] |= 1 << 5;
+ if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART)
+ code[0] |= 1 << 6;
+
+ // vertex stream
+ if (i->src[1].getFile() == FILE_IMMEDIATE) {
+ code[1] |= 0xc000;
+ code[0] |= SDATA(i->src[1]).u32 << 26;
+ } else {
+ srcId(i->src[1], 26);
+ }
+}
+
+void
+CodeEmitterNVC0::emitInterpMode(const Instruction *i)
+{
+ if (i->encSize == 8) {
+ code[0] |= i->ipa << 6; // TODO: INTERP_SAMPLEID
+ } else {
+ if (i->getInterpMode() == NV50_IR_INTERP_SC)
+ code[0] |= 0x80;
+ assert(i->op == OP_PINTERP && i->getSampleMode() == 0);
+ }
+}
+
+void
+CodeEmitterNVC0::emitINTERP(const Instruction *i)
+{
+ const uint32_t base = i->getSrc(0)->reg.data.offset;
+
+ if (i->encSize == 8) {
+ code[0] = 0x00000000;
+ code[1] = 0xc0000000 | (base & 0xffff);
+
+ if (i->saturate)
+ code[0] |= 1 << 5;
+
+ if (i->op == OP_PINTERP)
+ srcId(i->src[1], 26);
+ else
+ code[0] |= 0x3f << 26;
+
+ srcId(i->src[0].getIndirect(0), 20);
+ } else {
+ assert(i->op == OP_PINTERP);
+ code[0] = 0x00000009 | ((base & 0xc) << 6) | ((base >> 4) << 26);
+ srcId(i->src[1], 20);
+ }
+ emitInterpMode(i);
+
+ emitPredicate(i);
+ defId(i->def[0], 14);
+
+ if (i->getSampleMode() == NV50_IR_INTERP_OFFSET)
+ srcId(i->src[i->op == OP_PINTERP ? 2 : 1], 17);
+ else
+ code[1] |= 0x3f << 17;
+}
+
+void
+CodeEmitterNVC0::emitLoadStoreType(DataType ty)
+{
+ uint8_t val;
+
+ switch (ty) {
+ case TYPE_U8:
+ val = 0x00;
+ break;
+ case TYPE_S8:
+ val = 0x20;
+ break;
+ case TYPE_F16:
+ case TYPE_U16:
+ val = 0x40;
+ break;
+ case TYPE_S16:
+ val = 0x60;
+ break;
+ case TYPE_F32:
+ case TYPE_U32:
+ case TYPE_S32:
+ val = 0x80;
+ break;
+ case TYPE_F64:
+ case TYPE_U64:
+ case TYPE_S64:
+ val = 0xa0;
+ break;
+ case TYPE_B128:
+ val = 0xc0;
+ break;
+ default:
+ val = 0x80;
+ assert(!"invalid type");
+ break;
+ }
+ code[0] |= val;
+}
+
+void
+CodeEmitterNVC0::emitCachingMode(CacheMode c)
+{
+ uint32_t val;
+
+ switch (c) {
+ case CACHE_CA:
+// case CACHE_WB:
+ val = 0x000;
+ break;
+ case CACHE_CG:
+ val = 0x100;
+ break;
+ case CACHE_CS:
+ val = 0x200;
+ break;
+ case CACHE_CV:
+// case CACHE_WT:
+ val = 0x300;
+ break;
+ default:
+ val = 0;
+ assert(!"invalid caching mode");
+ break;
+ }
+ code[0] |= val;
+}
+
+void
+CodeEmitterNVC0::emitSTORE(const Instruction *i)
+{
+ uint32_t opc;
+
+ switch (i->src[0].getFile()) {
+ case FILE_MEMORY_GLOBAL: opc = 0x90000000; break;
+ case FILE_MEMORY_LOCAL: opc = 0xc8000000; break;
+ case FILE_MEMORY_SHARED: opc = 0xc9000000; break;
+ default:
+ assert(!"invalid memory file");
+ opc = 0;
+ break;
+ }
+ code[0] = 0x00000005;
+ code[1] = opc;
+
+ setAddress16(i->src[0]);
+ srcId(i->src[1], 14);
+ srcId(i->src[0].getIndirect(0), 20);
+
+ emitPredicate(i);
+
+ emitLoadStoreType(i->dType);
+ emitCachingMode(i->cache);
+}
+
+void
+CodeEmitterNVC0::emitLOAD(const Instruction *i)
+{
+ uint32_t opc;
+
+ code[0] = 0x00000005;
+
+ switch (i->src[0].getFile()) {
+ case FILE_MEMORY_GLOBAL: opc = 0x80000000; break;
+ case FILE_MEMORY_LOCAL: opc = 0xc0000000; break;
+ case FILE_MEMORY_SHARED: opc = 0xc1000000; break;
+ case FILE_MEMORY_CONST:
+ if (!i->src[0].isIndirect(0) && typeSizeof(i->dType) == 4) {
+ emitMOV(i); // not sure if this is any better
+ return;
+ }
+ opc = 0x14000000 | (i->src[0].get()->reg.fileIndex << 10);
+ code[0] = 0x00000006 | (i->subOp << 8);
+ break;
+ default:
+ assert(!"invalid memory file");
+ opc = 0;
+ break;
+ }
+ code[1] = opc;
+
+ defId(i->def[0], 14);
+
+ setAddress16(i->src[0]);
+ srcId(i->src[0].getIndirect(0), 20);
+
+ emitPredicate(i);
+
+ emitLoadStoreType(i->dType);
+ emitCachingMode(i->cache);
+}
+
+uint8_t
+CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref)
+{
+ switch (SDATA(ref).sv.sv) {
+ case SV_LANEID: return 0x00;
+ case SV_PHYSID: return 0x03;
+ case SV_VERTEX_COUNT: return 0x10;
+ case SV_INVOCATION_ID: return 0x11;
+ case SV_YDIR: return 0x12;
+ case SV_TID: return 0x21 + SDATA(ref).sv.index;
+ case SV_CTAID: return 0x25 + SDATA(ref).sv.index;
+ case SV_NTID: return 0x29 + SDATA(ref).sv.index;
+ case SV_GRIDID: return 0x2c;
+ case SV_NCTAID: return 0x2d + SDATA(ref).sv.index;
+ case SV_LBASE: return 0x34;
+ case SV_SBASE: return 0x30;
+ case SV_CLOCK: return 0x50 + SDATA(ref).sv.index;
+ default:
+ assert(!"no sreg for system value");
+ return 0;
+ }
+}
+
+void
+CodeEmitterNVC0::emitMOV(const Instruction *i)
+{
+ if (i->src[0].getFile() == FILE_SYSTEM_VALUE) {
+ uint8_t sr = getSRegEncoding(i->src[0]);
+
+ if (i->encSize == 8) {
+ code[0] = 0x00000004 | (sr << 26);
+ code[1] = 0x2c000000;
+ } else {
+ code[0] = 0x40000008 | (sr << 20);
+ }
+ defId(i->def[0], 14);
+
+ emitPredicate(i);
+ } else
+ if (i->encSize == 8) {
+ uint64_t opc;
+
+ if (i->src[0].getFile() == FILE_IMMEDIATE)
+ opc = HEX64(18000000, 000001e2);
+ else
+ if (i->src[0].getFile() == FILE_PREDICATE)
+ opc = HEX64(080e0000, 1c000004);
+ else
+ opc = HEX64(28000000, 00000004);
+
+ opc |= i->lanes << 5;
+
+ emitForm_B(i, opc);
+ } else {
+ uint32_t imm;
+
+ if (i->src[0].getFile() == FILE_IMMEDIATE) {
+ imm = SDATA(i->src[0]).u32;
+ if (imm & 0xfff00000) {
+ assert(!(imm & 0x000fffff));
+ code[0] = 0x00000318 | imm;
+ } else {
+ assert(imm < 0x800 || ((int32_t)imm >= -0x800));
+ code[0] = 0x00000118 | (imm << 20);
+ }
+ } else {
+ code[0] = 0x0028;
+ emitShortSrc2(i->src[0]);
+ }
+ defId(i->def[0], 14);
+
+ emitPredicate(i);
+ }
+}
+
+bool
+CodeEmitterNVC0::emitInstruction(Instruction *insn)
+{
+ if (!insn->encSize) {
+ ERROR("skipping unencodable instruction: "); insn->print();
+ return false;
+ } else
+ if (codeSize + insn->encSize > codeSizeLimit) {
+ ERROR("code emitter output buffer too small\n");
+ return false;
+ }
+
+ // assert that instructions with multiple defs don't corrupt registers
+ for (int d = 0; insn->defExists(d); ++d)
+ assert(insn->asTex() || insn->def[d].rep()->reg.data.id >= 0);
+
+ switch (insn->op) {
+ case OP_MOV:
+ case OP_RDSV:
+ emitMOV(insn);
+ break;
+ case OP_NOP:
+ break;
+ case OP_LOAD:
+ emitLOAD(insn);
+ break;
+ case OP_STORE:
+ emitSTORE(insn);
+ break;
+ case OP_LINTERP:
+ case OP_PINTERP:
+ emitINTERP(insn);
+ break;
+ case OP_VFETCH:
+ emitVFETCH(insn);
+ break;
+ case OP_EXPORT:
+ emitEXPORT(insn);
+ break;
+ case OP_PFETCH:
+ emitPFETCH(insn);
+ break;
+ case OP_EMIT:
+ case OP_RESTART:
+ emitOUT(insn);
+ break;
+ case OP_ADD:
+ case OP_SUB:
+ if (isFloatType(insn->dType))
+ emitFADD(insn);
+ else
+ emitUADD(insn);
+ break;
+ case OP_MUL:
+ if (isFloatType(insn->dType))
+ emitFMUL(insn);
+ else
+ emitUMUL(insn);
+ break;
+ case OP_MAD:
+ case OP_FMA:
+ if (isFloatType(insn->dType))
+ emitFMAD(insn);
+ else
+ emitIMAD(insn);
+ break;
+ case OP_NOT:
+ emitNOT(insn);
+ break;
+ case OP_AND:
+ emitLogicOp(insn, 0);
+ break;
+ case OP_OR:
+ emitLogicOp(insn, 1);
+ break;
+ case OP_XOR:
+ emitLogicOp(insn, 2);
+ break;
+ case OP_SHL:
+ case OP_SHR:
+ emitShift(insn);
+ break;
+ case OP_SET:
+ case OP_SET_AND:
+ case OP_SET_OR:
+ case OP_SET_XOR:
+ emitSET(insn->asCmp());
+ break;
+ case OP_SELP:
+ emitSELP(insn);
+ break;
+ case OP_SLCT:
+ emitSLCT(insn->asCmp());
+ break;
+ case OP_MIN:
+ case OP_MAX:
+ emitMINMAX(insn);
+ break;
+ case OP_ABS:
+ case OP_NEG:
+ case OP_CEIL:
+ case OP_FLOOR:
+ case OP_TRUNC:
+ case OP_CVT:
+ case OP_SAT:
+ emitCVT(insn);
+ break;
+ case OP_RSQ:
+ emitSFnOp(insn, 5);
+ break;
+ case OP_RCP:
+ emitSFnOp(insn, 4);
+ break;
+ case OP_LG2:
+ emitSFnOp(insn, 3);
+ break;
+ case OP_EX2:
+ emitSFnOp(insn, 2);
+ break;
+ case OP_SIN:
+ emitSFnOp(insn, 1);
+ break;
+ case OP_COS:
+ emitSFnOp(insn, 0);
+ break;
+ case OP_PRESIN:
+ case OP_PREEX2:
+ emitPreOp(insn);
+ break;
+ case OP_TEX:
+ case OP_TXB:
+ case OP_TXL:
+ case OP_TXD:
+ case OP_TXF:
+ emitTEX(insn->asTex());
+ break;
+ case OP_TXQ:
+ emitTXQ(insn->asTex());
+ break;
+ case OP_BRA:
+ case OP_CALL:
+ case OP_PRERET:
+ case OP_RET:
+ case OP_DISCARD:
+ case OP_EXIT:
+ case OP_PRECONT:
+ case OP_CONT:
+ case OP_PREBREAK:
+ case OP_BREAK:
+ case OP_JOINAT:
+ case OP_BRKPT:
+ case OP_QUADON:
+ case OP_QUADPOP:
+ emitFlow(insn);
+ break;
+ case OP_QUADOP:
+ emitQUADOP(insn, insn->subOp, insn->lanes);
+ break;
+ case OP_DFDX:
+ emitQUADOP(insn, insn->src[0].mod.neg() ? 0x66 : 0x99, 0x4);
+ break;
+ case OP_DFDY:
+ emitQUADOP(insn, insn->src[0].mod.neg() ? 0x5a : 0xa5, 0x5);
+ break;
+ case OP_POPCNT:
+ emitPOPC(insn);
+ break;
+ case OP_JOIN:
+ emitNOP(insn);
+ insn->join = 1;
+ break;
+ case OP_PHI:
+ case OP_UNION:
+ case OP_CONSTRAINT:
+ ERROR("operation should have been eliminated");
+ return false;
+ case OP_EXP:
+ case OP_LOG:
+ case OP_SQRT:
+ case OP_POW:
+ ERROR("operation should have been lowered\n");
+ return false;
+ default:
+ ERROR("unknow op\n");
+ return false;
+ }
+
+ if (insn->join) {
+ code[0] |= 0x10;
+ assert(insn->encSize == 8);
+ }
+
+ code += insn->encSize / 4;
+ codeSize += insn->encSize;
+ return true;
+}
+
+uint32_t
+CodeEmitterNVC0::getMinEncodingSize(const Instruction *i) const
+{
+ const Target::OpInfo &info = targ->getOpInfo(i);
+
+ if (info.minEncSize == 8 || 1)
+ return 8;
+
+ if (i->ftz || i->saturate || i->join)
+ return 8;
+ if (i->rnd != ROUND_N)
+ return 8;
+ if (i->predSrc >= 0 && i->op == OP_MAD)
+ return 8;
+
+ if (i->op == OP_PINTERP) {
+ if (i->getSampleMode() || 1) // XXX: grr, short op doesn't work
+ return 8;
+ } else
+ if (i->op == OP_MOV && i->lanes != 0xf) {
+ return 8;
+ }
+
+ for (int s = 0; i->srcExists(s); ++s) {
+ if (i->src[s].isIndirect(0))
+ return 8;
+
+ if (i->src[s].getFile() == FILE_MEMORY_CONST) {
+ if (SDATA(i->src[s]).offset >= 0x100)
+ return 8;
+ if (i->getSrc(s)->reg.fileIndex > 1 &&
+ i->getSrc(s)->reg.fileIndex != 16)
+ return 8;
+ } else
+ if (i->src[s].getFile() == FILE_IMMEDIATE) {
+ if (i->dType == TYPE_F32) {
+ if (SDATA(i->src[s]).u32 >= 0x100)
+ return 8;
+ } else {
+ if (SDATA(i->src[s]).u32 > 0xff)
+ return 8;
+ }
+ }
+
+ if (i->op == OP_CVT)
+ continue;
+ if (i->src[s].mod != Modifier(0)) {
+ if (i->src[s].mod == Modifier(NV50_IR_MOD_ABS))
+ if (i->op != OP_RSQ)
+ return 8;
+ if (i->src[s].mod == Modifier(NV50_IR_MOD_NEG))
+ if (i->op != OP_ADD || s != 0)
+ return 8;
+ }
+ }
+
+ return 4;
+}
+
+CodeEmitterNVC0::CodeEmitterNVC0(const TargetNVC0 *target) : targ(target)
+{
+ code = NULL;
+ codeSize = codeSizeLimit = 0;
+ relocInfo = NULL;
+}
+
+CodeEmitter *
+TargetNVC0::getCodeEmitter(Program::Type type)
+{
+ CodeEmitterNVC0 *emit = new CodeEmitterNVC0(this);
+ emit->setProgramType(type);
+ return emit;
+}
+
+} // namespace nv50_ir
diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp
new file mode 100644
index 00000000000..de73efcc56a
--- /dev/null
+++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp
@@ -0,0 +1,705 @@
+
+#include "nv50/codegen/nv50_ir.h"
+#include "nv50/codegen/nv50_ir_build_util.h"
+
+#include "nv50_ir_target_nvc0.h"
+
+namespace nv50_ir {
+
+#define QOP_ADD 0
+#define QOP_SUBR 1
+#define QOP_SUB 2
+#define QOP_MOV2 3
+
+#define QUADOP(q, r, s, t) \
+ ((QOP_##q << 0) | (QOP_##r << 2) | \
+ (QOP_##s << 4) | (QOP_##t << 6))
+
+class NVC0LegalizeSSA : public Pass
+{
+private:
+ virtual bool visit(BasicBlock *);
+ virtual bool visit(Function *);
+
+ // we want to insert calls to the builtin library only after optimization
+ void handleDIV(Instruction *); // integer division, modulus
+ void handleRCPRSQ(Instruction *); // double precision float recip/rsqrt
+
+private:
+ BuildUtil bld;
+};
+
+void
+NVC0LegalizeSSA::handleDIV(Instruction *i)
+{
+ FlowInstruction *call;
+ int builtin;
+ Value *def[2];
+
+ bld.setPosition(i, false);
+ def[0] = bld.mkMovToReg(0, i->getSrc(0))->getDef(0);
+ def[1] = bld.mkMovToReg(1, i->getSrc(1))->getDef(0);
+ switch (i->dType) {
+ case TYPE_U32: builtin = NVC0_BUILTIN_DIV_U32; break;
+ case TYPE_S32: builtin = NVC0_BUILTIN_DIV_S32; break;
+ default:
+ return;
+ }
+ call = bld.mkFlow(OP_CALL, NULL, CC_ALWAYS, NULL);
+ bld.mkMov(i->getDef(0), def[(i->op == OP_DIV) ? 0 : 1]);
+ bld.mkClobber(FILE_GPR, (i->op == OP_DIV) ? 0xe : 0xd, 2);
+ bld.mkClobber(FILE_PREDICATE, (i->dType == TYPE_S32) ? 0xf : 0x3, 0);
+
+ call->fixed = 1;
+ call->absolute = call->builtin = 1;
+ call->target.builtin = builtin;
+ delete_Instruction(prog, i);
+}
+
+void
+NVC0LegalizeSSA::handleRCPRSQ(Instruction *i)
+{
+ // TODO
+}
+
+bool
+NVC0LegalizeSSA::visit(Function *fn)
+{
+ bld.setProgram(fn->getProgram());
+ return true;
+}
+
+bool
+NVC0LegalizeSSA::visit(BasicBlock *bb)
+{
+ Instruction *next;
+ for (Instruction *i = bb->getEntry(); i; i = next) {
+ next = i->next;
+ if (i->dType == TYPE_F32)
+ continue;
+ switch (i->op) {
+ case OP_DIV:
+ case OP_MOD:
+ handleDIV(i);
+ break;
+ case OP_RCP:
+ case OP_RSQ:
+ if (i->dType == TYPE_F64)
+ handleRCPRSQ(i);
+ break;
+ default:
+ break;
+ }
+ }
+ return true;
+}
+
+class NVC0LegalizePostRA : public Pass
+{
+private:
+ virtual bool visit(Function *);
+ virtual bool visit(BasicBlock *);
+
+ void replaceZero(Instruction *);
+ void split64BitOp(Instruction *);
+ bool tryReplaceContWithBra(BasicBlock *);
+ void propagateJoin(BasicBlock *);
+
+ LValue *r63;
+};
+
+bool
+NVC0LegalizePostRA::visit(Function *fn)
+{
+ r63 = new_LValue(fn, FILE_GPR);
+ r63->reg.data.id = 63;
+ return true;
+}
+
+void
+NVC0LegalizePostRA::replaceZero(Instruction *i)
+{
+ for (int s = 0; i->srcExists(s); ++s) {
+ ImmediateValue *imm = i->getSrc(s)->asImm();
+ if (imm && imm->reg.data.u64 == 0)
+ i->setSrc(s, r63);
+ }
+}
+
+void
+NVC0LegalizePostRA::split64BitOp(Instruction *i)
+{
+ if (i->dType == TYPE_F64) {
+ if (i->op == OP_MAD)
+ i->op = OP_FMA;
+ if (i->op == OP_ADD || i->op == OP_MUL || i->op == OP_FMA ||
+ i->op == OP_CVT || i->op == OP_MIN || i->op == OP_MAX ||
+ i->op == OP_SET)
+ return;
+ i->dType = i->sType = TYPE_U32;
+
+ i->bb->insertAfter(i, i->clone(true)); // deep cloning
+ }
+}
+
+// replace CONT with BRA for single unconditional continue
+bool
+NVC0LegalizePostRA::tryReplaceContWithBra(BasicBlock *bb)
+{
+ if (bb->cfg.incidentCount() != 2 || bb->getEntry()->op != OP_PRECONT)
+ return false;
+ Graph::EdgeIterator ei = bb->cfg.incident();
+ if (ei.getType() != Graph::Edge::BACK)
+ ei.next();
+ if (ei.getType() != Graph::Edge::BACK)
+ return false;
+ BasicBlock *contBB = BasicBlock::get(ei.getNode());
+
+ if (!contBB->getExit() || contBB->getExit()->op != OP_CONT ||
+ contBB->getExit()->getPredicate())
+ return false;
+ contBB->getExit()->op = OP_BRA;
+ bb->remove(bb->getEntry()); // delete PRECONT
+
+ ei.next();
+ assert(ei.end() || ei.getType() != Graph::Edge::BACK);
+ return true;
+}
+
+// replace branches to join blocks with join ops
+void
+NVC0LegalizePostRA::propagateJoin(BasicBlock *bb)
+{
+ if (bb->getEntry()->op != OP_JOIN || bb->getEntry()->asFlow()->limit)
+ return;
+ for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
+ BasicBlock *in = BasicBlock::get(ei.getNode());
+ Instruction *exit = in->getExit();
+ if (!exit) {
+ in->insertTail(new FlowInstruction(func, OP_JOIN, bb));
+ // there should always be a terminator instruction
+ WARN("inserted missing terminator in BB:%i\n", in->getId());
+ } else
+ if (exit->op == OP_BRA) {
+ exit->op = OP_JOIN;
+ exit->asFlow()->limit = 1; // must-not-propagate marker
+ }
+ }
+ bb->remove(bb->getEntry());
+}
+
+bool
+NVC0LegalizePostRA::visit(BasicBlock *bb)
+{
+ Instruction *i, *next;
+
+ // remove pseudo operations and non-fixed no-ops, split 64 bit operations
+ for (i = bb->getFirst(); i; i = next) {
+ next = i->next;
+ if (i->op == OP_EMIT || i->op == OP_RESTART) {
+ if (!i->getDef(0)->refCount())
+ i->setDef(0, NULL);
+ if (i->src[0].getFile() == FILE_IMMEDIATE)
+ i->setSrc(0, r63); // initial value must be 0
+ } else
+ if (i->isNop()) {
+ bb->remove(i);
+ } else {
+ if (i->op != OP_MOV && i->op != OP_PFETCH)
+ replaceZero(i);
+ if (typeSizeof(i->dType) == 8)
+ split64BitOp(i);
+ }
+ }
+ if (!bb->getEntry())
+ return true;
+
+ if (!tryReplaceContWithBra(bb))
+ propagateJoin(bb);
+
+ return true;
+}
+
+class NVC0LoweringPass : public Pass
+{
+public:
+ NVC0LoweringPass(Program *);
+
+private:
+ virtual bool visit(Function *);
+ virtual bool visit(BasicBlock *);
+ virtual bool visit(Instruction *);
+
+ bool handleRDSV(Instruction *);
+ bool handleWRSV(Instruction *);
+ bool handleEXPORT(Instruction *);
+ bool handleOUT(Instruction *);
+ bool handleDIV(Instruction *);
+ bool handleMOD(Instruction *);
+ bool handleSQRT(Instruction *);
+ bool handlePOW(Instruction *);
+ bool handleTEX(TexInstruction *);
+ bool handleTXD(TexInstruction *);
+ bool handleManualTXD(TexInstruction *);
+
+ void checkPredicate(Instruction *);
+
+ void readTessCoord(LValue *dst, int c);
+
+private:
+ const Target *const targ;
+
+ BuildUtil bld;
+
+ LValue *gpEmitAddress;
+};
+
+NVC0LoweringPass::NVC0LoweringPass(Program *prog) : targ(prog->getTarget())
+{
+ bld.setProgram(prog);
+}
+
+bool
+NVC0LoweringPass::visit(Function *fn)
+{
+ if (prog->getType() == Program::TYPE_GEOMETRY) {
+ assert(!strncmp(fn->getName(), "MAIN", 4));
+ // TODO: when we generate actual functions pass this value along somehow
+ bld.setPosition(BasicBlock::get(fn->cfg.getRoot()), false);
+ gpEmitAddress = bld.loadImm(NULL, 0)->asLValue();
+ }
+ return true;
+}
+
+bool
+NVC0LoweringPass::visit(BasicBlock *bb)
+{
+ return true;
+}
+
+// move array source to first slot, convert to u16, add indirections
+bool
+NVC0LoweringPass::handleTEX(TexInstruction *i)
+{
+ const int dim = i->tex.target.getDim();
+ const int arg = i->tex.target.getDim() + i->tex.target.isArray();
+
+ // generate and move the tsc/tic/array source to the front
+ if (dim != arg || i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) {
+ LValue *src = new_LValue(func, FILE_GPR); // 0xttxsaaaa
+
+ Value *arrayIndex = i->tex.target.isArray() ? i->getSrc(dim) : NULL;
+ for (int s = dim; s >= 1; --s)
+ i->setSrc(s, i->getSrc(s - 1));
+ i->setSrc(0, arrayIndex);
+
+ Value *ticRel = i->getIndirectR();
+ Value *tscRel = i->getIndirectS();
+
+ if (arrayIndex)
+ bld.mkCvt(OP_CVT, TYPE_U16, src, TYPE_F32, arrayIndex);
+ else
+ bld.loadImm(src, 0);
+
+ if (ticRel) {
+ i->setSrc(i->tex.rIndirectSrc, NULL);
+ bld.mkOp3(OP_INSBF, TYPE_U32, src, ticRel, bld.mkImm(0x0917), src);
+ }
+ if (tscRel) {
+ i->setSrc(i->tex.sIndirectSrc, NULL);
+ bld.mkOp3(OP_INSBF, TYPE_U32, src, tscRel, bld.mkImm(0x0710), src);
+ }
+
+ i->setSrc(0, src);
+ }
+
+ // offset is last source (lod 1st, dc 2nd)
+ if (i->tex.useOffsets) {
+ uint32_t value = 0;
+ int n, c;
+ int s = i->srcCount(0xff);
+ for (n = 0; n < i->tex.useOffsets; ++n)
+ for (c = 0; c < 3; ++c)
+ value |= (i->tex.offset[n][c] & 0xf) << (n * 12 + c * 4);
+ i->setSrc(s, bld.loadImm(NULL, value));
+ }
+
+ return true;
+}
+
+bool
+NVC0LoweringPass::handleManualTXD(TexInstruction *i)
+{
+ static const uint8_t qOps[4][2] =
+ {
+ { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(MOV2, MOV2, ADD, ADD) }, // l0
+ { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(MOV2, MOV2, ADD, ADD) }, // l1
+ { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l2
+ { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l3
+ };
+ Value *def[4][4];
+ Value *crd[3];
+ Instruction *tex;
+ Value *zero = bld.loadImm(bld.getSSA(), 0);
+ int l, c;
+ const int dim = i->tex.target.getDim();
+
+ i->op = OP_TEX; // no need to clone dPdx/dPdy later
+
+ for (c = 0; c < dim; ++c)
+ crd[c] = bld.getScratch();
+
+ bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
+ for (l = 0; l < 4; ++l) {
+ // mov coordinates from lane l to all lanes
+ for (c = 0; c < dim; ++c)
+ bld.mkQuadop(0x00, crd[c], l, i->getSrc(c), zero);
+ // add dPdx from lane l to lanes dx
+ for (c = 0; c < dim; ++c)
+ bld.mkQuadop(qOps[l][0], crd[c], l, i->dPdx[c].get(), crd[c]);
+ // add dPdy from lane l to lanes dy
+ for (c = 0; c < dim; ++c)
+ bld.mkQuadop(qOps[l][1], crd[c], l, i->dPdy[c].get(), crd[c]);
+ // texture
+ bld.insert(tex = i->clone(true));
+ for (c = 0; c < dim; ++c)
+ tex->setSrc(c, crd[c]);
+ // save results
+ for (c = 0; i->defExists(c); ++c) {
+ Instruction *mov;
+ def[c][l] = bld.getSSA();
+ mov = bld.mkMov(def[c][l], tex->getDef(c));
+ mov->fixed = 1;
+ mov->lanes = 1 << l;
+ }
+ }
+ bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL);
+
+ for (c = 0; i->defExists(c); ++c) {
+ Instruction *u = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(c));
+ for (l = 0; l < 4; ++l)
+ u->setSrc(l, def[c][l]);
+ }
+
+ i->bb->remove(i);
+ return true;
+}
+
+bool
+NVC0LoweringPass::handleTXD(TexInstruction *txd)
+{
+ int dim = txd->tex.target.getDim();
+ int arg = txd->tex.target.getDim() + txd->tex.target.isArray();
+
+ handleTEX(txd);
+ if (txd->src[arg].exists())
+ ++arg;
+
+ if (dim > 2 || txd->tex.target.isShadow())
+ return handleManualTXD(txd);
+
+ // at most s/t/array, x, y, offset
+ assert(arg <= 4 && !txd->src[arg].exists());
+
+ for (int c = 0; c < dim; ++c) {
+ txd->src[arg + c * 2 + 0].set(txd->dPdx[c]);
+ txd->src[arg + c * 2 + 1].set(txd->dPdy[c]);
+ txd->dPdx[c] = NULL;
+ txd->dPdy[c] = NULL;
+ }
+ return true;
+}
+
+bool
+NVC0LoweringPass::handleWRSV(Instruction *i)
+{
+ Instruction *st;
+ Symbol *sym;
+ uint32_t addr;
+
+ // must replace, $sreg are not writeable
+ addr = targ->getSVAddress(FILE_SHADER_OUTPUT, i->getSrc(0)->asSym());
+ if (addr >= 0x400)
+ return false;
+ sym = bld.mkSymbol(FILE_SHADER_OUTPUT, 0, i->sType, addr);
+
+ st = bld.mkStore(OP_EXPORT, i->dType, sym, i->getIndirect(0, 0),
+ i->getSrc(1));
+ st->perPatch = i->perPatch;
+
+ bld.getBB()->remove(i);
+ return true;
+}
+
+void
+NVC0LoweringPass::readTessCoord(LValue *dst, int c)
+{
+ Value *laneid = bld.getSSA();
+ Value *x, *y;
+
+ bld.mkOp1(OP_RDSV, TYPE_U32, laneid, bld.mkSysVal(SV_LANEID, 0));
+
+ if (c == 0) {
+ x = dst;
+ y = NULL;
+ } else
+ if (c == 1) {
+ x = NULL;
+ y = dst;
+ } else {
+ assert(c == 2);
+ x = bld.getSSA();
+ y = bld.getSSA();
+ }
+ if (x)
+ bld.mkFetch(x, TYPE_F32, FILE_SHADER_OUTPUT, 0x2f0, NULL, laneid);
+ if (y)
+ bld.mkFetch(x, TYPE_F32, FILE_SHADER_OUTPUT, 0x2f4, NULL, laneid);
+
+ if (c == 2) {
+ bld.mkOp2(OP_ADD, TYPE_F32, dst, x, y);
+ bld.mkOp2(OP_SUB, TYPE_F32, dst, bld.loadImm(NULL, 1.0f), dst);
+ }
+}
+
+bool
+NVC0LoweringPass::handleRDSV(Instruction *i)
+{
+ Symbol *sym = i->getSrc(0)->asSym();
+ Value *vtx = NULL;
+ Instruction *ld;
+ uint32_t addr = targ->getSVAddress(FILE_SHADER_INPUT, sym);
+
+ if (addr >= 0x400) // mov $sreg
+ return true;
+
+ switch (i->getSrc(0)->reg.data.sv.sv) {
+ case SV_POSITION:
+ assert(prog->getType() == Program::TYPE_FRAGMENT);
+ ld = new_Instruction(func, OP_LINTERP, TYPE_F32);
+ ld->setDef(0, i->getDef(0));
+ ld->setSrc(0, bld.mkSymbol(FILE_SHADER_INPUT, 0, TYPE_F32, addr));
+ ld->setInterpolate(NV50_IR_INTERP_LINEAR);
+ bld.getBB()->insertAfter(i, ld);
+ break;
+ case SV_TESS_COORD:
+ assert(prog->getType() == Program::TYPE_TESSELLATION_EVAL);
+ readTessCoord(i->getDef(0)->asLValue(), i->getSrc(0)->reg.data.sv.index);
+ break;
+ default:
+ if (prog->getType() == Program::TYPE_TESSELLATION_EVAL)
+ vtx = bld.mkOp1v(OP_PFETCH, TYPE_U32, bld.getSSA(), bld.mkImm(0));
+ ld = bld.mkFetch(i->getDef(0), i->dType,
+ FILE_SHADER_INPUT, addr, i->getIndirect(0, 0), vtx);
+ ld->perPatch = i->perPatch;
+ break;
+ }
+ bld.getBB()->remove(i);
+ return true;
+}
+
+bool
+NVC0LoweringPass::handleDIV(Instruction *i)
+{
+ if (!isFloatType(i->dType))
+ return true;
+ Instruction *rcp = bld.mkOp1(OP_RCP, i->dType, bld.getSSA(), i->getSrc(1));
+ i->op = OP_MUL;
+ i->setSrc(1, rcp->getDef(0));
+ return true;
+}
+
+bool
+NVC0LoweringPass::handleMOD(Instruction *i)
+{
+ if (i->dType != TYPE_F32)
+ return true;
+ LValue *value = bld.getScratch();
+ bld.mkOp1(OP_RCP, TYPE_F32, value, i->getSrc(1));
+ bld.mkOp2(OP_MUL, TYPE_F32, value, i->getSrc(0), value);
+ bld.mkOp1(OP_TRUNC, TYPE_F32, value, value);
+ bld.mkOp2(OP_MUL, TYPE_F32, value, i->getSrc(1), value);
+ i->op = OP_SUB;
+ i->setSrc(1, value);
+ return true;
+}
+
+bool
+NVC0LoweringPass::handleSQRT(Instruction *i)
+{
+ Instruction *rsq = bld.mkOp1(OP_RSQ, TYPE_F32,
+ bld.getSSA(), i->getSrc(0));
+ i->op = OP_MUL;
+ i->setSrc(1, rsq->getDef(0));
+
+ return true;
+}
+
+bool
+NVC0LoweringPass::handlePOW(Instruction *i)
+{
+ LValue *val = bld.getScratch();
+
+ bld.mkOp1(OP_LG2, TYPE_F32, val, i->getSrc(0));
+ bld.mkOp2(OP_MUL, TYPE_F32, val, i->getSrc(1), val)->dnz = 1;
+ bld.mkOp1(OP_PREEX2, TYPE_F32, val, val);
+
+ i->op = OP_EX2;
+ i->setSrc(0, val);
+ i->setSrc(1, NULL);
+
+ return true;
+}
+
+bool
+NVC0LoweringPass::handleEXPORT(Instruction *i)
+{
+ if (prog->getType() == Program::TYPE_FRAGMENT) {
+ int id = i->getSrc(0)->reg.data.offset / 4;
+
+ if (i->src[0].isIndirect(0)) // TODO, ugly
+ return false;
+ i->op = OP_MOV;
+ i->src[0].set(i->src[1]);
+ i->setSrc(1, NULL);
+ i->setDef(0, new_LValue(func, FILE_GPR));
+ i->getDef(0)->reg.data.id = id;
+
+ prog->maxGPR = MAX2(prog->maxGPR, id);
+ } else
+ if (prog->getType() == Program::TYPE_GEOMETRY) {
+ i->setIndirect(0, 1, gpEmitAddress);
+ }
+ return true;
+}
+
+bool
+NVC0LoweringPass::handleOUT(Instruction *i)
+{
+ if (i->op == OP_RESTART && i->prev && i->prev->op == OP_EMIT) {
+ i->prev->subOp = NV50_IR_SUBOP_EMIT_RESTART;
+ delete_Instruction(prog, i);
+ } else {
+ assert(gpEmitAddress);
+ i->setDef(0, gpEmitAddress);
+ if (i->srcExists(0))
+ i->setSrc(1, i->getSrc(0));
+ i->setSrc(0, gpEmitAddress);
+ }
+ return true;
+}
+
+// Generate a binary predicate if an instruction is predicated by
+// e.g. an f32 value.
+void
+NVC0LoweringPass::checkPredicate(Instruction *insn)
+{
+ Value *pred = insn->getPredicate();
+ Value *pdst;
+
+ if (!pred || pred->reg.file == FILE_PREDICATE)
+ return;
+ pdst = new_LValue(func, FILE_PREDICATE);
+
+ // CAUTION: don't use pdst->getInsn, the definition might not be unique,
+ // delay turning PSET(FSET(x,y),0) into PSET(x,y) to a later pass
+
+ bld.mkCmp(OP_SET, CC_NEU, TYPE_U32, pdst, bld.mkImm(0), pred);
+
+ insn->setPredicate(insn->cc, pdst);
+}
+
+//
+// - add quadop dance for texturing
+// - put FP outputs in GPRs
+// - convert instruction sequences
+//
+bool
+NVC0LoweringPass::visit(Instruction *i)
+{
+ if (i->prev)
+ bld.setPosition(i->prev, true);
+ else
+ if (i->next)
+ bld.setPosition(i->next, false);
+ else
+ bld.setPosition(i->bb, true);
+
+ if (i->cc != CC_ALWAYS)
+ checkPredicate(i);
+
+ switch (i->op) {
+ case OP_TEX:
+ case OP_TXB:
+ case OP_TXL:
+ case OP_TXF:
+ case OP_TXQ:
+ case OP_TXG:
+ return handleTEX(i->asTex());
+ case OP_TXD:
+ return handleTXD(i->asTex());
+ case OP_EX2:
+ bld.mkOp1(OP_PREEX2, TYPE_F32, i->getDef(0), i->getSrc(0));
+ i->setSrc(0, i->getDef(0));
+ break;
+ case OP_POW:
+ return handlePOW(i);
+ case OP_DIV:
+ return handleDIV(i);
+ case OP_MOD:
+ return handleMOD(i);
+ case OP_SQRT:
+ return handleSQRT(i);
+ case OP_EXPORT:
+ return handleEXPORT(i);
+ case OP_EMIT:
+ case OP_RESTART:
+ return handleOUT(i);
+ case OP_RDSV:
+ return handleRDSV(i);
+ case OP_WRSV:
+ return handleWRSV(i);
+ case OP_LOAD:
+ if (i->src[0].getFile() == FILE_SHADER_INPUT) {
+ i->op = OP_VFETCH;
+ assert(prog->getType() != Program::TYPE_FRAGMENT);
+ }
+ break;
+ case OP_PINTERP:
+ if (i->getSrc(0)->reg.data.offset >= 0x280 &&
+ i->getSrc(0)->reg.data.offset < 0x2c0)
+ i->setInterpolate(i->getSampleMode() | NV50_IR_INTERP_SC);
+ break;
+ case OP_LINTERP:
+ if (i->getSrc(0)->reg.data.offset == 0x3fc) {
+ Value *face = i->getDef(0);
+ bld.setPosition(i, true);
+ bld.mkOp2(OP_SHL, TYPE_U32, face, face, bld.mkImm(31));
+ bld.mkOp2(OP_XOR, TYPE_U32, face, face, bld.mkImm(0xbf800000));
+ }
+ break;
+ default:
+ break;
+ }
+ return true;
+}
+
+bool
+TargetNVC0::runLegalizePass(Program *prog, CGStage stage) const
+{
+ if (stage == CG_STAGE_PRE_SSA) {
+ NVC0LoweringPass pass(prog);
+ return pass.run(prog, false, true);
+ } else
+ if (stage == CG_STAGE_POST_RA) {
+ NVC0LegalizePostRA pass;
+ return pass.run(prog, false, true);
+ } else
+ if (stage == CG_STAGE_SSA) {
+ NVC0LegalizeSSA pass;
+ return pass.run(prog, false, true);
+ }
+ return false;
+}
+
+} // namespace nv50_ir
diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp
new file mode 100644
index 00000000000..60b2016878e
--- /dev/null
+++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp
@@ -0,0 +1,568 @@
+
+#include "nv50_ir_target_nvc0.h"
+
+namespace nv50_ir {
+
+Target *getTargetNVC0(unsigned int chipset)
+{
+ return new TargetNVC0(chipset);
+}
+
+TargetNVC0::TargetNVC0(unsigned int card)
+{
+ chipset = card;
+ initOpInfo();
+}
+
+// BULTINS / LIBRARY FUNCTIONS:
+
+// lazyness -> will just hardcode everything for the time being
+
+// Will probably make this nicer once we support subroutines properly,
+// i.e. when we have an input IR that provides function declarations.
+
+static const uint32_t nvc0_builtin_code[] =
+{
+// DIV U32: slow unsigned integer division
+//
+// UNR recurrence (q = a / b):
+// look for z such that 2^32 - b <= b * z < 2^32
+// then q - 1 <= (a * z) / 2^32 <= q
+//
+// INPUT: $r0: dividend, $r1: divisor
+// OUTPUT: $r0: result, $r1: modulus
+// CLOBBER: $r2 - $r3, $p0 - $p1
+// SIZE: 22 / 14 * 8 bytes
+//
+#if 1
+ 0x04009c03, 0x78000000,
+ 0x7c209cdd,
+ 0x0010dd18,
+ 0x08309c03, 0x60000000,
+ 0x05605c18,
+ 0x0810dc2a,
+ 0x0c209c43, 0x20040000,
+ 0x0810dc03, 0x50000000,
+ 0x0c209c43, 0x20040000,
+ 0x0810dc03, 0x50000000,
+ 0x0c209c43, 0x20040000,
+ 0x0810dc03, 0x50000000,
+ 0x0c209c43, 0x20040000,
+ 0x0810dc03, 0x50000000,
+ 0x0c209c43, 0x20040000,
+ 0x0000dde4, 0x28000000,
+ 0x08001c43, 0x50000000,
+ 0x05609c18,
+ 0x0010430d,
+ 0x0811dc03, 0x1b0e0000,
+ 0x08104103, 0x48000000,
+ 0x04000002, 0x08000000,
+ 0x0811c003, 0x1b0e0000,
+ 0x08104103, 0x48000000,
+ 0x040000ac,
+ 0x90001dff,
+#else
+ 0x0401dc03, 0x1b0e0000,
+ 0x00008003, 0x78000000,
+ 0x0400c003, 0x78000000,
+ 0x0c20c103, 0x48000000,
+ 0x0c108003, 0x60000000,
+ 0x00005c28,
+ 0x00001d18,
+ 0x0031c023, 0x1b0ec000,
+ 0xb000a1e7, 0x40000000,
+ 0x04000003, 0x6000c000,
+ 0x0813dc03, 0x1b000000,
+ 0x0420446c,
+ 0x040004bd,
+ 0x04208003, 0x5800c000,
+ 0x0430c103, 0x4800c000,
+ 0x0ffc5dff,
+ 0x90001dff,
+#endif
+
+// DIV S32: slow signed integer division
+//
+// INPUT: $r0: dividend, $r1: divisor
+// OUTPUT: $r0: result, $r1: modulus
+// CLOBBER: $r2 - $r3, $p0 - $p3
+// SIZE: 18 * 8 bytes
+//
+ 0xfc05dc23, 0x188e0000,
+ 0xfc17dc23, 0x18c40000,
+ 0x03301e18,
+ 0x07305e18,
+ 0x0401dc03, 0x1b0e0000,
+ 0x00008003, 0x78000000,
+ 0x0400c003, 0x78000000,
+ 0x0c20c103, 0x48000000,
+ 0x0c108003, 0x60000000,
+ 0x00005c28,
+ 0x00001d18,
+ 0x0031c023, 0x1b0ec000,
+ 0xb000a1e7, 0x40000000,
+ 0x04000003, 0x6000c000,
+ 0x0813dc03, 0x1b000000,
+ 0x0420446c,
+ 0x040004bd,
+ 0x04208003, 0x5800c000,
+ 0x0430c103, 0x4800c000,
+ 0x0ffc5dff,
+ 0x01700e18,
+ 0x05704a18,
+ 0x90001dff,
+
+// RCP F64: Newton Raphson reciprocal(x): r_{i+1} = r_i * (2.0 - x * r_i)
+//
+// INPUT: $r0d (x)
+// OUTPUT: $r0d (rcp(x))
+// CLOBBER: $r2 - $r7
+// SIZE: 9 * 8 bytes
+//
+ 0x9810dc08,
+ 0x00009c28,
+ 0x4001df18,
+ 0x00019d18,
+ 0x08011e01, 0x200c0000,
+ 0x10209c01, 0x50000000,
+ 0x08011e01, 0x200c0000,
+ 0x10209c01, 0x50000000,
+ 0x08011e01, 0x200c0000,
+ 0x10201c01, 0x50000000,
+ 0x00001de7, 0x90000000,
+
+// RSQ F64: Newton Raphson rsqrt(x): r_{i+1} = r_i * (1.5 - 0.5 * x * r_i * r_i)
+//
+// INPUT: $r0d (x)
+// OUTPUT: $r0d (rsqrt(x))
+// CLOBBER: $r2 - $r7
+// SIZE: 14 * 8 bytes
+//
+ 0x9c10dc08,
+ 0x00009c28,
+ 0x00019d18,
+ 0x3fe1df18,
+ 0x18001c01, 0x50000000,
+ 0x0001dde2, 0x18ffe000,
+ 0x08211c01, 0x50000000,
+ 0x10011e01, 0x200c0000,
+ 0x10209c01, 0x50000000,
+ 0x08211c01, 0x50000000,
+ 0x10011e01, 0x200c0000,
+ 0x10209c01, 0x50000000,
+ 0x08211c01, 0x50000000,
+ 0x10011e01, 0x200c0000,
+ 0x10201c01, 0x50000000,
+ 0x00001de7, 0x90000000,
+};
+
+static const uint16_t nvc0_builtin_offsets[NVC0_BUILTIN_COUNT] =
+{
+ 0,
+ 8 * (22),
+ 8 * (22 + 18),
+ 8 * (22 + 18 + 9)
+};
+
+void
+TargetNVC0::getBuiltinCode(const uint32_t **code, uint32_t *size) const
+{
+ *code = &nvc0_builtin_code[0];
+ *size = sizeof(nvc0_builtin_code);
+}
+
+uint32_t
+TargetNVC0::getBuiltinOffset(int builtin) const
+{
+ assert(builtin < NVC0_BUILTIN_COUNT);
+ return nvc0_builtin_offsets[builtin];
+}
+
+struct opProperties
+{
+ operation op;
+ unsigned int mNeg : 4;
+ unsigned int mAbs : 4;
+ unsigned int mNot : 4;
+ unsigned int mSat : 4;
+ unsigned int fConst : 3;
+ unsigned int fImmd : 4; // last bit indicates if full immediate is suppoted
+};
+
+static const struct opProperties _initProps[] =
+{
+ // neg abs not sat c[] imm
+ { OP_ADD, 0x3, 0x3, 0x0, 0x8, 0x2, 0x2 | 0x8 },
+ { OP_SUB, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 | 0x8 },
+ { OP_MUL, 0x3, 0x0, 0x0, 0x8, 0x2, 0x2 | 0x8 },
+ { OP_MAX, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
+ { OP_MIN, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
+ { OP_MAD, 0x7, 0x0, 0x0, 0x8, 0x6, 0x2 | 0x8 }, // special c[] constraint
+ { OP_ABS, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0 },
+ { OP_NEG, 0x0, 0x1, 0x0, 0x0, 0x1, 0x0 },
+ { OP_CVT, 0x1, 0x1, 0x0, 0x8, 0x1, 0x0 },
+ { OP_AND, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 },
+ { OP_OR, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 },
+ { OP_XOR, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 },
+ { OP_SHL, 0x0, 0x0, 0x0, 0x0, 0x2, 0x2 },
+ { OP_SHR, 0x0, 0x0, 0x0, 0x0, 0x2, 0x2 },
+ { OP_SET, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
+ { OP_SLCT, 0x4, 0x0, 0x0, 0x0, 0x6, 0x2 }, // special c[] constraint
+ { OP_PREEX2, 0x1, 0x1, 0x0, 0x0, 0x1, 0x1 },
+ { OP_PRESIN, 0x1, 0x1, 0x0, 0x0, 0x1, 0x1 },
+ { OP_COS, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
+ { OP_SIN, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
+ { OP_EX2, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
+ { OP_LG2, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
+ { OP_RCP, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
+ { OP_RSQ, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
+ { OP_DFDX, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0 },
+ { OP_DFDY, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0 },
+ { OP_CALL, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0 },
+ { OP_INSBF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4 },
+ { OP_SET_AND, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
+ { OP_SET_OR, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
+ { OP_SET_XOR, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
+ // saturate only:
+ { OP_LINTERP, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0 },
+ { OP_PINTERP, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0 },
+};
+
+void TargetNVC0::initOpInfo()
+{
+ unsigned int i, j;
+
+ static const uint32_t commutative[(OP_LAST + 31) / 32] =
+ {
+ // ADD, MAD, MUL, AND, OR, XOR, MAX, MIN
+ 0x0670ca00, 0x0000003f, 0x00000000
+ };
+
+ static const uint32_t shortForm[(OP_LAST + 31) / 32] =
+ {
+ // ADD, MAD, MUL, AND, OR, XOR, PRESIN, PREEX2, SFN, CVT, PINTERP, MOV
+ 0x0670ca00, 0x00000000, 0x00000000
+ };
+
+ static const operation noDest[] =
+ {
+ OP_STORE, OP_WRSV, OP_EXPORT, OP_BRA, OP_CALL, OP_RET, OP_EXIT,
+ OP_DISCARD, OP_CONT, OP_BREAK, OP_PRECONT, OP_PREBREAK, OP_PRERET,
+ OP_JOIN, OP_JOINAT, OP_BRKPT, OP_MEMBAR, OP_EMIT, OP_RESTART,
+ OP_QUADON, OP_QUADPOP
+ };
+
+ joinAnterior = false;
+
+ for (i = 0; i < DATA_FILE_COUNT; ++i)
+ nativeFileMap[i] = (DataFile)i;
+ nativeFileMap[FILE_ADDRESS] = FILE_GPR;
+
+ for (i = 0; i < OP_LAST; ++i) {
+ opInfo[i].variants = NULL;
+ opInfo[i].op = (operation)i;
+ opInfo[i].srcTypes = 1 << (int)TYPE_F32;
+ opInfo[i].dstTypes = 1 << (int)TYPE_F32;
+ opInfo[i].immdBits = 0;
+ opInfo[i].srcNr = operationSrcNr[i];
+
+ for (j = 0; j < opInfo[i].srcNr; ++j) {
+ opInfo[i].srcMods[j] = 0;
+ opInfo[i].srcFiles[j] = 1 << (int)FILE_GPR;
+ }
+ opInfo[i].dstMods = 0;
+ opInfo[i].dstFiles = 1 << (int)FILE_GPR;
+
+ opInfo[i].hasDest = 1;
+ opInfo[i].vector = (i >= OP_TEX && i <= OP_TEXCSAA);
+ opInfo[i].commutative = (commutative[i / 32] >> (i % 32)) & 1;
+ opInfo[i].pseudo = (i < OP_MOV);
+ opInfo[i].predicate = !opInfo[i].pseudo;
+ opInfo[i].flow = (i >= OP_BRA && i <= OP_JOIN);
+ opInfo[i].minEncSize = (shortForm[i / 32] & (1 << (i % 32))) ? 4 : 8;
+ }
+ for (i = 0; i < sizeof(noDest) / sizeof(noDest[0]); ++i)
+ opInfo[noDest[i]].hasDest = 0;
+
+ for (i = 0; i < sizeof(_initProps) / sizeof(_initProps[0]); ++i) {
+ const struct opProperties *prop = &_initProps[i];
+
+ for (int s = 0; s < 3; ++s) {
+ if (prop->mNeg & (1 << s))
+ opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_NEG;
+ if (prop->mAbs & (1 << s))
+ opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_ABS;
+ if (prop->mNot & (1 << s))
+ opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_NOT;
+ if (prop->fConst & (1 << s))
+ opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_MEMORY_CONST;
+ if (prop->fImmd & (1 << s))
+ opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_IMMEDIATE;
+ if (prop->fImmd & 8)
+ opInfo[prop->op].immdBits = 0xffffffff;
+ }
+ if (prop->mSat & 8)
+ opInfo[prop->op].dstMods = NV50_IR_MOD_SAT;
+ }
+}
+
+unsigned int
+TargetNVC0::getFileSize(DataFile file) const
+{
+ switch (file) {
+ case FILE_NULL: return 0;
+ case FILE_GPR: return 63;
+ case FILE_PREDICATE: return 7;
+ case FILE_FLAGS: return 1;
+ case FILE_ADDRESS: return 0;
+ case FILE_IMMEDIATE: return 0;
+ case FILE_MEMORY_CONST: return 65536;
+ case FILE_SHADER_INPUT: return 0x400;
+ case FILE_SHADER_OUTPUT: return 0x400;
+ case FILE_MEMORY_GLOBAL: return 0xffffffff;
+ case FILE_MEMORY_SHARED: return 16 << 10;
+ case FILE_MEMORY_LOCAL: return 48 << 10;
+ case FILE_SYSTEM_VALUE: return 32;
+ default:
+ assert(!"invalid file");
+ return 0;
+ }
+}
+
+unsigned int
+TargetNVC0::getFileUnit(DataFile file) const
+{
+ if (file == FILE_GPR || file == FILE_ADDRESS || file == FILE_SYSTEM_VALUE)
+ return 2;
+ return 0;
+}
+
+uint32_t
+TargetNVC0::getSVAddress(DataFile shaderFile, const Symbol *sym) const
+{
+ const int idx = sym->reg.data.sv.index;
+ const SVSemantic sv = sym->reg.data.sv.sv;
+
+ const bool isInput = shaderFile == FILE_SHADER_INPUT;
+
+ switch (sv) {
+ case SV_POSITION: return 0x070 + idx * 4;
+ case SV_INSTANCE_ID: return 0x2f8;
+ case SV_VERTEX_ID: return 0x2fc;
+ case SV_PRIMITIVE_ID: return isInput ? 0x060 : 0x040;
+ case SV_LAYER: return 0x064;
+ case SV_VIEWPORT_INDEX: return 0x068;
+ case SV_POINT_SIZE: return 0x06c;
+ case SV_CLIP_DISTANCE: return 0x2c0 + idx * 4;
+ case SV_POINT_COORD: return 0x2e0 + idx * 4;
+ case SV_FACE: return 0x3fc;
+ case SV_TESS_FACTOR: return 0x000 + idx * 4;
+ case SV_TESS_COORD: return 0x2f0 + idx * 4;
+ default:
+ return 0xffffffff;
+ }
+}
+
+bool
+TargetNVC0::insnCanLoad(const Instruction *i, int s,
+ const Instruction *ld) const
+{
+ DataFile sf = ld->src[0].getFile();
+
+ // immediate 0 can be represented by GPR $r63
+ if (sf == FILE_IMMEDIATE && ld->getSrc(0)->reg.data.u64 == 0)
+ return (!i->asTex() && i->op != OP_EXPORT && i->op != OP_STORE);
+
+ if (s > opInfo[i->op].srcNr)
+ return false;
+ if (!(opInfo[i->op].srcFiles[s] & (1 << (int)sf)))
+ return false;
+
+ // indirect loads can only be done by OP_LOAD/VFETCH/INTERP on nvc0
+ if (ld->src[0].isIndirect(0))
+ return false;
+
+ for (int k = 0; i->srcExists(k); ++k) {
+ if (i->src[k].getFile() == FILE_IMMEDIATE) {
+ if (i->getSrc(k)->reg.data.u64 != 0)
+ return false;
+ } else
+ if (i->src[k].getFile() != FILE_GPR &&
+ i->src[k].getFile() != FILE_PREDICATE) {
+ return false;
+ }
+ }
+
+ // not all instructions support full 32 bit immediates
+ if (sf == FILE_IMMEDIATE) {
+ Storage &reg = ld->getSrc(0)->asImm()->reg;
+
+ if (opInfo[i->op].immdBits != 0xffffffff) {
+ if (i->sType == TYPE_F32) {
+ if (reg.data.u32 & 0xfff)
+ return false;
+ } else
+ if (i->sType == TYPE_S32 || i->sType == TYPE_U32) {
+ // with u32, 0xfffff counts as 0xffffffff as well
+ if (reg.data.s32 > 0x7ffff || reg.data.s32 < -0x80000)
+ return false;
+ }
+ } else
+ if (i->op == OP_MAD || i->op == OP_FMA) {
+ // requires src == dst, cannot decide before RA
+ // (except if we implement more constraints)
+ if (ld->getSrc(0)->asImm()->reg.data.u32 & 0xfff)
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool
+TargetNVC0::isOpSupported(operation op, DataType ty) const
+{
+ if ((op == OP_MAD || op == OP_FMA) && (ty != TYPE_F32))
+ return false;
+ if (op == OP_SAD && ty != TYPE_S32)
+ return false;
+ if (op == OP_POW || op == OP_SQRT || op == OP_DIV || op == OP_MOD)
+ return false;
+ return true;
+}
+
+bool
+TargetNVC0::isModSupported(const Instruction *insn, int s, Modifier mod) const
+{
+ if (!isFloatType(insn->dType)) {
+ switch (insn->op) {
+ case OP_ABS:
+ case OP_NEG:
+ case OP_CVT:
+ case OP_CEIL:
+ case OP_FLOOR:
+ case OP_TRUNC:
+ case OP_AND:
+ case OP_OR:
+ case OP_XOR:
+ break;
+ case OP_ADD:
+ if (insn->src[s ? 0 : 1].mod.neg())
+ return false;
+ break;
+ case OP_SUB:
+ if (s == 0)
+ return insn->src[1].mod.neg() ? false : true;
+ break;
+ default:
+ return false;
+ }
+ }
+ if (s > 3)
+ return false;
+ return (mod & Modifier(opInfo[insn->op].srcMods[s])) == mod;
+}
+
+bool
+TargetNVC0::mayPredicate(const Instruction *insn, const Value *pred) const
+{
+ if (insn->getPredicate())
+ return false;
+ return opInfo[insn->op].predicate;
+}
+
+bool
+TargetNVC0::isSatSupported(const Instruction *insn) const
+{
+ if (insn->op == OP_CVT)
+ return true;
+ if (!(opInfo[insn->op].dstMods & NV50_IR_MOD_SAT))
+ return false;
+
+ if (insn->dType == TYPE_U32)
+ return (insn->op == OP_ADD) || (insn->op == OP_MAD);
+
+ return insn->dType == TYPE_F32;
+}
+
+// TODO: better values
+int TargetNVC0::getLatency(const Instruction *i) const
+{
+ if (i->op == OP_LOAD) {
+ if (i->cache == CACHE_CV)
+ return 700;
+ return 48;
+ }
+ return 24;
+}
+
+// These are "inverse" throughput values, i.e. the number of cycles required
+// to issue a specific instruction for a full warp (32 threads).
+//
+// Assuming we have more than 1 warp in flight, a higher issue latency results
+// in a lower result latency since the MP will have spent more time with other
+// warps.
+// This also helps to determine the number of cycles between instructions in
+// a single warp.
+//
+int TargetNVC0::getThroughput(const Instruction *i) const
+{
+ // TODO: better values
+ if (i->dType == TYPE_F32) {
+ switch (i->op) {
+ case OP_ADD:
+ case OP_MUL:
+ case OP_MAD:
+ case OP_FMA:
+ return 1;
+ case OP_CVT:
+ case OP_CEIL:
+ case OP_FLOOR:
+ case OP_TRUNC:
+ case OP_SET:
+ case OP_SLCT:
+ case OP_MIN:
+ case OP_MAX:
+ return 2;
+ case OP_RCP:
+ case OP_RSQ:
+ case OP_LG2:
+ case OP_SIN:
+ case OP_COS:
+ case OP_PRESIN:
+ case OP_PREEX2:
+ default:
+ return 8;
+ }
+ } else
+ if (i->dType == TYPE_U32 || i->dType == TYPE_S32) {
+ switch (i->op) {
+ case OP_ADD:
+ case OP_AND:
+ case OP_OR:
+ case OP_XOR:
+ case OP_NOT:
+ return 1;
+ case OP_MUL:
+ case OP_MAD:
+ case OP_CVT:
+ case OP_SET:
+ case OP_SLCT:
+ case OP_SHL:
+ case OP_SHR:
+ case OP_NEG:
+ case OP_ABS:
+ case OP_MIN:
+ case OP_MAX:
+ default:
+ return 2;
+ }
+ } else
+ if (i->dType == TYPE_F64) {
+ return 2;
+ } else {
+ return 1;
+ }
+}
+
+} // namespace nv50_ir
diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.h b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.h
new file mode 100644
index 00000000000..f96bfbeaa6a
--- /dev/null
+++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.h
@@ -0,0 +1,46 @@
+
+#include "nv50/codegen/nv50_ir_target.h"
+
+namespace nv50_ir {
+
+#define NVC0_BUILTIN_DIV_U32 0
+#define NVC0_BUILTIN_DIV_S32 1
+#define NVC0_BUILTIN_RCP_F64 2
+#define NVC0_BUILTIN_RSQ_F64 3
+
+#define NVC0_BUILTIN_COUNT 4
+
+class TargetNVC0 : public Target
+{
+public:
+ TargetNVC0(unsigned int chipset);
+
+ virtual CodeEmitter *getCodeEmitter(Program::Type);
+
+ virtual bool runLegalizePass(Program *, CGStage stage) const;
+
+ virtual void getBuiltinCode(const uint32_t **code, uint32_t *size) const;
+
+ virtual bool insnCanLoad(const Instruction *insn, int s,
+ const Instruction *ld) const;
+ virtual bool isOpSupported(operation, DataType) const;
+ virtual bool isModSupported(const Instruction *, int s, Modifier) const;
+ virtual bool isSatSupported(const Instruction *) const;
+ virtual bool mayPredicate(const Instruction *, const Value *) const;
+
+ virtual int getLatency(const Instruction *) const;
+ virtual int getThroughput(const Instruction *) const;
+
+ virtual unsigned int getFileSize(DataFile) const;
+ virtual unsigned int getFileUnit(DataFile) const;
+
+ virtual uint32_t getSVAddress(DataFile shaderFile, const Symbol *sv) const;
+
+ uint32_t getBuiltinOffset(int builtin) const;
+
+private:
+ void initOpInfo();
+
+};
+
+} // namespace nv50_ir