diff options
Diffstat (limited to 'src/gallium/drivers/nouveau/codegen')
15 files changed, 3583 insertions, 3 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/lib/Makefile b/src/gallium/drivers/nouveau/codegen/lib/Makefile index 28a41a3f41e..06d1979d8b2 100644 --- a/src/gallium/drivers/nouveau/codegen/lib/Makefile +++ b/src/gallium/drivers/nouveau/codegen/lib/Makefile @@ -1,6 +1,6 @@ ENVYAS ?= envyas -all: gf100.asm.h gk104.asm.h gk110.asm.h +all: gf100.asm.h gk104.asm.h gk110.asm.h gm107.asm.h gf100.asm.h: %.asm.h: %.asm $(ENVYAS) -a -W -mnvc0 -Vnvc0 $< -o $@ @@ -8,3 +8,5 @@ gk104.asm.h: %.asm.h: %.asm $(ENVYAS) -a -W -mnvc0 -Vnve4 $< -o $@ gk110.asm.h: %.asm.h: %.asm $(ENVYAS) -a -W -mgk110 $< -o $@ +gm107.asm.h: %.asm.h: %.asm + $(ENVYAS) -a -W -mgm107 $< -o $@ diff --git a/src/gallium/drivers/nouveau/codegen/lib/gm107.asm b/src/gallium/drivers/nouveau/codegen/lib/gm107.asm new file mode 100644 index 00000000000..758cc81a159 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/lib/gm107.asm @@ -0,0 +1,115 @@ +.section #gm107_builtin_code +// DIV U32 +// +// UNR recurrence (q = a / b): +// look for z such that 2^32 - b <= b * z < 2^32 +// then q - 1 <= (a * z) / 2^32 <= q +// +// INPUT: $r0: dividend, $r1: divisor +// OUTPUT: $r0: result, $r1: modulus +// CLOBBER: $r2 - $r3, $p0 - $p1 +// SIZE: 22 / 14 * 8 bytes +// +gm107_div_u32: + sched 0x7e0 0x7e0 0x7e0 + flo u32 $r2 $r1 + lop xor 1 $r2 $r2 0x1f + mov $r3 0x1 0xf + sched 0x7e0 0x7e0 0x7e0 + shl $r2 $r3 $r2 + i2i u32 u32 $r1 neg $r1 + imul u32 u32 $r3 $r1 $r2 + sched 0x7e0 0x7e0 0x7e0 + imad u32 u32 hi $r2 $r2 $r3 $r2 + imul u32 u32 $r3 $r1 $r2 + imad u32 u32 hi $r2 $r2 $r3 $r2 + sched 0x7e0 0x7e0 0x7e0 + imul u32 u32 $r3 $r1 $r2 + imad u32 u32 hi $r2 $r2 $r3 $r2 + imul u32 u32 $r3 $r1 $r2 + sched 0x7e0 0x7e0 0x7e0 + imad u32 u32 hi $r2 $r2 $r3 $r2 + imul u32 u32 $r3 $r1 $r2 + imad u32 u32 hi $r2 $r2 $r3 $r2 + sched 0x7e0 0x7e0 0x7e0 + mov $r3 $r0 0xf + imul u32 u32 hi $r0 $r0 $r2 + i2i u32 u32 $r2 neg $r1 + sched 0x7e0 0x7e0 0x7e0 + imad u32 u32 $r1 $r1 $r0 $r3 + isetp ge u32 and $p0 1 $r1 $r2 1 + $p0 iadd $r1 $r1 neg $r2 + sched 0x7e0 0x7e0 0x7e0 + $p0 iadd $r0 $r0 0x1 + $p0 isetp ge u32 and $p0 1 $r1 $r2 1 + $p0 iadd $r1 $r1 neg $r2 + sched 0x7e0 0x7e0 0x7e0 + $p0 iadd $r0 $r0 0x1 + ret + nop 0 + +// DIV S32, like DIV U32 after taking ABS(inputs) +// +// INPUT: $r0: dividend, $r1: divisor +// OUTPUT: $r0: result, $r1: modulus +// CLOBBER: $r2 - $r3, $p0 - $p3 +// +gm107_div_s32: + sched 0x7e0 0x7e0 0x7e0 + isetp lt and $p2 0x1 $r0 0 1 + isetp lt xor $p3 1 $r1 0 $p2 + i2i s32 s32 $r0 abs $r0 + sched 0x7e0 0x7e0 0x7e0 + i2i s32 s32 $r1 abs $r1 + flo u32 $r2 $r1 + lop xor 1 $r2 $r2 0x1f + sched 0x7e0 0x7e0 0x7e0 + mov $r3 0x1 0xf + shl $r2 $r3 $r2 + i2i u32 u32 $r1 neg $r1 + sched 0x7e0 0x7e0 0x7e0 + imul u32 u32 $r3 $r1 $r2 + imad u32 u32 hi $r2 $r2 $r3 $r2 + imul u32 u32 $r3 $r1 $r2 + sched 0x7e0 0x7e0 0x7e0 + imad u32 u32 hi $r2 $r2 $r3 $r2 + imul u32 u32 $r3 $r1 $r2 + imad u32 u32 hi $r2 $r2 $r3 $r2 + sched 0x7e0 0x7e0 0x7e0 + imul u32 u32 $r3 $r1 $r2 + imad u32 u32 hi $r2 $r2 $r3 $r2 + imul u32 u32 $r3 $r1 $r2 + sched 0x7e0 0x7e0 0x7e0 + imad u32 u32 hi $r2 $r2 $r3 $r2 + mov $r3 $r0 0xf + imul u32 u32 hi $r0 $r0 $r2 + sched 0x7e0 0x7e0 0x7e0 + i2i u32 u32 $r2 neg $r1 + imad u32 u32 $r1 $r1 $r0 $r3 + isetp ge u32 and $p0 1 $r1 $r2 1 + sched 0x7e0 0x7e0 0x7e0 + $p0 iadd $r1 $r1 neg $r2 + $p0 iadd $r0 $r0 0x1 + $p0 isetp ge u32 and $p0 1 $r1 $r2 1 + sched 0x7e0 0x7e0 0x7e0 + $p0 iadd $r1 $r1 neg $r2 + $p0 iadd $r0 $r0 0x1 + $p3 i2i s32 s32 $r0 neg $r0 + sched 0x7e0 0x7e0 0x7e0 + $p2 i2i s32 s32 $r1 neg $r1 + ret + nop 0 + +// STUB +gm107_rcp_f64: +gm107_rsq_f64: + sched 0x7e0 0x7e0 0x7e0 + ret + nop 0 + nop 0 + +.section #gm107_builtin_offsets +.b64 #gm107_div_u32 +.b64 #gm107_div_s32 +.b64 #gm107_rcp_f64 +.b64 #gm107_rsq_f64 diff --git a/src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h b/src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h new file mode 100644 index 00000000000..7be25da5532 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h @@ -0,0 +1,97 @@ +uint64_t gm107_builtin_code[] = { +/* 0x0000: gm107_div_u32 */ + 0x001f8000fc0007e0, + 0x5c30000000170002, + 0x3847040001f70202, + 0x3898078000170003, + 0x001f8000fc0007e0, + 0x5c48000000270302, + 0x5ce0200000170a01, + 0x5c38000000270103, + 0x001f8000fc0007e0, + 0x5a40010000370202, + 0x5c38000000270103, + 0x5a40010000370202, + 0x001f8000fc0007e0, + 0x5c38000000270103, + 0x5a40010000370202, + 0x5c38000000270103, + 0x001f8000fc0007e0, + 0x5a40010000370202, + 0x5c38000000270103, + 0x5a40010000370202, + 0x001f8000fc0007e0, + 0x5c98078000070003, + 0x5c38008000270000, + 0x5ce0200000170a02, + 0x001f8000fc0007e0, + 0x5a00018000070101, + 0x5b6c038000270107, + 0x5c11000000200101, + 0x001f8000fc0007e0, + 0x3810000000100000, + 0x5b6c038000200107, + 0x5c11000000200101, + 0x001f8000fc0007e0, + 0x3810000000100000, + 0xe32000000007000f, + 0x50b0000000070f00, +/* 0x0120: gm107_div_s32 */ + 0x001f8000fc0007e0, + 0x5b6303800ff70017, + 0x5b6341000ff7011f, + 0x5ce2000000073a00, + 0x001f8000fc0007e0, + 0x5ce2000000173a01, + 0x5c30000000170002, + 0x3847040001f70202, + 0x001f8000fc0007e0, + 0x3898078000170003, + 0x5c48000000270302, + 0x5ce0200000170a01, + 0x001f8000fc0007e0, + 0x5c38000000270103, + 0x5a40010000370202, + 0x5c38000000270103, + 0x001f8000fc0007e0, + 0x5a40010000370202, + 0x5c38000000270103, + 0x5a40010000370202, + 0x001f8000fc0007e0, + 0x5c38000000270103, + 0x5a40010000370202, + 0x5c38000000270103, + 0x001f8000fc0007e0, + 0x5a40010000370202, + 0x5c98078000070003, + 0x5c38008000270000, + 0x001f8000fc0007e0, + 0x5ce0200000170a02, + 0x5a00018000070101, + 0x5b6c038000270107, + 0x001f8000fc0007e0, + 0x5c11000000200101, + 0x3810000000100000, + 0x5b6c038000200107, + 0x001f8000fc0007e0, + 0x5c11000000200101, + 0x3810000000100000, + 0x5ce0200000033a00, + 0x001f8000fc0007e0, + 0x5ce0200000123a01, + 0xe32000000007000f, + 0x50b0000000070f00, +/* 0x0280: gm107_rcp_f64 */ +/* 0x0280: gm107_rsq_f64 */ + 0x001f8000fc0007e0, + 0xe32000000007000f, + 0x50b0000000070f00, + 0x50b0000000070f00, +}; + +uint64_t gm107_builtin_offsets[] = { + 0x0000000000000000, + 0x0000000000000120, + 0x0000000000000280, + 0x0000000000000280, +}; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index f082f856ffc..88440309e6b 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -157,6 +157,7 @@ enum operation OP_VSHL, OP_VSEL, OP_CCTL, // cache control + OP_SHFL, // warp shuffle OP_LAST }; @@ -223,6 +224,10 @@ enum operation #define NV50_IR_SUBOP_PIXLD_OFFSET 3 #define NV50_IR_SUBOP_PIXLD_CENT_OFFSET 4 #define NV50_IR_SUBOP_PIXLD_SAMPLEID 5 +#define NV50_IR_SUBOP_SHFL_IDX 0 +#define NV50_IR_SUBOP_SHFL_UP 1 +#define NV50_IR_SUBOP_SHFL_DOWN 2 +#define NV50_IR_SUBOP_SHFL_BFLY 3 #define NV50_IR_SUBOP_MADSP_SD 0xffff // Yes, we could represent those with DataType. // Or put the type into operation and have a couple 1000 values in that enum. @@ -379,6 +384,7 @@ enum SVSemantic SV_LBASE, SV_SBASE, SV_VERTEX_STRIDE, + SV_INVOCATION_INFO, SV_UNDEFINED, SV_LAST }; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index d7a9c2c7d8c..bbb89d97932 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -92,6 +92,7 @@ struct nv50_ir_prog_symbol #define NVISA_GF100_CHIPSET_D0 0xd0 #define NVISA_GK104_CHIPSET 0xe0 #define NVISA_GK110_CHIPSET 0xf0 +#define NVISA_GM107_CHIPSET 0x110 struct nv50_ir_prog_info { diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp new file mode 100644 index 00000000000..c01a153c940 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp @@ -0,0 +1,2906 @@ +/* + * Copyright 2014 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ben Skeggs <[email protected]> + */ + +#include "codegen/nv50_ir_target_gm107.h" + +namespace nv50_ir { + +class CodeEmitterGM107 : public CodeEmitter +{ +public: + CodeEmitterGM107(const TargetGM107 *); + + virtual bool emitInstruction(Instruction *); + virtual uint32_t getMinEncodingSize(const Instruction *) const; + + virtual void prepareEmission(Program *); + virtual void prepareEmission(Function *); + + inline void setProgramType(Program::Type pType) { progType = pType; } + +private: + const TargetGM107 *targGM107; + + Program::Type progType; + + const Instruction *insn; + const bool writeIssueDelays; + uint32_t *data; + +private: + inline void emitField(uint32_t *, int, int, uint32_t); + inline void emitField(int b, int s, uint32_t v) { emitField(code, b, s, v); } + + inline void emitInsn(uint32_t, bool); + inline void emitInsn(uint32_t o) { emitInsn(o, true); } + inline void emitPred(); + inline void emitGPR(int, const Value *); + inline void emitGPR(int pos) { + emitGPR(pos, (const Value *)NULL); + } + inline void emitGPR(int pos, const ValueRef &ref) { + emitGPR(pos, ref.get() ? ref.rep() : (const Value *)NULL); + } + inline void emitGPR(int pos, const ValueRef *ref) { + emitGPR(pos, ref ? ref->rep() : (const Value *)NULL); + } + inline void emitGPR(int pos, const ValueDef &def) { + emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL); + } + inline void emitSYS(int, const Value *); + inline void emitSYS(int pos, const ValueRef &ref) { + emitSYS(pos, ref.get() ? ref.rep() : (const Value *)NULL); + } + inline void emitPRED(int, const Value *); + inline void emitPRED(int pos) { + emitPRED(pos, (const Value *)NULL); + } + inline void emitPRED(int pos, const ValueRef &ref) { + emitPRED(pos, ref.get() ? ref.rep() : (const Value *)NULL); + } + inline void emitPRED(int pos, const ValueDef &def) { + emitPRED(pos, def.get() ? def.rep() : (const Value *)NULL); + } + inline void emitADDR(int, int, int, int, const ValueRef &); + inline void emitCBUF(int, int, int, int, int, const ValueRef &); + inline bool longIMMD(const ValueRef &); + inline void emitIMMD(int, int, const ValueRef &); + + void emitCond3(int, CondCode); + void emitCond4(int, CondCode); + void emitCond5(int pos, CondCode cc) { emitCond4(pos, cc); } + inline void emitO(int); + inline void emitP(int); + inline void emitSAT(int); + inline void emitCC(int); + inline void emitX(int); + inline void emitABS(int, const ValueRef &); + inline void emitNEG(int, const ValueRef &); + inline void emitNEG2(int, const ValueRef &, const ValueRef &); + inline void emitFMZ(int, int); + inline void emitRND(int, RoundMode, int); + inline void emitRND(int pos) { + emitRND(pos, insn->rnd, -1); + } + inline void emitPDIV(int); + inline void emitINV(int, const ValueRef &); + + void emitEXIT(); + void emitBRA(); + void emitCAL(); + void emitPCNT(); + void emitCONT(); + void emitPBK(); + void emitBRK(); + void emitPRET(); + void emitRET(); + void emitSSY(); + void emitSYNC(); + void emitSAM(); + void emitRAM(); + + void emitMOV(); + void emitS2R(); + void emitF2F(); + void emitF2I(); + void emitI2F(); + void emitI2I(); + void emitSHFL(); + + void emitDADD(); + void emitDMUL(); + void emitDFMA(); + void emitDMNMX(); + void emitDSET(); + void emitDSETP(); + + void emitFADD(); + void emitFMUL(); + void emitFFMA(); + void emitMUFU(); + void emitFMNMX(); + void emitRRO(); + void emitFCMP(); + void emitFSET(); + void emitFSETP(); + void emitFSWZADD(); + + void emitLOP(); + void emitNOT(); + void emitIADD(); + void emitIMUL(); + void emitIMAD(); + void emitIMNMX(); + void emitICMP(); + void emitISET(); + void emitISETP(); + void emitSHL(); + void emitSHR(); + void emitPOPC(); + void emitBFI(); + void emitBFE(); + void emitFLO(); + + void emitLDSTs(int, DataType); + void emitLDSTc(int); + void emitLDC(); + void emitLDL(); + void emitLDS(); + void emitLD(); + void emitSTL(); + void emitSTS(); + void emitST(); + void emitALD(); + void emitAST(); + void emitISBERD(); + void emitIPA(); + + void emitPIXLD(); + + void emitTEXs(int); + void emitTEX(); + void emitTLD(); + void emitTLD4(); + void emitTXD(); + void emitTXQ(); + void emitTMML(); + void emitDEPBAR(); + + void emitNOP(); + void emitKIL(); + void emitOUT(); +}; + +/******************************************************************************* + * general instruction layout/fields + ******************************************************************************/ + +void +CodeEmitterGM107::emitField(uint32_t *data, int b, int s, uint32_t v) +{ + if (b >= 0) { + uint32_t m = ((1ULL << s) - 1); + uint64_t d = (uint64_t)(v & m) << b; + assert(!(v & ~m) || (v & ~m) == ~m); + data[1] |= d >> 32; + data[0] |= d; + } +} + +void +CodeEmitterGM107::emitPred() +{ + if (insn->predSrc >= 0) { + emitField(16, 3, insn->getSrc(insn->predSrc)->rep()->reg.data.id); + emitField(19, 1, insn->cc == CC_NOT_P); + } else { + emitField(16, 3, 7); + } +} + +void +CodeEmitterGM107::emitInsn(uint32_t hi, bool pred) +{ + code[0] = 0x00000000; + code[1] = hi; + if (pred) + emitPred(); +} + +void +CodeEmitterGM107::emitGPR(int pos, const Value *val) +{ + emitField(pos, 8, val ? val->reg.data.id : 255); +} + +void +CodeEmitterGM107::emitSYS(int pos, const Value *val) +{ + int id = val ? val->reg.data.id : -1; + + switch (id) { + case SV_INVOCATION_ID : id = 0x11; break; + case SV_INVOCATION_INFO: id = 0x1d; break; + default: + assert(!"invalid system value"); + id = 0; + break; + } + + emitField(pos, 8, id); +} + +void +CodeEmitterGM107::emitPRED(int pos, const Value *val) +{ + emitField(pos, 3, val ? val->reg.data.id : 7); +} + +void +CodeEmitterGM107::emitADDR(int gpr, int off, int len, int shr, + const ValueRef &ref) +{ + const Value *v = ref.get(); + assert(!(v->reg.data.offset & ((1 << shr) - 1))); + if (gpr >= 0) + emitGPR(gpr, ref.getIndirect(0)); + emitField(off, len, v->reg.data.offset >> shr); +} + +void +CodeEmitterGM107::emitCBUF(int buf, int gpr, int off, int len, int shr, + const ValueRef &ref) +{ + const Value *v = ref.get(); + const Symbol *s = v->asSym(); + + assert(!(s->reg.data.offset & ((1 << shr) - 1))); + + emitField(buf, 5, v->reg.fileIndex); + if (gpr >= 0) + emitGPR(gpr, ref.getIndirect(0)); + emitField(off, 16, s->reg.data.offset >> shr); +} + +bool +CodeEmitterGM107::longIMMD(const ValueRef &ref) +{ + if (ref.getFile() == FILE_IMMEDIATE) { + const ImmediateValue *imm = ref.get()->asImm(); + if (isFloatType(insn->sType)) { + if ((imm->reg.data.u32 & 0x00000fff) != 0x00000000) + return true; + } else { + if ((imm->reg.data.u32 & 0xfff00000) != 0x00000000 && + (imm->reg.data.u32 & 0xfff00000) != 0xfff00000) + return true; + } + } + return false; +} + +void +CodeEmitterGM107::emitIMMD(int pos, int len, const ValueRef &ref) +{ + const ImmediateValue *imm = ref.get()->asImm(); + uint32_t val = imm->reg.data.u32; + + if (len == 19) { + if (isFloatType(insn->sType)) { + assert(!(val & 0x00000fff)); + val >>= 12; + } + assert(!(val & 0xfff00000) || (val & 0xfff00000) == 0xfff00000); + emitField( 56, 1, (val & 0x80000) >> 19); + emitField(pos, len, (val & 0x7ffff)); + } else { + emitField(pos, len, val); + } +} + +/******************************************************************************* + * modifiers + ******************************************************************************/ + +void +CodeEmitterGM107::emitCond3(int pos, CondCode code) +{ + int data = 0; + + switch (code) { + case CC_FL : data = 0x00; break; + case CC_LTU: + case CC_LT : data = 0x01; break; + case CC_EQU: + case CC_EQ : data = 0x02; break; + case CC_LEU: + case CC_LE : data = 0x03; break; + case CC_GTU: + case CC_GT : data = 0x04; break; + case CC_NEU: + case CC_NE : data = 0x05; break; + case CC_GEU: + case CC_GE : data = 0x06; break; + case CC_TR : data = 0x07; break; + default: + assert(!"invalid cond3"); + break; + } + + emitField(pos, 3, data); +} + +void +CodeEmitterGM107::emitCond4(int pos, CondCode code) +{ + int data = 0; + + switch (code) { + case CC_FL: data = 0x00; break; + case CC_LT: data = 0x01; break; + case CC_EQ: data = 0x02; break; + case CC_LE: data = 0x03; break; + case CC_GT: data = 0x04; break; + case CC_NE: data = 0x05; break; + case CC_GE: data = 0x06; break; +// case CC_NUM: data = 0x07; break; +// case CC_NAN: data = 0x08; break; + case CC_LTU: data = 0x09; break; + case CC_EQU: data = 0x0a; break; + case CC_LEU: data = 0x0b; break; + case CC_GTU: data = 0x0c; break; + case CC_NEU: data = 0x0d; break; + case CC_GEU: data = 0x0e; break; + case CC_TR: data = 0x0f; break; + default: + assert(!"invalid cond4"); + break; + } + + emitField(pos, 4, data); +} + +void +CodeEmitterGM107::emitO(int pos) +{ + emitField(pos, 1, insn->getSrc(0)->reg.file == FILE_SHADER_OUTPUT); +} + +void +CodeEmitterGM107::emitP(int pos) +{ + emitField(pos, 1, insn->perPatch); +} + +void +CodeEmitterGM107::emitSAT(int pos) +{ + emitField(pos, 1, insn->saturate); +} + +void +CodeEmitterGM107::emitCC(int pos) +{ + emitField(pos, 1, insn->defExists(1)); +} + +void +CodeEmitterGM107::emitX(int pos) +{ + emitField(pos, 1, insn->flagsSrc >= 0); +} + +void +CodeEmitterGM107::emitABS(int pos, const ValueRef &ref) +{ + emitField(pos, 1, ref.mod.abs()); +} + +void +CodeEmitterGM107::emitNEG(int pos, const ValueRef &ref) +{ + emitField(pos, 1, ref.mod.neg()); +} + +void +CodeEmitterGM107::emitNEG2(int pos, const ValueRef &a, const ValueRef &b) +{ + emitField(pos, 1, a.mod.neg() ^ b.mod.neg()); +} + +void +CodeEmitterGM107::emitFMZ(int pos, int len) +{ + emitField(pos, len, /*XXX: insn->dnz << 1 | */ insn->ftz); +} + +void +CodeEmitterGM107::emitRND(int rmp, RoundMode rnd, int rip) +{ + int rm = 0, ri = 0; + switch (rnd) { + case ROUND_NI: ri = 1; + case ROUND_N : rm = 0; break; + case ROUND_MI: ri = 1; + case ROUND_M : rm = 1; break; + case ROUND_PI: ri = 1; + case ROUND_P : rm = 2; break; + case ROUND_ZI: ri = 1; + case ROUND_Z : rm = 3; break; + default: + assert(!"invalid round mode"); + break; + } + emitField(rip, 1, ri); + emitField(rmp, 2, rm); +} + +void +CodeEmitterGM107::emitPDIV(int pos) +{ + assert(insn->postFactor >= -3 && insn->postFactor <= 3); + if (insn->postFactor > 0) + emitField(pos, 3, 7 - insn->postFactor); + else + emitField(pos, 3, 0 - insn->postFactor); +} + +void +CodeEmitterGM107::emitINV(int pos, const ValueRef &ref) +{ + emitField(pos, 1, !!(ref.mod & Modifier(NV50_IR_MOD_NOT))); +} + +/******************************************************************************* + * control flow + ******************************************************************************/ + +void +CodeEmitterGM107::emitEXIT() +{ + emitInsn (0xe3000000); + emitCond5(0x00, CC_TR); +} + +void +CodeEmitterGM107::emitBRA() +{ + const FlowInstruction *insn = this->insn->asFlow(); + int gpr = -1; + + if (insn->indirect) { + if (insn->absolute) + emitInsn(0xe2000000); // JMX + else + emitInsn(0xe2500000); // BRX + gpr = 0x08; + } else { + if (insn->absolute) + emitInsn(0xe2100000); // JMP + else + emitInsn(0xe2400000); // BRA + emitField(0x07, 1, insn->allWarp); + } + + emitField(0x06, 1, insn->limit); + emitCond5(0x00, CC_TR); + + if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) { + if (!insn->absolute) + emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8)); + else + emitField(0x14, 32, insn->target.bb->binPos); + } else { + emitCBUF (0x24, gpr, 20, 16, 0, insn->src(0)); + emitField(0x05, 1, 1); + } +} + +void +CodeEmitterGM107::emitCAL() +{ + const FlowInstruction *insn = this->insn->asFlow(); + + if (insn->absolute) { + emitInsn(0xe2200000, 0); // JCAL + } else { + emitInsn(0xe2600000, 0); // CAL + } + + if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) { + if (!insn->absolute) + emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8)); + else { + if (insn->builtin) { + int pcAbs = targGM107->getBuiltinOffset(insn->target.builtin); + addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfff00000, 20); + addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x000fffff, -12); + } else { + emitField(0x14, 32, insn->target.bb->binPos); + } + } + } else { + emitCBUF (0x24, -1, 20, 16, 0, insn->src(0)); + emitField(0x05, 1, 1); + } +} + +void +CodeEmitterGM107::emitPCNT() +{ + const FlowInstruction *insn = this->insn->asFlow(); + + emitInsn(0xe2b00000, 0); + + if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) { + emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8)); + } else { + emitCBUF (0x24, -1, 20, 16, 0, insn->src(0)); + emitField(0x05, 1, 1); + } +} + +void +CodeEmitterGM107::emitCONT() +{ + emitInsn (0xe3500000); + emitCond5(0x00, CC_TR); +} + +void +CodeEmitterGM107::emitPBK() +{ + const FlowInstruction *insn = this->insn->asFlow(); + + emitInsn(0xe2a00000, 0); + + if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) { + emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8)); + } else { + emitCBUF (0x24, -1, 20, 16, 0, insn->src(0)); + emitField(0x05, 1, 1); + } +} + +void +CodeEmitterGM107::emitBRK() +{ + emitInsn (0xe3400000); + emitCond5(0x00, CC_TR); +} + +void +CodeEmitterGM107::emitPRET() +{ + const FlowInstruction *insn = this->insn->asFlow(); + + emitInsn(0xe2700000, 0); + + if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) { + emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8)); + } else { + emitCBUF (0x24, -1, 20, 16, 0, insn->src(0)); + emitField(0x05, 1, 1); + } +} + +void +CodeEmitterGM107::emitRET() +{ + emitInsn (0xe3200000); + emitCond5(0x00, CC_TR); +} + +void +CodeEmitterGM107::emitSSY() +{ + const FlowInstruction *insn = this->insn->asFlow(); + + emitInsn(0xe2900000, 0); + + if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) { + emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8)); + } else { + emitCBUF (0x24, -1, 20, 16, 0, insn->src(0)); + emitField(0x05, 1, 1); + } +} + +void +CodeEmitterGM107::emitSYNC() +{ + emitInsn (0xf0f80000); + emitCond5(0x00, CC_TR); +} + +void +CodeEmitterGM107::emitSAM() +{ + emitInsn(0xe3700000, 0); +} + +void +CodeEmitterGM107::emitRAM() +{ + emitInsn(0xe3800000, 0); +} + +/******************************************************************************* + * predicate/cc + ******************************************************************************/ + +/******************************************************************************* + * movement / conversion + ******************************************************************************/ + +void +CodeEmitterGM107::emitMOV() +{ + if ( insn->src(0).getFile() != FILE_IMMEDIATE || + (insn->sType != TYPE_F32 && !longIMMD(insn->src(0)))) { + switch (insn->src(0).getFile()) { + case FILE_GPR: + emitInsn(0x5c980000); + emitGPR (0x14, insn->src(0)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c980000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38980000); + emitIMMD(0x14, 19, insn->src(0)); + break; + default: + assert(!"bad src file"); + break; + } + emitField(0x27, 4, insn->lanes); + } else { + emitInsn (0x01000000); + emitIMMD (0x14, 32, insn->src(0)); + emitField(0x0c, 4, insn->lanes); + } + + emitGPR(0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitS2R() +{ + emitInsn(0xf0c80000); + emitSYS (0x14, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitF2F() +{ + RoundMode rnd = insn->rnd; + + switch (insn->op) { + case OP_FLOOR: rnd = ROUND_MI; break; + case OP_CEIL : rnd = ROUND_PI; break; + case OP_TRUNC: rnd = ROUND_ZI; break; + default: + break; + } + + switch (insn->src(0).getFile()) { + case FILE_GPR: + emitInsn(0x5ca80000); + emitGPR (0x14, insn->src(0)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4ca80000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38a80000); + emitIMMD(0x14, 19, insn->src(0)); + break; + default: + assert(!"bad src0 file"); + break; + } + + emitField(0x32, 1, (insn->op == OP_SAT) || insn->saturate); + emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs()); + emitCC (0x2f); + emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg()); + emitFMZ (0x2c, 1); + emitRND (0x27, rnd, 0x2a); + emitField(0x0a, 2, util_logbase2(typeSizeof(insn->dType))); + emitField(0x08, 2, util_logbase2(typeSizeof(insn->sType))); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitF2I() +{ + RoundMode rnd = insn->rnd; + + switch (insn->op) { + case OP_FLOOR: rnd = ROUND_M; break; + case OP_CEIL : rnd = ROUND_P; break; + case OP_TRUNC: rnd = ROUND_Z; break; + default: + break; + } + + switch (insn->src(0).getFile()) { + case FILE_GPR: + emitInsn(0x5cb00000); + emitGPR (0x14, insn->src(0)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4cb00000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38b00000); + emitIMMD(0x14, 19, insn->src(0)); + break; + default: + assert(!"bad src0 file"); + break; + } + + emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs()); + emitCC (0x2f); + emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg()); + emitFMZ (0x2c, 1); + emitRND (0x27, rnd, 0x2a); + emitField(0x0c, 1, isSignedType(insn->dType)); + emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType))); + emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType))); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitI2F() +{ + RoundMode rnd = insn->rnd; + + switch (insn->op) { + case OP_FLOOR: rnd = ROUND_M; break; + case OP_CEIL : rnd = ROUND_P; break; + case OP_TRUNC: rnd = ROUND_Z; break; + default: + break; + } + + switch (insn->src(0).getFile()) { + case FILE_GPR: + emitInsn(0x5cb80000); + emitGPR (0x14, insn->src(0)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4cb80000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38b80000); + emitIMMD(0x14, 19, insn->src(0)); + break; + default: + assert(!"bad src0 file"); + break; + } + + emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs()); + emitCC (0x2f); + emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg()); + emitRND (0x27, rnd, -1); + emitField(0x0d, 1, isSignedType(insn->sType)); + emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType))); + emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType))); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitI2I() +{ + switch (insn->src(0).getFile()) { + case FILE_GPR: + emitInsn(0x5ce00000); + emitGPR (0x14, insn->src(0)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4ce00000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38e00000); + emitIMMD(0x14, 19, insn->src(0)); + break; + default: + assert(!"bad src0 file"); + break; + } + + emitSAT (0x32); + emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs()); + emitCC (0x2f); + emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg()); + emitField(0x0d, 1, isSignedType(insn->sType)); + emitField(0x0c, 1, isSignedType(insn->dType)); + emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType))); + emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType))); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitSHFL() +{ + int type = 0; + + emitInsn (0xef100000); + + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitGPR(0x14, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitIMMD(0x14, 5, insn->src(1)); + type |= 1; + break; + default: + assert(!"invalid src1 file"); + break; + } + + /*XXX: what is this arg? hardcode immediate for now */ + emitField(0x22, 13, 0x1c03); + type |= 2; + + emitPRED (0x30); + emitField(0x1e, 2, insn->subOp); + emitField(0x1c, 2, type); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +/******************************************************************************* + * double + ******************************************************************************/ + +void +CodeEmitterGM107::emitDADD() +{ + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5c700000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c700000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38700000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + emitABS(0x31, insn->src(1)); + emitNEG(0x30, insn->src(0)); + emitCC (0x2f); + emitABS(0x2e, insn->src(0)); + emitNEG(0x2d, insn->src(1)); + + if (insn->op == OP_SUB) + code[1] ^= 0x00002000; + + emitGPR(0x08, insn->src(0)); + emitGPR(0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitDMUL() +{ + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5c680000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c680000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38680000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + + emitNEG2(0x30, insn->src(0), insn->src(1)); + emitCC (0x2f); + emitRND (0x27); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitDFMA() +{ + switch(insn->src(2).getFile()) { + case FILE_GPR: + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5b700000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4b700000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x36700000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + emitGPR (0x27, insn->src(2)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x53700000); + emitGPR (0x27, insn->src(1)); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); + break; + default: + assert(!"bad src2 file"); + break; + } + + emitRND (0x32); + emitNEG (0x31, insn->src(2)); + emitNEG2(0x30, insn->src(0), insn->src(1)); + emitCC (0x2f); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitDMNMX() +{ + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5c500000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c500000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38500000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + + emitABS (0x31, insn->src(1)); + emitNEG (0x30, insn->src(0)); + emitCC (0x2f); + emitABS (0x2e, insn->src(0)); + emitNEG (0x2d, insn->src(1)); + emitField(0x2a, 1, insn->op == OP_MAX); + emitPRED (0x27); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitDSET() +{ + const CmpInstruction *insn = this->insn->asCmp(); + + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x59000000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x49000000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x32000000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + + if (insn->op != OP_SET) { + switch (insn->op) { + case OP_SET_AND: emitField(0x2d, 2, 0); break; + case OP_SET_OR : emitField(0x2d, 2, 1); break; + case OP_SET_XOR: emitField(0x2d, 2, 2); break; + default: + assert(!"invalid set op"); + break; + } + emitPRED(0x27, insn->src(2)); + } else { + emitPRED(0x27); + } + + emitABS (0x36, insn->src(0)); + emitNEG (0x35, insn->src(1)); + emitCond4(0x30, insn->setCond); + emitCC (0x2f); + emitABS (0x2c, insn->src(1)); + emitNEG (0x2b, insn->src(0)); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitDSETP() +{ + const CmpInstruction *insn = this->insn->asCmp(); + + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5b800000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4b800000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x36800000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + + if (insn->op != OP_SET) { + switch (insn->op) { + case OP_SET_AND: emitField(0x2d, 2, 0); break; + case OP_SET_OR : emitField(0x2d, 2, 1); break; + case OP_SET_XOR: emitField(0x2d, 2, 2); break; + default: + assert(!"invalid set op"); + break; + } + emitPRED(0x27, insn->src(2)); + } else { + emitPRED(0x27); + } + + emitCond4(0x30, insn->setCond); + emitABS (0x2c, insn->src(1)); + emitNEG (0x2b, insn->src(0)); + emitGPR (0x08, insn->src(0)); + emitABS (0x07, insn->src(0)); + emitNEG (0x06, insn->src(1)); + emitPRED (0x03, insn->def(0)); + if (insn->defExists(1)) + emitPRED(0x00, insn->def(1)); + else + emitPRED(0x00); +} + +/******************************************************************************* + * float + ******************************************************************************/ + +void +CodeEmitterGM107::emitFADD() +{ + if (!longIMMD(insn->src(1))) { + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5c580000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c580000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38580000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + emitSAT(0x32); + emitABS(0x31, insn->src(1)); + emitNEG(0x30, insn->src(0)); + emitCC (0x2f); + emitABS(0x2e, insn->src(0)); + emitNEG(0x2d, insn->src(1)); + emitFMZ(0x2c, 1); + } else { + emitInsn(0x08000000); + emitABS(0x39, insn->src(1)); + emitNEG(0x38, insn->src(0)); + emitFMZ(0x37, 1); + emitABS(0x36, insn->src(0)); + emitNEG(0x35, insn->src(1)); + emitCC (0x34); + emitIMMD(0x14, 32, insn->src(1)); + } + + if (insn->op == OP_SUB) + code[1] ^= 0x00002000; + + emitGPR(0x08, insn->src(0)); + emitGPR(0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitFMUL() +{ + if (!longIMMD(insn->src(1))) { + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5c680000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c680000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38680000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + emitSAT (0x32); + emitNEG2(0x30, insn->src(0), insn->src(1)); + emitCC (0x2f); + emitFMZ (0x2c, 2); + emitPDIV(0x29); + emitRND (0x27); + } else { + emitInsn(0x1e000000); + emitSAT (0x37); + emitFMZ (0x35, 2); + emitCC (0x34); + emitIMMD(0x14, 32, insn->src(1)); + if (insn->src(0).mod.neg() ^ insn->src(1).mod.neg()) + code[1] ^= 0x00080000; /* flip immd sign bit */ + } + + emitGPR(0x08, insn->src(0)); + emitGPR(0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitFFMA() +{ + /*XXX: ffma32i exists, but not using it as third src overlaps dst */ + switch(insn->src(2).getFile()) { + case FILE_GPR: + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x59800000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x49800000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x32800000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + emitGPR (0x27, insn->src(2)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x51800000); + emitGPR (0x27, insn->src(1)); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); + break; + default: + assert(!"bad src2 file"); + break; + } + emitRND (0x33); + emitSAT (0x32); + emitNEG (0x31, insn->src(2)); + emitNEG2(0x30, insn->src(0), insn->src(1)); + emitCC (0x2f); + + emitFMZ(0x35, 2); + emitGPR(0x08, insn->src(0)); + emitGPR(0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitMUFU() +{ + int mufu = 0; + + switch (insn->op) { + case OP_COS: mufu = 0; break; + case OP_SIN: mufu = 1; break; + case OP_EX2: mufu = 2; break; + case OP_LG2: mufu = 3; break; + case OP_RCP: mufu = 4; break; + case OP_RSQ: mufu = 5; break; + default: + assert(!"invalid mufu"); + break; + } + + emitInsn (0x50800000); + emitSAT (0x32); + emitNEG (0x30, insn->src(0)); + emitABS (0x2e, insn->src(0)); + emitField(0x14, 3, mufu); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitFMNMX() +{ + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5c600000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c600000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38600000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + + emitField(0x2a, 1, insn->op == OP_MAX); + emitPRED (0x27); + + emitABS(0x31, insn->src(1)); + emitNEG(0x30, insn->src(0)); + emitCC (0x2f); + emitABS(0x2e, insn->src(0)); + emitNEG(0x2d, insn->src(1)); + emitFMZ(0x2c, 1); + emitGPR(0x08, insn->src(0)); + emitGPR(0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitRRO() +{ + switch (insn->src(0).getFile()) { + case FILE_GPR: + emitInsn(0x5c900000); + emitGPR (0x14, insn->src(0)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c900000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38900000); + emitIMMD(0x14, 19, insn->src(0)); + break; + default: + assert(!"bad src file"); + break; + } + + emitABS (0x31, insn->src(0)); + emitNEG (0x2d, insn->src(0)); + emitField(0x27, 1, insn->op == OP_PREEX2); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitFCMP() +{ + const CmpInstruction *insn = this->insn->asCmp(); + CondCode cc = insn->setCond; + + if (insn->src(2).mod.neg()) + cc = reverseCondCode(cc); + + switch(insn->src(2).getFile()) { + case FILE_GPR: + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5ba00000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4ba00000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x36a00000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + emitGPR (0x27, insn->src(2)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x53a00000); + emitGPR (0x27, insn->src(1)); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); + break; + default: + assert(!"bad src2 file"); + break; + } + + emitCond4(0x30, cc); + emitFMZ (0x2f, 1); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitFSET() +{ + const CmpInstruction *insn = this->insn->asCmp(); + + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x58000000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x48000000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x30000000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + + if (insn->op != OP_SET) { + switch (insn->op) { + case OP_SET_AND: emitField(0x2d, 2, 0); break; + case OP_SET_OR : emitField(0x2d, 2, 1); break; + case OP_SET_XOR: emitField(0x2d, 2, 2); break; + default: + assert(!"invalid set op"); + break; + } + emitPRED(0x27, insn->src(2)); + } else { + emitPRED(0x27); + } + + emitFMZ (0x37, 1); + emitABS (0x36, insn->src(0)); + emitNEG (0x35, insn->src(1)); + emitField(0x34, 1, insn->dType == TYPE_F32); + emitCond4(0x30, insn->setCond); + emitCC (0x2f); + emitABS (0x2c, insn->src(1)); + emitNEG (0x2b, insn->src(0)); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitFSETP() +{ + const CmpInstruction *insn = this->insn->asCmp(); + + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5bb00000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4bb00000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x36b00000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + + if (insn->op != OP_SET) { + switch (insn->op) { + case OP_SET_AND: emitField(0x2d, 2, 0); break; + case OP_SET_OR : emitField(0x2d, 2, 1); break; + case OP_SET_XOR: emitField(0x2d, 2, 2); break; + default: + assert(!"invalid set op"); + break; + } + emitPRED(0x27, insn->src(2)); + } else { + emitPRED(0x27); + } + + emitCond4(0x30, insn->setCond); + emitFMZ (0x2f, 1); + emitABS (0x2c, insn->src(1)); + emitNEG (0x2b, insn->src(0)); + emitGPR (0x08, insn->src(0)); + emitABS (0x07, insn->src(0)); + emitNEG (0x06, insn->src(1)); + emitPRED (0x03, insn->def(0)); + if (insn->defExists(1)) + emitPRED(0x00, insn->def(1)); + else + emitPRED(0x00); +} + +void +CodeEmitterGM107::emitFSWZADD() +{ + emitInsn (0x50f80000); + emitCC (0x2f); + emitFMZ (0x2c, 1); + emitRND (0x27); + emitField(0x26, 1, insn->lanes); /* abused for .ndv */ + emitField(0x1c, 8, insn->subOp); + emitGPR (0x14, insn->src(1)); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +/******************************************************************************* + * integer + ******************************************************************************/ + +void +CodeEmitterGM107::emitLOP() +{ + int lop = 0; + + switch (insn->op) { + case OP_AND: lop = 0; break; + case OP_OR : lop = 1; break; + case OP_XOR: lop = 2; break; + default: + assert(!"invalid lop"); + break; + } + + if (!longIMMD(insn->src(1))) { + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5c400000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c400000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38400000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + emitPRED (0x30); + emitField(0x29, 2, lop); + emitINV (0x28, insn->src(1)); + emitINV (0x27, insn->src(0)); + } else { + emitInsn (0x04000000); + emitINV (0x38, insn->src(1)); + emitINV (0x37, insn->src(0)); + emitField(0x35, 2, lop); + emitIMMD (0x14, 32, insn->src(1)); + } + + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +/* special-case of emitLOP(): lop pass_b dst 0 ~src */ +void +CodeEmitterGM107::emitNOT() +{ + if (!longIMMD(insn->src(0))) { + switch (insn->src(0).getFile()) { + case FILE_GPR: + emitInsn(0x5c400700); + emitGPR (0x14, insn->src(0)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c400700); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38400700); + emitIMMD(0x14, 19, insn->src(0)); + break; + default: + assert(!"bad src1 file"); + break; + } + emitPRED (0x30); + } else { + emitInsn (0x05600000); + emitIMMD (0x14, 32, insn->src(1)); + } + + emitGPR(0x08); + emitGPR(0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitIADD() +{ + if (!longIMMD(insn->src(1))) { + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5c100000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c100000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38100000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + emitSAT(0x32); + emitNEG(0x31, insn->src(0)); + emitNEG(0x30, insn->src(1)); + emitCC (0x2f); + } else { + emitInsn(0x1c000000); + emitSAT (0x36); + emitCC (0x34); + emitIMMD(0x14, 32, insn->src(1)); + } + + if (insn->op == OP_SUB) + code[1] ^= 0x00010000; + + emitGPR(0x08, insn->src(0)); + emitGPR(0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitIMUL() +{ + if (!longIMMD(insn->src(1))) { + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5c380000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c380000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38380000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + emitCC (0x2f); + emitField(0x29, 1, isSignedType(insn->sType)); + emitField(0x28, 1, isSignedType(insn->dType)); + emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH); + } else { + emitInsn (0x1f000000); + emitField(0x37, 1, isSignedType(insn->sType)); + emitField(0x36, 1, isSignedType(insn->dType)); + emitField(0x35, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH); + emitCC (0x34); + emitIMMD (0x14, 32, insn->src(1)); + } + + emitGPR(0x08, insn->src(0)); + emitGPR(0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitIMAD() +{ + /*XXX: imad32i exists, but not using it as third src overlaps dst */ + switch(insn->src(2).getFile()) { + case FILE_GPR: + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5a000000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4a000000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x34000000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + emitGPR (0x27, insn->src(2)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x52000000); + emitGPR (0x27, insn->src(1)); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); + break; + default: + assert(!"bad src2 file"); + break; + } + + emitField(0x36, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH); + emitField(0x35, 1, isSignedType(insn->sType)); + emitNEG (0x34, insn->src(2)); + emitNEG2 (0x33, insn->src(0), insn->src(1)); + emitSAT (0x32); + emitX (0x31); + emitField(0x30, 1, isSignedType(insn->dType)); + emitCC (0x2f); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitIMNMX() +{ + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5c200000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c200000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38200000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + + emitField(0x30, 1, isSignedType(insn->dType)); + emitCC (0x2f); + emitField(0x2a, 1, insn->op == OP_MAX); + emitPRED (0x27); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitICMP() +{ + const CmpInstruction *insn = this->insn->asCmp(); + CondCode cc = insn->setCond; + + if (insn->src(2).mod.neg()) + cc = reverseCondCode(cc); + + switch(insn->src(2).getFile()) { + case FILE_GPR: + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5b400000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4b400000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x36400000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + emitGPR (0x27, insn->src(2)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x53400000); + emitGPR (0x27, insn->src(1)); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); + break; + default: + assert(!"bad src2 file"); + break; + } + + emitCond3(0x31, cc); + emitField(0x30, 1, isSignedType(insn->sType)); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitISET() +{ + const CmpInstruction *insn = this->insn->asCmp(); + + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5b500000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4b500000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x36500000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + + if (insn->op != OP_SET) { + switch (insn->op) { + case OP_SET_AND: emitField(0x2d, 2, 0); break; + case OP_SET_OR : emitField(0x2d, 2, 1); break; + case OP_SET_XOR: emitField(0x2d, 2, 2); break; + default: + assert(!"invalid set op"); + break; + } + emitPRED(0x27, insn->src(2)); + } else { + emitPRED(0x27); + } + + emitCond3(0x31, insn->setCond); + emitField(0x30, 1, isSignedType(insn->sType)); + emitCC (0x2f); + emitX (0x2b); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitISETP() +{ + const CmpInstruction *insn = this->insn->asCmp(); + + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5b600000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4b600000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x36600000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + + if (insn->op != OP_SET) { + switch (insn->op) { + case OP_SET_AND: emitField(0x2d, 2, 0); break; + case OP_SET_OR : emitField(0x2d, 2, 1); break; + case OP_SET_XOR: emitField(0x2d, 2, 2); break; + default: + assert(!"invalid set op"); + break; + } + emitPRED(0x27, insn->src(2)); + } else { + emitPRED(0x27); + } + + emitCond3(0x31, insn->setCond); + emitField(0x30, 1, isSignedType(insn->sType)); + emitX (0x2b); + emitGPR (0x08, insn->src(0)); + emitPRED (0x03, insn->def(0)); + if (insn->defExists(1)) + emitPRED(0x00, insn->def(1)); + else + emitPRED(0x00); +} + +void +CodeEmitterGM107::emitSHL() +{ + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5c480000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c480000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38480000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + + emitCC (0x2f); + emitX (0x2b); + emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitSHR() +{ + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5c280000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c280000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38280000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + + emitField(0x30, 1, isSignedType(insn->dType)); + emitCC (0x2f); + emitX (0x2c); + emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitPOPC() +{ + switch (insn->src(0).getFile()) { + case FILE_GPR: + emitInsn(0x5c080000); + emitGPR (0x14, insn->src(0)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c080000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38080000); + emitIMMD(0x14, 19, insn->src(0)); + break; + default: + assert(!"bad src1 file"); + break; + } + + emitINV(0x28, insn->src(0)); + emitGPR(0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitBFI() +{ + switch(insn->src(2).getFile()) { + case FILE_GPR: + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5bf00000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4bf00000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x36f00000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + emitGPR (0x27, insn->src(2)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x53f00000); + emitGPR (0x27, insn->src(1)); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); + break; + default: + assert(!"bad src2 file"); + break; + } + + emitCC (0x2f); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitBFE() +{ + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5c000000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c000000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38000000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + + emitField(0x30, 1, isSignedType(insn->dType)); + emitCC (0x2f); + emitField(0x28, 1, insn->subOp == NV50_IR_SUBOP_EXTBF_REV); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitFLO() +{ + switch (insn->src(0).getFile()) { + case FILE_GPR: + emitInsn(0x5c300000); + emitGPR (0x14, insn->src(0)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c300000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38300000); + emitIMMD(0x14, 19, insn->src(0)); + break; + default: + assert(!"bad src1 file"); + break; + } + + emitField(0x30, 1, isSignedType(insn->dType)); + emitCC (0x2f); + emitField(0x29, 1, insn->subOp == NV50_IR_SUBOP_BFIND_SAMT); + emitINV (0x28, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +/******************************************************************************* + * memory + ******************************************************************************/ + +void +CodeEmitterGM107::emitLDSTs(int pos, DataType type) +{ + int data = 0; + + switch (typeSizeof(type)) { + case 1: data = isSignedType(type) ? 1 : 0; break; + case 2: data = isSignedType(type) ? 3 : 2; break; + case 4: data = 4; break; + case 8: data = 5; break; + case 16: data = 6; break; + default: + assert(!"bad type"); + break; + } + + emitField(pos, 3, data); +} + +void +CodeEmitterGM107::emitLDSTc(int pos) +{ + int mode = 0; + + switch (insn->cache) { + case CACHE_CA: mode = 0; break; + case CACHE_CG: mode = 1; break; + case CACHE_CS: mode = 2; break; + case CACHE_CV: mode = 3; break; + default: + assert(!"invalid caching mode"); + break; + } + + emitField(pos, 2, mode); +} + +void +CodeEmitterGM107::emitLDC() +{ + emitInsn (0xef900000); + emitLDSTs(0x30, insn->dType); + emitCBUF (0x24, 0x08, 0x14, 16, 0, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitLDL() +{ + emitInsn (0xef400000); + emitLDSTs(0x30, insn->dType); + emitLDSTc(0x2c); + emitADDR (0x08, 0x14, 24, 0, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitLDS() +{ + emitInsn (0xef480000); + emitLDSTs(0x30, insn->dType); + emitADDR (0x08, 0x14, 24, 0, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitLD() +{ + emitInsn (0x80000000); + emitPRED (0x3a); + emitLDSTc(0x38); + emitLDSTs(0x35, insn->dType); + emitADDR (0x08, 0x14, 32, 0, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitSTL() +{ + emitInsn (0xef500000); + emitLDSTs(0x30, insn->dType); + emitLDSTc(0x2c); + emitADDR (0x08, 0x14, 24, 0, insn->src(0)); + emitGPR (0x00, insn->src(1)); +} + +void +CodeEmitterGM107::emitSTS() +{ + emitInsn (0xef580000); + emitLDSTs(0x30, insn->dType); + emitADDR (0x08, 0x14, 24, 0, insn->src(0)); + emitGPR (0x00, insn->src(1)); +} + +void +CodeEmitterGM107::emitST() +{ + emitInsn (0xa0000000); + emitPRED (0x3a); + emitLDSTc(0x38); + emitLDSTs(0x35, insn->dType); + emitADDR (0x08, 0x14, 32, 0, insn->src(0)); + emitGPR (0x00, insn->src(1)); +} + +void +CodeEmitterGM107::emitALD() +{ + emitInsn (0xefd80000); + emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1); + emitGPR (0x27, insn->src(0).getIndirect(1)); + emitO (0x20); + emitP (0x1f); + emitADDR (0x08, 20, 10, 0, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitAST() +{ + emitInsn (0xeff00000); + emitField(0x2f, 2, (typeSizeof(insn->dType) / 4) - 1); + emitGPR (0x27, insn->src(0).getIndirect(1)); + emitP (0x1f); + emitADDR (0x08, 20, 10, 0, insn->src(0)); + emitGPR (0x00, insn->src(1)); +} + +void +CodeEmitterGM107::emitISBERD() +{ + emitInsn(0xefd00000); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitIPA() +{ + int ipam = 0, ipas = 0; + + switch (insn->getInterpMode()) { + case NV50_IR_INTERP_LINEAR : ipam = 0; break; + case NV50_IR_INTERP_PERSPECTIVE: ipam = 1; break; + case NV50_IR_INTERP_FLAT : ipam = 2; break; + case NV50_IR_INTERP_SC : ipam = 3; break; + default: + assert(!"invalid ipa mode"); + break; + } + + switch (insn->getSampleMode()) { + case NV50_IR_INTERP_DEFAULT : ipas = 0; break; + case NV50_IR_INTERP_CENTROID: ipas = 1; break; + case NV50_IR_INTERP_OFFSET : ipas = 2; break; + default: + assert(!"invalid ipa sample mode"); + break; + } + + emitInsn (0xe0000000); + emitField(0x36, 2, ipam); + emitField(0x34, 2, ipas); + emitSAT (0x33); + emitField(0x2f, 3, 7); + emitADDR (0x08, 0x1c, 10, 0, insn->src(0)); + if ((code[0] & 0x0000ff00) != 0x0000ff00) + code[1] |= 0x00000040; /* .idx */ + emitGPR(0x00, insn->def(0)); + + if (insn->op == OP_PINTERP) { + emitGPR(0x14, insn->src(1)); + if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET) + emitGPR(0x27, insn->src(2)); + } else { + if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET) + emitGPR(0x27, insn->src(1)); + emitGPR(0x14); + } + + if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET) + emitGPR(0x27); +} + +/******************************************************************************* + * surface + ******************************************************************************/ + +void +CodeEmitterGM107::emitPIXLD() +{ + emitInsn (0xefe80000); + emitPRED (0x2d); + emitField(0x1f, 3, insn->subOp); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +/******************************************************************************* + * texture + ******************************************************************************/ + +void +CodeEmitterGM107::emitTEXs(int pos) +{ + int src1 = insn->predSrc == 1 ? 2 : 1; + if (insn->srcExists(src1)) + emitGPR(pos, insn->src(src1)); + else + emitGPR(pos); +} + +void +CodeEmitterGM107::emitTEX() +{ + const TexInstruction *insn = this->insn->asTex(); + int lodm = 0; + + if (!insn->tex.levelZero) { + switch (insn->op) { + case OP_TEX: lodm = 0; break; + case OP_TXB: lodm = 2; break; + case OP_TXL: lodm = 3; break; + default: + assert(!"invalid tex op"); + break; + } + } else { + lodm = 1; + } + + if (insn->tex.rIndirectSrc >= 0) { + emitInsn (0xdeb80000); + emitField(0x35, 2, lodm); + emitField(0x24, 1, insn->tex.useOffsets == 1); + } else { + emitInsn (0xc0380000); + emitField(0x37, 2, lodm); + emitField(0x36, 1, insn->tex.useOffsets == 1); + emitField(0x24, 13, insn->tex.r); + } + + emitField(0x32, 1, insn->tex.target.isShadow()); + emitField(0x31, 1, insn->tex.liveOnly); + emitField(0x23, 1, insn->tex.derivAll); + emitField(0x1f, 4, insn->tex.mask); + emitField(0x1d, 2, insn->tex.target.isCube() ? 3 : + insn->tex.target.getDim() - 1); + emitField(0x1c, 1, insn->tex.target.isArray()); + emitTEXs (0x14); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitTLD() +{ + const TexInstruction *insn = this->insn->asTex(); + + if (insn->tex.rIndirectSrc >= 0) { + emitInsn (0xdd380000); + } else { + emitInsn (0xdc380000); + emitField(0x24, 13, insn->tex.r); + } + + emitField(0x37, 1, insn->tex.levelZero == 0); + emitField(0x32, 1, insn->tex.target.isMS()); + emitField(0x31, 1, insn->tex.liveOnly); + emitField(0x23, 1, insn->tex.useOffsets == 1); + emitField(0x1f, 4, insn->tex.mask); + emitField(0x1d, 2, insn->tex.target.isCube() ? 3 : + insn->tex.target.getDim() - 1); + emitField(0x1c, 1, insn->tex.target.isArray()); + emitTEXs (0x14); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitTLD4() +{ + const TexInstruction *insn = this->insn->asTex(); + + if (insn->tex.rIndirectSrc >= 0) { + emitInsn (0xdef80000); + emitField(0x26, 2, insn->tex.gatherComp); + emitField(0x25, 2, insn->tex.useOffsets == 4); + emitField(0x24, 2, insn->tex.useOffsets == 1); + } else { + emitInsn (0xc8380000); + emitField(0x38, 2, insn->tex.gatherComp); + emitField(0x37, 2, insn->tex.useOffsets == 4); + emitField(0x36, 2, insn->tex.useOffsets == 1); + emitField(0x24, 13, insn->tex.r); + } + + emitField(0x32, 1, insn->tex.target.isShadow()); + emitField(0x31, 1, insn->tex.liveOnly); + emitField(0x23, 1, insn->tex.derivAll); + emitField(0x1f, 4, insn->tex.mask); + emitField(0x1d, 2, insn->tex.target.isCube() ? 3 : + insn->tex.target.getDim() - 1); + emitField(0x1c, 1, insn->tex.target.isArray()); + emitTEXs (0x14); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitTXD() +{ + const TexInstruction *insn = this->insn->asTex(); + + if (insn->tex.rIndirectSrc >= 0) { + emitInsn (0xde780000); + } else { + emitInsn (0xde380000); + emitField(0x24, 13, insn->tex.r); + } + + emitField(0x31, 1, insn->tex.liveOnly); + emitField(0x23, 1, insn->tex.useOffsets == 1); + emitField(0x1f, 4, insn->tex.mask); + emitField(0x1d, 2, insn->tex.target.isCube() ? 3 : + insn->tex.target.getDim() - 1); + emitField(0x1c, 1, insn->tex.target.isArray()); + emitTEXs (0x14); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitTMML() +{ + const TexInstruction *insn = this->insn->asTex(); + + if (insn->tex.rIndirectSrc >= 0) { + emitInsn (0xdf600000); + } else { + emitInsn (0xdf580000); + emitField(0x24, 13, insn->tex.r); + } + + emitField(0x31, 1, insn->tex.liveOnly); + emitField(0x23, 1, insn->tex.derivAll); + emitField(0x1f, 4, insn->tex.mask); + emitField(0x1d, 2, insn->tex.target.isCube() ? 3 : + insn->tex.target.getDim() - 1); + emitField(0x1c, 1, insn->tex.target.isArray()); + emitTEXs (0x14); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitTXQ() +{ + const TexInstruction *insn = this->insn->asTex(); + int type = 0; + + switch (insn->tex.query) { + case TXQ_DIMS : type = 0x01; break; + case TXQ_TYPE : type = 0x02; break; + case TXQ_SAMPLE_POSITION: type = 0x05; break; + case TXQ_FILTER : type = 0x10; break; + case TXQ_LOD : type = 0x12; break; + case TXQ_WRAP : type = 0x14; break; + case TXQ_BORDER_COLOUR : type = 0x16; break; + default: + assert(!"invalid txq query"); + break; + } + + emitInsn (0xdf4a0000); + emitField(0x24, 13, insn->tex.r); + emitField(0x1f, 4, insn->tex.mask); + emitField(0x16, 6, type); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitDEPBAR() +{ + emitInsn (0xf0f00000); + emitField(0x1d, 1, 1); /* le */ + emitField(0x1a, 3, 5); + emitField(0x14, 6, insn->subOp); + emitField(0x00, 6, insn->subOp); +} + +/******************************************************************************* + * misc + ******************************************************************************/ + +void +CodeEmitterGM107::emitNOP() +{ + emitInsn(0x50b00000); +} + +void +CodeEmitterGM107::emitKIL() +{ + emitInsn (0xe3300000); + emitCond5(0x00, CC_TR); +} + +void +CodeEmitterGM107::emitOUT() +{ + const int cut = insn->op == OP_RESTART || insn->subOp; + const int emit = insn->op == OP_EMIT; + + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0xfbe00000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0xf6e00000); + emitIMMD(0x14, 19, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0xebe00000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + + emitField(0x27, 2, (cut << 1) | emit); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +/******************************************************************************* + * assembler front-end + ******************************************************************************/ + +bool +CodeEmitterGM107::emitInstruction(Instruction *i) +{ + const unsigned int size = (writeIssueDelays && !(codeSize & 0x1f)) ? 16 : 8; + bool ret = true; + + insn = i; + + if (insn->encSize != 8) { + ERROR("skipping undecodable instruction: "); insn->print(); + return false; + } else + if (codeSize + size > codeSizeLimit) { + ERROR("code emitter output buffer too small\n"); + return false; + } + + if (writeIssueDelays) { + int n = ((codeSize & 0x1f) / 8) - 1; + if (n < 0) { + data = code; + data[0] = 0x00000000; + data[1] = 0x00000000; + code += 2; + codeSize += 8; + n++; + } + + emitField(data, n * 21, 21, insn->sched); + } + + switch (insn->op) { + case OP_EXIT: + emitEXIT(); + break; + case OP_BRA: + emitBRA(); + break; + case OP_CALL: + emitCAL(); + break; + case OP_PRECONT: + emitPCNT(); + break; + case OP_CONT: + emitCONT(); + break; + case OP_PREBREAK: + emitPBK(); + break; + case OP_BREAK: + emitBRK(); + break; + case OP_PRERET: + emitPRET(); + break; + case OP_RET: + emitRET(); + break; + case OP_JOINAT: + emitSSY(); + break; + case OP_JOIN: + emitSYNC(); + break; + case OP_QUADON: + emitSAM(); + break; + case OP_QUADPOP: + emitRAM(); + break; + case OP_MOV: + if (insn->def(0).getFile() == FILE_GPR && + insn->src(0).getFile() != FILE_PREDICATE) + emitMOV(); + else + assert(!"R2P/P2R"); + break; + case OP_RDSV: + emitS2R(); + break; + case OP_ABS: + case OP_NEG: + case OP_SAT: + case OP_FLOOR: + case OP_CEIL: + case OP_TRUNC: + case OP_CVT: + if (isFloatType(insn->dType)) { + if (isFloatType(insn->sType)) + emitF2F(); + else + emitI2F(); + } else { + if (isFloatType(insn->sType)) + emitF2I(); + else + emitI2I(); + } + break; + case OP_SHFL: + emitSHFL(); + break; + case OP_ADD: + case OP_SUB: + if (isFloatType(insn->dType)) { + if (insn->dType == TYPE_F64) + emitDADD(); + else + emitFADD(); + } else { + emitIADD(); + } + break; + case OP_MUL: + if (isFloatType(insn->dType)) { + if (insn->dType == TYPE_F64) + emitDMUL(); + else + emitFMUL(); + } else { + emitIMUL(); + } + break; + case OP_MAD: + case OP_FMA: + if (isFloatType(insn->dType)) { + if (insn->dType == TYPE_F64) + emitDFMA(); + else + emitFFMA(); + } else { + emitIMAD(); + } + break; + case OP_MIN: + case OP_MAX: + if (isFloatType(insn->dType)) { + if (insn->dType == TYPE_F64) + emitDMNMX(); + else + emitFMNMX(); + } else { + emitIMNMX(); + } + break; + case OP_SHL: + emitSHL(); + break; + case OP_SHR: + emitSHR(); + break; + case OP_POPCNT: + emitPOPC(); + break; + case OP_INSBF: + emitBFI(); + break; + case OP_EXTBF: + emitBFE(); + break; + case OP_BFIND: + emitFLO(); + break; + case OP_SLCT: + if (isFloatType(insn->dType)) + emitFCMP(); + else + emitICMP(); + break; + case OP_SET: + case OP_SET_AND: + case OP_SET_OR: + case OP_SET_XOR: + if (insn->def(0).getFile() != FILE_PREDICATE) { + if (isFloatType(insn->sType)) + if (insn->sType == TYPE_F64) + emitDSET(); + else + emitFSET(); + else + emitISET(); + } else { + if (isFloatType(insn->sType)) + if (insn->sType == TYPE_F64) + emitDSETP(); + else + emitFSETP(); + else + emitISETP(); + } + break; + case OP_PRESIN: + case OP_PREEX2: + emitRRO(); + break; + case OP_COS: + case OP_SIN: + case OP_EX2: + case OP_LG2: + case OP_RCP: + case OP_RSQ: + emitMUFU(); + break; + case OP_AND: + case OP_OR: + case OP_XOR: + emitLOP(); + break; + case OP_NOT: + emitNOT(); + break; + case OP_LOAD: + switch (insn->src(0).getFile()) { + case FILE_MEMORY_CONST : emitLDC(); break; + case FILE_MEMORY_LOCAL : emitLDL(); break; + case FILE_MEMORY_SHARED: emitLDS(); break; + case FILE_MEMORY_GLOBAL: emitLD(); break; + default: + assert(!"invalid load"); + emitNOP(); + break; + } + break; + case OP_STORE: + switch (insn->src(0).getFile()) { + case FILE_MEMORY_LOCAL : emitSTL(); break; + case FILE_MEMORY_SHARED: emitSTS(); break; + case FILE_MEMORY_GLOBAL: emitST(); break; + default: + assert(!"invalid load"); + emitNOP(); + break; + } + break; + case OP_VFETCH: + emitALD(); + break; + case OP_EXPORT: + emitAST(); + break; + case OP_PFETCH: + emitISBERD(); + break; + case OP_LINTERP: + case OP_PINTERP: + emitIPA(); + break; + case OP_PIXLD: + emitPIXLD(); + break; + case OP_TEX: + case OP_TXB: + case OP_TXL: + emitTEX(); + break; + case OP_TXF: + emitTLD(); + break; + case OP_TXG: + emitTLD4(); + break; + case OP_TXD: + emitTXD(); + break; + case OP_TXQ: + emitTXQ(); + break; + case OP_TXLQ: + emitTMML(); + break; + case OP_TEXBAR: + emitDEPBAR(); + break; + case OP_QUADOP: + emitFSWZADD(); + break; + case OP_NOP: + emitNOP(); + break; + case OP_DISCARD: + emitKIL(); + break; + case OP_EMIT: + case OP_RESTART: + emitOUT(); + break; + default: + assert(!"invalid opcode"); + emitNOP(); + ret = false; + break; + } + + if (insn->join) { + /*XXX*/ + } + + code += 2; + codeSize += 8; + return ret; +} + +uint32_t +CodeEmitterGM107::getMinEncodingSize(const Instruction *i) const +{ + return 8; +} + +/******************************************************************************* + * sched data calculator + ******************************************************************************/ + +class SchedDataCalculatorGM107 : public Pass +{ +public: + SchedDataCalculatorGM107(const Target *targ) : targ(targ) {} +private: + const Target *targ; + bool visit(BasicBlock *bb); +}; + +bool +SchedDataCalculatorGM107::visit(BasicBlock *bb) +{ + for (Instruction *insn = bb->getEntry(); insn; insn = insn->next) { + /*XXX*/ + insn->sched = 0x7e0; + } + + return true; +} + +/******************************************************************************* + * main + ******************************************************************************/ + +void +CodeEmitterGM107::prepareEmission(Function *func) +{ + SchedDataCalculatorGM107 sched(targ); + CodeEmitter::prepareEmission(func); + sched.run(func, true, true); +} + +static inline uint32_t sizeToBundlesGM107(uint32_t size) +{ + return (size + 23) / 24; +} + +void +CodeEmitterGM107::prepareEmission(Program *prog) +{ + for (ArrayList::Iterator fi = prog->allFuncs.iterator(); + !fi.end(); fi.next()) { + Function *func = reinterpret_cast<Function *>(fi.get()); + func->binPos = prog->binSize; + prepareEmission(func); + + // adjust sizes & positions for schedulding info: + if (prog->getTarget()->hasSWSched) { + uint32_t adjPos = func->binPos; + BasicBlock *bb = NULL; + for (int i = 0; i < func->bbCount; ++i) { + bb = func->bbArray[i]; + int32_t adjSize = bb->binSize; + if (adjPos % 32) { + adjSize -= 32 - adjPos % 32; + if (adjSize < 0) + adjSize = 0; + } + adjSize = bb->binSize + sizeToBundlesGM107(adjSize) * 8; + bb->binPos = adjPos; + bb->binSize = adjSize; + adjPos += adjSize; + } + if (bb) + func->binSize = adjPos - func->binPos; + } + + prog->binSize += func->binSize; + } +} + +CodeEmitterGM107::CodeEmitterGM107(const TargetGM107 *target) + : CodeEmitter(target), + targGM107(target), + writeIssueDelays(target->hasSWSched) +{ + code = NULL; + codeSize = codeSizeLimit = 0; + relocInfo = NULL; +} + +CodeEmitter * +TargetGM107::createCodeEmitterGM107(Program::Type type) +{ + CodeEmitterGM107 *emit = new CodeEmitterGM107(this); + emit->setProgramType(type); + return emit; +} + +} // namespace nv50_ir diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp new file mode 100644 index 00000000000..070b20a2133 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp @@ -0,0 +1,273 @@ +/* + * Copyright 2011 Christoph Bumiller + * 2014 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "codegen/nv50_ir.h" +#include "codegen/nv50_ir_build_util.h" + +#include "codegen/nv50_ir_target_nvc0.h" +#include "codegen/nv50_ir_lowering_gm107.h" + +#include <limits> + +namespace nv50_ir { + +#define QOP_ADD 0 +#define QOP_SUBR 1 +#define QOP_SUB 2 +#define QOP_MOV2 3 + +// UL UR LL LR +#define QUADOP(q, r, s, t) \ + ((QOP_##q << 6) | (QOP_##r << 4) | \ + (QOP_##s << 2) | (QOP_##t << 0)) + +bool +GM107LoweringPass::handleManualTXD(TexInstruction *i) +{ + static const uint8_t qOps[4][2] = + { + { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(MOV2, MOV2, ADD, ADD) }, // l0 + { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(MOV2, MOV2, ADD, ADD) }, // l1 + { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l2 + { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l3 + }; + Value *def[4][4]; + Value *crd[3]; + Value *tmp; + Instruction *tex, *add; + Value *zero = bld.loadImm(bld.getSSA(), 0); + int l, c; + const int dim = i->tex.target.getDim(); + + i->op = OP_TEX; // no need to clone dPdx/dPdy later + + for (c = 0; c < dim; ++c) + crd[c] = bld.getScratch(); + tmp = bld.getScratch(); + + for (l = 0; l < 4; ++l) { + // mov coordinates from lane l to all lanes + bld.mkOp(OP_QUADON, TYPE_NONE, NULL); + for (c = 0; c < dim; ++c) { + bld.mkOp2(OP_SHFL, TYPE_F32, crd[c], i->getSrc(c), bld.mkImm(l)); + add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], crd[c], zero); + add->subOp = 0x00; + add->lanes = 1; /* abused for .ndv */ + } + + // add dPdx from lane l to lanes dx + for (c = 0; c < dim; ++c) { + bld.mkOp2(OP_SHFL, TYPE_F32, tmp, i->dPdx[c].get(), bld.mkImm(l)); + add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]); + add->subOp = qOps[l][0]; + add->lanes = 1; /* abused for .ndv */ + } + + // add dPdy from lane l to lanes dy + for (c = 0; c < dim; ++c) { + bld.mkOp2(OP_SHFL, TYPE_F32, tmp, i->dPdy[c].get(), bld.mkImm(l)); + add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]); + add->subOp = qOps[l][1]; + add->lanes = 1; /* abused for .ndv */ + } + + // texture + bld.insert(tex = cloneForward(func, i)); + for (c = 0; c < dim; ++c) + tex->setSrc(c, crd[c]); + bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL); + + // save results + for (c = 0; i->defExists(c); ++c) { + Instruction *mov; + def[c][l] = bld.getSSA(); + mov = bld.mkMov(def[c][l], tex->getDef(c)); + mov->fixed = 1; + mov->lanes = 1 << l; + } + } + + for (c = 0; i->defExists(c); ++c) { + Instruction *u = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(c)); + for (l = 0; l < 4; ++l) + u->setSrc(l, def[c][l]); + } + + i->bb->remove(i); + return true; +} + +bool +GM107LoweringPass::handleDFDX(Instruction *insn) +{ + Instruction *shfl; + int qop = 0, xid = 0; + + switch (insn->op) { + case OP_DFDX: + qop = QUADOP(SUB, SUBR, SUB, SUBR); + xid = 1; + break; + case OP_DFDY: + qop = QUADOP(SUB, SUB, SUBR, SUBR); + xid = 2; + break; + default: + assert(!"invalid dfdx opcode"); + break; + } + + shfl = bld.mkOp2(OP_SHFL, TYPE_F32, bld.getScratch(), + insn->getSrc(0), bld.mkImm(xid)); + shfl->subOp = NV50_IR_SUBOP_SHFL_BFLY; + insn->op = OP_QUADOP; + insn->subOp = qop; + insn->lanes = 0; /* abused for !.ndv */ + insn->setSrc(1, insn->getSrc(0)); + insn->setSrc(0, shfl->getDef(0)); + return true; +} + +bool +GM107LoweringPass::handlePFETCH(Instruction *i) +{ + Value *tmp0 = bld.getScratch(); + Value *tmp1 = bld.getScratch(); + Value *tmp2 = bld.getScratch(); + bld.mkOp1(OP_RDSV, TYPE_U32, tmp0, bld.mkSysVal(SV_INVOCATION_INFO, 0)); + bld.mkOp2(OP_SHR , TYPE_U32, tmp1, tmp0, bld.mkImm(16)); + bld.mkOp2(OP_AND , TYPE_U32, tmp0, tmp0, bld.mkImm(0xff)); + bld.mkOp2(OP_AND , TYPE_U32, tmp1, tmp1, bld.mkImm(0xff)); + bld.mkOp1(OP_MOV , TYPE_U32, tmp2, bld.mkImm(i->getSrc(0)->reg.data.u32)); + bld.mkOp3(OP_MAD , TYPE_U32, tmp0, tmp0, tmp1, tmp2); + i->setSrc(0, tmp0); + i->setSrc(1, NULL); + return true; +} + +bool +GM107LoweringPass::handlePOPCNT(Instruction *i) +{ + Value *tmp = bld.mkOp2v(OP_AND, i->sType, bld.getScratch(), + i->getSrc(0), i->getSrc(1)); + i->setSrc(0, tmp); + i->setSrc(1, NULL); + return TRUE; +} + +// +// - add quadop dance for texturing +// - put FP outputs in GPRs +// - convert instruction sequences +// +bool +GM107LoweringPass::visit(Instruction *i) +{ + bld.setPosition(i, false); + + if (i->cc != CC_ALWAYS) + checkPredicate(i); + + switch (i->op) { + case OP_TEX: + case OP_TXB: + case OP_TXL: + case OP_TXF: + case OP_TXG: + return handleTEX(i->asTex()); + case OP_TXD: + return handleTXD(i->asTex()); + case OP_TXLQ: + return handleTXLQ(i->asTex()); + case OP_TXQ: + return handleTXQ(i->asTex()); + case OP_EX2: + bld.mkOp1(OP_PREEX2, TYPE_F32, i->getDef(0), i->getSrc(0)); + i->setSrc(0, i->getDef(0)); + break; + case OP_POW: + return handlePOW(i); + case OP_DIV: + return handleDIV(i); + case OP_MOD: + return handleMOD(i); + case OP_SQRT: + return handleSQRT(i); + case OP_EXPORT: + return handleEXPORT(i); + case OP_PFETCH: + return handlePFETCH(i); + case OP_EMIT: + case OP_RESTART: + return handleOUT(i); + case OP_RDSV: + return handleRDSV(i); + case OP_WRSV: + return handleWRSV(i); + case OP_LOAD: + if (i->src(0).getFile() == FILE_SHADER_INPUT) { + if (prog->getType() == Program::TYPE_COMPUTE) { + i->getSrc(0)->reg.file = FILE_MEMORY_CONST; + i->getSrc(0)->reg.fileIndex = 0; + } else + if (prog->getType() == Program::TYPE_GEOMETRY && + i->src(0).isIndirect(0)) { + // XXX: this assumes vec4 units + Value *ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), + i->getIndirect(0, 0), bld.mkImm(4)); + i->setIndirect(0, 0, ptr); + } else { + i->op = OP_VFETCH; + assert(prog->getType() != Program::TYPE_FRAGMENT); // INTERP + } + } + break; + case OP_ATOM: + { + const bool cctl = i->src(0).getFile() == FILE_MEMORY_GLOBAL; + handleATOM(i); + handleCasExch(i, cctl); + } + break; + case OP_SULDB: + case OP_SULDP: + case OP_SUSTB: + case OP_SUSTP: + case OP_SUREDB: + case OP_SUREDP: + handleSurfaceOpNVE4(i->asTex()); + break; + case OP_DFDX: + case OP_DFDY: + handleDFDX(i); + break; + case OP_POPCNT: + handlePOPCNT(i); + break; + default: + break; + } + return true; +} + +} // namespace nv50_ir diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.h new file mode 100644 index 00000000000..036abf055ed --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.h @@ -0,0 +1,18 @@ +#include "codegen/nv50_ir_lowering_nvc0.h" + +namespace nv50_ir { + +class GM107LoweringPass : public NVC0LoweringPass +{ +public: + GM107LoweringPass(Program *p) : NVC0LoweringPass(p) {} +private: + virtual bool visit(Instruction *); + + virtual bool handleManualTXD(TexInstruction *); + bool handleDFDX(Instruction *); + bool handlePFETCH(Instruction *); + bool handlePOPCNT(Instruction *); +}; + +} // namespace nv50_ir diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h index b68c2d09146..7f39c289554 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h @@ -99,7 +99,7 @@ protected: bool handleTEX(TexInstruction *); bool handleTXD(TexInstruction *); bool handleTXQ(TexInstruction *); - bool handleManualTXD(TexInstruction *); + virtual bool handleManualTXD(TexInstruction *); bool handleTXLQ(TexInstruction *); bool handleATOM(Instruction *); bool handleCasExch(Instruction *, bool needCctl); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp index f788c72669b..ef3de6ff92a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp @@ -187,6 +187,7 @@ const char *operationStr[OP_LAST + 1] = "vshl", "vsel", "cctl", + "shfl", "(invalid)" }; @@ -271,6 +272,7 @@ static const char *SemanticStr[SV_LAST + 1] = "LBASE", "SBASE", "VERTEX_STRIDE", + "INVOCATION_INFO", "?", "(INVALID)" }; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp index 60a6a3f486b..cbf0dd2119d 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp @@ -256,6 +256,7 @@ private: void texConstraintNV50(TexInstruction *); void texConstraintNVC0(TexInstruction *); void texConstraintNVE0(TexInstruction *); + void texConstraintGM107(TexInstruction *); std::list<Instruction *> constrList; @@ -855,6 +856,7 @@ GCRA::coalesce(ArrayList& insns) case 0xe0: case 0xf0: case 0x100: + case 0x110: ret = doCoalesce(insns, JOIN_MASK_UNION); break; default: @@ -1881,6 +1883,34 @@ RegAlloc::InsertConstraintsPass::condenseSrcs(Instruction *insn, } void +RegAlloc::InsertConstraintsPass::texConstraintGM107(TexInstruction *tex) +{ + int n, s; + + if (isTextureOp(tex->op)) + textureMask(tex); + condenseDefs(tex); + + if (tex->op == OP_SUSTB || tex->op == OP_SUSTP) { + condenseSrcs(tex, 3, (3 + typeSizeof(tex->dType) / 4) - 1); + } else + if (isTextureOp(tex->op)) { + if (tex->op != OP_TXQ) { + s = tex->tex.target.getArgCount() - tex->tex.target.isMS(); + n = tex->srcCount(0xff) - s; + } else { + s = tex->srcCount(0xff); + n = 0; + } + + if (s > 1) + condenseSrcs(tex, 0, s - 1); + if (n > 1) // NOTE: first call modified positions already + condenseSrcs(tex, 1, n); + } +} + +void RegAlloc::InsertConstraintsPass::texConstraintNVE0(TexInstruction *tex) { if (isTextureOp(tex->op)) @@ -1987,6 +2017,9 @@ RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb) case 0x100: texConstraintNVE0(tex); break; + case 0x110: + texConstraintGM107(tex); + break; default: break; } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp index 4ca5687765c..0397bdcad55 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp @@ -54,6 +54,7 @@ const uint8_t Target::operationSrcNr[] = 2, 2, // ATOM, BAR 2, 2, 2, 2, 3, 2, // VADD, VAVG, VMIN, VMAX, VSAD, VSET, 2, 2, 2, 1, // VSHR, VSHL, VSEL, CCTL + 3, // SHFL 0 }; @@ -126,10 +127,13 @@ const OpClass Target::operationClass[] = OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, // VSEL, CCTL OPCLASS_VECTOR, OPCLASS_CONTROL, + // SHFL + OPCLASS_OTHER, OPCLASS_PSEUDO // LAST }; +extern Target *getTargetGM107(unsigned int chipset); extern Target *getTargetNVC0(unsigned int chipset); extern Target *getTargetNV50(unsigned int chipset); @@ -138,6 +142,8 @@ Target *Target::create(unsigned int chipset) STATIC_ASSERT(Elements(operationSrcNr) == OP_LAST + 1); STATIC_ASSERT(Elements(operationClass) == OP_LAST + 1); switch (chipset & ~0xf) { + case 0x110: + return getTargetGM107(chipset); case 0xc0: case 0xd0: case 0xe0: diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h index 711056e5961..cb9fd37c4c8 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h @@ -78,7 +78,7 @@ public: inline void *getRelocInfo() const { return relocInfo; } - void prepareEmission(Program *); + virtual void prepareEmission(Program *); virtual void prepareEmission(Function *); virtual void prepareEmission(BasicBlock *); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp new file mode 100644 index 00000000000..202d7443588 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp @@ -0,0 +1,100 @@ +/* + * Copyright 2011 Christoph Bumiller + * 2014 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "codegen/nv50_ir_target_gm107.h" +#include "codegen/nv50_ir_lowering_gm107.h" + +namespace nv50_ir { + +Target *getTargetGM107(unsigned int chipset) +{ + return new TargetGM107(chipset); +} + +// BULTINS / LIBRARY FUNCTIONS: + +// lazyness -> will just hardcode everything for the time being + +#include "lib/gm107.asm.h" + +void +TargetGM107::getBuiltinCode(const uint32_t **code, uint32_t *size) const +{ + *code = (const uint32_t *)&gm107_builtin_code[0]; + *size = sizeof(gm107_builtin_code); +} + +uint32_t +TargetGM107::getBuiltinOffset(int builtin) const +{ + assert(builtin < NVC0_BUILTIN_COUNT); + return gm107_builtin_offsets[builtin]; +} + +bool +TargetGM107::isOpSupported(operation op, DataType ty) const +{ + switch (op) { + case OP_MAD: + case OP_FMA: + if (ty != TYPE_F32) + return false; + break; + case OP_SAD: + case OP_POW: + case OP_SQRT: + case OP_DIV: + case OP_MOD: + return false; + default: + break; + } + + return true; +} + +bool +TargetGM107::runLegalizePass(Program *prog, CGStage stage) const +{ + if (stage == CG_STAGE_PRE_SSA) { + GM107LoweringPass pass(prog); + return pass.run(prog, false, true); + } else + if (stage == CG_STAGE_POST_RA) { + NVC0LegalizePostRA pass(prog); + return pass.run(prog, false, true); + } else + if (stage == CG_STAGE_SSA) { + NVC0LegalizeSSA pass; + return pass.run(prog, false, true); + } + return false; +} + +CodeEmitter * +TargetGM107::getCodeEmitter(Program::Type type) +{ + return createCodeEmitterGM107(type); +} + +} // namespace nv50_ir diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.h new file mode 100644 index 00000000000..5d606378953 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.h @@ -0,0 +1,21 @@ +#include "codegen/nv50_ir_target_nvc0.h" + +namespace nv50_ir { + +class TargetGM107 : public TargetNVC0 +{ +public: + TargetGM107(unsigned int chipset) : TargetNVC0(chipset) {} + + virtual CodeEmitter *getCodeEmitter(Program::Type); + CodeEmitter *createCodeEmitterGM107(Program::Type); + + virtual bool runLegalizePass(Program *, CGStage) const; + + virtual void getBuiltinCode(const uint32_t **, uint32_t *) const; + virtual uint32_t getBuiltinOffset(int) const; + + virtual bool isOpSupported(operation, DataType) const; +}; + +} // namespace nv50_ir |