diff options
author | Christoph Bumiller <[email protected]> | 2011-10-18 18:36:57 +0200 |
---|---|---|
committer | Christoph Bumiller <[email protected]> | 2011-10-21 23:00:40 +0200 |
commit | bb0482a55bbebfdd4b06954fbfacf5a57a04b1f3 (patch) | |
tree | b6bdfb9091559354cffbcaba21cc7ab62d1cc44a /src/gallium/drivers | |
parent | d988361ead27ce61615669bd428b04d2aac7af4f (diff) |
nv50/ir: import SM4 converter
Diffstat (limited to 'src/gallium/drivers')
4 files changed, 2513 insertions, 4 deletions
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp index f9c18fa8d88..53a73b457f4 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp @@ -207,7 +207,7 @@ BuildUtil::mkCvt(operation op, return insn; } -Instruction * +CmpInstruction * BuildUtil::mkCmp(operation op, CondCode cc, DataType ty, Value *dst, Value *src0, Value *src1, Value *src2) { diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h index dbc6723fe90..0bb853a340b 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h @@ -68,9 +68,9 @@ public: Value *attrRel, Value *primRel); Instruction *mkCvt(operation, DataType, Value *, DataType, Value *); - Instruction *mkCmp(operation, CondCode, DataType, - Value *, - Value *, Value *, Value * = NULL); + CmpInstruction *mkCmp(operation, CondCode, DataType, + Value *, + Value *, Value *, Value * = NULL); Instruction *mkTex(operation, TexTarget, uint8_t tic, uint8_t tsc, Value **def, Value **src); Instruction *mkQuadop(uint8_t qop, Value *, uint8_t l, Value *, Value *); diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_from_sm4.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_from_sm4.cpp new file mode 100644 index 00000000000..7ed00e336c6 --- /dev/null +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_from_sm4.cpp @@ -0,0 +1,2326 @@ + +#include "nv50_ir.h" +#include "nv50_ir_target.h" +#include "nv50_ir_build_util.h" + +#include "nv50_ir_from_sm4.h" + +// WTF: pass-through is implicit ??? check ReadWriteMask + +namespace tgsi { + +static nv50_ir::SVSemantic irSemantic(unsigned sn) +{ + switch (sn) { + case TGSI_SEMANTIC_POSITION: return nv50_ir::SV_POSITION; + case TGSI_SEMANTIC_FACE: return nv50_ir::SV_FACE; + case NV50_SEMANTIC_LAYER: return nv50_ir::SV_LAYER; + case NV50_SEMANTIC_VIEWPORTINDEX: return nv50_ir::SV_VIEWPORT_INDEX; + case TGSI_SEMANTIC_PSIZE: return nv50_ir::SV_POINT_SIZE; + case NV50_SEMANTIC_CLIPDISTANCE: return nv50_ir::SV_CLIP_DISTANCE; + case NV50_SEMANTIC_VERTEXID: return nv50_ir::SV_VERTEX_ID; + case TGSI_SEMANTIC_INSTANCEID: return nv50_ir::SV_INSTANCE_ID; + case TGSI_SEMANTIC_PRIMID: return nv50_ir::SV_PRIMITIVE_ID; + case NV50_SEMANTIC_TESSFACTOR: return nv50_ir::SV_TESS_FACTOR; + case NV50_SEMANTIC_TESSCOORD: return nv50_ir::SV_TESS_COORD; + default: + return nv50_ir::SV_UNDEFINED; + } +} + +} // namespace tgsi + +namespace { + +using namespace nv50_ir; + +#define NV50_IR_MAX_RESOURCES 64 + +class Converter : public BuildUtil +{ +public: + Converter(Program *, struct nv50_ir_prog_info *); + ~Converter(); + +private: + DataArray tData32; + DataArray tData64; + unsigned int nrRegVals; + + DataArray *lData; + unsigned int nrArrays; + unsigned int arrayVol; + + DataArray oData; + + uint8_t interpMode[PIPE_MAX_SHADER_INPUTS]; + + // outputs for each phase + struct nv50_ir_varying out[3][PIPE_MAX_SHADER_OUTPUTS]; + + int phase; + int subPhaseCnt[2]; + int subPhase; + unsigned int phaseStart; + unsigned int phaseInstance; + unsigned int *phaseInstCnt[2]; + bool unrollPhase; + bool phaseInstanceUsed; + int phaseEnded; // (phase + 1) if $phase ended + + bool finalized; + + Value *srcPtr[3][3]; // for indirect addressing, save pointer values + Value *dstPtr[3]; + Value *vtxBase[3]; // base address of vertex in a primitive (TP/GP) + + Value *domainPt[3]; // pre-fetched TessCoord + + unsigned int nDstOpnds; + + Stack condBBs; + Stack joinBBs; + Stack loopBBs; + Stack breakBBs; + Stack entryBBs; + Stack leaveBBs; + Stack retIPs; + + bool shadow[NV50_IR_MAX_RESOURCES]; + TexTarget resourceType[NV50_IR_MAX_RESOURCES][2]; + + struct nv50_ir_prog_info& info; + + Value *fragCoord[4]; + +public: + bool run(); + +private: + bool handleInstruction(unsigned int pos); + bool inspectInstruction(unsigned int pos); + bool handleDeclaration(const sm4_dcl& dcl); + bool inspectDeclaration(const sm4_dcl& dcl); + bool parseSignature(); + + bool haveNextPhase(unsigned int pos) const; + + void allocateValues(); + void exportOutputs(); + + void emitTex(Value *dst0[4], TexInstruction *, const uint8_t swizzle[4]); + void handleLOAD(Value *dst0[4]); + void handleSAMPLE(operation, Value *dst0[4]); + void handleQUERY(Value *dst0[4], enum TexQuery query); + void handleDP(Value *dst0[4], int dim); + + Symbol *iSym(int i, int c); + Symbol *oSym(int i, int c); + + Value *src(int i, int c); + Value *src(const sm4_op&, int c, int i); + Value *dst(int i, int c); + Value *dst(const sm4_op&, int c, int i); + void saveDst(int i, int c, Value *value); + void saveDst(const sm4_op&, int c, Value *value, int i); + void saveFragDepth(operation op, Value *value); + + Value *interpolate(const sm4_op&, int c, int i); + + Value *getSrcPtr(int s, int dim, int shl); + Value *getDstPtr(int d, int dim, int shl); + Value *getVtxPtr(int s); + + bool checkDstSrcAliasing() const; + void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork); + void finalizeShader(); + + operation cvtOpcode(enum sm4_opcode op) const; + unsigned int getDstOpndCount(enum sm4_opcode opcode) const; + + DataType inferSrcType(enum sm4_opcode op) const; + DataType inferDstType(enum sm4_opcode op) const; + + unsigned g3dPrim(const unsigned prim, unsigned *patchSize = NULL) const; + CondCode cvtCondCode(enum sm4_opcode op) const; + RoundMode cvtRoundingMode(enum sm4_opcode op) const; + TexTarget cvtTexTarget(enum sm4_target, + enum sm4_opcode, operation *) const; + SVSemantic cvtSemantic(enum sm4_sv, uint8_t &index) const; + uint8_t cvtInterpMode(enum sm4_interpolation) const; + + unsigned tgsiSemantic(SVSemantic, int index); + void recordSV(unsigned sn, unsigned si, unsigned mask, bool input); + +private: + sm4_insn *insn; + DataType dTy, sTy; + + const struct sm4_program& sm4; + Program *prog; +}; + +#define PRIM_CASE(a, b) \ + case D3D_PRIMITIVE_TOPOLOGY_##a: return PIPE_PRIM_##b; + +unsigned +Converter::g3dPrim(const unsigned prim, unsigned *patchSize) const +{ + switch (prim) { + PRIM_CASE(UNDEFINED, POINTS); + PRIM_CASE(POINTLIST, POINTS); + PRIM_CASE(LINELIST, LINES); + PRIM_CASE(LINESTRIP, LINE_STRIP); + PRIM_CASE(TRIANGLELIST, TRIANGLES); + PRIM_CASE(TRIANGLESTRIP, TRIANGLE_STRIP); + PRIM_CASE(LINELIST_ADJ, LINES_ADJACENCY); + PRIM_CASE(LINESTRIP_ADJ, LINE_STRIP_ADJACENCY); + PRIM_CASE(TRIANGLELIST_ADJ, TRIANGLES_ADJACENCY); + PRIM_CASE(TRIANGLESTRIP_ADJ, TRIANGLES_ADJACENCY); + default: + if (prim < D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST || + prim > D3D_PRIMITIVE_TOPOLOGY_32_CONTROL_POINT_PATCHLIST) + return PIPE_PRIM_POINTS; + if (patchSize) + *patchSize = + prim - D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST + 1; + return NV50_PRIM_PATCHES; + } +} + +#define IPM_CASE(n, a, b) \ + case SM4_INTERPOLATION_##n: return NV50_IR_INTERP_##a | NV50_IR_INTERP_##b + +uint8_t +Converter::cvtInterpMode(enum sm4_interpolation mode) const +{ + switch (mode) { + IPM_CASE(CONSTANT, FLAT, FLAT); + IPM_CASE(LINEAR, PERSPECTIVE, PERSPECTIVE); + IPM_CASE(LINEAR_CENTROID, PERSPECTIVE, CENTROID); + IPM_CASE(LINEAR_NOPERSPECTIVE, LINEAR, LINEAR); + IPM_CASE(LINEAR_NOPERSPECTIVE_CENTROID, LINEAR, CENTROID); + IPM_CASE(LINEAR_SAMPLE, PERSPECTIVE, OFFSET); + IPM_CASE(LINEAR_NOPERSPECTIVE_SAMPLE, LINEAR, OFFSET); + IPM_CASE(UNDEFINED, LINEAR, LINEAR); + default: + assert(!"invalid interpolation mode"); + return 0; + } +} + +static void +setVaryingInterpMode(struct nv50_ir_varying *var, uint8_t mode) +{ + switch (mode & NV50_IR_INTERP_MODE_MASK) { + case NV50_IR_INTERP_LINEAR: + var->linear = 1; + break; + case NV50_IR_INTERP_FLAT: + var->flat = 1; + break; + default: + break; + } + if (mode & NV50_IR_INTERP_CENTROID) + var->centroid = 1; +} + +RoundMode +Converter::cvtRoundingMode(enum sm4_opcode op) const +{ + switch (op) { + case SM4_OPCODE_ROUND_NE: return ROUND_NI; + case SM4_OPCODE_ROUND_NI: return ROUND_MI; + case SM4_OPCODE_ROUND_PI: return ROUND_PI; + case SM4_OPCODE_ROUND_Z: return ROUND_ZI; + default: + return ROUND_N; + } +} + +CondCode +Converter::cvtCondCode(enum sm4_opcode op) const +{ + switch (op) { + case SM4_OPCODE_EQ: + case SM4_OPCODE_DEQ: + case SM4_OPCODE_IEQ: return CC_EQ; + case SM4_OPCODE_GE: + case SM4_OPCODE_DGE: + case SM4_OPCODE_IGE: + case SM4_OPCODE_UGE: return CC_GE; + case SM4_OPCODE_LT: + case SM4_OPCODE_DLT: + case SM4_OPCODE_ILT: + case SM4_OPCODE_ULT: return CC_LT; + case SM4_OPCODE_NE: + case SM4_OPCODE_INE: + case SM4_OPCODE_DNE: return CC_NEU; + default: + return CC_ALWAYS; + } +} + +DataType +Converter::inferSrcType(enum sm4_opcode op) const +{ + switch (op) { + case SM4_OPCODE_IADD: + case SM4_OPCODE_IEQ: + case SM4_OPCODE_IGE: + case SM4_OPCODE_ILT: + case SM4_OPCODE_IMAD: + case SM4_OPCODE_IMAX: + case SM4_OPCODE_IMIN: + case SM4_OPCODE_IMUL: + case SM4_OPCODE_INE: + case SM4_OPCODE_INEG: + case SM4_OPCODE_ISHL: + case SM4_OPCODE_ISHR: + case SM4_OPCODE_ITOF: + case SM4_OPCODE_ATOMIC_IADD: + case SM4_OPCODE_ATOMIC_IMAX: + case SM4_OPCODE_ATOMIC_IMIN: + return TYPE_S32; + case SM4_OPCODE_AND: + case SM4_OPCODE_NOT: + case SM4_OPCODE_OR: + case SM4_OPCODE_UDIV: + case SM4_OPCODE_ULT: + case SM4_OPCODE_UGE: + case SM4_OPCODE_UMUL: + case SM4_OPCODE_UMAD: + case SM4_OPCODE_UMAX: + case SM4_OPCODE_UMIN: + case SM4_OPCODE_USHR: + case SM4_OPCODE_UTOF: + case SM4_OPCODE_XOR: + case SM4_OPCODE_UADDC: + case SM4_OPCODE_USUBB: + case SM4_OPCODE_ATOMIC_AND: + case SM4_OPCODE_ATOMIC_OR: + case SM4_OPCODE_ATOMIC_XOR: + case SM4_OPCODE_ATOMIC_UMAX: + case SM4_OPCODE_ATOMIC_UMIN: + return TYPE_U32; + case SM4_OPCODE_DADD: + case SM4_OPCODE_DMAX: + case SM4_OPCODE_DMIN: + case SM4_OPCODE_DMUL: + case SM4_OPCODE_DEQ: + case SM4_OPCODE_DGE: + case SM4_OPCODE_DLT: + case SM4_OPCODE_DNE: + case SM4_OPCODE_DMOV: + case SM4_OPCODE_DMOVC: + case SM4_OPCODE_DTOF: + return TYPE_F64; + case SM4_OPCODE_F16TOF32: + return TYPE_F16; + default: + return TYPE_F32; + } +} + +DataType +Converter::inferDstType(enum sm4_opcode op) const +{ + switch (op) { + case SM4_OPCODE_FTOI: + return TYPE_S32; + case SM4_OPCODE_FTOU: + case SM4_OPCODE_EQ: + case SM4_OPCODE_GE: + case SM4_OPCODE_LT: + case SM4_OPCODE_NE: + return TYPE_U32; + case SM4_OPCODE_FTOD: + return TYPE_F64; + case SM4_OPCODE_F32TOF16: + return TYPE_F16; + case SM4_OPCODE_ITOF: + case SM4_OPCODE_UTOF: + case SM4_OPCODE_DTOF: + return TYPE_F32; + default: + return inferSrcType(op); + } +} + +operation +Converter::cvtOpcode(enum sm4_opcode op) const +{ + switch (op) { + case SM4_OPCODE_ADD: return OP_ADD; + case SM4_OPCODE_AND: return OP_AND; + case SM4_OPCODE_BREAK: return OP_BREAK; + case SM4_OPCODE_BREAKC: return OP_BREAK; + case SM4_OPCODE_CALL: return OP_CALL; + case SM4_OPCODE_CALLC: return OP_CALL; + case SM4_OPCODE_CASE: return OP_NOP; + case SM4_OPCODE_CONTINUE: return OP_CONT; + case SM4_OPCODE_CONTINUEC: return OP_CONT; + case SM4_OPCODE_CUT: return OP_RESTART; + case SM4_OPCODE_DEFAULT: return OP_NOP; + case SM4_OPCODE_DERIV_RTX: return OP_DFDX; + case SM4_OPCODE_DERIV_RTY: return OP_DFDY; + case SM4_OPCODE_DISCARD: return OP_DISCARD; + case SM4_OPCODE_DIV: return OP_DIV; + case SM4_OPCODE_DP2: return OP_MAD; + case SM4_OPCODE_DP3: return OP_MAD; + case SM4_OPCODE_DP4: return OP_MAD; + case SM4_OPCODE_ELSE: return OP_BRA; + case SM4_OPCODE_EMIT: return OP_EMIT; + case SM4_OPCODE_EMITTHENCUT: return OP_EMIT; + case SM4_OPCODE_ENDIF: return OP_BRA; + case SM4_OPCODE_ENDLOOP: return OP_PREBREAK; + case SM4_OPCODE_ENDSWITCH: return OP_NOP; + case SM4_OPCODE_EQ: return OP_SET; + case SM4_OPCODE_EXP: return OP_EX2; + case SM4_OPCODE_FRC: return OP_CVT; + case SM4_OPCODE_FTOI: return OP_CVT; + case SM4_OPCODE_FTOU: return OP_CVT; + case SM4_OPCODE_GE: return OP_SET; + case SM4_OPCODE_IADD: return OP_ADD; + case SM4_OPCODE_IF: return OP_BRA; + case SM4_OPCODE_IEQ: return OP_SET; + case SM4_OPCODE_IGE: return OP_SET; + case SM4_OPCODE_ILT: return OP_SET; + case SM4_OPCODE_IMAD: return OP_MAD; + case SM4_OPCODE_IMAX: return OP_MAX; + case SM4_OPCODE_IMIN: return OP_MIN; + case SM4_OPCODE_IMUL: return OP_MUL; + case SM4_OPCODE_INE: return OP_SET; + case SM4_OPCODE_INEG: return OP_NEG; + case SM4_OPCODE_ISHL: return OP_SHL; + case SM4_OPCODE_ISHR: return OP_SHR; + case SM4_OPCODE_ITOF: return OP_CVT; + case SM4_OPCODE_LD: return OP_TXF; + case SM4_OPCODE_LD_MS: return OP_TXF; + case SM4_OPCODE_LOG: return OP_LG2; + case SM4_OPCODE_LOOP: return OP_PRECONT; + case SM4_OPCODE_LT: return OP_SET; + case SM4_OPCODE_MAD: return OP_MAD; + case SM4_OPCODE_MIN: return OP_MIN; + case SM4_OPCODE_MAX: return OP_MAX; + case SM4_OPCODE_MOV: return OP_MOV; + case SM4_OPCODE_MOVC: return OP_MOV; + case SM4_OPCODE_MUL: return OP_MUL; + case SM4_OPCODE_NE: return OP_SET; + case SM4_OPCODE_NOP: return OP_NOP; + case SM4_OPCODE_NOT: return OP_NOT; + case SM4_OPCODE_OR: return OP_OR; + case SM4_OPCODE_RESINFO: return OP_TXQ; + case SM4_OPCODE_RET: return OP_RET; + case SM4_OPCODE_RETC: return OP_RET; + case SM4_OPCODE_ROUND_NE: return OP_CVT; + case SM4_OPCODE_ROUND_NI: return OP_FLOOR; + case SM4_OPCODE_ROUND_PI: return OP_CEIL; + case SM4_OPCODE_ROUND_Z: return OP_TRUNC; + case SM4_OPCODE_RSQ: return OP_RSQ; + case SM4_OPCODE_SAMPLE: return OP_TEX; + case SM4_OPCODE_SAMPLE_C: return OP_TEX; + case SM4_OPCODE_SAMPLE_C_LZ: return OP_TEX; + case SM4_OPCODE_SAMPLE_L: return OP_TXL; + case SM4_OPCODE_SAMPLE_D: return OP_TXD; + case SM4_OPCODE_SAMPLE_B: return OP_TXB; + case SM4_OPCODE_SQRT: return OP_SQRT; + case SM4_OPCODE_SWITCH: return OP_NOP; + case SM4_OPCODE_SINCOS: return OP_PRESIN; + case SM4_OPCODE_UDIV: return OP_DIV; + case SM4_OPCODE_ULT: return OP_SET; + case SM4_OPCODE_UGE: return OP_SET; + case SM4_OPCODE_UMUL: return OP_MUL; + case SM4_OPCODE_UMAD: return OP_MAD; + case SM4_OPCODE_UMAX: return OP_MAX; + case SM4_OPCODE_UMIN: return OP_MIN; + case SM4_OPCODE_USHR: return OP_SHR; + case SM4_OPCODE_UTOF: return OP_CVT; + case SM4_OPCODE_XOR: return OP_XOR; + + case SM4_OPCODE_GATHER4: return OP_TXG; + case SM4_OPCODE_SAMPLE_POS: return OP_PIXLD; + case SM4_OPCODE_SAMPLE_INFO: return OP_PIXLD; + case SM4_OPCODE_EMIT_STREAM: return OP_EMIT; + case SM4_OPCODE_CUT_STREAM: return OP_RESTART; + case SM4_OPCODE_EMITTHENCUT_STREAM: return OP_EMIT; + case SM4_OPCODE_INTERFACE_CALL: return OP_CALL; + case SM4_OPCODE_BUFINFO: return OP_TXQ; + case SM4_OPCODE_DERIV_RTX_COARSE: return OP_DFDX; + case SM4_OPCODE_DERIV_RTX_FINE: return OP_DFDX; + case SM4_OPCODE_DERIV_RTY_COARSE: return OP_DFDY; + case SM4_OPCODE_DERIV_RTY_FINE: return OP_DFDY; + case SM4_OPCODE_GATHER4_C: return OP_TXG; + case SM4_OPCODE_GATHER4_PO: return OP_TXG; + case SM4_OPCODE_GATHER4_PO_C: return OP_TXG; + + case SM4_OPCODE_RCP: return OP_RCP; + case SM4_OPCODE_F32TOF16: return OP_CVT; + case SM4_OPCODE_F16TOF32: return OP_CVT; + case SM4_OPCODE_UADDC: return OP_ADD; + case SM4_OPCODE_USUBB: return OP_SUB; + case SM4_OPCODE_COUNTBITS: return OP_POPCNT; + + case SM4_OPCODE_ATOMIC_AND: return OP_AND; + case SM4_OPCODE_ATOMIC_OR: return OP_OR; + case SM4_OPCODE_ATOMIC_XOR: return OP_XOR; + case SM4_OPCODE_ATOMIC_CMP_STORE: return OP_STORE; + case SM4_OPCODE_ATOMIC_IADD: return OP_ADD; + case SM4_OPCODE_ATOMIC_IMAX: return OP_MAX; + case SM4_OPCODE_ATOMIC_IMIN: return OP_MIN; + case SM4_OPCODE_ATOMIC_UMAX: return OP_MAX; + case SM4_OPCODE_ATOMIC_UMIN: return OP_MIN; + + case SM4_OPCODE_SYNC: return OP_MEMBAR; + case SM4_OPCODE_DADD: return OP_ADD; + case SM4_OPCODE_DMAX: return OP_MAX; + case SM4_OPCODE_DMIN: return OP_MIN; + case SM4_OPCODE_DMUL: return OP_MUL; + case SM4_OPCODE_DEQ: return OP_SET; + case SM4_OPCODE_DGE: return OP_SET; + case SM4_OPCODE_DLT: return OP_SET; + case SM4_OPCODE_DNE: return OP_SET; + case SM4_OPCODE_DMOV: return OP_MOV; + case SM4_OPCODE_DMOVC: return OP_MOV; + case SM4_OPCODE_DTOF: return OP_CVT; + case SM4_OPCODE_FTOD: return OP_CVT; + + default: + return OP_NOP; + } +} + +unsigned int +Converter::getDstOpndCount(enum sm4_opcode opcode) const +{ + switch (opcode) { + case SM4_OPCODE_SINCOS: + case SM4_OPCODE_UDIV: + case SM4_OPCODE_IMUL: + case SM4_OPCODE_UMUL: + return 2; + case SM4_OPCODE_BREAK: + case SM4_OPCODE_BREAKC: + case SM4_OPCODE_CALL: + case SM4_OPCODE_CALLC: + case SM4_OPCODE_CONTINUE: + case SM4_OPCODE_CONTINUEC: + case SM4_OPCODE_DISCARD: + case SM4_OPCODE_EMIT: + case SM4_OPCODE_EMIT_STREAM: + case SM4_OPCODE_CUT: + case SM4_OPCODE_CUT_STREAM: + case SM4_OPCODE_EMITTHENCUT: + case SM4_OPCODE_EMITTHENCUT_STREAM: + case SM4_OPCODE_IF: + case SM4_OPCODE_ELSE: + case SM4_OPCODE_ENDIF: + case SM4_OPCODE_LOOP: + case SM4_OPCODE_ENDLOOP: + case SM4_OPCODE_RET: + case SM4_OPCODE_RETC: + case SM4_OPCODE_SYNC: + case SM4_OPCODE_SWITCH: + case SM4_OPCODE_CASE: + case SM4_OPCODE_HS_DECLS: + case SM4_OPCODE_HS_CONTROL_POINT_PHASE: + case SM4_OPCODE_HS_FORK_PHASE: + case SM4_OPCODE_HS_JOIN_PHASE: + return 0; + default: + return 1; + } +} + +#define TARG_CASE_1(a, b) case SM4_TARGET_##a: return TEX_TARGET_##b; +#define TARG_CASE_2(a, b) case SM4_TARGET_##a: \ + return dc ? TEX_TARGET_##b##_SHADOW : TEX_TARGET_##b + +TexTarget +Converter::cvtTexTarget(enum sm4_target targ, + enum sm4_opcode op, operation *opr) const +{ + bool dc = (op == SM4_OPCODE_SAMPLE_C || + op == SM4_OPCODE_SAMPLE_C_LZ || + op == SM4_OPCODE_GATHER4_C || + op == SM4_OPCODE_GATHER4_PO_C); + + if (opr) { + switch (targ) { + case SM4_TARGET_RAW_BUFFER: *opr = OP_LOAD; break; + case SM4_TARGET_STRUCTURED_BUFFER: *opr = OP_SULD; break; + default: + *opr = OP_TEX; + break; + } + } + + switch (targ) { + TARG_CASE_1(UNKNOWN, 2D); + TARG_CASE_2(TEXTURE1D, 1D); + TARG_CASE_2(TEXTURE2D, 2D); + TARG_CASE_1(TEXTURE2DMS, 2D_MS); + TARG_CASE_1(TEXTURE3D, 3D); + TARG_CASE_2(TEXTURECUBE, CUBE); + TARG_CASE_2(TEXTURE1DARRAY, 1D_ARRAY); + TARG_CASE_2(TEXTURE2DARRAY, 2D_ARRAY); + TARG_CASE_1(TEXTURE2DMSARRAY, 2D_MS_ARRAY); + TARG_CASE_2(TEXTURECUBEARRAY, CUBE_ARRAY); + TARG_CASE_1(BUFFER, BUFFER); + TARG_CASE_1(RAW_BUFFER, BUFFER); + TARG_CASE_1(STRUCTURED_BUFFER, BUFFER); + default: + assert(!"invalid SM4 texture target"); + return dc ? TEX_TARGET_2D_SHADOW : TEX_TARGET_2D; + } +} + +static inline uint32_t +getSVIndex(enum sm4_sv sv) +{ + switch (sv) { + case SM4_SV_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: return 0; + case SM4_SV_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: return 1; + case SM4_SV_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: return 2; + case SM4_SV_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: return 3; + + case SM4_SV_FINAL_QUAD_U_INSIDE_TESSFACTOR: return 4; + case SM4_SV_FINAL_QUAD_V_INSIDE_TESSFACTOR: return 5; + + case SM4_SV_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: return 0; + case SM4_SV_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: return 1; + case SM4_SV_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: return 2; + + case SM4_SV_FINAL_TRI_INSIDE_TESSFACTOR: return 4; + + case SM4_SV_FINAL_LINE_DETAIL_TESSFACTOR: return 0; + + case SM4_SV_FINAL_LINE_DENSITY_TESSFACTOR: return 4; + + default: + return 0; + } +} + +SVSemantic +Converter::cvtSemantic(enum sm4_sv sv, uint8_t &idx) const +{ + idx = 0; + + switch (sv) { + case SM4_SV_UNDEFINED: return SV_UNDEFINED; + case SM4_SV_POSITION: return SV_POSITION; + case SM4_SV_CLIP_DISTANCE: return SV_CLIP_DISTANCE; + case SM4_SV_CULL_DISTANCE: return SV_CLIP_DISTANCE; // XXX: distinction + case SM4_SV_RENDER_TARGET_ARRAY_INDEX: return SV_LAYER; + case SM4_SV_VIEWPORT_ARRAY_INDEX: return SV_VIEWPORT_INDEX; + case SM4_SV_VERTEX_ID: return SV_VERTEX_ID; + case SM4_SV_PRIMITIVE_ID: return SV_PRIMITIVE_ID; + case SM4_SV_INSTANCE_ID: return SV_INSTANCE_ID; + case SM4_SV_IS_FRONT_FACE: return SV_FACE; + case SM4_SV_SAMPLE_INDEX: return SV_SAMPLE_INDEX; + + case SM4_SV_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: + case SM4_SV_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: + case SM4_SV_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: + case SM4_SV_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: + case SM4_SV_FINAL_QUAD_U_INSIDE_TESSFACTOR: + case SM4_SV_FINAL_QUAD_V_INSIDE_TESSFACTOR: + case SM4_SV_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: + case SM4_SV_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: + case SM4_SV_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: + case SM4_SV_FINAL_TRI_INSIDE_TESSFACTOR: + case SM4_SV_FINAL_LINE_DETAIL_TESSFACTOR: + case SM4_SV_FINAL_LINE_DENSITY_TESSFACTOR: + idx = getSVIndex(sv); + return SV_TESS_FACTOR; + + default: + assert(!"invalid SM4 system value"); + return SV_UNDEFINED; + } +} + +unsigned +Converter::tgsiSemantic(SVSemantic sv, int index) +{ + switch (sv) { + case SV_POSITION: return TGSI_SEMANTIC_POSITION; + case SV_FACE: return TGSI_SEMANTIC_FACE; + case SV_LAYER: return NV50_SEMANTIC_LAYER; + case SV_VIEWPORT_INDEX: return NV50_SEMANTIC_VIEWPORTINDEX; + case SV_POINT_SIZE: return TGSI_SEMANTIC_PSIZE; + case SV_CLIP_DISTANCE: return NV50_SEMANTIC_CLIPDISTANCE; + case SV_VERTEX_ID: return NV50_SEMANTIC_VERTEXID; + case SV_INSTANCE_ID: return TGSI_SEMANTIC_INSTANCEID; + case SV_PRIMITIVE_ID: return TGSI_SEMANTIC_PRIMID; + case SV_TESS_FACTOR: return NV50_SEMANTIC_TESSFACTOR; + case SV_TESS_COORD: return NV50_SEMANTIC_TESSCOORD; + case SV_INVOCATION_ID: return NV50_SEMANTIC_INVOCATIONID; + default: + return TGSI_SEMANTIC_GENERIC; + } +} + +void +Converter::recordSV(unsigned sn, unsigned si, unsigned mask, bool input) +{ + unsigned int i; + for (i = 0; i < info.numSysVals; ++i) + if (info.sv[i].sn == sn && + info.sv[i].si == si) + return; + info.numSysVals = i + 1; + info.sv[i].sn = sn; + info.sv[i].si = si; + info.sv[i].mask = mask; + info.sv[i].input = input ? 1 : 0; +} + +bool +Converter::parseSignature() +{ + struct nv50_ir_varying *patch; + unsigned int i, r, n; + + info.numInputs = 0; + info.numOutputs = 0; + info.numPatchConstants = 0; + + for (n = 0, i = 0; i < sm4.num_params_in; ++i) { + r = sm4.params_in[i].Register; + + info.in[r].mask |= sm4.params_in[i].ReadWriteMask; + // mask might be uninitialized ... + if (!sm4.params_in[i].ReadWriteMask) + info.in[r].mask = 0xf; + info.in[r].id = r; + if (info.in[r].regular) // already assigned semantic name/index + continue; + info.in[r].regular = 1; + info.in[r].patch = 0; + + info.numInputs = MAX2(info.numInputs, r + 1); + + switch (sm4.params_in[i].SystemValueType) { + case D3D_NAME_UNDEFINED: + info.in[r].sn = TGSI_SEMANTIC_GENERIC; + info.in[r].si = n++; + break; + case D3D_NAME_POSITION: + info.in[r].sn = TGSI_SEMANTIC_POSITION; + break; + case D3D_NAME_VERTEX_ID: + info.in[r].sn = NV50_SEMANTIC_VERTEXID; + break; + case D3D_NAME_PRIMITIVE_ID: + info.in[r].sn = TGSI_SEMANTIC_PRIMID; + // no corresponding output + recordSV(TGSI_SEMANTIC_PRIMID, 0, 1, true); + break; + case D3D_NAME_INSTANCE_ID: + info.in[r].sn = TGSI_SEMANTIC_INSTANCEID; + break; + case D3D_NAME_IS_FRONT_FACE: + info.in[r].sn = TGSI_SEMANTIC_FACE; + // no corresponding output + recordSV(TGSI_SEMANTIC_FACE, 0, 1, true); + break; + default: + assert(!"invalid/unsupported input linkage semantic"); + break; + } + } + + for (n = 0, i = 0; i < sm4.num_params_out; ++i) { + r = sm4.params_out[i].Register; + + info.out[r].mask |= ~sm4.params_out[i].ReadWriteMask; + info.out[r].id = r; + if (info.out[r].regular) // already assigned semantic name/index + continue; + info.out[r].regular = 1; + info.out[r].patch = 0; + + info.numOutputs = MAX2(info.numOutputs, r + 1); + + switch (sm4.params_out[i].SystemValueType) { + case D3D_NAME_UNDEFINED: + if (prog->getType() == Program::TYPE_FRAGMENT) { + info.out[r].sn = TGSI_SEMANTIC_COLOR; + info.out[r].si = info.prop.fp.numColourResults++; + } else { + info.out[r].sn = TGSI_SEMANTIC_GENERIC; + info.out[r].si = n++; + } + break; + case D3D_NAME_POSITION: + case D3D_NAME_DEPTH: + case D3D_NAME_DEPTH_GREATER_EQUAL: + case D3D_NAME_DEPTH_LESS_EQUAL: + info.out[r].sn = TGSI_SEMANTIC_POSITION; + info.io.fragDepth = r; + break; + case D3D_NAME_CULL_DISTANCE: + case D3D_NAME_CLIP_DISTANCE: + info.out[r].sn = NV50_SEMANTIC_CLIPDISTANCE; + info.out[r].si = sm4.params_out[i].SemanticIndex; + break; + case D3D_NAME_RENDER_TARGET_ARRAY_INDEX: + info.out[r].sn = NV50_SEMANTIC_LAYER; + break; + case D3D_NAME_VIEWPORT_ARRAY_INDEX: + info.out[r].sn = NV50_SEMANTIC_VIEWPORTINDEX; + break; + case D3D_NAME_PRIMITIVE_ID: + info.out[r].sn = TGSI_SEMANTIC_PRIMID; + break; + case D3D_NAME_TARGET: + info.out[r].sn = TGSI_SEMANTIC_COLOR; + info.out[r].si = sm4.params_out[i].SemanticIndex; + break; + case D3D_NAME_COVERAGE: + info.out[r].sn = NV50_SEMANTIC_SAMPLEMASK; + info.io.sampleMask = r; + break; + case D3D_NAME_SAMPLE_INDEX: + default: + assert(!"invalid/unsupported output linkage semantic"); + break; + } + } + + if (prog->getType() == Program::TYPE_TESSELLATION_EVAL) + patch = &info.in[info.numInputs]; + else + patch = &info.out[info.numOutputs]; + + for (n = 0, i = 0; i < sm4.num_params_patch; ++i) { + r = sm4.params_patch[i].Register; + + patch[r].mask |= sm4.params_patch[i].Mask; + patch[r].id = r; + if (patch[r].regular) // already visited + continue; + patch[r].regular = 1; + patch[r].patch = 1; + + info.numPatchConstants = MAX2(info.numPatchConstants, r + 1); + + switch (sm4.params_patch[i].SystemValueType) { + case D3D_NAME_UNDEFINED: + patch[r].sn = TGSI_SEMANTIC_GENERIC; + patch[r].si = n++; + break; + case D3D_NAME_FINAL_QUAD_EDGE_TESSFACTOR: + case D3D_NAME_FINAL_TRI_EDGE_TESSFACTOR: + case D3D_NAME_FINAL_LINE_DETAIL_TESSFACTOR: + patch[r].sn = NV50_SEMANTIC_TESSFACTOR; + patch[r].si = sm4.params_patch[i].SemanticIndex; + break; + case D3D_NAME_FINAL_QUAD_INSIDE_TESSFACTOR: + case D3D_NAME_FINAL_TRI_INSIDE_TESSFACTOR: + case D3D_NAME_FINAL_LINE_DENSITY_TESSFACTOR: + patch[r].sn = NV50_SEMANTIC_TESSFACTOR; + patch[r].si = sm4.params_patch[i].SemanticIndex + 4; + break; + default: + assert(!"invalid patch-constant linkage semantic"); + break; + } + } + if (prog->getType() == Program::TYPE_TESSELLATION_EVAL) + info.numInputs += info.numPatchConstants; + else + info.numOutputs += info.numPatchConstants; + + return true; +} + +bool +Converter::inspectDeclaration(const sm4_dcl& dcl) +{ + int idx = -1; + enum sm4_interpolation ipa_mode; + + if (dcl.op.get() && dcl.op->is_index_simple(0)) + idx = dcl.op->indices[0].disp; + + switch (dcl.opcode) { + case SM4_OPCODE_DCL_SAMPLER: + assert(idx >= 0); + shadow[idx] = dcl.dcl_sampler.shadow; + break; + case SM4_OPCODE_DCL_RESOURCE: + { + enum sm4_target targ = (enum sm4_target)dcl.dcl_resource.target; + + assert(idx >= 0 && idx < NV50_IR_MAX_RESOURCES); + resourceType[idx][0] = cvtTexTarget(targ, SM4_OPCODE_SAMPLE, NULL); + resourceType[idx][1] = cvtTexTarget(targ, SM4_OPCODE_SAMPLE_C, NULL); + } + break; + case SM4_OPCODE_DCL_CONSTANT_BUFFER: + // nothing to do + break; + case SM4_OPCODE_CUSTOMDATA: + info.immd.bufSize = dcl.num * 4; + info.immd.buf = (uint32_t *)MALLOC(info.immd.bufSize); + memcpy(info.immd.buf, dcl.data, info.immd.bufSize); + break; + case SM4_OPCODE_DCL_INDEX_RANGE: + // XXX: ? + break; + case SM4_OPCODE_DCL_INPUT_PS_SGV: + case SM4_OPCODE_DCL_INPUT_PS_SIV: + case SM4_OPCODE_DCL_INPUT_PS: + { + assert(idx >= 0 && idx < info.numInputs); + ipa_mode = (enum sm4_interpolation)dcl.dcl_input_ps.interpolation; + interpMode[idx] = cvtInterpMode(ipa_mode); + setVaryingInterpMode(&info.in[idx], interpMode[idx]); + } + break; + case SM4_OPCODE_DCL_INPUT_SGV: + case SM4_OPCODE_DCL_INPUT_SIV: + case SM4_OPCODE_DCL_INPUT: + if (dcl.op->file == SM4_FILE_INPUT_DOMAIN_POINT) { + idx = info.numInputs++; + info.in[idx].sn = NV50_SEMANTIC_TESSCOORD; + info.in[idx].mask = dcl.op->mask; + } + // rest handled in parseSignature + break; + case SM4_OPCODE_DCL_OUTPUT_SGV: + case SM4_OPCODE_DCL_OUTPUT_SIV: + switch (dcl.sv) { + case SM4_SV_POSITION: + assert(prog->getType() != Program::TYPE_FRAGMENT); + break; + case SM4_SV_CULL_DISTANCE: // XXX: order ? + info.io.cullDistanceMask |= 1 << info.io.clipDistanceCount; + // fall through + case SM4_SV_CLIP_DISTANCE: + info.io.clipDistanceCount++; + break; + default: + break; + } + switch (dcl.op->file) { + case SM4_FILE_OUTPUT_DEPTH_LESS_EQUAL: + case SM4_FILE_OUTPUT_DEPTH_GREATER_EQUAL: + case SM4_FILE_OUTPUT_DEPTH: + if (info.io.fragDepth < 0xff) + break; + idx = info.io.fragDepth = info.numOutputs++; + info.out[idx].sn = TGSI_SEMANTIC_POSITION; + break; + case SM4_FILE_OUTPUT_COVERAGE_MASK: + if (info.io.sampleMask < 0xff) + break; + idx = info.io.sampleMask = info.numOutputs++; + info.out[idx].sn = NV50_SEMANTIC_SAMPLEMASK; + break; + default: + break; + } + break; + case SM4_OPCODE_DCL_OUTPUT: + // handled in parseSignature + break; + case SM4_OPCODE_DCL_TEMPS: + nrRegVals += dcl.num; + break; + case SM4_OPCODE_DCL_INDEXABLE_TEMP: + nrArrays++; + break; + case SM4_OPCODE_DCL_GLOBAL_FLAGS: + if (prog->getType() == Program::TYPE_FRAGMENT) + info.prop.fp.earlyFragTests = dcl.dcl_global_flags.early_depth_stencil; + break; + + case SM4_OPCODE_DCL_FUNCTION_BODY: + break; + case SM4_OPCODE_DCL_FUNCTION_TABLE: + break; + case SM4_OPCODE_DCL_INTERFACE: + break; + + // GP + case SM4_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY: + info.prop.gp.outputPrim = g3dPrim( + dcl.dcl_gs_output_primitive_topology.primitive_topology); + break; + case SM4_OPCODE_DCL_GS_INPUT_PRIMITIVE: + info.prop.gp.inputPrim = g3dPrim(dcl.dcl_gs_input_primitive.primitive); + break; + case SM4_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT: + info.prop.gp.maxVertices = dcl.num; + break; + case SM4_OPCODE_DCL_GS_INSTANCE_COUNT: + info.prop.gp.instanceCount = dcl.num; + break; + case SM4_OPCODE_DCL_STREAM: + break; + + // TCP/TEP + case SM4_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT: + info.prop.tp.inputPatchSize = + dcl.dcl_input_control_point_count.control_points; + break; + case SM4_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT: + info.prop.tp.outputPatchSize = + dcl.dcl_output_control_point_count.control_points; + break; + case SM4_OPCODE_DCL_TESS_DOMAIN: + switch (dcl.dcl_tess_domain.domain) { + case D3D_TESSELLATOR_DOMAIN_ISOLINE: + info.prop.tp.domain = PIPE_PRIM_LINES; + break; + case D3D_TESSELLATOR_DOMAIN_TRI: + info.prop.tp.domain = PIPE_PRIM_TRIANGLES; + break; + case D3D_TESSELLATOR_DOMAIN_QUAD: + info.prop.tp.domain = PIPE_PRIM_QUADS; + break; + case D3D_TESSELLATOR_DOMAIN_UNDEFINED: + default: + info.prop.tp.domain = PIPE_PRIM_MAX; + break; + } + break; + case SM4_OPCODE_DCL_TESS_PARTITIONING: + switch (dcl.dcl_tess_partitioning.partitioning) { + case D3D_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD: + info.prop.tp.partitioning = NV50_TESS_PART_FRACT_ODD; + break; + case D3D_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN: + info.prop.tp.partitioning = NV50_TESS_PART_FRACT_EVEN; + break; + case D3D_TESSELLATOR_PARTITIONING_POW2: + info.prop.tp.partitioning = NV50_TESS_PART_POW2; + break; + case D3D_TESSELLATOR_PARTITIONING_INTEGER: + case D3D_TESSELLATOR_PARTITIONING_UNDEFINED: + default: + info.prop.tp.partitioning = NV50_TESS_PART_INTEGER; + break; + } + break; + case SM4_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE: + switch (dcl.dcl_tess_output_primitive.primitive) { + case D3D_TESSELLATOR_OUTPUT_LINE: + info.prop.tp.outputPrim = PIPE_PRIM_LINES; + break; + case D3D_TESSELLATOR_OUTPUT_TRIANGLE_CW: + info.prop.tp.outputPrim = PIPE_PRIM_TRIANGLES; + info.prop.tp.winding = +1; + break; + case D3D_TESSELLATOR_OUTPUT_TRIANGLE_CCW: + info.prop.tp.outputPrim = PIPE_PRIM_TRIANGLES; + info.prop.tp.winding = -1; + break; + case D3D_TESSELLATOR_OUTPUT_POINT: + info.prop.tp.outputPrim = PIPE_PRIM_POINTS; + break; + case D3D_TESSELLATOR_OUTPUT_UNDEFINED: + default: + info.prop.tp.outputPrim = PIPE_PRIM_MAX; + break; + } + break; + + case SM4_OPCODE_HS_FORK_PHASE: + ++subPhaseCnt[0]; + phase = 1; + break; + case SM4_OPCODE_HS_JOIN_PHASE: + phase = 2; + ++subPhaseCnt[1]; + break; + case SM4_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT: + case SM4_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: + case SM4_OPCODE_DCL_HS_MAX_TESSFACTOR: + break; + + // weird stuff + case SM4_OPCODE_DCL_THREAD_GROUP: + case SM4_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED: + case SM4_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW: + case SM4_OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED: + case SM4_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW: + case SM4_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED: + case SM4_OPCODE_DCL_RESOURCE_RAW: + case SM4_OPCODE_DCL_RESOURCE_STRUCTURED: + ERROR("unhandled declaration\n"); + abort(); + return false; + + default: + assert(!"invalid SM4 declaration"); + return false; + } + return true; +} + +void +Converter::allocateValues() +{ + lData = new DataArray[nrArrays]; + + for (unsigned int i = 0; i < nrArrays; ++i) + lData[i].setParent(this); + + tData32.setup(0, nrRegVals, 4, 4, FILE_GPR); + tData64.setup(0, nrRegVals, 2, 8, FILE_GPR); + + if (prog->getType() == Program::TYPE_FRAGMENT) + oData.setup(0, info.numOutputs, 4, 4, FILE_GPR); +} + +bool Converter::handleDeclaration(const sm4_dcl& dcl) +{ + switch (dcl.opcode) { + case SM4_OPCODE_DCL_INDEXABLE_TEMP: + lData[nrArrays++].setup(arrayVol, + dcl.indexable_temp.num, dcl.indexable_temp.comps, + 4, FILE_MEMORY_LOCAL); + arrayVol += dcl.indexable_temp.num * dcl.indexable_temp.comps * 4; + break; + case SM4_OPCODE_HS_FORK_PHASE: + if (subPhaseCnt[0]) + phaseInstCnt[0][subPhaseCnt[0]] = phaseInstCnt[0][subPhaseCnt[0] - 1]; + ++subPhaseCnt[0]; + break; + case SM4_OPCODE_HS_JOIN_PHASE: + if (subPhaseCnt[1]) + phaseInstCnt[1][subPhaseCnt[1]] = phaseInstCnt[1][subPhaseCnt[1] - 1]; + ++subPhaseCnt[1]; + break; + case SM4_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT: + phaseInstCnt[0][subPhaseCnt[0] - 1] = dcl.num; + break; + case SM4_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: + phaseInstCnt[1][subPhaseCnt[1] - 1] = dcl.num; + break; + + default: + break; // already handled in inspection + } + + return true; +} + +Symbol * +Converter::iSym(int i, int c) +{ + if (info.in[i].regular) { + return mkSymbol(FILE_SHADER_INPUT, 0, sTy, info.in[i].slot[c] * 4); + } else { + return mkSysVal(tgsi::irSemantic(info.in[i].sn), info.in[i].si); + } +} + +Symbol * +Converter::oSym(int i, int c) +{ + if (info.out[i].regular) { + return mkSymbol(FILE_SHADER_OUTPUT, 0, dTy, info.out[i].slot[c] * 4); + } else { + return mkSysVal(tgsi::irSemantic(info.out[i].sn), info.out[i].si); + } +} + +Value * +Converter::getSrcPtr(int s, int dim, int shl) +{ + if (srcPtr[s][dim]) + return srcPtr[s][dim]; + + sm4_op *op = insn->ops[s + nDstOpnds]->indices[dim].reg.get(); + + if (!op) + return NULL; + + Value *index = src(*op, 0, s); + + srcPtr[s][dim] = index; + if (shl) + srcPtr[s][dim] = mkOp2v(OP_SHL, TYPE_U32, getSSA(), index, mkImm(shl)); + return srcPtr[s][dim]; +} + +Value * +Converter::getDstPtr(int d, int dim, int shl) +{ + assert(d == 0); + if (dstPtr[dim]) + return dstPtr[dim]; + + sm4_op *op = insn->ops[d]->indices[dim].reg.get(); + if (!op) + return NULL; + + Value *index = src(*op, 0, d); + if (shl) + index = mkOp2v(OP_SHL, TYPE_U32, getSSA(), index, mkImm(shl)); + + return (dstPtr[dim] = index); +} + +Value * +Converter::getVtxPtr(int s) +{ + assert(s < 3); + if (vtxBase[s]) + return vtxBase[s]; + + sm4_op *op = insn->ops[s + nDstOpnds].get(); + if (!op) + return NULL; + int idx = op->indices[0].disp; + + vtxBase[s] = getSrcPtr(s, 0, 0); + vtxBase[s] = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(), mkImm(idx), vtxBase[s]); + return vtxBase[s]; +} + +Value * +Converter::src(int i, int c) +{ + return src(*insn->ops[i + nDstOpnds], c, i); +} + +Value * +Converter::dst(int i, int c) +{ + return dst(*insn->ops[i], c, i); +} + +void +Converter::saveDst(int i, int c, Value *value) +{ + if (insn->insn.sat) + mkOp1(OP_SAT, dTy, value, value); + return saveDst(*insn->ops[i], c, value, i); +} + +Value * +Converter::interpolate(const sm4_op& op, int c, int i) +{ + int idx = op.indices[0].disp; + int swz = op.swizzle[c]; + operation opr = + (info.in[idx].linear || info.in[idx].flat) ? OP_LINTERP : OP_PINTERP; + + Value *ptr = getSrcPtr(i, 0, 4); + + Instruction *insn = new_Instruction(func, opr, TYPE_F32); + + insn->setDef(0, getScratch()); + insn->setSrc(0, iSym(idx, swz)); + if (opr == OP_PINTERP) + insn->setSrc(1, fragCoord[3]); + if (ptr) + insn->setIndirect(0, 0, ptr); + + insn->setInterpolate(interpMode[idx]); + + bb->insertTail(insn); + return insn->getDef(0); +} + +Value * +Converter::src(const sm4_op& op, int c, int s) +{ + const int size = typeSizeof(sTy); + + Instruction *ld; + Value *res, *ptr, *vtx; + int idx, dim, off; + const int swz = op.swizzle[c]; + + switch (op.file) { + case SM4_FILE_IMMEDIATE32: + res = loadImm(NULL, (uint32_t)op.imm_values[swz].u32); + break; + case SM4_FILE_IMMEDIATE64: + assert(c < 2); + res = loadImm(NULL, op.imm_values[swz].u64); + break; + case SM4_FILE_TEMP: + assert(op.is_index_simple(0)); + idx = op.indices[0].disp; + if (size == 8) + res = tData64.load(idx, swz, NULL); + else + res = tData32.load(idx, swz, NULL); + break; + case SM4_FILE_INPUT: + case SM4_FILE_INPUT_CONTROL_POINT: + case SM4_FILE_INPUT_PATCH_CONSTANT: + if (prog->getType() == Program::TYPE_FRAGMENT) + return interpolate(op, c, s); + + idx = 0; + if (op.file == SM4_FILE_INPUT_PATCH_CONSTANT) + idx = info.numInputs - info.numPatchConstants; + + if (op.num_indices == 2) { + vtx = getVtxPtr(s); + ptr = getSrcPtr(s, 1, 4); + idx += op.indices[1].disp; + res = getSSA(); + ld = mkOp1(OP_VFETCH, TYPE_U32, res, iSym(idx, swz)); + ld->setIndirect(0, 0, ptr); + ld->setIndirect(0, 1, vtx); + } else { + idx += op.indices[0].disp; + res = mkLoad(sTy, iSym(idx, swz), getSrcPtr(s, 0, 4)); + } + if (op.file == SM4_FILE_INPUT_PATCH_CONSTANT) + res->defs->getInsn()->perPatch = 1; + break; + case SM4_FILE_CONSTANT_BUFFER: + assert(op.num_indices == 2); + assert(op.is_index_simple(0)); + + ptr = getSrcPtr(s, 1, 4); + dim = op.indices[0].disp; + off = (op.indices[1].disp * 4 + swz) * (sTy == TYPE_F64 ? 8 : 4); + + res = mkLoad(sTy, mkSymbol(FILE_MEMORY_CONST, dim, sTy, off), ptr); + break; + case SM4_FILE_IMMEDIATE_CONSTANT_BUFFER: + ptr = getSrcPtr(s, 0, 4); + off = (op.indices[0].disp * 4 + swz) * 4; + res = mkLoad(sTy, mkSymbol(FILE_MEMORY_CONST, 14, sTy, off), ptr); + break; + case SM4_FILE_INDEXABLE_TEMP: + { + assert(op.is_index_simple(0)); + int a = op.indices[0].disp; + idx = op.indices[1].disp; + res = lData[a].load(idx, swz, getSrcPtr(s, 1, 4)); + } + break; + case SM4_FILE_INPUT_PRIMITIVEID: + recordSV(TGSI_SEMANTIC_PRIMID, 0, 1, true); + res = mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_PRIMITIVE_ID, 0)); + break; + case SM4_FILE_INPUT_GS_INSTANCE_ID: + case SM4_FILE_OUTPUT_CONTROL_POINT_ID: + recordSV(NV50_SEMANTIC_INVOCATIONID, 0, 1, true); + res = mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)); + break; + case SM4_FILE_CYCLE_COUNTER: + res = + mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_CLOCK, swz ? 1 : 0)); + break; + case SM4_FILE_INPUT_FORK_INSTANCE_ID: + case SM4_FILE_INPUT_JOIN_INSTANCE_ID: + { + phaseInstanceUsed = true; + if (unrollPhase) + return loadImm(NULL, phaseInstance); + const unsigned int cnt = phaseInstCnt[phase - 1][subPhase]; + res = getScratch(); + res = mkOp1v(OP_RDSV, TYPE_U32, res, mkSysVal(SV_INVOCATION_ID, 0)); + res = mkOp2v(OP_MIN, TYPE_U32, res, res, loadImm(NULL, cnt - 1)); + } + break; + case SM4_FILE_INPUT_DOMAIN_POINT: + assert(swz < 3); + res = domainPt[swz]; + break; + case SM4_FILE_THREAD_GROUP_SHARED_MEMORY: + off = (op.indices[0].disp * 4 + swz) * (sTy == TYPE_F64 ? 8 : 4); + ptr = getSrcPtr(s, 0, 4); + res = mkLoad(sTy, mkSymbol(FILE_MEMORY_SHARED, 0, sTy, off), ptr); + break; + case SM4_FILE_RESOURCE: + case SM4_FILE_SAMPLER: + case SM4_FILE_UNORDERED_ACCESS_VIEW: + return NULL; + case SM4_FILE_INPUT_THREAD_ID: + res = mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_TID, swz)); + break; + case SM4_FILE_INPUT_THREAD_GROUP_ID: + res = mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_CTAID, swz)); + break; + case SM4_FILE_FUNCTION_INPUT: + case SM4_FILE_INPUT_THREAD_ID_IN_GROUP: + assert(!"unhandled source file"); + return NULL; + default: + assert(!"invalid source file"); + return NULL; + } + + if (op.abs) + res = mkOp1v(OP_ABS, sTy, getSSA(res->reg.size), res); + if (op.neg) + res = mkOp1v(OP_NEG, sTy, getSSA(res->reg.size), res); + return res; +} + +Value * +Converter::dst(const sm4_op &op, int c, int i) +{ + switch (op.file) { + case SM4_FILE_TEMP: + return tData32.acquire(op.indices[0].disp, c); + case SM4_FILE_INDEXABLE_TEMP: + return getScratch(); + case SM4_FILE_OUTPUT: + if (prog->getType() == Program::TYPE_FRAGMENT) + return oData.acquire(op.indices[0].disp, c); + return getScratch(); + case SM4_FILE_NULL: + return NULL; + case SM4_FILE_OUTPUT_DEPTH: + case SM4_FILE_OUTPUT_DEPTH_GREATER_EQUAL: + case SM4_FILE_OUTPUT_DEPTH_LESS_EQUAL: + case SM4_FILE_OUTPUT_COVERAGE_MASK: + return getScratch(); + case SM4_FILE_IMMEDIATE32: + case SM4_FILE_IMMEDIATE64: + case SM4_FILE_CONSTANT_BUFFER: + case SM4_FILE_RESOURCE: + case SM4_FILE_SAMPLER: + case SM4_FILE_UNORDERED_ACCESS_VIEW: + assert(!"invalid destination file"); + return NULL; + default: + assert(!"invalid file"); + return NULL; + } +} + +void +Converter::saveFragDepth(operation op, Value *value) +{ + if (op == OP_MIN || op == OP_MAX) { + Value *zIn; + zIn = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 2)); + value = mkOp2v(op, TYPE_F32, getSSA(), value, zIn); + } + oData.store(info.io.fragDepth, 2, NULL, value); +} + +void +Converter::saveDst(const sm4_op &op, int c, Value *value, int s) +{ + Symbol *sym; + Instruction *st; + int a, idx; + + switch (op.file) { + case SM4_FILE_TEMP: + idx = op.indices[0].disp; + tData32.store(idx, c, NULL, value); + break; + case SM4_FILE_INDEXABLE_TEMP: + a = op.indices[0].disp; + idx = op.indices[1].disp; + // FIXME: shift is wrong, depends in lData + lData[a].store(idx, c, getDstPtr(s, 1, 4), value); + break; + case SM4_FILE_OUTPUT: + assert(op.num_indices == 1); + idx = op.indices[0].disp; + if (prog->getType() == Program::TYPE_FRAGMENT) { + oData.store(idx, c, NULL, value); + } else { + if (phase) + idx += info.numOutputs - info.numPatchConstants; + const int shl = (info.out[idx].sn == NV50_SEMANTIC_TESSFACTOR) ? 2 : 4; + sym = oSym(idx, c); + if (sym->reg.file == FILE_SHADER_OUTPUT) + st = mkStore(OP_EXPORT, dTy, sym, getDstPtr(s, 0, shl), value); + else + st = mkStore(OP_WRSV, dTy, sym, getDstPtr(s, 0, 2), value); + st->perPatch = phase ? 1 : 0; + } + break; + case SM4_FILE_OUTPUT_DEPTH_GREATER_EQUAL: + saveFragDepth(OP_MAX, value); + break; + case SM4_FILE_OUTPUT_DEPTH_LESS_EQUAL: + saveFragDepth(OP_MIN, value); + break; + case SM4_FILE_OUTPUT_DEPTH: + saveFragDepth(OP_NOP, value); + break; + case SM4_FILE_OUTPUT_COVERAGE_MASK: + oData.store(info.io.sampleMask, 0, NULL, value); + break; + case SM4_FILE_IMMEDIATE32: + case SM4_FILE_IMMEDIATE64: + case SM4_FILE_INPUT: + case SM4_FILE_CONSTANT_BUFFER: + case SM4_FILE_RESOURCE: + case SM4_FILE_SAMPLER: + assert(!"invalid destination file"); + return; + default: + assert(!"invalid file"); + return; + } +} + +void +Converter::emitTex(Value *dst0[4], TexInstruction *tex, const uint8_t swz[4]) +{ + Value *res[4] = { NULL, NULL, NULL, NULL }; + unsigned int c, d; + + for (c = 0; c < 4; ++c) + if (dst0[c]) + tex->tex.mask |= 1 << swz[c]; + for (d = 0, c = 0; c < 4; ++c) + if (tex->tex.mask & (1 << c)) + tex->setDef(d++, (res[c] = getScratch())); + + bb->insertTail(tex); + + if (insn->opcode == SM4_OPCODE_RESINFO) { + if (tex->tex.target.getDim() == 1) { + res[2] = loadImm(NULL, 0); + if (!tex->tex.target.isArray()) + res[1] = res[2]; + } else + if (tex->tex.target.getDim() == 2 && !tex->tex.target.isArray()) { + res[2] = loadImm(NULL, 0); + } + for (c = 0; c < 4; ++c) { + if (!dst0[c]) + continue; + Value *src = res[swz[c]]; + assert(src); + switch (insn->insn.resinfo_return_type) { + case 0: + mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_U32, src); + break; + case 1: + mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_U32, src); + if (swz[c] < tex->tex.target.getDim()) + mkOp1(OP_RCP, TYPE_F32, dst0[c], dst0[c]); + break; + default: + mkMov(dst0[c], src); + break; + } + } + } else { + for (c = 0; c < 4; ++c) + if (dst0[c]) + mkMov(dst0[c], res[swz[c]]); + } +} + +void +Converter::handleQUERY(Value *dst0[4], enum TexQuery query) +{ + TexInstruction *texi = new_TexInstruction(func, OP_TXQ); + texi->tex.query = query; + + assert(insn->ops[2]->file == SM4_FILE_RESOURCE); // TODO: UAVs + + const int rOp = (query == TXQ_DIMS) ? 2 : 1; + const int sOp = (query == TXQ_DIMS) ? 0 : 1; + + const int tR = insn->ops[rOp]->indices[0].disp; + + texi->setTexture(resourceType[tR][0], tR, 0); + + texi->setSrc(0, src(sOp, 0)); // mip level or sample index + + emitTex(dst0, texi, insn->ops[rOp]->swizzle); +} + +void +Converter::handleLOAD(Value *dst0[4]) +{ + TexInstruction *texi = new_TexInstruction(func, OP_TXF); + unsigned int c; + + const int tR = insn->ops[2]->indices[0].disp; + + texi->setTexture(resourceType[tR][0], tR, 0); + + for (c = 0; c < texi->tex.target.getArgCount(); ++c) + texi->setSrc(c, src(0, c)); + + if (texi->tex.target == TEX_TARGET_BUFFER) { + texi->tex.levelZero = true; + } else { + texi->setSrc(c++, src(0, 3)); + for (c = 0; c < 3; ++c) { + texi->tex.offset[0][c] = insn->sample_offset[c]; + if (texi->tex.offset[0][c]) + texi->tex.useOffsets = 1; + } + } + + emitTex(dst0, texi, insn->ops[2]->swizzle); +} + +// order of nv50 ir sources: x y z/layer lod/bias dc +void +Converter::handleSAMPLE(operation opr, Value *dst0[4]) +{ + TexInstruction *texi = new_TexInstruction(func, opr); + unsigned int c, s; + Value *arg[4], *src0[4]; + Value *val; + Value *lod = NULL, *dc = NULL; + + const int tR = insn->ops[2]->indices[0].disp; + const int tS = insn->ops[3]->indices[0].disp; + + TexInstruction::Target tgt = resourceType[tR][shadow[tS] ? 1 : 0]; + + for (c = 0; c < tgt.getArgCount(); ++c) + arg[c] = src0[c] = src(0, c); + + if (insn->opcode == SM4_OPCODE_SAMPLE_L || + insn->opcode == SM4_OPCODE_SAMPLE_B) { + lod = src(3, 0); + } else + if (insn->opcode == SM4_OPCODE_SAMPLE_C || + insn->opcode == SM4_OPCODE_SAMPLE_C_LZ) { + dc = src(3, 0); + if (insn->opcode == SM4_OPCODE_SAMPLE_C_LZ) + texi->tex.levelZero = true; + } else + if (insn->opcode == SM4_OPCODE_SAMPLE_D) { + for (c = 0; c < tgt.getDim(); ++c) { + texi->dPdx[c] = src(3, c); + texi->dPdy[c] = src(4, c); + } + } + + if (tgt.isCube()) { + for (c = 0; c < 3; ++c) + src0[c] = mkOp1v(OP_ABS, TYPE_F32, getSSA(), arg[c]); + val = getScratch(); + mkOp2(OP_MAX, TYPE_F32, val, src0[0], src0[1]); + mkOp2(OP_MAX, TYPE_F32, val, src0[2], val); + mkOp1(OP_RCP, TYPE_F32, val, val); + for (c = 0; c < 3; ++c) + src0[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), arg[c], val); + } + + for (s = 0; s < tgt.getArgCount(); ++s) + texi->setSrc(s, src0[s]); + if (lod) + texi->setSrc(s++, lod); + if (dc) + texi->setSrc(s++, dc); + + for (c = 0; c < 3; ++c) { + texi->tex.offset[0][c] = insn->sample_offset[c]; + if (texi->tex.offset[0][c]) + texi->tex.useOffsets = 1; + } + + texi->setTexture(tgt, tR, tS); + + emitTex(dst0, texi, insn->ops[2]->swizzle); +} + +void +Converter::handleDP(Value *dst0[4], int dim) +{ + Value *src0 = src(0, 0), *src1 = src(1, 0); + Value *dotp = getScratch(); + + assert(dim > 0); + + mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1); + for (int c = 1; c < dim; ++c) + mkOp3(OP_MAD, TYPE_F32, dotp, src(0, c), src(1, c), dotp); + + for (int c = 0; c < 4; ++c) + dst0[c] = dotp; +} + +void +Converter::insertConvergenceOps(BasicBlock *conv, BasicBlock *fork) +{ + FlowInstruction *join = new_FlowInstruction(func, OP_JOIN, NULL); + join->fixed = 1; + conv->insertHead(join); + + fork->joinAt = new_FlowInstruction(func, OP_JOINAT, conv); + fork->insertBefore(fork->getExit(), fork->joinAt); +} + +void +Converter::finalizeShader() +{ + if (finalized) + return; + BasicBlock *epilogue = reinterpret_cast<BasicBlock *>(leaveBBs.pop().u.p); + entryBBs.pop(); + + finalized = true; + + bb->cfg.attach(&epilogue->cfg, Graph::Edge::TREE); + setPosition(epilogue, true); + + if (prog->getType() == Program::TYPE_FRAGMENT) + exportOutputs(); + + mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1; +} + +#define FOR_EACH_DST0_ENABLED_CHANNEL32(chan) \ + for ((chan) = 0; (chan) < 4; ++(chan)) \ + if (insn->ops[0].get()->mask & (1 << (chan))) + +#define FOR_EACH_DST0_ENABLED_CHANNEL64(chan) \ + for ((chan) = 0; (chan) < 2; ++(chan)) \ + if (insn->ops[0].get()->mask & (1 << (chan))) + +bool +Converter::checkDstSrcAliasing() const +{ + for (unsigned int d = 0; d < nDstOpnds; ++d) { + for (unsigned int s = nDstOpnds; s < insn->num_ops; ++s) { + if (insn->ops[d]->file != insn->ops[s]->file) + continue; + int i = insn->ops[s]->num_indices - 1; + if (i != insn->ops[d]->num_indices - 1) + continue; + if (insn->ops[d]->is_index_simple(i) && + insn->ops[s]->is_index_simple(i) && + insn->ops[d]->indices[i].disp == insn->ops[s]->indices[i].disp) + return true; + } + } + return false; +} + +bool +Converter::handleInstruction(unsigned int pos) +{ + Value *dst0[4], *rDst0[4]; + Value *dst1[4], *rDst1[4]; + int c, nc; + + insn = sm4.insns[pos]; + enum sm4_opcode opcode = static_cast<sm4_opcode>(insn->opcode); + + operation op = cvtOpcode(opcode); + + sTy = inferSrcType(opcode); + dTy = inferDstType(opcode); + + nc = dTy == TYPE_F64 ? 2 : 4; + + nDstOpnds = getDstOpndCount(opcode); + + bool useScratchDst = checkDstSrcAliasing(); + + INFO("SM4_OPCODE_##%u, aliasing = %u\n", insn->opcode, useScratchDst); + + if (nDstOpnds >= 1) { + for (c = 0; c < nc; ++c) + rDst0[c] = dst0[c] = + insn->ops[0].get()->mask & (1 << c) ? dst(0, c) : NULL; + if (useScratchDst) + for (c = 0; c < nc; ++c) + dst0[c] = rDst0[c] ? getScratch() : NULL; + } + + if (nDstOpnds >= 2) { + for (c = 0; c < nc; ++c) + rDst1[c] = dst1[c] = + insn->ops[1].get()->mask & (1 << c) ? dst(1, c) : NULL; + if (useScratchDst) + for (c = 0; c < nc; ++c) + dst1[c] = rDst1[c] ? getScratch() : NULL; + } + + switch (insn->opcode) { + case SM4_OPCODE_ADD: + case SM4_OPCODE_AND: + case SM4_OPCODE_DIV: + case SM4_OPCODE_IADD: + case SM4_OPCODE_IMAX: + case SM4_OPCODE_IMIN: + case SM4_OPCODE_MIN: + case SM4_OPCODE_MAX: + case SM4_OPCODE_MUL: + case SM4_OPCODE_OR: + case SM4_OPCODE_UMAX: + case SM4_OPCODE_UMIN: + case SM4_OPCODE_XOR: + FOR_EACH_DST0_ENABLED_CHANNEL32(c) { + Instruction *insn = mkOp2(op, dTy, dst0[c], src(0, c), src(1, c)); + if (dTy == TYPE_F32) + insn->ftz = 1; + } + break; + + case SM4_OPCODE_ISHL: + case SM4_OPCODE_ISHR: + case SM4_OPCODE_USHR: + FOR_EACH_DST0_ENABLED_CHANNEL32(c) { + Instruction *insn = mkOp2(op, dTy, dst0[c], src(0, c), src(1, c)); + insn->subOp = NV50_IR_SUBOP_SHIFT_WRAP; + } + break; + + case SM4_OPCODE_IMAD: + case SM4_OPCODE_MAD: + case SM4_OPCODE_UMAD: + FOR_EACH_DST0_ENABLED_CHANNEL32(c) { + mkOp3(OP_MAD, dTy, dst0[c], src(0, c), src(1, c), src(2, c)); + } + break; + + case SM4_OPCODE_DADD: + case SM4_OPCODE_DMAX: + case SM4_OPCODE_DMIN: + case SM4_OPCODE_DMUL: + FOR_EACH_DST0_ENABLED_CHANNEL64(c) { + mkOp2(op, dTy, dst0[c], src(0, c), src(1, c)); + } + break; + + case SM4_OPCODE_UDIV: + for (c = 0; c < 4; ++c) { + Value *dvn, *dvs; + if (dst0[c] || dst1[c]) { + dvn = src(0, c); + dvs = src(1, c); + } + if (dst0[c]) + mkOp2(OP_DIV, TYPE_U32, dst0[c], dvn, dvs); + if (dst1[c]) + mkOp2(OP_MOD, TYPE_U32, dst1[c], dvn, dvs); + } + break; + + case SM4_OPCODE_IMUL: + case SM4_OPCODE_UMUL: + for (c = 0; c < 4; ++c) { + Value *a, *b; + if (dst0[c] || dst1[c]) { + a = src(0, c); + b = src(1, c); + } + if (dst0[c]) + mkOp2(OP_MUL, dTy, dst0[c], a, b)->subOp = + NV50_IR_SUBOP_MUL_HIGH; + if (dst1[c]) + mkOp2(OP_MUL, dTy, dst1[c], a, b); + } + break; + + case SM4_OPCODE_DP2: + handleDP(dst0, 2); + break; + case SM4_OPCODE_DP3: + handleDP(dst0, 3); + break; + case SM4_OPCODE_DP4: + handleDP(dst0, 4); + break; + + case SM4_OPCODE_DERIV_RTX: + case SM4_OPCODE_DERIV_RTX_COARSE: + case SM4_OPCODE_DERIV_RTX_FINE: + case SM4_OPCODE_DERIV_RTY: + case SM4_OPCODE_DERIV_RTY_COARSE: + case SM4_OPCODE_DERIV_RTY_FINE: + case SM4_OPCODE_MOV: + case SM4_OPCODE_INEG: + case SM4_OPCODE_NOT: + case SM4_OPCODE_SQRT: + case SM4_OPCODE_COUNTBITS: + case SM4_OPCODE_EXP: + case SM4_OPCODE_LOG: + case SM4_OPCODE_RCP: + FOR_EACH_DST0_ENABLED_CHANNEL32(c) { + mkOp1(op, dTy, dst0[c], src(0, c)); + } + break; + + case SM4_OPCODE_FRC: + FOR_EACH_DST0_ENABLED_CHANNEL32(c) { + Value *val = getScratch(); + Value *src0 = src(0, c); + mkOp1(OP_FLOOR, TYPE_F32, val, src0); + mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val); + } + break; + + case SM4_OPCODE_MOVC: + FOR_EACH_DST0_ENABLED_CHANNEL32(c) + mkCmp(OP_SLCT, CC_NE, TYPE_U32, dst0[c], src(1, c), src(2, c), + src(0, c)); + break; + + case SM4_OPCODE_ROUND_NE: + case SM4_OPCODE_ROUND_NI: + case SM4_OPCODE_ROUND_PI: + case SM4_OPCODE_ROUND_Z: + FOR_EACH_DST0_ENABLED_CHANNEL32(c) { + Instruction *rnd = mkOp1(op, dTy, dst0[c], src(0, c)); + rnd->ftz = 1; + rnd->rnd = cvtRoundingMode(opcode); + } + break; + + case SM4_OPCODE_RSQ: + FOR_EACH_DST0_ENABLED_CHANNEL32(c) + mkOp1(op, dTy, dst0[c], src(0, c)); + break; + + case SM4_OPCODE_SINCOS: + for (c = 0; c < 4; ++c) { + if (!dst0[c] && !dst1[c]) + continue; + Value *val = mkOp1v(OP_PRESIN, TYPE_F32, getScratch(), src(0, c)); + if (dst0[c]) + mkOp1(OP_SIN, TYPE_F32, dst0[c], val); + if (dst1[c]) + mkOp1(OP_COS, TYPE_F32, dst1[c], val); + } + break; + + case SM4_OPCODE_EQ: + case SM4_OPCODE_GE: + case SM4_OPCODE_IEQ: + case SM4_OPCODE_IGE: + case SM4_OPCODE_ILT: + case SM4_OPCODE_LT: + case SM4_OPCODE_NE: + case SM4_OPCODE_INE: + case SM4_OPCODE_ULT: + case SM4_OPCODE_UGE: + case SM4_OPCODE_DEQ: + case SM4_OPCODE_DGE: + case SM4_OPCODE_DLT: + case SM4_OPCODE_DNE: + { + CondCode cc = cvtCondCode(opcode); + FOR_EACH_DST0_ENABLED_CHANNEL32(c) { + CmpInstruction *set; + set = mkCmp(op, cc, sTy, dst0[c], src(0, c), src(1, c), NULL); + set->setType(dTy, sTy); + if (sTy == TYPE_F32) + set->ftz = 1; + } + } + break; + + case SM4_OPCODE_FTOI: + case SM4_OPCODE_FTOU: + FOR_EACH_DST0_ENABLED_CHANNEL32(c) + mkCvt(op, dTy, dst0[c], sTy, src(0, c))->rnd = ROUND_Z; + break; + case SM4_OPCODE_ITOF: + case SM4_OPCODE_UTOF: + case SM4_OPCODE_F32TOF16: + case SM4_OPCODE_F16TOF32: + case SM4_OPCODE_DTOF: + case SM4_OPCODE_FTOD: + FOR_EACH_DST0_ENABLED_CHANNEL32(c) + mkCvt(op, dTy, dst0[c], sTy, src(0, c)); + break; + + case SM4_OPCODE_CUT: + case SM4_OPCODE_CUT_STREAM: + mkOp1(OP_RESTART, TYPE_U32, NULL, mkImm(0))->fixed = 1; + break; + case SM4_OPCODE_EMIT: + case SM4_OPCODE_EMIT_STREAM: + mkOp1(OP_EMIT, TYPE_U32, NULL, mkImm(0))->fixed = 1; + break; + case SM4_OPCODE_EMITTHENCUT: + case SM4_OPCODE_EMITTHENCUT_STREAM: + { + Instruction *cut = mkOp1(OP_EMIT, TYPE_U32, NULL, mkImm(0)); + cut->fixed = 1; + cut->subOp = NV50_IR_SUBOP_EMIT_RESTART; + } + break; + + case SM4_OPCODE_DISCARD: + info.prop.fp.usesDiscard = TRUE; + mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate( + insn->insn.test_nz ? CC_P : CC_NOT_P, src(0, 0)); + break; + + case SM4_OPCODE_CALL: + case SM4_OPCODE_CALLC: + assert(!"CALL/CALLC not implemented"); + break; + + case SM4_OPCODE_RET: + // XXX: the following doesn't work with subroutines / early ret + if (!haveNextPhase(pos)) + finalizeShader(); + else + phaseEnded = phase + 1; + break; + + case SM4_OPCODE_IF: + { + BasicBlock *ifClause = new BasicBlock(func); + + bb->cfg.attach(&ifClause->cfg, Graph::Edge::TREE); + condBBs.push(bb); + joinBBs.push(bb); + + mkFlow(OP_BRA, NULL, insn->insn.test_nz ? CC_NOT_P : CC_P, src(0, 0)); + + setPosition(ifClause, true); + } + break; + case SM4_OPCODE_ELSE: + { + BasicBlock *elseClause = new BasicBlock(func); + BasicBlock *forkPoint = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p); + + forkPoint->cfg.attach(&elseClause->cfg, Graph::Edge::TREE); + condBBs.push(bb); + + forkPoint->getExit()->asFlow()->target.bb = elseClause; + if (!bb->isTerminated()) + mkFlow(OP_BRA, NULL, CC_ALWAYS, NULL); + + setPosition(elseClause, true); + } + break; + case SM4_OPCODE_ENDIF: + { + BasicBlock *convPoint = new BasicBlock(func); + BasicBlock *lastBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p); + BasicBlock *forkPoint = reinterpret_cast<BasicBlock *>(joinBBs.pop().u.p); + + if (!bb->isTerminated()) { + // we only want join if none of the clauses ended with CONT/BREAK/RET + if (lastBB->getExit()->op == OP_BRA && joinBBs.getSize() < 6) + insertConvergenceOps(convPoint, forkPoint); + mkFlow(OP_BRA, convPoint, CC_ALWAYS, NULL); + bb->cfg.attach(&convPoint->cfg, Graph::Edge::FORWARD); + } + + if (lastBB->getExit()->op == OP_BRA) { + lastBB->cfg.attach(&convPoint->cfg, Graph::Edge::FORWARD); + lastBB->getExit()->asFlow()->target.bb = convPoint; + } + setPosition(convPoint, true); + } + break; + + case SM4_OPCODE_SWITCH: + case SM4_OPCODE_CASE: + case SM4_OPCODE_ENDSWITCH: + assert(!"SWITCH/CASE/ENDSWITCH not implemented"); + break; + + case SM4_OPCODE_LOOP: + { + BasicBlock *loopHeader = new BasicBlock(func); + BasicBlock *loopBreak = new BasicBlock(func); + + loopBBs.push(loopHeader); + breakBBs.push(loopBreak); + if (loopBBs.getSize() > func->loopNestingBound) + func->loopNestingBound++; + + mkFlow(OP_PREBREAK, loopBreak, CC_ALWAYS, NULL); + + bb->cfg.attach(&loopHeader->cfg, Graph::Edge::TREE); + setPosition(loopHeader, true); + mkFlow(OP_PRECONT, loopHeader, CC_ALWAYS, NULL); + } + break; + case SM4_OPCODE_ENDLOOP: + { + BasicBlock *loopBB = reinterpret_cast<BasicBlock *>(loopBBs.pop().u.p); + + if (!bb->isTerminated()) { + mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL); + bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK); + } + setPosition(reinterpret_cast<BasicBlock *>(breakBBs.pop().u.p), true); + } + break; + case SM4_OPCODE_BREAK: + { + if (bb->isTerminated()) + break; + BasicBlock *breakBB = reinterpret_cast<BasicBlock *>(breakBBs.peek().u.p); + mkFlow(OP_BREAK, breakBB, CC_ALWAYS, NULL); + bb->cfg.attach(&breakBB->cfg, Graph::Edge::CROSS); + } + break; + case SM4_OPCODE_BREAKC: + { + BasicBlock *nextBB = new BasicBlock(func); + BasicBlock *breakBB = reinterpret_cast<BasicBlock *>(breakBBs.peek().u.p); + CondCode cc = insn->insn.test_nz ? CC_P : CC_NOT_P; + mkFlow(OP_BREAK, breakBB, cc, src(0, 0)); + bb->cfg.attach(&breakBB->cfg, Graph::Edge::CROSS); + bb->cfg.attach(&nextBB->cfg, Graph::Edge::FORWARD); + setPosition(nextBB, true); + } + break; + case SM4_OPCODE_CONTINUE: + { + if (bb->isTerminated()) + break; + BasicBlock *contBB = reinterpret_cast<BasicBlock *>(loopBBs.peek().u.p); + mkFlow(OP_CONT, contBB, CC_ALWAYS, NULL); + contBB->explicitCont = true; + bb->cfg.attach(&contBB->cfg, Graph::Edge::BACK); + } + break; + case SM4_OPCODE_CONTINUEC: + { + BasicBlock *nextBB = new BasicBlock(func); + BasicBlock *contBB = reinterpret_cast<BasicBlock *>(loopBBs.peek().u.p); + mkFlow(OP_CONT, contBB, insn->insn.test_nz ? CC_P : CC_NOT_P, src(0, 0)); + bb->cfg.attach(&contBB->cfg, Graph::Edge::BACK); + bb->cfg.attach(&nextBB->cfg, Graph::Edge::FORWARD); + setPosition(nextBB, true); + } + break; + + case SM4_OPCODE_SAMPLE: + case SM4_OPCODE_SAMPLE_C: + case SM4_OPCODE_SAMPLE_C_LZ: + case SM4_OPCODE_SAMPLE_L: + case SM4_OPCODE_SAMPLE_D: + case SM4_OPCODE_SAMPLE_B: + handleSAMPLE(op, dst0); + break; + case SM4_OPCODE_LD: + case SM4_OPCODE_LD_MS: + handleLOAD(dst0); + break; + + case SM4_OPCODE_GATHER4: + assert(!"GATHER4 not implemented\n"); + break; + + case SM4_OPCODE_RESINFO: + handleQUERY(dst0, TXQ_DIMS); + break; + case SM4_OPCODE_SAMPLE_POS: + handleQUERY(dst0, TXQ_SAMPLE_POSITION); + break; + + case SM4_OPCODE_NOP: + mkOp(OP_NOP, TYPE_NONE, NULL); + break; + + case SM4_OPCODE_HS_DECLS: + // XXX: any significance ? + break; + case SM4_OPCODE_HS_CONTROL_POINT_PHASE: + phase = 0; + break; + case SM4_OPCODE_HS_FORK_PHASE: + if (phase != 1) + subPhase = 0; + phase = 1; + phaseInstance = (phaseStart == pos) ? (phaseInstance + 1) : 0; + phaseStart = pos; + if (info.prop.tp.outputPatchSize < phaseInstCnt[0][subPhase]) + unrollPhase = true; + break; + case SM4_OPCODE_HS_JOIN_PHASE: + if (phase != 2) + subPhase = 0; + phase = 2; + phaseInstance = (phaseStart == pos) ? (phaseInstance + 1) : 0; + phaseStart = pos; + if (info.prop.tp.outputPatchSize < phaseInstCnt[1][subPhase]) + unrollPhase = true; + break; + + default: + ERROR("SM4_OPCODE_#%u illegal / not supported\n", insn->opcode); + abort(); + return false; + } + + for (c = 0; c < nc; ++c) { + if (nDstOpnds >= 1 && rDst0[c]) { + if (dst0[c] != rDst0[c]) + mkMov(rDst0[c], dst0[c]); + saveDst(0, c, rDst0[c]); + } + if (nDstOpnds >= 2 && rDst1[c]) { + if (dst1[c] != rDst1[c]) + mkMov(rDst1[c], dst1[c]); + saveDst(1, c, rDst1[c]); + } + } + + memset(srcPtr, 0, sizeof(srcPtr)); + memset(dstPtr, 0, sizeof(dstPtr)); + memset(vtxBase, 0, sizeof(vtxBase)); + return true; +} + +void +Converter::exportOutputs() +{ + for (int i = 0; i < info.numOutputs; ++i) { + for (int c = 0; c < 4; ++c) { + if (!oData.exists(i, c)) + continue; + Symbol *sym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32, + info.out[i].slot[c] * 4); + Value *val = oData.load(i, c, NULL); + if (val) + mkStore(OP_EXPORT, TYPE_F32, sym, NULL, val); + } + } +} + +Converter::Converter(Program *p, struct nv50_ir_prog_info *s) + : tData32(this), + tData64(this), + oData(this), + info(*s), + sm4(*reinterpret_cast<const sm4_program *>(s->bin.source)), + prog(p) +{ + memset(srcPtr, 0, sizeof(srcPtr)); + memset(dstPtr, 0, sizeof(dstPtr)); + memset(vtxBase, 0, sizeof(vtxBase)); + + memset(interpMode, 0, sizeof(interpMode)); + + nrRegVals = nrArrays = arrayVol = 0; + + for (phase = 3; phase > 0; --phase) + for (unsigned int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i) + out[phase - 1][i].sn = TGSI_SEMANTIC_COUNT; + + unrollPhase = false; + phaseStart = 0; + subPhaseCnt[0] = subPhaseCnt[1] = 0; +} + +Converter::~Converter() +{ + if (lData) + delete[] lData; + + if (subPhaseCnt[0]) + delete[] phaseInstCnt[0]; + if (subPhaseCnt[1]) + delete[] phaseInstCnt[1]; +} + +bool +Converter::haveNextPhase(unsigned int pos) const +{ + ++pos; + return (pos < sm4.insns.size()) && + (sm4.insns[pos]->opcode == SM4_OPCODE_HS_FORK_PHASE || + sm4.insns[pos]->opcode == SM4_OPCODE_HS_JOIN_PHASE); +} + +bool +Converter::run() +{ + parseSignature(); + + for (unsigned int pos = 0; pos < sm4.dcls.size(); ++pos) + inspectDeclaration(*sm4.dcls[pos]); + + phaseInstCnt[0] = new unsigned int [subPhaseCnt[0]]; + phaseInstCnt[1] = new unsigned int [subPhaseCnt[1]]; + for (int i = 0; i < subPhaseCnt[0]; ++i) + phaseInstCnt[0][i] = -1; + for (int i = 0; i < subPhaseCnt[1]; ++i) + phaseInstCnt[1][i] = -1; + // re-increased in handleDeclaration: + subPhaseCnt[0] = subPhaseCnt[1] = 0; + + allocateValues(); + nrArrays = 0; + for (unsigned int pos = 0; pos < sm4.dcls.size(); ++pos) + handleDeclaration(*sm4.dcls[pos]); + + info.assignSlots(&info); + + if (sm4.dcls.size() == 0 && sm4.insns.size() == 0) + return true; + + BasicBlock *entry = new BasicBlock(prog->main); + BasicBlock *leave = new BasicBlock(prog->main); + + prog->main->setEntry(entry); + prog->main->setExit(leave); + + setPosition(entry, true); + + entryBBs.push(entry); + leaveBBs.push(leave); + + if (prog->getType() == Program::TYPE_FRAGMENT) { + Symbol *sv = mkSysVal(SV_POSITION, 3); + fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv); + mkOp1(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]); + } else + if (prog->getType() == Program::TYPE_TESSELLATION_EVAL) { + const int n = (info.prop.tp.domain == PIPE_PRIM_TRIANGLES) ? 3 : 2; + int c; + for (c = 0; c < n; ++c) + domainPt[c] = + mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_TESS_COORD, c)); + if (c == 2) + domainPt[2] = loadImm(NULL, 0.0f); + } + + finalized = false; + phaseEnded = 0; + phase = 0; + subPhase = 0; + for (unsigned int pos = 0; pos < sm4.insns.size(); ++pos) { + handleInstruction(pos); + if (likely(phase == 0) || (phaseEnded < 2)) + continue; + phaseEnded = 0; + if (!unrollPhase || !phaseInstanceUsed) { + ++subPhase; + continue; + } + phaseInstanceUsed = false; + if (phaseInstance < (phaseInstCnt[phase - 1][subPhase] - 1)) + pos = phaseStart - 1; + else + ++subPhase; + } + finalizeShader(); + + return true; +} + +} // anonymous namespace + +namespace nv50_ir { + +bool +Program::makeFromSM4(struct nv50_ir_prog_info *info) +{ + Converter bld(this, info); + return bld.run(); +} + +} // namespace nv50_ir diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_from_sm4.h b/src/gallium/drivers/nv50/codegen/nv50_ir_from_sm4.h new file mode 100644 index 00000000000..3c7b55aa1d4 --- /dev/null +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_from_sm4.h @@ -0,0 +1,183 @@ + +#ifndef __NV50_IR_FROM_SM4_H__ +#define __NV50_IR_FROM_SM4_H__ + +typedef enum D3D_PRIMITIVE_TOPOLOGY { + D3D_PRIMITIVE_TOPOLOGY_UNDEFINED = 0, + D3D_PRIMITIVE_TOPOLOGY_POINTLIST = 1, + D3D_PRIMITIVE_TOPOLOGY_LINELIST = 2, + D3D_PRIMITIVE_TOPOLOGY_LINESTRIP = 3, + D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST = 4, + D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP = 5, + D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ = 10, + D3D_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ = 11, + D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ = 12, + D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ = 13, + D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST = 33, + D3D_PRIMITIVE_TOPOLOGY_2_CONTROL_POINT_PATCHLIST = 34, + D3D_PRIMITIVE_TOPOLOGY_3_CONTROL_POINT_PATCHLIST = 35, + D3D_PRIMITIVE_TOPOLOGY_4_CONTROL_POINT_PATCHLIST = 36, + D3D_PRIMITIVE_TOPOLOGY_5_CONTROL_POINT_PATCHLIST = 37, + D3D_PRIMITIVE_TOPOLOGY_6_CONTROL_POINT_PATCHLIST = 38, + D3D_PRIMITIVE_TOPOLOGY_7_CONTROL_POINT_PATCHLIST = 39, + D3D_PRIMITIVE_TOPOLOGY_8_CONTROL_POINT_PATCHLIST = 40, + D3D_PRIMITIVE_TOPOLOGY_9_CONTROL_POINT_PATCHLIST = 41, + D3D_PRIMITIVE_TOPOLOGY_10_CONTROL_POINT_PATCHLIST = 42, + D3D_PRIMITIVE_TOPOLOGY_11_CONTROL_POINT_PATCHLIST = 43, + D3D_PRIMITIVE_TOPOLOGY_12_CONTROL_POINT_PATCHLIST = 44, + D3D_PRIMITIVE_TOPOLOGY_13_CONTROL_POINT_PATCHLIST = 45, + D3D_PRIMITIVE_TOPOLOGY_14_CONTROL_POINT_PATCHLIST = 46, + D3D_PRIMITIVE_TOPOLOGY_15_CONTROL_POINT_PATCHLIST = 47, + D3D_PRIMITIVE_TOPOLOGY_16_CONTROL_POINT_PATCHLIST = 48, + D3D_PRIMITIVE_TOPOLOGY_17_CONTROL_POINT_PATCHLIST = 49, + D3D_PRIMITIVE_TOPOLOGY_18_CONTROL_POINT_PATCHLIST = 50, + D3D_PRIMITIVE_TOPOLOGY_19_CONTROL_POINT_PATCHLIST = 51, + D3D_PRIMITIVE_TOPOLOGY_20_CONTROL_POINT_PATCHLIST = 52, + D3D_PRIMITIVE_TOPOLOGY_21_CONTROL_POINT_PATCHLIST = 53, + D3D_PRIMITIVE_TOPOLOGY_22_CONTROL_POINT_PATCHLIST = 54, + D3D_PRIMITIVE_TOPOLOGY_23_CONTROL_POINT_PATCHLIST = 55, + D3D_PRIMITIVE_TOPOLOGY_24_CONTROL_POINT_PATCHLIST = 56, + D3D_PRIMITIVE_TOPOLOGY_25_CONTROL_POINT_PATCHLIST = 57, + D3D_PRIMITIVE_TOPOLOGY_26_CONTROL_POINT_PATCHLIST = 58, + D3D_PRIMITIVE_TOPOLOGY_27_CONTROL_POINT_PATCHLIST = 59, + D3D_PRIMITIVE_TOPOLOGY_28_CONTROL_POINT_PATCHLIST = 60, + D3D_PRIMITIVE_TOPOLOGY_29_CONTROL_POINT_PATCHLIST = 61, + D3D_PRIMITIVE_TOPOLOGY_30_CONTROL_POINT_PATCHLIST = 62, + D3D_PRIMITIVE_TOPOLOGY_31_CONTROL_POINT_PATCHLIST = 63, + D3D_PRIMITIVE_TOPOLOGY_32_CONTROL_POINT_PATCHLIST = 64, +} D3D_PRIMITIVE_TOPOLOGY; + +typedef enum D3D_RESOURCE_RETURN_TYPE { + D3D_RETURN_TYPE_UNORM = 1, + D3D_RETURN_TYPE_SNORM = 2, + D3D_RETURN_TYPE_SINT = 3, + D3D_RETURN_TYPE_UINT = 4, + D3D_RETURN_TYPE_FLOAT = 5, + D3D_RETURN_TYPE_MIXED = 6, + D3D_RETURN_TYPE_DOUBLE = 7, + D3D_RETURN_TYPE_CONTINUED = 8, + D3D10_RETURN_TYPE_UNORM = 1, + D3D10_RETURN_TYPE_SNORM = 2, + D3D10_RETURN_TYPE_SINT = 3, + D3D10_RETURN_TYPE_UINT = 4, + D3D10_RETURN_TYPE_FLOAT = 5, + D3D10_RETURN_TYPE_MIXED = 6, + D3D11_RETURN_TYPE_UNORM = 1, + D3D11_RETURN_TYPE_SNORM = 2, + D3D11_RETURN_TYPE_SINT = 3, + D3D11_RETURN_TYPE_UINT = 4, + D3D11_RETURN_TYPE_FLOAT = 5, + D3D11_RETURN_TYPE_MIXED = 6, + D3D11_RETURN_TYPE_DOUBLE = 7, + D3D11_RETURN_TYPE_CONTINUED = 8 +} D3D_RESOURCE_RETURN_TYPE; + +typedef enum D3D_REGISTER_COMPONENT_TYPE { + D3D_REGISTER_COMPONENT_UNKNOWN = 0, + D3D_REGISTER_COMPONENT_UINT32 = 1, + D3D_REGISTER_COMPONENT_SINT32 = 2, + D3D_REGISTER_COMPONENT_FLOAT32 = 3, + D3D10_REGISTER_COMPONENT_UNKNOWN = 0, + D3D10_REGISTER_COMPONENT_UINT32 = 1, + D3D10_REGISTER_COMPONENT_SINT32 = 2, + D3D10_REGISTER_COMPONENT_FLOAT32 = 3 +} D3D_REGISTER_COMPONENT_TYPE; + +typedef enum D3D_TESSELLATOR_DOMAIN { + D3D_TESSELLATOR_DOMAIN_UNDEFINED = 0, + D3D_TESSELLATOR_DOMAIN_ISOLINE = 1, + D3D_TESSELLATOR_DOMAIN_TRI = 2, + D3D_TESSELLATOR_DOMAIN_QUAD = 3, + D3D11_TESSELLATOR_DOMAIN_UNDEFINED = 0, + D3D11_TESSELLATOR_DOMAIN_ISOLINE = 1, + D3D11_TESSELLATOR_DOMAIN_TRI = 2, + D3D11_TESSELLATOR_DOMAIN_QUAD = 3 +} D3D_TESSELLATOR_DOMAIN; + +typedef enum D3D_TESSELLATOR_PARTITIONING { + D3D_TESSELLATOR_PARTITIONING_UNDEFINED = 0, + D3D_TESSELLATOR_PARTITIONING_INTEGER = 1, + D3D_TESSELLATOR_PARTITIONING_POW2 = 2, + D3D_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD = 3, + D3D_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN = 4, + D3D11_TESSELLATOR_PARTITIONING_UNDEFINED = 0, + D3D11_TESSELLATOR_PARTITIONING_INTEGER = 1, + D3D11_TESSELLATOR_PARTITIONING_POW2 = 2, + D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD = 3, + D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN = 4 +} D3D_TESSELLATOR_PARTITIONING; + +typedef enum D3D_TESSELLATOR_OUTPUT_PRIMITIVE { + D3D_TESSELLATOR_OUTPUT_UNDEFINED = 0, + D3D_TESSELLATOR_OUTPUT_POINT = 1, + D3D_TESSELLATOR_OUTPUT_LINE = 2, + D3D_TESSELLATOR_OUTPUT_TRIANGLE_CW = 3, + D3D_TESSELLATOR_OUTPUT_TRIANGLE_CCW = 4, + D3D11_TESSELLATOR_OUTPUT_UNDEFINED = 0, + D3D11_TESSELLATOR_OUTPUT_POINT = 1, + D3D11_TESSELLATOR_OUTPUT_LINE = 2, + D3D11_TESSELLATOR_OUTPUT_TRIANGLE_CW = 3, + D3D11_TESSELLATOR_OUTPUT_TRIANGLE_CCW = 4 +} D3D_TESSELLATOR_OUTPUT_PRIMITIVE; + +typedef enum D3D_NAME { + D3D_NAME_UNDEFINED = 0, + D3D_NAME_POSITION = 1, + D3D_NAME_CLIP_DISTANCE = 2, + D3D_NAME_CULL_DISTANCE = 3, + D3D_NAME_RENDER_TARGET_ARRAY_INDEX = 4, + D3D_NAME_VIEWPORT_ARRAY_INDEX = 5, + D3D_NAME_VERTEX_ID = 6, + D3D_NAME_PRIMITIVE_ID = 7, + D3D_NAME_INSTANCE_ID = 8, + D3D_NAME_IS_FRONT_FACE = 9, + D3D_NAME_SAMPLE_INDEX = 10, + D3D_NAME_FINAL_QUAD_EDGE_TESSFACTOR = 11, + D3D_NAME_FINAL_QUAD_INSIDE_TESSFACTOR = 12, + D3D_NAME_FINAL_TRI_EDGE_TESSFACTOR = 13, + D3D_NAME_FINAL_TRI_INSIDE_TESSFACTOR = 14, + D3D_NAME_FINAL_LINE_DETAIL_TESSFACTOR = 15, + D3D_NAME_FINAL_LINE_DENSITY_TESSFACTOR = 16, + D3D_NAME_TARGET = 64, + D3D_NAME_DEPTH = 65, + D3D_NAME_COVERAGE = 66, + D3D_NAME_DEPTH_GREATER_EQUAL = 67, + D3D_NAME_DEPTH_LESS_EQUAL = 68, + D3D10_NAME_UNDEFINED = 0, + D3D10_NAME_POSITION = 1, + D3D10_NAME_CLIP_DISTANCE = 2, + D3D10_NAME_CULL_DISTANCE = 3, + D3D10_NAME_RENDER_TARGET_ARRAY_INDEX = 4, + D3D10_NAME_VIEWPORT_ARRAY_INDEX = 5, + D3D10_NAME_VERTEX_ID = 6, + D3D10_NAME_PRIMITIVE_ID = 7, + D3D10_NAME_INSTANCE_ID = 8, + D3D10_NAME_IS_FRONT_FACE = 9, + D3D10_NAME_SAMPLE_INDEX = 10, + D3D11_NAME_FINAL_QUAD_EDGE_TESSFACTOR = 11, + D3D11_NAME_FINAL_QUAD_INSIDE_TESSFACTOR = 12, + D3D11_NAME_FINAL_TRI_EDGE_TESSFACTOR = 13, + D3D11_NAME_FINAL_TRI_INSIDE_TESSFACTOR = 14, + D3D11_NAME_FINAL_LINE_DETAIL_TESSFACTOR = 15, + D3D11_NAME_FINAL_LINE_DENSITY_TESSFACTOR = 16, + D3D10_NAME_TARGET = 64, + D3D10_NAME_DEPTH = 65, + D3D10_NAME_COVERAGE = 66, + D3D11_NAME_DEPTH_GREATER_EQUAL = 67, + D3D11_NAME_DEPTH_LESS_EQUAL = 68 +} D3D_NAME; + +typedef struct _D3D11_SIGNATURE_PARAMETER_DESC { + const char* SemanticName; + unsigned int SemanticIndex; + unsigned int Register; + D3D_NAME SystemValueType; + D3D_REGISTER_COMPONENT_TYPE ComponentType; + unsigned char Mask; + unsigned char ReadWriteMask; + unsigned int Stream; +} D3D11_SIGNATURE_PARAMETER_DESC; + +#include "../../../state_trackers/d3d1x/d3d1xshader/include/sm4.h" + +#endif // __NV50_IR_FROM_SM4_H__ |