aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorChristoph Bumiller <[email protected]>2011-10-18 18:36:57 +0200
committerChristoph Bumiller <[email protected]>2011-10-21 23:00:40 +0200
commitbb0482a55bbebfdd4b06954fbfacf5a57a04b1f3 (patch)
treeb6bdfb9091559354cffbcaba21cc7ab62d1cc44a /src/gallium/drivers
parentd988361ead27ce61615669bd428b04d2aac7af4f (diff)
nv50/ir: import SM4 converter
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp2
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h6
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_from_sm4.cpp2326
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_from_sm4.h183
4 files changed, 2513 insertions, 4 deletions
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp
index f9c18fa8d88..53a73b457f4 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp
@@ -207,7 +207,7 @@ BuildUtil::mkCvt(operation op,
return insn;
}
-Instruction *
+CmpInstruction *
BuildUtil::mkCmp(operation op, CondCode cc, DataType ty, Value *dst,
Value *src0, Value *src1, Value *src2)
{
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h
index dbc6723fe90..0bb853a340b 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h
@@ -68,9 +68,9 @@ public:
Value *attrRel, Value *primRel);
Instruction *mkCvt(operation, DataType, Value *, DataType, Value *);
- Instruction *mkCmp(operation, CondCode, DataType,
- Value *,
- Value *, Value *, Value * = NULL);
+ CmpInstruction *mkCmp(operation, CondCode, DataType,
+ Value *,
+ Value *, Value *, Value * = NULL);
Instruction *mkTex(operation, TexTarget, uint8_t tic, uint8_t tsc,
Value **def, Value **src);
Instruction *mkQuadop(uint8_t qop, Value *, uint8_t l, Value *, Value *);
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_from_sm4.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_from_sm4.cpp
new file mode 100644
index 00000000000..7ed00e336c6
--- /dev/null
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_from_sm4.cpp
@@ -0,0 +1,2326 @@
+
+#include "nv50_ir.h"
+#include "nv50_ir_target.h"
+#include "nv50_ir_build_util.h"
+
+#include "nv50_ir_from_sm4.h"
+
+// WTF: pass-through is implicit ??? check ReadWriteMask
+
+namespace tgsi {
+
+static nv50_ir::SVSemantic irSemantic(unsigned sn)
+{
+ switch (sn) {
+ case TGSI_SEMANTIC_POSITION: return nv50_ir::SV_POSITION;
+ case TGSI_SEMANTIC_FACE: return nv50_ir::SV_FACE;
+ case NV50_SEMANTIC_LAYER: return nv50_ir::SV_LAYER;
+ case NV50_SEMANTIC_VIEWPORTINDEX: return nv50_ir::SV_VIEWPORT_INDEX;
+ case TGSI_SEMANTIC_PSIZE: return nv50_ir::SV_POINT_SIZE;
+ case NV50_SEMANTIC_CLIPDISTANCE: return nv50_ir::SV_CLIP_DISTANCE;
+ case NV50_SEMANTIC_VERTEXID: return nv50_ir::SV_VERTEX_ID;
+ case TGSI_SEMANTIC_INSTANCEID: return nv50_ir::SV_INSTANCE_ID;
+ case TGSI_SEMANTIC_PRIMID: return nv50_ir::SV_PRIMITIVE_ID;
+ case NV50_SEMANTIC_TESSFACTOR: return nv50_ir::SV_TESS_FACTOR;
+ case NV50_SEMANTIC_TESSCOORD: return nv50_ir::SV_TESS_COORD;
+ default:
+ return nv50_ir::SV_UNDEFINED;
+ }
+}
+
+} // namespace tgsi
+
+namespace {
+
+using namespace nv50_ir;
+
+#define NV50_IR_MAX_RESOURCES 64
+
+class Converter : public BuildUtil
+{
+public:
+ Converter(Program *, struct nv50_ir_prog_info *);
+ ~Converter();
+
+private:
+ DataArray tData32;
+ DataArray tData64;
+ unsigned int nrRegVals;
+
+ DataArray *lData;
+ unsigned int nrArrays;
+ unsigned int arrayVol;
+
+ DataArray oData;
+
+ uint8_t interpMode[PIPE_MAX_SHADER_INPUTS];
+
+ // outputs for each phase
+ struct nv50_ir_varying out[3][PIPE_MAX_SHADER_OUTPUTS];
+
+ int phase;
+ int subPhaseCnt[2];
+ int subPhase;
+ unsigned int phaseStart;
+ unsigned int phaseInstance;
+ unsigned int *phaseInstCnt[2];
+ bool unrollPhase;
+ bool phaseInstanceUsed;
+ int phaseEnded; // (phase + 1) if $phase ended
+
+ bool finalized;
+
+ Value *srcPtr[3][3]; // for indirect addressing, save pointer values
+ Value *dstPtr[3];
+ Value *vtxBase[3]; // base address of vertex in a primitive (TP/GP)
+
+ Value *domainPt[3]; // pre-fetched TessCoord
+
+ unsigned int nDstOpnds;
+
+ Stack condBBs;
+ Stack joinBBs;
+ Stack loopBBs;
+ Stack breakBBs;
+ Stack entryBBs;
+ Stack leaveBBs;
+ Stack retIPs;
+
+ bool shadow[NV50_IR_MAX_RESOURCES];
+ TexTarget resourceType[NV50_IR_MAX_RESOURCES][2];
+
+ struct nv50_ir_prog_info& info;
+
+ Value *fragCoord[4];
+
+public:
+ bool run();
+
+private:
+ bool handleInstruction(unsigned int pos);
+ bool inspectInstruction(unsigned int pos);
+ bool handleDeclaration(const sm4_dcl& dcl);
+ bool inspectDeclaration(const sm4_dcl& dcl);
+ bool parseSignature();
+
+ bool haveNextPhase(unsigned int pos) const;
+
+ void allocateValues();
+ void exportOutputs();
+
+ void emitTex(Value *dst0[4], TexInstruction *, const uint8_t swizzle[4]);
+ void handleLOAD(Value *dst0[4]);
+ void handleSAMPLE(operation, Value *dst0[4]);
+ void handleQUERY(Value *dst0[4], enum TexQuery query);
+ void handleDP(Value *dst0[4], int dim);
+
+ Symbol *iSym(int i, int c);
+ Symbol *oSym(int i, int c);
+
+ Value *src(int i, int c);
+ Value *src(const sm4_op&, int c, int i);
+ Value *dst(int i, int c);
+ Value *dst(const sm4_op&, int c, int i);
+ void saveDst(int i, int c, Value *value);
+ void saveDst(const sm4_op&, int c, Value *value, int i);
+ void saveFragDepth(operation op, Value *value);
+
+ Value *interpolate(const sm4_op&, int c, int i);
+
+ Value *getSrcPtr(int s, int dim, int shl);
+ Value *getDstPtr(int d, int dim, int shl);
+ Value *getVtxPtr(int s);
+
+ bool checkDstSrcAliasing() const;
+ void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork);
+ void finalizeShader();
+
+ operation cvtOpcode(enum sm4_opcode op) const;
+ unsigned int getDstOpndCount(enum sm4_opcode opcode) const;
+
+ DataType inferSrcType(enum sm4_opcode op) const;
+ DataType inferDstType(enum sm4_opcode op) const;
+
+ unsigned g3dPrim(const unsigned prim, unsigned *patchSize = NULL) const;
+ CondCode cvtCondCode(enum sm4_opcode op) const;
+ RoundMode cvtRoundingMode(enum sm4_opcode op) const;
+ TexTarget cvtTexTarget(enum sm4_target,
+ enum sm4_opcode, operation *) const;
+ SVSemantic cvtSemantic(enum sm4_sv, uint8_t &index) const;
+ uint8_t cvtInterpMode(enum sm4_interpolation) const;
+
+ unsigned tgsiSemantic(SVSemantic, int index);
+ void recordSV(unsigned sn, unsigned si, unsigned mask, bool input);
+
+private:
+ sm4_insn *insn;
+ DataType dTy, sTy;
+
+ const struct sm4_program& sm4;
+ Program *prog;
+};
+
+#define PRIM_CASE(a, b) \
+ case D3D_PRIMITIVE_TOPOLOGY_##a: return PIPE_PRIM_##b;
+
+unsigned
+Converter::g3dPrim(const unsigned prim, unsigned *patchSize) const
+{
+ switch (prim) {
+ PRIM_CASE(UNDEFINED, POINTS);
+ PRIM_CASE(POINTLIST, POINTS);
+ PRIM_CASE(LINELIST, LINES);
+ PRIM_CASE(LINESTRIP, LINE_STRIP);
+ PRIM_CASE(TRIANGLELIST, TRIANGLES);
+ PRIM_CASE(TRIANGLESTRIP, TRIANGLE_STRIP);
+ PRIM_CASE(LINELIST_ADJ, LINES_ADJACENCY);
+ PRIM_CASE(LINESTRIP_ADJ, LINE_STRIP_ADJACENCY);
+ PRIM_CASE(TRIANGLELIST_ADJ, TRIANGLES_ADJACENCY);
+ PRIM_CASE(TRIANGLESTRIP_ADJ, TRIANGLES_ADJACENCY);
+ default:
+ if (prim < D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST ||
+ prim > D3D_PRIMITIVE_TOPOLOGY_32_CONTROL_POINT_PATCHLIST)
+ return PIPE_PRIM_POINTS;
+ if (patchSize)
+ *patchSize =
+ prim - D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST + 1;
+ return NV50_PRIM_PATCHES;
+ }
+}
+
+#define IPM_CASE(n, a, b) \
+ case SM4_INTERPOLATION_##n: return NV50_IR_INTERP_##a | NV50_IR_INTERP_##b
+
+uint8_t
+Converter::cvtInterpMode(enum sm4_interpolation mode) const
+{
+ switch (mode) {
+ IPM_CASE(CONSTANT, FLAT, FLAT);
+ IPM_CASE(LINEAR, PERSPECTIVE, PERSPECTIVE);
+ IPM_CASE(LINEAR_CENTROID, PERSPECTIVE, CENTROID);
+ IPM_CASE(LINEAR_NOPERSPECTIVE, LINEAR, LINEAR);
+ IPM_CASE(LINEAR_NOPERSPECTIVE_CENTROID, LINEAR, CENTROID);
+ IPM_CASE(LINEAR_SAMPLE, PERSPECTIVE, OFFSET);
+ IPM_CASE(LINEAR_NOPERSPECTIVE_SAMPLE, LINEAR, OFFSET);
+ IPM_CASE(UNDEFINED, LINEAR, LINEAR);
+ default:
+ assert(!"invalid interpolation mode");
+ return 0;
+ }
+}
+
+static void
+setVaryingInterpMode(struct nv50_ir_varying *var, uint8_t mode)
+{
+ switch (mode & NV50_IR_INTERP_MODE_MASK) {
+ case NV50_IR_INTERP_LINEAR:
+ var->linear = 1;
+ break;
+ case NV50_IR_INTERP_FLAT:
+ var->flat = 1;
+ break;
+ default:
+ break;
+ }
+ if (mode & NV50_IR_INTERP_CENTROID)
+ var->centroid = 1;
+}
+
+RoundMode
+Converter::cvtRoundingMode(enum sm4_opcode op) const
+{
+ switch (op) {
+ case SM4_OPCODE_ROUND_NE: return ROUND_NI;
+ case SM4_OPCODE_ROUND_NI: return ROUND_MI;
+ case SM4_OPCODE_ROUND_PI: return ROUND_PI;
+ case SM4_OPCODE_ROUND_Z: return ROUND_ZI;
+ default:
+ return ROUND_N;
+ }
+}
+
+CondCode
+Converter::cvtCondCode(enum sm4_opcode op) const
+{
+ switch (op) {
+ case SM4_OPCODE_EQ:
+ case SM4_OPCODE_DEQ:
+ case SM4_OPCODE_IEQ: return CC_EQ;
+ case SM4_OPCODE_GE:
+ case SM4_OPCODE_DGE:
+ case SM4_OPCODE_IGE:
+ case SM4_OPCODE_UGE: return CC_GE;
+ case SM4_OPCODE_LT:
+ case SM4_OPCODE_DLT:
+ case SM4_OPCODE_ILT:
+ case SM4_OPCODE_ULT: return CC_LT;
+ case SM4_OPCODE_NE:
+ case SM4_OPCODE_INE:
+ case SM4_OPCODE_DNE: return CC_NEU;
+ default:
+ return CC_ALWAYS;
+ }
+}
+
+DataType
+Converter::inferSrcType(enum sm4_opcode op) const
+{
+ switch (op) {
+ case SM4_OPCODE_IADD:
+ case SM4_OPCODE_IEQ:
+ case SM4_OPCODE_IGE:
+ case SM4_OPCODE_ILT:
+ case SM4_OPCODE_IMAD:
+ case SM4_OPCODE_IMAX:
+ case SM4_OPCODE_IMIN:
+ case SM4_OPCODE_IMUL:
+ case SM4_OPCODE_INE:
+ case SM4_OPCODE_INEG:
+ case SM4_OPCODE_ISHL:
+ case SM4_OPCODE_ISHR:
+ case SM4_OPCODE_ITOF:
+ case SM4_OPCODE_ATOMIC_IADD:
+ case SM4_OPCODE_ATOMIC_IMAX:
+ case SM4_OPCODE_ATOMIC_IMIN:
+ return TYPE_S32;
+ case SM4_OPCODE_AND:
+ case SM4_OPCODE_NOT:
+ case SM4_OPCODE_OR:
+ case SM4_OPCODE_UDIV:
+ case SM4_OPCODE_ULT:
+ case SM4_OPCODE_UGE:
+ case SM4_OPCODE_UMUL:
+ case SM4_OPCODE_UMAD:
+ case SM4_OPCODE_UMAX:
+ case SM4_OPCODE_UMIN:
+ case SM4_OPCODE_USHR:
+ case SM4_OPCODE_UTOF:
+ case SM4_OPCODE_XOR:
+ case SM4_OPCODE_UADDC:
+ case SM4_OPCODE_USUBB:
+ case SM4_OPCODE_ATOMIC_AND:
+ case SM4_OPCODE_ATOMIC_OR:
+ case SM4_OPCODE_ATOMIC_XOR:
+ case SM4_OPCODE_ATOMIC_UMAX:
+ case SM4_OPCODE_ATOMIC_UMIN:
+ return TYPE_U32;
+ case SM4_OPCODE_DADD:
+ case SM4_OPCODE_DMAX:
+ case SM4_OPCODE_DMIN:
+ case SM4_OPCODE_DMUL:
+ case SM4_OPCODE_DEQ:
+ case SM4_OPCODE_DGE:
+ case SM4_OPCODE_DLT:
+ case SM4_OPCODE_DNE:
+ case SM4_OPCODE_DMOV:
+ case SM4_OPCODE_DMOVC:
+ case SM4_OPCODE_DTOF:
+ return TYPE_F64;
+ case SM4_OPCODE_F16TOF32:
+ return TYPE_F16;
+ default:
+ return TYPE_F32;
+ }
+}
+
+DataType
+Converter::inferDstType(enum sm4_opcode op) const
+{
+ switch (op) {
+ case SM4_OPCODE_FTOI:
+ return TYPE_S32;
+ case SM4_OPCODE_FTOU:
+ case SM4_OPCODE_EQ:
+ case SM4_OPCODE_GE:
+ case SM4_OPCODE_LT:
+ case SM4_OPCODE_NE:
+ return TYPE_U32;
+ case SM4_OPCODE_FTOD:
+ return TYPE_F64;
+ case SM4_OPCODE_F32TOF16:
+ return TYPE_F16;
+ case SM4_OPCODE_ITOF:
+ case SM4_OPCODE_UTOF:
+ case SM4_OPCODE_DTOF:
+ return TYPE_F32;
+ default:
+ return inferSrcType(op);
+ }
+}
+
+operation
+Converter::cvtOpcode(enum sm4_opcode op) const
+{
+ switch (op) {
+ case SM4_OPCODE_ADD: return OP_ADD;
+ case SM4_OPCODE_AND: return OP_AND;
+ case SM4_OPCODE_BREAK: return OP_BREAK;
+ case SM4_OPCODE_BREAKC: return OP_BREAK;
+ case SM4_OPCODE_CALL: return OP_CALL;
+ case SM4_OPCODE_CALLC: return OP_CALL;
+ case SM4_OPCODE_CASE: return OP_NOP;
+ case SM4_OPCODE_CONTINUE: return OP_CONT;
+ case SM4_OPCODE_CONTINUEC: return OP_CONT;
+ case SM4_OPCODE_CUT: return OP_RESTART;
+ case SM4_OPCODE_DEFAULT: return OP_NOP;
+ case SM4_OPCODE_DERIV_RTX: return OP_DFDX;
+ case SM4_OPCODE_DERIV_RTY: return OP_DFDY;
+ case SM4_OPCODE_DISCARD: return OP_DISCARD;
+ case SM4_OPCODE_DIV: return OP_DIV;
+ case SM4_OPCODE_DP2: return OP_MAD;
+ case SM4_OPCODE_DP3: return OP_MAD;
+ case SM4_OPCODE_DP4: return OP_MAD;
+ case SM4_OPCODE_ELSE: return OP_BRA;
+ case SM4_OPCODE_EMIT: return OP_EMIT;
+ case SM4_OPCODE_EMITTHENCUT: return OP_EMIT;
+ case SM4_OPCODE_ENDIF: return OP_BRA;
+ case SM4_OPCODE_ENDLOOP: return OP_PREBREAK;
+ case SM4_OPCODE_ENDSWITCH: return OP_NOP;
+ case SM4_OPCODE_EQ: return OP_SET;
+ case SM4_OPCODE_EXP: return OP_EX2;
+ case SM4_OPCODE_FRC: return OP_CVT;
+ case SM4_OPCODE_FTOI: return OP_CVT;
+ case SM4_OPCODE_FTOU: return OP_CVT;
+ case SM4_OPCODE_GE: return OP_SET;
+ case SM4_OPCODE_IADD: return OP_ADD;
+ case SM4_OPCODE_IF: return OP_BRA;
+ case SM4_OPCODE_IEQ: return OP_SET;
+ case SM4_OPCODE_IGE: return OP_SET;
+ case SM4_OPCODE_ILT: return OP_SET;
+ case SM4_OPCODE_IMAD: return OP_MAD;
+ case SM4_OPCODE_IMAX: return OP_MAX;
+ case SM4_OPCODE_IMIN: return OP_MIN;
+ case SM4_OPCODE_IMUL: return OP_MUL;
+ case SM4_OPCODE_INE: return OP_SET;
+ case SM4_OPCODE_INEG: return OP_NEG;
+ case SM4_OPCODE_ISHL: return OP_SHL;
+ case SM4_OPCODE_ISHR: return OP_SHR;
+ case SM4_OPCODE_ITOF: return OP_CVT;
+ case SM4_OPCODE_LD: return OP_TXF;
+ case SM4_OPCODE_LD_MS: return OP_TXF;
+ case SM4_OPCODE_LOG: return OP_LG2;
+ case SM4_OPCODE_LOOP: return OP_PRECONT;
+ case SM4_OPCODE_LT: return OP_SET;
+ case SM4_OPCODE_MAD: return OP_MAD;
+ case SM4_OPCODE_MIN: return OP_MIN;
+ case SM4_OPCODE_MAX: return OP_MAX;
+ case SM4_OPCODE_MOV: return OP_MOV;
+ case SM4_OPCODE_MOVC: return OP_MOV;
+ case SM4_OPCODE_MUL: return OP_MUL;
+ case SM4_OPCODE_NE: return OP_SET;
+ case SM4_OPCODE_NOP: return OP_NOP;
+ case SM4_OPCODE_NOT: return OP_NOT;
+ case SM4_OPCODE_OR: return OP_OR;
+ case SM4_OPCODE_RESINFO: return OP_TXQ;
+ case SM4_OPCODE_RET: return OP_RET;
+ case SM4_OPCODE_RETC: return OP_RET;
+ case SM4_OPCODE_ROUND_NE: return OP_CVT;
+ case SM4_OPCODE_ROUND_NI: return OP_FLOOR;
+ case SM4_OPCODE_ROUND_PI: return OP_CEIL;
+ case SM4_OPCODE_ROUND_Z: return OP_TRUNC;
+ case SM4_OPCODE_RSQ: return OP_RSQ;
+ case SM4_OPCODE_SAMPLE: return OP_TEX;
+ case SM4_OPCODE_SAMPLE_C: return OP_TEX;
+ case SM4_OPCODE_SAMPLE_C_LZ: return OP_TEX;
+ case SM4_OPCODE_SAMPLE_L: return OP_TXL;
+ case SM4_OPCODE_SAMPLE_D: return OP_TXD;
+ case SM4_OPCODE_SAMPLE_B: return OP_TXB;
+ case SM4_OPCODE_SQRT: return OP_SQRT;
+ case SM4_OPCODE_SWITCH: return OP_NOP;
+ case SM4_OPCODE_SINCOS: return OP_PRESIN;
+ case SM4_OPCODE_UDIV: return OP_DIV;
+ case SM4_OPCODE_ULT: return OP_SET;
+ case SM4_OPCODE_UGE: return OP_SET;
+ case SM4_OPCODE_UMUL: return OP_MUL;
+ case SM4_OPCODE_UMAD: return OP_MAD;
+ case SM4_OPCODE_UMAX: return OP_MAX;
+ case SM4_OPCODE_UMIN: return OP_MIN;
+ case SM4_OPCODE_USHR: return OP_SHR;
+ case SM4_OPCODE_UTOF: return OP_CVT;
+ case SM4_OPCODE_XOR: return OP_XOR;
+
+ case SM4_OPCODE_GATHER4: return OP_TXG;
+ case SM4_OPCODE_SAMPLE_POS: return OP_PIXLD;
+ case SM4_OPCODE_SAMPLE_INFO: return OP_PIXLD;
+ case SM4_OPCODE_EMIT_STREAM: return OP_EMIT;
+ case SM4_OPCODE_CUT_STREAM: return OP_RESTART;
+ case SM4_OPCODE_EMITTHENCUT_STREAM: return OP_EMIT;
+ case SM4_OPCODE_INTERFACE_CALL: return OP_CALL;
+ case SM4_OPCODE_BUFINFO: return OP_TXQ;
+ case SM4_OPCODE_DERIV_RTX_COARSE: return OP_DFDX;
+ case SM4_OPCODE_DERIV_RTX_FINE: return OP_DFDX;
+ case SM4_OPCODE_DERIV_RTY_COARSE: return OP_DFDY;
+ case SM4_OPCODE_DERIV_RTY_FINE: return OP_DFDY;
+ case SM4_OPCODE_GATHER4_C: return OP_TXG;
+ case SM4_OPCODE_GATHER4_PO: return OP_TXG;
+ case SM4_OPCODE_GATHER4_PO_C: return OP_TXG;
+
+ case SM4_OPCODE_RCP: return OP_RCP;
+ case SM4_OPCODE_F32TOF16: return OP_CVT;
+ case SM4_OPCODE_F16TOF32: return OP_CVT;
+ case SM4_OPCODE_UADDC: return OP_ADD;
+ case SM4_OPCODE_USUBB: return OP_SUB;
+ case SM4_OPCODE_COUNTBITS: return OP_POPCNT;
+
+ case SM4_OPCODE_ATOMIC_AND: return OP_AND;
+ case SM4_OPCODE_ATOMIC_OR: return OP_OR;
+ case SM4_OPCODE_ATOMIC_XOR: return OP_XOR;
+ case SM4_OPCODE_ATOMIC_CMP_STORE: return OP_STORE;
+ case SM4_OPCODE_ATOMIC_IADD: return OP_ADD;
+ case SM4_OPCODE_ATOMIC_IMAX: return OP_MAX;
+ case SM4_OPCODE_ATOMIC_IMIN: return OP_MIN;
+ case SM4_OPCODE_ATOMIC_UMAX: return OP_MAX;
+ case SM4_OPCODE_ATOMIC_UMIN: return OP_MIN;
+
+ case SM4_OPCODE_SYNC: return OP_MEMBAR;
+ case SM4_OPCODE_DADD: return OP_ADD;
+ case SM4_OPCODE_DMAX: return OP_MAX;
+ case SM4_OPCODE_DMIN: return OP_MIN;
+ case SM4_OPCODE_DMUL: return OP_MUL;
+ case SM4_OPCODE_DEQ: return OP_SET;
+ case SM4_OPCODE_DGE: return OP_SET;
+ case SM4_OPCODE_DLT: return OP_SET;
+ case SM4_OPCODE_DNE: return OP_SET;
+ case SM4_OPCODE_DMOV: return OP_MOV;
+ case SM4_OPCODE_DMOVC: return OP_MOV;
+ case SM4_OPCODE_DTOF: return OP_CVT;
+ case SM4_OPCODE_FTOD: return OP_CVT;
+
+ default:
+ return OP_NOP;
+ }
+}
+
+unsigned int
+Converter::getDstOpndCount(enum sm4_opcode opcode) const
+{
+ switch (opcode) {
+ case SM4_OPCODE_SINCOS:
+ case SM4_OPCODE_UDIV:
+ case SM4_OPCODE_IMUL:
+ case SM4_OPCODE_UMUL:
+ return 2;
+ case SM4_OPCODE_BREAK:
+ case SM4_OPCODE_BREAKC:
+ case SM4_OPCODE_CALL:
+ case SM4_OPCODE_CALLC:
+ case SM4_OPCODE_CONTINUE:
+ case SM4_OPCODE_CONTINUEC:
+ case SM4_OPCODE_DISCARD:
+ case SM4_OPCODE_EMIT:
+ case SM4_OPCODE_EMIT_STREAM:
+ case SM4_OPCODE_CUT:
+ case SM4_OPCODE_CUT_STREAM:
+ case SM4_OPCODE_EMITTHENCUT:
+ case SM4_OPCODE_EMITTHENCUT_STREAM:
+ case SM4_OPCODE_IF:
+ case SM4_OPCODE_ELSE:
+ case SM4_OPCODE_ENDIF:
+ case SM4_OPCODE_LOOP:
+ case SM4_OPCODE_ENDLOOP:
+ case SM4_OPCODE_RET:
+ case SM4_OPCODE_RETC:
+ case SM4_OPCODE_SYNC:
+ case SM4_OPCODE_SWITCH:
+ case SM4_OPCODE_CASE:
+ case SM4_OPCODE_HS_DECLS:
+ case SM4_OPCODE_HS_CONTROL_POINT_PHASE:
+ case SM4_OPCODE_HS_FORK_PHASE:
+ case SM4_OPCODE_HS_JOIN_PHASE:
+ return 0;
+ default:
+ return 1;
+ }
+}
+
+#define TARG_CASE_1(a, b) case SM4_TARGET_##a: return TEX_TARGET_##b;
+#define TARG_CASE_2(a, b) case SM4_TARGET_##a: \
+ return dc ? TEX_TARGET_##b##_SHADOW : TEX_TARGET_##b
+
+TexTarget
+Converter::cvtTexTarget(enum sm4_target targ,
+ enum sm4_opcode op, operation *opr) const
+{
+ bool dc = (op == SM4_OPCODE_SAMPLE_C ||
+ op == SM4_OPCODE_SAMPLE_C_LZ ||
+ op == SM4_OPCODE_GATHER4_C ||
+ op == SM4_OPCODE_GATHER4_PO_C);
+
+ if (opr) {
+ switch (targ) {
+ case SM4_TARGET_RAW_BUFFER: *opr = OP_LOAD; break;
+ case SM4_TARGET_STRUCTURED_BUFFER: *opr = OP_SULD; break;
+ default:
+ *opr = OP_TEX;
+ break;
+ }
+ }
+
+ switch (targ) {
+ TARG_CASE_1(UNKNOWN, 2D);
+ TARG_CASE_2(TEXTURE1D, 1D);
+ TARG_CASE_2(TEXTURE2D, 2D);
+ TARG_CASE_1(TEXTURE2DMS, 2D_MS);
+ TARG_CASE_1(TEXTURE3D, 3D);
+ TARG_CASE_2(TEXTURECUBE, CUBE);
+ TARG_CASE_2(TEXTURE1DARRAY, 1D_ARRAY);
+ TARG_CASE_2(TEXTURE2DARRAY, 2D_ARRAY);
+ TARG_CASE_1(TEXTURE2DMSARRAY, 2D_MS_ARRAY);
+ TARG_CASE_2(TEXTURECUBEARRAY, CUBE_ARRAY);
+ TARG_CASE_1(BUFFER, BUFFER);
+ TARG_CASE_1(RAW_BUFFER, BUFFER);
+ TARG_CASE_1(STRUCTURED_BUFFER, BUFFER);
+ default:
+ assert(!"invalid SM4 texture target");
+ return dc ? TEX_TARGET_2D_SHADOW : TEX_TARGET_2D;
+ }
+}
+
+static inline uint32_t
+getSVIndex(enum sm4_sv sv)
+{
+ switch (sv) {
+ case SM4_SV_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: return 0;
+ case SM4_SV_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: return 1;
+ case SM4_SV_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: return 2;
+ case SM4_SV_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: return 3;
+
+ case SM4_SV_FINAL_QUAD_U_INSIDE_TESSFACTOR: return 4;
+ case SM4_SV_FINAL_QUAD_V_INSIDE_TESSFACTOR: return 5;
+
+ case SM4_SV_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: return 0;
+ case SM4_SV_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: return 1;
+ case SM4_SV_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: return 2;
+
+ case SM4_SV_FINAL_TRI_INSIDE_TESSFACTOR: return 4;
+
+ case SM4_SV_FINAL_LINE_DETAIL_TESSFACTOR: return 0;
+
+ case SM4_SV_FINAL_LINE_DENSITY_TESSFACTOR: return 4;
+
+ default:
+ return 0;
+ }
+}
+
+SVSemantic
+Converter::cvtSemantic(enum sm4_sv sv, uint8_t &idx) const
+{
+ idx = 0;
+
+ switch (sv) {
+ case SM4_SV_UNDEFINED: return SV_UNDEFINED;
+ case SM4_SV_POSITION: return SV_POSITION;
+ case SM4_SV_CLIP_DISTANCE: return SV_CLIP_DISTANCE;
+ case SM4_SV_CULL_DISTANCE: return SV_CLIP_DISTANCE; // XXX: distinction
+ case SM4_SV_RENDER_TARGET_ARRAY_INDEX: return SV_LAYER;
+ case SM4_SV_VIEWPORT_ARRAY_INDEX: return SV_VIEWPORT_INDEX;
+ case SM4_SV_VERTEX_ID: return SV_VERTEX_ID;
+ case SM4_SV_PRIMITIVE_ID: return SV_PRIMITIVE_ID;
+ case SM4_SV_INSTANCE_ID: return SV_INSTANCE_ID;
+ case SM4_SV_IS_FRONT_FACE: return SV_FACE;
+ case SM4_SV_SAMPLE_INDEX: return SV_SAMPLE_INDEX;
+
+ case SM4_SV_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR:
+ case SM4_SV_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR:
+ case SM4_SV_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR:
+ case SM4_SV_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR:
+ case SM4_SV_FINAL_QUAD_U_INSIDE_TESSFACTOR:
+ case SM4_SV_FINAL_QUAD_V_INSIDE_TESSFACTOR:
+ case SM4_SV_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR:
+ case SM4_SV_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR:
+ case SM4_SV_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR:
+ case SM4_SV_FINAL_TRI_INSIDE_TESSFACTOR:
+ case SM4_SV_FINAL_LINE_DETAIL_TESSFACTOR:
+ case SM4_SV_FINAL_LINE_DENSITY_TESSFACTOR:
+ idx = getSVIndex(sv);
+ return SV_TESS_FACTOR;
+
+ default:
+ assert(!"invalid SM4 system value");
+ return SV_UNDEFINED;
+ }
+}
+
+unsigned
+Converter::tgsiSemantic(SVSemantic sv, int index)
+{
+ switch (sv) {
+ case SV_POSITION: return TGSI_SEMANTIC_POSITION;
+ case SV_FACE: return TGSI_SEMANTIC_FACE;
+ case SV_LAYER: return NV50_SEMANTIC_LAYER;
+ case SV_VIEWPORT_INDEX: return NV50_SEMANTIC_VIEWPORTINDEX;
+ case SV_POINT_SIZE: return TGSI_SEMANTIC_PSIZE;
+ case SV_CLIP_DISTANCE: return NV50_SEMANTIC_CLIPDISTANCE;
+ case SV_VERTEX_ID: return NV50_SEMANTIC_VERTEXID;
+ case SV_INSTANCE_ID: return TGSI_SEMANTIC_INSTANCEID;
+ case SV_PRIMITIVE_ID: return TGSI_SEMANTIC_PRIMID;
+ case SV_TESS_FACTOR: return NV50_SEMANTIC_TESSFACTOR;
+ case SV_TESS_COORD: return NV50_SEMANTIC_TESSCOORD;
+ case SV_INVOCATION_ID: return NV50_SEMANTIC_INVOCATIONID;
+ default:
+ return TGSI_SEMANTIC_GENERIC;
+ }
+}
+
+void
+Converter::recordSV(unsigned sn, unsigned si, unsigned mask, bool input)
+{
+ unsigned int i;
+ for (i = 0; i < info.numSysVals; ++i)
+ if (info.sv[i].sn == sn &&
+ info.sv[i].si == si)
+ return;
+ info.numSysVals = i + 1;
+ info.sv[i].sn = sn;
+ info.sv[i].si = si;
+ info.sv[i].mask = mask;
+ info.sv[i].input = input ? 1 : 0;
+}
+
+bool
+Converter::parseSignature()
+{
+ struct nv50_ir_varying *patch;
+ unsigned int i, r, n;
+
+ info.numInputs = 0;
+ info.numOutputs = 0;
+ info.numPatchConstants = 0;
+
+ for (n = 0, i = 0; i < sm4.num_params_in; ++i) {
+ r = sm4.params_in[i].Register;
+
+ info.in[r].mask |= sm4.params_in[i].ReadWriteMask;
+ // mask might be uninitialized ...
+ if (!sm4.params_in[i].ReadWriteMask)
+ info.in[r].mask = 0xf;
+ info.in[r].id = r;
+ if (info.in[r].regular) // already assigned semantic name/index
+ continue;
+ info.in[r].regular = 1;
+ info.in[r].patch = 0;
+
+ info.numInputs = MAX2(info.numInputs, r + 1);
+
+ switch (sm4.params_in[i].SystemValueType) {
+ case D3D_NAME_UNDEFINED:
+ info.in[r].sn = TGSI_SEMANTIC_GENERIC;
+ info.in[r].si = n++;
+ break;
+ case D3D_NAME_POSITION:
+ info.in[r].sn = TGSI_SEMANTIC_POSITION;
+ break;
+ case D3D_NAME_VERTEX_ID:
+ info.in[r].sn = NV50_SEMANTIC_VERTEXID;
+ break;
+ case D3D_NAME_PRIMITIVE_ID:
+ info.in[r].sn = TGSI_SEMANTIC_PRIMID;
+ // no corresponding output
+ recordSV(TGSI_SEMANTIC_PRIMID, 0, 1, true);
+ break;
+ case D3D_NAME_INSTANCE_ID:
+ info.in[r].sn = TGSI_SEMANTIC_INSTANCEID;
+ break;
+ case D3D_NAME_IS_FRONT_FACE:
+ info.in[r].sn = TGSI_SEMANTIC_FACE;
+ // no corresponding output
+ recordSV(TGSI_SEMANTIC_FACE, 0, 1, true);
+ break;
+ default:
+ assert(!"invalid/unsupported input linkage semantic");
+ break;
+ }
+ }
+
+ for (n = 0, i = 0; i < sm4.num_params_out; ++i) {
+ r = sm4.params_out[i].Register;
+
+ info.out[r].mask |= ~sm4.params_out[i].ReadWriteMask;
+ info.out[r].id = r;
+ if (info.out[r].regular) // already assigned semantic name/index
+ continue;
+ info.out[r].regular = 1;
+ info.out[r].patch = 0;
+
+ info.numOutputs = MAX2(info.numOutputs, r + 1);
+
+ switch (sm4.params_out[i].SystemValueType) {
+ case D3D_NAME_UNDEFINED:
+ if (prog->getType() == Program::TYPE_FRAGMENT) {
+ info.out[r].sn = TGSI_SEMANTIC_COLOR;
+ info.out[r].si = info.prop.fp.numColourResults++;
+ } else {
+ info.out[r].sn = TGSI_SEMANTIC_GENERIC;
+ info.out[r].si = n++;
+ }
+ break;
+ case D3D_NAME_POSITION:
+ case D3D_NAME_DEPTH:
+ case D3D_NAME_DEPTH_GREATER_EQUAL:
+ case D3D_NAME_DEPTH_LESS_EQUAL:
+ info.out[r].sn = TGSI_SEMANTIC_POSITION;
+ info.io.fragDepth = r;
+ break;
+ case D3D_NAME_CULL_DISTANCE:
+ case D3D_NAME_CLIP_DISTANCE:
+ info.out[r].sn = NV50_SEMANTIC_CLIPDISTANCE;
+ info.out[r].si = sm4.params_out[i].SemanticIndex;
+ break;
+ case D3D_NAME_RENDER_TARGET_ARRAY_INDEX:
+ info.out[r].sn = NV50_SEMANTIC_LAYER;
+ break;
+ case D3D_NAME_VIEWPORT_ARRAY_INDEX:
+ info.out[r].sn = NV50_SEMANTIC_VIEWPORTINDEX;
+ break;
+ case D3D_NAME_PRIMITIVE_ID:
+ info.out[r].sn = TGSI_SEMANTIC_PRIMID;
+ break;
+ case D3D_NAME_TARGET:
+ info.out[r].sn = TGSI_SEMANTIC_COLOR;
+ info.out[r].si = sm4.params_out[i].SemanticIndex;
+ break;
+ case D3D_NAME_COVERAGE:
+ info.out[r].sn = NV50_SEMANTIC_SAMPLEMASK;
+ info.io.sampleMask = r;
+ break;
+ case D3D_NAME_SAMPLE_INDEX:
+ default:
+ assert(!"invalid/unsupported output linkage semantic");
+ break;
+ }
+ }
+
+ if (prog->getType() == Program::TYPE_TESSELLATION_EVAL)
+ patch = &info.in[info.numInputs];
+ else
+ patch = &info.out[info.numOutputs];
+
+ for (n = 0, i = 0; i < sm4.num_params_patch; ++i) {
+ r = sm4.params_patch[i].Register;
+
+ patch[r].mask |= sm4.params_patch[i].Mask;
+ patch[r].id = r;
+ if (patch[r].regular) // already visited
+ continue;
+ patch[r].regular = 1;
+ patch[r].patch = 1;
+
+ info.numPatchConstants = MAX2(info.numPatchConstants, r + 1);
+
+ switch (sm4.params_patch[i].SystemValueType) {
+ case D3D_NAME_UNDEFINED:
+ patch[r].sn = TGSI_SEMANTIC_GENERIC;
+ patch[r].si = n++;
+ break;
+ case D3D_NAME_FINAL_QUAD_EDGE_TESSFACTOR:
+ case D3D_NAME_FINAL_TRI_EDGE_TESSFACTOR:
+ case D3D_NAME_FINAL_LINE_DETAIL_TESSFACTOR:
+ patch[r].sn = NV50_SEMANTIC_TESSFACTOR;
+ patch[r].si = sm4.params_patch[i].SemanticIndex;
+ break;
+ case D3D_NAME_FINAL_QUAD_INSIDE_TESSFACTOR:
+ case D3D_NAME_FINAL_TRI_INSIDE_TESSFACTOR:
+ case D3D_NAME_FINAL_LINE_DENSITY_TESSFACTOR:
+ patch[r].sn = NV50_SEMANTIC_TESSFACTOR;
+ patch[r].si = sm4.params_patch[i].SemanticIndex + 4;
+ break;
+ default:
+ assert(!"invalid patch-constant linkage semantic");
+ break;
+ }
+ }
+ if (prog->getType() == Program::TYPE_TESSELLATION_EVAL)
+ info.numInputs += info.numPatchConstants;
+ else
+ info.numOutputs += info.numPatchConstants;
+
+ return true;
+}
+
+bool
+Converter::inspectDeclaration(const sm4_dcl& dcl)
+{
+ int idx = -1;
+ enum sm4_interpolation ipa_mode;
+
+ if (dcl.op.get() && dcl.op->is_index_simple(0))
+ idx = dcl.op->indices[0].disp;
+
+ switch (dcl.opcode) {
+ case SM4_OPCODE_DCL_SAMPLER:
+ assert(idx >= 0);
+ shadow[idx] = dcl.dcl_sampler.shadow;
+ break;
+ case SM4_OPCODE_DCL_RESOURCE:
+ {
+ enum sm4_target targ = (enum sm4_target)dcl.dcl_resource.target;
+
+ assert(idx >= 0 && idx < NV50_IR_MAX_RESOURCES);
+ resourceType[idx][0] = cvtTexTarget(targ, SM4_OPCODE_SAMPLE, NULL);
+ resourceType[idx][1] = cvtTexTarget(targ, SM4_OPCODE_SAMPLE_C, NULL);
+ }
+ break;
+ case SM4_OPCODE_DCL_CONSTANT_BUFFER:
+ // nothing to do
+ break;
+ case SM4_OPCODE_CUSTOMDATA:
+ info.immd.bufSize = dcl.num * 4;
+ info.immd.buf = (uint32_t *)MALLOC(info.immd.bufSize);
+ memcpy(info.immd.buf, dcl.data, info.immd.bufSize);
+ break;
+ case SM4_OPCODE_DCL_INDEX_RANGE:
+ // XXX: ?
+ break;
+ case SM4_OPCODE_DCL_INPUT_PS_SGV:
+ case SM4_OPCODE_DCL_INPUT_PS_SIV:
+ case SM4_OPCODE_DCL_INPUT_PS:
+ {
+ assert(idx >= 0 && idx < info.numInputs);
+ ipa_mode = (enum sm4_interpolation)dcl.dcl_input_ps.interpolation;
+ interpMode[idx] = cvtInterpMode(ipa_mode);
+ setVaryingInterpMode(&info.in[idx], interpMode[idx]);
+ }
+ break;
+ case SM4_OPCODE_DCL_INPUT_SGV:
+ case SM4_OPCODE_DCL_INPUT_SIV:
+ case SM4_OPCODE_DCL_INPUT:
+ if (dcl.op->file == SM4_FILE_INPUT_DOMAIN_POINT) {
+ idx = info.numInputs++;
+ info.in[idx].sn = NV50_SEMANTIC_TESSCOORD;
+ info.in[idx].mask = dcl.op->mask;
+ }
+ // rest handled in parseSignature
+ break;
+ case SM4_OPCODE_DCL_OUTPUT_SGV:
+ case SM4_OPCODE_DCL_OUTPUT_SIV:
+ switch (dcl.sv) {
+ case SM4_SV_POSITION:
+ assert(prog->getType() != Program::TYPE_FRAGMENT);
+ break;
+ case SM4_SV_CULL_DISTANCE: // XXX: order ?
+ info.io.cullDistanceMask |= 1 << info.io.clipDistanceCount;
+ // fall through
+ case SM4_SV_CLIP_DISTANCE:
+ info.io.clipDistanceCount++;
+ break;
+ default:
+ break;
+ }
+ switch (dcl.op->file) {
+ case SM4_FILE_OUTPUT_DEPTH_LESS_EQUAL:
+ case SM4_FILE_OUTPUT_DEPTH_GREATER_EQUAL:
+ case SM4_FILE_OUTPUT_DEPTH:
+ if (info.io.fragDepth < 0xff)
+ break;
+ idx = info.io.fragDepth = info.numOutputs++;
+ info.out[idx].sn = TGSI_SEMANTIC_POSITION;
+ break;
+ case SM4_FILE_OUTPUT_COVERAGE_MASK:
+ if (info.io.sampleMask < 0xff)
+ break;
+ idx = info.io.sampleMask = info.numOutputs++;
+ info.out[idx].sn = NV50_SEMANTIC_SAMPLEMASK;
+ break;
+ default:
+ break;
+ }
+ break;
+ case SM4_OPCODE_DCL_OUTPUT:
+ // handled in parseSignature
+ break;
+ case SM4_OPCODE_DCL_TEMPS:
+ nrRegVals += dcl.num;
+ break;
+ case SM4_OPCODE_DCL_INDEXABLE_TEMP:
+ nrArrays++;
+ break;
+ case SM4_OPCODE_DCL_GLOBAL_FLAGS:
+ if (prog->getType() == Program::TYPE_FRAGMENT)
+ info.prop.fp.earlyFragTests = dcl.dcl_global_flags.early_depth_stencil;
+ break;
+
+ case SM4_OPCODE_DCL_FUNCTION_BODY:
+ break;
+ case SM4_OPCODE_DCL_FUNCTION_TABLE:
+ break;
+ case SM4_OPCODE_DCL_INTERFACE:
+ break;
+
+ // GP
+ case SM4_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY:
+ info.prop.gp.outputPrim = g3dPrim(
+ dcl.dcl_gs_output_primitive_topology.primitive_topology);
+ break;
+ case SM4_OPCODE_DCL_GS_INPUT_PRIMITIVE:
+ info.prop.gp.inputPrim = g3dPrim(dcl.dcl_gs_input_primitive.primitive);
+ break;
+ case SM4_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT:
+ info.prop.gp.maxVertices = dcl.num;
+ break;
+ case SM4_OPCODE_DCL_GS_INSTANCE_COUNT:
+ info.prop.gp.instanceCount = dcl.num;
+ break;
+ case SM4_OPCODE_DCL_STREAM:
+ break;
+
+ // TCP/TEP
+ case SM4_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT:
+ info.prop.tp.inputPatchSize =
+ dcl.dcl_input_control_point_count.control_points;
+ break;
+ case SM4_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT:
+ info.prop.tp.outputPatchSize =
+ dcl.dcl_output_control_point_count.control_points;
+ break;
+ case SM4_OPCODE_DCL_TESS_DOMAIN:
+ switch (dcl.dcl_tess_domain.domain) {
+ case D3D_TESSELLATOR_DOMAIN_ISOLINE:
+ info.prop.tp.domain = PIPE_PRIM_LINES;
+ break;
+ case D3D_TESSELLATOR_DOMAIN_TRI:
+ info.prop.tp.domain = PIPE_PRIM_TRIANGLES;
+ break;
+ case D3D_TESSELLATOR_DOMAIN_QUAD:
+ info.prop.tp.domain = PIPE_PRIM_QUADS;
+ break;
+ case D3D_TESSELLATOR_DOMAIN_UNDEFINED:
+ default:
+ info.prop.tp.domain = PIPE_PRIM_MAX;
+ break;
+ }
+ break;
+ case SM4_OPCODE_DCL_TESS_PARTITIONING:
+ switch (dcl.dcl_tess_partitioning.partitioning) {
+ case D3D_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD:
+ info.prop.tp.partitioning = NV50_TESS_PART_FRACT_ODD;
+ break;
+ case D3D_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN:
+ info.prop.tp.partitioning = NV50_TESS_PART_FRACT_EVEN;
+ break;
+ case D3D_TESSELLATOR_PARTITIONING_POW2:
+ info.prop.tp.partitioning = NV50_TESS_PART_POW2;
+ break;
+ case D3D_TESSELLATOR_PARTITIONING_INTEGER:
+ case D3D_TESSELLATOR_PARTITIONING_UNDEFINED:
+ default:
+ info.prop.tp.partitioning = NV50_TESS_PART_INTEGER;
+ break;
+ }
+ break;
+ case SM4_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE:
+ switch (dcl.dcl_tess_output_primitive.primitive) {
+ case D3D_TESSELLATOR_OUTPUT_LINE:
+ info.prop.tp.outputPrim = PIPE_PRIM_LINES;
+ break;
+ case D3D_TESSELLATOR_OUTPUT_TRIANGLE_CW:
+ info.prop.tp.outputPrim = PIPE_PRIM_TRIANGLES;
+ info.prop.tp.winding = +1;
+ break;
+ case D3D_TESSELLATOR_OUTPUT_TRIANGLE_CCW:
+ info.prop.tp.outputPrim = PIPE_PRIM_TRIANGLES;
+ info.prop.tp.winding = -1;
+ break;
+ case D3D_TESSELLATOR_OUTPUT_POINT:
+ info.prop.tp.outputPrim = PIPE_PRIM_POINTS;
+ break;
+ case D3D_TESSELLATOR_OUTPUT_UNDEFINED:
+ default:
+ info.prop.tp.outputPrim = PIPE_PRIM_MAX;
+ break;
+ }
+ break;
+
+ case SM4_OPCODE_HS_FORK_PHASE:
+ ++subPhaseCnt[0];
+ phase = 1;
+ break;
+ case SM4_OPCODE_HS_JOIN_PHASE:
+ phase = 2;
+ ++subPhaseCnt[1];
+ break;
+ case SM4_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT:
+ case SM4_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT:
+ case SM4_OPCODE_DCL_HS_MAX_TESSFACTOR:
+ break;
+
+ // weird stuff
+ case SM4_OPCODE_DCL_THREAD_GROUP:
+ case SM4_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED:
+ case SM4_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW:
+ case SM4_OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED:
+ case SM4_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW:
+ case SM4_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED:
+ case SM4_OPCODE_DCL_RESOURCE_RAW:
+ case SM4_OPCODE_DCL_RESOURCE_STRUCTURED:
+ ERROR("unhandled declaration\n");
+ abort();
+ return false;
+
+ default:
+ assert(!"invalid SM4 declaration");
+ return false;
+ }
+ return true;
+}
+
+void
+Converter::allocateValues()
+{
+ lData = new DataArray[nrArrays];
+
+ for (unsigned int i = 0; i < nrArrays; ++i)
+ lData[i].setParent(this);
+
+ tData32.setup(0, nrRegVals, 4, 4, FILE_GPR);
+ tData64.setup(0, nrRegVals, 2, 8, FILE_GPR);
+
+ if (prog->getType() == Program::TYPE_FRAGMENT)
+ oData.setup(0, info.numOutputs, 4, 4, FILE_GPR);
+}
+
+bool Converter::handleDeclaration(const sm4_dcl& dcl)
+{
+ switch (dcl.opcode) {
+ case SM4_OPCODE_DCL_INDEXABLE_TEMP:
+ lData[nrArrays++].setup(arrayVol,
+ dcl.indexable_temp.num, dcl.indexable_temp.comps,
+ 4, FILE_MEMORY_LOCAL);
+ arrayVol += dcl.indexable_temp.num * dcl.indexable_temp.comps * 4;
+ break;
+ case SM4_OPCODE_HS_FORK_PHASE:
+ if (subPhaseCnt[0])
+ phaseInstCnt[0][subPhaseCnt[0]] = phaseInstCnt[0][subPhaseCnt[0] - 1];
+ ++subPhaseCnt[0];
+ break;
+ case SM4_OPCODE_HS_JOIN_PHASE:
+ if (subPhaseCnt[1])
+ phaseInstCnt[1][subPhaseCnt[1]] = phaseInstCnt[1][subPhaseCnt[1] - 1];
+ ++subPhaseCnt[1];
+ break;
+ case SM4_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT:
+ phaseInstCnt[0][subPhaseCnt[0] - 1] = dcl.num;
+ break;
+ case SM4_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT:
+ phaseInstCnt[1][subPhaseCnt[1] - 1] = dcl.num;
+ break;
+
+ default:
+ break; // already handled in inspection
+ }
+
+ return true;
+}
+
+Symbol *
+Converter::iSym(int i, int c)
+{
+ if (info.in[i].regular) {
+ return mkSymbol(FILE_SHADER_INPUT, 0, sTy, info.in[i].slot[c] * 4);
+ } else {
+ return mkSysVal(tgsi::irSemantic(info.in[i].sn), info.in[i].si);
+ }
+}
+
+Symbol *
+Converter::oSym(int i, int c)
+{
+ if (info.out[i].regular) {
+ return mkSymbol(FILE_SHADER_OUTPUT, 0, dTy, info.out[i].slot[c] * 4);
+ } else {
+ return mkSysVal(tgsi::irSemantic(info.out[i].sn), info.out[i].si);
+ }
+}
+
+Value *
+Converter::getSrcPtr(int s, int dim, int shl)
+{
+ if (srcPtr[s][dim])
+ return srcPtr[s][dim];
+
+ sm4_op *op = insn->ops[s + nDstOpnds]->indices[dim].reg.get();
+
+ if (!op)
+ return NULL;
+
+ Value *index = src(*op, 0, s);
+
+ srcPtr[s][dim] = index;
+ if (shl)
+ srcPtr[s][dim] = mkOp2v(OP_SHL, TYPE_U32, getSSA(), index, mkImm(shl));
+ return srcPtr[s][dim];
+}
+
+Value *
+Converter::getDstPtr(int d, int dim, int shl)
+{
+ assert(d == 0);
+ if (dstPtr[dim])
+ return dstPtr[dim];
+
+ sm4_op *op = insn->ops[d]->indices[dim].reg.get();
+ if (!op)
+ return NULL;
+
+ Value *index = src(*op, 0, d);
+ if (shl)
+ index = mkOp2v(OP_SHL, TYPE_U32, getSSA(), index, mkImm(shl));
+
+ return (dstPtr[dim] = index);
+}
+
+Value *
+Converter::getVtxPtr(int s)
+{
+ assert(s < 3);
+ if (vtxBase[s])
+ return vtxBase[s];
+
+ sm4_op *op = insn->ops[s + nDstOpnds].get();
+ if (!op)
+ return NULL;
+ int idx = op->indices[0].disp;
+
+ vtxBase[s] = getSrcPtr(s, 0, 0);
+ vtxBase[s] = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(), mkImm(idx), vtxBase[s]);
+ return vtxBase[s];
+}
+
+Value *
+Converter::src(int i, int c)
+{
+ return src(*insn->ops[i + nDstOpnds], c, i);
+}
+
+Value *
+Converter::dst(int i, int c)
+{
+ return dst(*insn->ops[i], c, i);
+}
+
+void
+Converter::saveDst(int i, int c, Value *value)
+{
+ if (insn->insn.sat)
+ mkOp1(OP_SAT, dTy, value, value);
+ return saveDst(*insn->ops[i], c, value, i);
+}
+
+Value *
+Converter::interpolate(const sm4_op& op, int c, int i)
+{
+ int idx = op.indices[0].disp;
+ int swz = op.swizzle[c];
+ operation opr =
+ (info.in[idx].linear || info.in[idx].flat) ? OP_LINTERP : OP_PINTERP;
+
+ Value *ptr = getSrcPtr(i, 0, 4);
+
+ Instruction *insn = new_Instruction(func, opr, TYPE_F32);
+
+ insn->setDef(0, getScratch());
+ insn->setSrc(0, iSym(idx, swz));
+ if (opr == OP_PINTERP)
+ insn->setSrc(1, fragCoord[3]);
+ if (ptr)
+ insn->setIndirect(0, 0, ptr);
+
+ insn->setInterpolate(interpMode[idx]);
+
+ bb->insertTail(insn);
+ return insn->getDef(0);
+}
+
+Value *
+Converter::src(const sm4_op& op, int c, int s)
+{
+ const int size = typeSizeof(sTy);
+
+ Instruction *ld;
+ Value *res, *ptr, *vtx;
+ int idx, dim, off;
+ const int swz = op.swizzle[c];
+
+ switch (op.file) {
+ case SM4_FILE_IMMEDIATE32:
+ res = loadImm(NULL, (uint32_t)op.imm_values[swz].u32);
+ break;
+ case SM4_FILE_IMMEDIATE64:
+ assert(c < 2);
+ res = loadImm(NULL, op.imm_values[swz].u64);
+ break;
+ case SM4_FILE_TEMP:
+ assert(op.is_index_simple(0));
+ idx = op.indices[0].disp;
+ if (size == 8)
+ res = tData64.load(idx, swz, NULL);
+ else
+ res = tData32.load(idx, swz, NULL);
+ break;
+ case SM4_FILE_INPUT:
+ case SM4_FILE_INPUT_CONTROL_POINT:
+ case SM4_FILE_INPUT_PATCH_CONSTANT:
+ if (prog->getType() == Program::TYPE_FRAGMENT)
+ return interpolate(op, c, s);
+
+ idx = 0;
+ if (op.file == SM4_FILE_INPUT_PATCH_CONSTANT)
+ idx = info.numInputs - info.numPatchConstants;
+
+ if (op.num_indices == 2) {
+ vtx = getVtxPtr(s);
+ ptr = getSrcPtr(s, 1, 4);
+ idx += op.indices[1].disp;
+ res = getSSA();
+ ld = mkOp1(OP_VFETCH, TYPE_U32, res, iSym(idx, swz));
+ ld->setIndirect(0, 0, ptr);
+ ld->setIndirect(0, 1, vtx);
+ } else {
+ idx += op.indices[0].disp;
+ res = mkLoad(sTy, iSym(idx, swz), getSrcPtr(s, 0, 4));
+ }
+ if (op.file == SM4_FILE_INPUT_PATCH_CONSTANT)
+ res->defs->getInsn()->perPatch = 1;
+ break;
+ case SM4_FILE_CONSTANT_BUFFER:
+ assert(op.num_indices == 2);
+ assert(op.is_index_simple(0));
+
+ ptr = getSrcPtr(s, 1, 4);
+ dim = op.indices[0].disp;
+ off = (op.indices[1].disp * 4 + swz) * (sTy == TYPE_F64 ? 8 : 4);
+
+ res = mkLoad(sTy, mkSymbol(FILE_MEMORY_CONST, dim, sTy, off), ptr);
+ break;
+ case SM4_FILE_IMMEDIATE_CONSTANT_BUFFER:
+ ptr = getSrcPtr(s, 0, 4);
+ off = (op.indices[0].disp * 4 + swz) * 4;
+ res = mkLoad(sTy, mkSymbol(FILE_MEMORY_CONST, 14, sTy, off), ptr);
+ break;
+ case SM4_FILE_INDEXABLE_TEMP:
+ {
+ assert(op.is_index_simple(0));
+ int a = op.indices[0].disp;
+ idx = op.indices[1].disp;
+ res = lData[a].load(idx, swz, getSrcPtr(s, 1, 4));
+ }
+ break;
+ case SM4_FILE_INPUT_PRIMITIVEID:
+ recordSV(TGSI_SEMANTIC_PRIMID, 0, 1, true);
+ res = mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_PRIMITIVE_ID, 0));
+ break;
+ case SM4_FILE_INPUT_GS_INSTANCE_ID:
+ case SM4_FILE_OUTPUT_CONTROL_POINT_ID:
+ recordSV(NV50_SEMANTIC_INVOCATIONID, 0, 1, true);
+ res = mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0));
+ break;
+ case SM4_FILE_CYCLE_COUNTER:
+ res =
+ mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_CLOCK, swz ? 1 : 0));
+ break;
+ case SM4_FILE_INPUT_FORK_INSTANCE_ID:
+ case SM4_FILE_INPUT_JOIN_INSTANCE_ID:
+ {
+ phaseInstanceUsed = true;
+ if (unrollPhase)
+ return loadImm(NULL, phaseInstance);
+ const unsigned int cnt = phaseInstCnt[phase - 1][subPhase];
+ res = getScratch();
+ res = mkOp1v(OP_RDSV, TYPE_U32, res, mkSysVal(SV_INVOCATION_ID, 0));
+ res = mkOp2v(OP_MIN, TYPE_U32, res, res, loadImm(NULL, cnt - 1));
+ }
+ break;
+ case SM4_FILE_INPUT_DOMAIN_POINT:
+ assert(swz < 3);
+ res = domainPt[swz];
+ break;
+ case SM4_FILE_THREAD_GROUP_SHARED_MEMORY:
+ off = (op.indices[0].disp * 4 + swz) * (sTy == TYPE_F64 ? 8 : 4);
+ ptr = getSrcPtr(s, 0, 4);
+ res = mkLoad(sTy, mkSymbol(FILE_MEMORY_SHARED, 0, sTy, off), ptr);
+ break;
+ case SM4_FILE_RESOURCE:
+ case SM4_FILE_SAMPLER:
+ case SM4_FILE_UNORDERED_ACCESS_VIEW:
+ return NULL;
+ case SM4_FILE_INPUT_THREAD_ID:
+ res = mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_TID, swz));
+ break;
+ case SM4_FILE_INPUT_THREAD_GROUP_ID:
+ res = mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_CTAID, swz));
+ break;
+ case SM4_FILE_FUNCTION_INPUT:
+ case SM4_FILE_INPUT_THREAD_ID_IN_GROUP:
+ assert(!"unhandled source file");
+ return NULL;
+ default:
+ assert(!"invalid source file");
+ return NULL;
+ }
+
+ if (op.abs)
+ res = mkOp1v(OP_ABS, sTy, getSSA(res->reg.size), res);
+ if (op.neg)
+ res = mkOp1v(OP_NEG, sTy, getSSA(res->reg.size), res);
+ return res;
+}
+
+Value *
+Converter::dst(const sm4_op &op, int c, int i)
+{
+ switch (op.file) {
+ case SM4_FILE_TEMP:
+ return tData32.acquire(op.indices[0].disp, c);
+ case SM4_FILE_INDEXABLE_TEMP:
+ return getScratch();
+ case SM4_FILE_OUTPUT:
+ if (prog->getType() == Program::TYPE_FRAGMENT)
+ return oData.acquire(op.indices[0].disp, c);
+ return getScratch();
+ case SM4_FILE_NULL:
+ return NULL;
+ case SM4_FILE_OUTPUT_DEPTH:
+ case SM4_FILE_OUTPUT_DEPTH_GREATER_EQUAL:
+ case SM4_FILE_OUTPUT_DEPTH_LESS_EQUAL:
+ case SM4_FILE_OUTPUT_COVERAGE_MASK:
+ return getScratch();
+ case SM4_FILE_IMMEDIATE32:
+ case SM4_FILE_IMMEDIATE64:
+ case SM4_FILE_CONSTANT_BUFFER:
+ case SM4_FILE_RESOURCE:
+ case SM4_FILE_SAMPLER:
+ case SM4_FILE_UNORDERED_ACCESS_VIEW:
+ assert(!"invalid destination file");
+ return NULL;
+ default:
+ assert(!"invalid file");
+ return NULL;
+ }
+}
+
+void
+Converter::saveFragDepth(operation op, Value *value)
+{
+ if (op == OP_MIN || op == OP_MAX) {
+ Value *zIn;
+ zIn = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 2));
+ value = mkOp2v(op, TYPE_F32, getSSA(), value, zIn);
+ }
+ oData.store(info.io.fragDepth, 2, NULL, value);
+}
+
+void
+Converter::saveDst(const sm4_op &op, int c, Value *value, int s)
+{
+ Symbol *sym;
+ Instruction *st;
+ int a, idx;
+
+ switch (op.file) {
+ case SM4_FILE_TEMP:
+ idx = op.indices[0].disp;
+ tData32.store(idx, c, NULL, value);
+ break;
+ case SM4_FILE_INDEXABLE_TEMP:
+ a = op.indices[0].disp;
+ idx = op.indices[1].disp;
+ // FIXME: shift is wrong, depends in lData
+ lData[a].store(idx, c, getDstPtr(s, 1, 4), value);
+ break;
+ case SM4_FILE_OUTPUT:
+ assert(op.num_indices == 1);
+ idx = op.indices[0].disp;
+ if (prog->getType() == Program::TYPE_FRAGMENT) {
+ oData.store(idx, c, NULL, value);
+ } else {
+ if (phase)
+ idx += info.numOutputs - info.numPatchConstants;
+ const int shl = (info.out[idx].sn == NV50_SEMANTIC_TESSFACTOR) ? 2 : 4;
+ sym = oSym(idx, c);
+ if (sym->reg.file == FILE_SHADER_OUTPUT)
+ st = mkStore(OP_EXPORT, dTy, sym, getDstPtr(s, 0, shl), value);
+ else
+ st = mkStore(OP_WRSV, dTy, sym, getDstPtr(s, 0, 2), value);
+ st->perPatch = phase ? 1 : 0;
+ }
+ break;
+ case SM4_FILE_OUTPUT_DEPTH_GREATER_EQUAL:
+ saveFragDepth(OP_MAX, value);
+ break;
+ case SM4_FILE_OUTPUT_DEPTH_LESS_EQUAL:
+ saveFragDepth(OP_MIN, value);
+ break;
+ case SM4_FILE_OUTPUT_DEPTH:
+ saveFragDepth(OP_NOP, value);
+ break;
+ case SM4_FILE_OUTPUT_COVERAGE_MASK:
+ oData.store(info.io.sampleMask, 0, NULL, value);
+ break;
+ case SM4_FILE_IMMEDIATE32:
+ case SM4_FILE_IMMEDIATE64:
+ case SM4_FILE_INPUT:
+ case SM4_FILE_CONSTANT_BUFFER:
+ case SM4_FILE_RESOURCE:
+ case SM4_FILE_SAMPLER:
+ assert(!"invalid destination file");
+ return;
+ default:
+ assert(!"invalid file");
+ return;
+ }
+}
+
+void
+Converter::emitTex(Value *dst0[4], TexInstruction *tex, const uint8_t swz[4])
+{
+ Value *res[4] = { NULL, NULL, NULL, NULL };
+ unsigned int c, d;
+
+ for (c = 0; c < 4; ++c)
+ if (dst0[c])
+ tex->tex.mask |= 1 << swz[c];
+ for (d = 0, c = 0; c < 4; ++c)
+ if (tex->tex.mask & (1 << c))
+ tex->setDef(d++, (res[c] = getScratch()));
+
+ bb->insertTail(tex);
+
+ if (insn->opcode == SM4_OPCODE_RESINFO) {
+ if (tex->tex.target.getDim() == 1) {
+ res[2] = loadImm(NULL, 0);
+ if (!tex->tex.target.isArray())
+ res[1] = res[2];
+ } else
+ if (tex->tex.target.getDim() == 2 && !tex->tex.target.isArray()) {
+ res[2] = loadImm(NULL, 0);
+ }
+ for (c = 0; c < 4; ++c) {
+ if (!dst0[c])
+ continue;
+ Value *src = res[swz[c]];
+ assert(src);
+ switch (insn->insn.resinfo_return_type) {
+ case 0:
+ mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_U32, src);
+ break;
+ case 1:
+ mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_U32, src);
+ if (swz[c] < tex->tex.target.getDim())
+ mkOp1(OP_RCP, TYPE_F32, dst0[c], dst0[c]);
+ break;
+ default:
+ mkMov(dst0[c], src);
+ break;
+ }
+ }
+ } else {
+ for (c = 0; c < 4; ++c)
+ if (dst0[c])
+ mkMov(dst0[c], res[swz[c]]);
+ }
+}
+
+void
+Converter::handleQUERY(Value *dst0[4], enum TexQuery query)
+{
+ TexInstruction *texi = new_TexInstruction(func, OP_TXQ);
+ texi->tex.query = query;
+
+ assert(insn->ops[2]->file == SM4_FILE_RESOURCE); // TODO: UAVs
+
+ const int rOp = (query == TXQ_DIMS) ? 2 : 1;
+ const int sOp = (query == TXQ_DIMS) ? 0 : 1;
+
+ const int tR = insn->ops[rOp]->indices[0].disp;
+
+ texi->setTexture(resourceType[tR][0], tR, 0);
+
+ texi->setSrc(0, src(sOp, 0)); // mip level or sample index
+
+ emitTex(dst0, texi, insn->ops[rOp]->swizzle);
+}
+
+void
+Converter::handleLOAD(Value *dst0[4])
+{
+ TexInstruction *texi = new_TexInstruction(func, OP_TXF);
+ unsigned int c;
+
+ const int tR = insn->ops[2]->indices[0].disp;
+
+ texi->setTexture(resourceType[tR][0], tR, 0);
+
+ for (c = 0; c < texi->tex.target.getArgCount(); ++c)
+ texi->setSrc(c, src(0, c));
+
+ if (texi->tex.target == TEX_TARGET_BUFFER) {
+ texi->tex.levelZero = true;
+ } else {
+ texi->setSrc(c++, src(0, 3));
+ for (c = 0; c < 3; ++c) {
+ texi->tex.offset[0][c] = insn->sample_offset[c];
+ if (texi->tex.offset[0][c])
+ texi->tex.useOffsets = 1;
+ }
+ }
+
+ emitTex(dst0, texi, insn->ops[2]->swizzle);
+}
+
+// order of nv50 ir sources: x y z/layer lod/bias dc
+void
+Converter::handleSAMPLE(operation opr, Value *dst0[4])
+{
+ TexInstruction *texi = new_TexInstruction(func, opr);
+ unsigned int c, s;
+ Value *arg[4], *src0[4];
+ Value *val;
+ Value *lod = NULL, *dc = NULL;
+
+ const int tR = insn->ops[2]->indices[0].disp;
+ const int tS = insn->ops[3]->indices[0].disp;
+
+ TexInstruction::Target tgt = resourceType[tR][shadow[tS] ? 1 : 0];
+
+ for (c = 0; c < tgt.getArgCount(); ++c)
+ arg[c] = src0[c] = src(0, c);
+
+ if (insn->opcode == SM4_OPCODE_SAMPLE_L ||
+ insn->opcode == SM4_OPCODE_SAMPLE_B) {
+ lod = src(3, 0);
+ } else
+ if (insn->opcode == SM4_OPCODE_SAMPLE_C ||
+ insn->opcode == SM4_OPCODE_SAMPLE_C_LZ) {
+ dc = src(3, 0);
+ if (insn->opcode == SM4_OPCODE_SAMPLE_C_LZ)
+ texi->tex.levelZero = true;
+ } else
+ if (insn->opcode == SM4_OPCODE_SAMPLE_D) {
+ for (c = 0; c < tgt.getDim(); ++c) {
+ texi->dPdx[c] = src(3, c);
+ texi->dPdy[c] = src(4, c);
+ }
+ }
+
+ if (tgt.isCube()) {
+ for (c = 0; c < 3; ++c)
+ src0[c] = mkOp1v(OP_ABS, TYPE_F32, getSSA(), arg[c]);
+ val = getScratch();
+ mkOp2(OP_MAX, TYPE_F32, val, src0[0], src0[1]);
+ mkOp2(OP_MAX, TYPE_F32, val, src0[2], val);
+ mkOp1(OP_RCP, TYPE_F32, val, val);
+ for (c = 0; c < 3; ++c)
+ src0[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), arg[c], val);
+ }
+
+ for (s = 0; s < tgt.getArgCount(); ++s)
+ texi->setSrc(s, src0[s]);
+ if (lod)
+ texi->setSrc(s++, lod);
+ if (dc)
+ texi->setSrc(s++, dc);
+
+ for (c = 0; c < 3; ++c) {
+ texi->tex.offset[0][c] = insn->sample_offset[c];
+ if (texi->tex.offset[0][c])
+ texi->tex.useOffsets = 1;
+ }
+
+ texi->setTexture(tgt, tR, tS);
+
+ emitTex(dst0, texi, insn->ops[2]->swizzle);
+}
+
+void
+Converter::handleDP(Value *dst0[4], int dim)
+{
+ Value *src0 = src(0, 0), *src1 = src(1, 0);
+ Value *dotp = getScratch();
+
+ assert(dim > 0);
+
+ mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1);
+ for (int c = 1; c < dim; ++c)
+ mkOp3(OP_MAD, TYPE_F32, dotp, src(0, c), src(1, c), dotp);
+
+ for (int c = 0; c < 4; ++c)
+ dst0[c] = dotp;
+}
+
+void
+Converter::insertConvergenceOps(BasicBlock *conv, BasicBlock *fork)
+{
+ FlowInstruction *join = new_FlowInstruction(func, OP_JOIN, NULL);
+ join->fixed = 1;
+ conv->insertHead(join);
+
+ fork->joinAt = new_FlowInstruction(func, OP_JOINAT, conv);
+ fork->insertBefore(fork->getExit(), fork->joinAt);
+}
+
+void
+Converter::finalizeShader()
+{
+ if (finalized)
+ return;
+ BasicBlock *epilogue = reinterpret_cast<BasicBlock *>(leaveBBs.pop().u.p);
+ entryBBs.pop();
+
+ finalized = true;
+
+ bb->cfg.attach(&epilogue->cfg, Graph::Edge::TREE);
+ setPosition(epilogue, true);
+
+ if (prog->getType() == Program::TYPE_FRAGMENT)
+ exportOutputs();
+
+ mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
+}
+
+#define FOR_EACH_DST0_ENABLED_CHANNEL32(chan) \
+ for ((chan) = 0; (chan) < 4; ++(chan)) \
+ if (insn->ops[0].get()->mask & (1 << (chan)))
+
+#define FOR_EACH_DST0_ENABLED_CHANNEL64(chan) \
+ for ((chan) = 0; (chan) < 2; ++(chan)) \
+ if (insn->ops[0].get()->mask & (1 << (chan)))
+
+bool
+Converter::checkDstSrcAliasing() const
+{
+ for (unsigned int d = 0; d < nDstOpnds; ++d) {
+ for (unsigned int s = nDstOpnds; s < insn->num_ops; ++s) {
+ if (insn->ops[d]->file != insn->ops[s]->file)
+ continue;
+ int i = insn->ops[s]->num_indices - 1;
+ if (i != insn->ops[d]->num_indices - 1)
+ continue;
+ if (insn->ops[d]->is_index_simple(i) &&
+ insn->ops[s]->is_index_simple(i) &&
+ insn->ops[d]->indices[i].disp == insn->ops[s]->indices[i].disp)
+ return true;
+ }
+ }
+ return false;
+}
+
+bool
+Converter::handleInstruction(unsigned int pos)
+{
+ Value *dst0[4], *rDst0[4];
+ Value *dst1[4], *rDst1[4];
+ int c, nc;
+
+ insn = sm4.insns[pos];
+ enum sm4_opcode opcode = static_cast<sm4_opcode>(insn->opcode);
+
+ operation op = cvtOpcode(opcode);
+
+ sTy = inferSrcType(opcode);
+ dTy = inferDstType(opcode);
+
+ nc = dTy == TYPE_F64 ? 2 : 4;
+
+ nDstOpnds = getDstOpndCount(opcode);
+
+ bool useScratchDst = checkDstSrcAliasing();
+
+ INFO("SM4_OPCODE_##%u, aliasing = %u\n", insn->opcode, useScratchDst);
+
+ if (nDstOpnds >= 1) {
+ for (c = 0; c < nc; ++c)
+ rDst0[c] = dst0[c] =
+ insn->ops[0].get()->mask & (1 << c) ? dst(0, c) : NULL;
+ if (useScratchDst)
+ for (c = 0; c < nc; ++c)
+ dst0[c] = rDst0[c] ? getScratch() : NULL;
+ }
+
+ if (nDstOpnds >= 2) {
+ for (c = 0; c < nc; ++c)
+ rDst1[c] = dst1[c] =
+ insn->ops[1].get()->mask & (1 << c) ? dst(1, c) : NULL;
+ if (useScratchDst)
+ for (c = 0; c < nc; ++c)
+ dst1[c] = rDst1[c] ? getScratch() : NULL;
+ }
+
+ switch (insn->opcode) {
+ case SM4_OPCODE_ADD:
+ case SM4_OPCODE_AND:
+ case SM4_OPCODE_DIV:
+ case SM4_OPCODE_IADD:
+ case SM4_OPCODE_IMAX:
+ case SM4_OPCODE_IMIN:
+ case SM4_OPCODE_MIN:
+ case SM4_OPCODE_MAX:
+ case SM4_OPCODE_MUL:
+ case SM4_OPCODE_OR:
+ case SM4_OPCODE_UMAX:
+ case SM4_OPCODE_UMIN:
+ case SM4_OPCODE_XOR:
+ FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
+ Instruction *insn = mkOp2(op, dTy, dst0[c], src(0, c), src(1, c));
+ if (dTy == TYPE_F32)
+ insn->ftz = 1;
+ }
+ break;
+
+ case SM4_OPCODE_ISHL:
+ case SM4_OPCODE_ISHR:
+ case SM4_OPCODE_USHR:
+ FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
+ Instruction *insn = mkOp2(op, dTy, dst0[c], src(0, c), src(1, c));
+ insn->subOp = NV50_IR_SUBOP_SHIFT_WRAP;
+ }
+ break;
+
+ case SM4_OPCODE_IMAD:
+ case SM4_OPCODE_MAD:
+ case SM4_OPCODE_UMAD:
+ FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
+ mkOp3(OP_MAD, dTy, dst0[c], src(0, c), src(1, c), src(2, c));
+ }
+ break;
+
+ case SM4_OPCODE_DADD:
+ case SM4_OPCODE_DMAX:
+ case SM4_OPCODE_DMIN:
+ case SM4_OPCODE_DMUL:
+ FOR_EACH_DST0_ENABLED_CHANNEL64(c) {
+ mkOp2(op, dTy, dst0[c], src(0, c), src(1, c));
+ }
+ break;
+
+ case SM4_OPCODE_UDIV:
+ for (c = 0; c < 4; ++c) {
+ Value *dvn, *dvs;
+ if (dst0[c] || dst1[c]) {
+ dvn = src(0, c);
+ dvs = src(1, c);
+ }
+ if (dst0[c])
+ mkOp2(OP_DIV, TYPE_U32, dst0[c], dvn, dvs);
+ if (dst1[c])
+ mkOp2(OP_MOD, TYPE_U32, dst1[c], dvn, dvs);
+ }
+ break;
+
+ case SM4_OPCODE_IMUL:
+ case SM4_OPCODE_UMUL:
+ for (c = 0; c < 4; ++c) {
+ Value *a, *b;
+ if (dst0[c] || dst1[c]) {
+ a = src(0, c);
+ b = src(1, c);
+ }
+ if (dst0[c])
+ mkOp2(OP_MUL, dTy, dst0[c], a, b)->subOp =
+ NV50_IR_SUBOP_MUL_HIGH;
+ if (dst1[c])
+ mkOp2(OP_MUL, dTy, dst1[c], a, b);
+ }
+ break;
+
+ case SM4_OPCODE_DP2:
+ handleDP(dst0, 2);
+ break;
+ case SM4_OPCODE_DP3:
+ handleDP(dst0, 3);
+ break;
+ case SM4_OPCODE_DP4:
+ handleDP(dst0, 4);
+ break;
+
+ case SM4_OPCODE_DERIV_RTX:
+ case SM4_OPCODE_DERIV_RTX_COARSE:
+ case SM4_OPCODE_DERIV_RTX_FINE:
+ case SM4_OPCODE_DERIV_RTY:
+ case SM4_OPCODE_DERIV_RTY_COARSE:
+ case SM4_OPCODE_DERIV_RTY_FINE:
+ case SM4_OPCODE_MOV:
+ case SM4_OPCODE_INEG:
+ case SM4_OPCODE_NOT:
+ case SM4_OPCODE_SQRT:
+ case SM4_OPCODE_COUNTBITS:
+ case SM4_OPCODE_EXP:
+ case SM4_OPCODE_LOG:
+ case SM4_OPCODE_RCP:
+ FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
+ mkOp1(op, dTy, dst0[c], src(0, c));
+ }
+ break;
+
+ case SM4_OPCODE_FRC:
+ FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
+ Value *val = getScratch();
+ Value *src0 = src(0, c);
+ mkOp1(OP_FLOOR, TYPE_F32, val, src0);
+ mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val);
+ }
+ break;
+
+ case SM4_OPCODE_MOVC:
+ FOR_EACH_DST0_ENABLED_CHANNEL32(c)
+ mkCmp(OP_SLCT, CC_NE, TYPE_U32, dst0[c], src(1, c), src(2, c),
+ src(0, c));
+ break;
+
+ case SM4_OPCODE_ROUND_NE:
+ case SM4_OPCODE_ROUND_NI:
+ case SM4_OPCODE_ROUND_PI:
+ case SM4_OPCODE_ROUND_Z:
+ FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
+ Instruction *rnd = mkOp1(op, dTy, dst0[c], src(0, c));
+ rnd->ftz = 1;
+ rnd->rnd = cvtRoundingMode(opcode);
+ }
+ break;
+
+ case SM4_OPCODE_RSQ:
+ FOR_EACH_DST0_ENABLED_CHANNEL32(c)
+ mkOp1(op, dTy, dst0[c], src(0, c));
+ break;
+
+ case SM4_OPCODE_SINCOS:
+ for (c = 0; c < 4; ++c) {
+ if (!dst0[c] && !dst1[c])
+ continue;
+ Value *val = mkOp1v(OP_PRESIN, TYPE_F32, getScratch(), src(0, c));
+ if (dst0[c])
+ mkOp1(OP_SIN, TYPE_F32, dst0[c], val);
+ if (dst1[c])
+ mkOp1(OP_COS, TYPE_F32, dst1[c], val);
+ }
+ break;
+
+ case SM4_OPCODE_EQ:
+ case SM4_OPCODE_GE:
+ case SM4_OPCODE_IEQ:
+ case SM4_OPCODE_IGE:
+ case SM4_OPCODE_ILT:
+ case SM4_OPCODE_LT:
+ case SM4_OPCODE_NE:
+ case SM4_OPCODE_INE:
+ case SM4_OPCODE_ULT:
+ case SM4_OPCODE_UGE:
+ case SM4_OPCODE_DEQ:
+ case SM4_OPCODE_DGE:
+ case SM4_OPCODE_DLT:
+ case SM4_OPCODE_DNE:
+ {
+ CondCode cc = cvtCondCode(opcode);
+ FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
+ CmpInstruction *set;
+ set = mkCmp(op, cc, sTy, dst0[c], src(0, c), src(1, c), NULL);
+ set->setType(dTy, sTy);
+ if (sTy == TYPE_F32)
+ set->ftz = 1;
+ }
+ }
+ break;
+
+ case SM4_OPCODE_FTOI:
+ case SM4_OPCODE_FTOU:
+ FOR_EACH_DST0_ENABLED_CHANNEL32(c)
+ mkCvt(op, dTy, dst0[c], sTy, src(0, c))->rnd = ROUND_Z;
+ break;
+ case SM4_OPCODE_ITOF:
+ case SM4_OPCODE_UTOF:
+ case SM4_OPCODE_F32TOF16:
+ case SM4_OPCODE_F16TOF32:
+ case SM4_OPCODE_DTOF:
+ case SM4_OPCODE_FTOD:
+ FOR_EACH_DST0_ENABLED_CHANNEL32(c)
+ mkCvt(op, dTy, dst0[c], sTy, src(0, c));
+ break;
+
+ case SM4_OPCODE_CUT:
+ case SM4_OPCODE_CUT_STREAM:
+ mkOp1(OP_RESTART, TYPE_U32, NULL, mkImm(0))->fixed = 1;
+ break;
+ case SM4_OPCODE_EMIT:
+ case SM4_OPCODE_EMIT_STREAM:
+ mkOp1(OP_EMIT, TYPE_U32, NULL, mkImm(0))->fixed = 1;
+ break;
+ case SM4_OPCODE_EMITTHENCUT:
+ case SM4_OPCODE_EMITTHENCUT_STREAM:
+ {
+ Instruction *cut = mkOp1(OP_EMIT, TYPE_U32, NULL, mkImm(0));
+ cut->fixed = 1;
+ cut->subOp = NV50_IR_SUBOP_EMIT_RESTART;
+ }
+ break;
+
+ case SM4_OPCODE_DISCARD:
+ info.prop.fp.usesDiscard = TRUE;
+ mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(
+ insn->insn.test_nz ? CC_P : CC_NOT_P, src(0, 0));
+ break;
+
+ case SM4_OPCODE_CALL:
+ case SM4_OPCODE_CALLC:
+ assert(!"CALL/CALLC not implemented");
+ break;
+
+ case SM4_OPCODE_RET:
+ // XXX: the following doesn't work with subroutines / early ret
+ if (!haveNextPhase(pos))
+ finalizeShader();
+ else
+ phaseEnded = phase + 1;
+ break;
+
+ case SM4_OPCODE_IF:
+ {
+ BasicBlock *ifClause = new BasicBlock(func);
+
+ bb->cfg.attach(&ifClause->cfg, Graph::Edge::TREE);
+ condBBs.push(bb);
+ joinBBs.push(bb);
+
+ mkFlow(OP_BRA, NULL, insn->insn.test_nz ? CC_NOT_P : CC_P, src(0, 0));
+
+ setPosition(ifClause, true);
+ }
+ break;
+ case SM4_OPCODE_ELSE:
+ {
+ BasicBlock *elseClause = new BasicBlock(func);
+ BasicBlock *forkPoint = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p);
+
+ forkPoint->cfg.attach(&elseClause->cfg, Graph::Edge::TREE);
+ condBBs.push(bb);
+
+ forkPoint->getExit()->asFlow()->target.bb = elseClause;
+ if (!bb->isTerminated())
+ mkFlow(OP_BRA, NULL, CC_ALWAYS, NULL);
+
+ setPosition(elseClause, true);
+ }
+ break;
+ case SM4_OPCODE_ENDIF:
+ {
+ BasicBlock *convPoint = new BasicBlock(func);
+ BasicBlock *lastBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p);
+ BasicBlock *forkPoint = reinterpret_cast<BasicBlock *>(joinBBs.pop().u.p);
+
+ if (!bb->isTerminated()) {
+ // we only want join if none of the clauses ended with CONT/BREAK/RET
+ if (lastBB->getExit()->op == OP_BRA && joinBBs.getSize() < 6)
+ insertConvergenceOps(convPoint, forkPoint);
+ mkFlow(OP_BRA, convPoint, CC_ALWAYS, NULL);
+ bb->cfg.attach(&convPoint->cfg, Graph::Edge::FORWARD);
+ }
+
+ if (lastBB->getExit()->op == OP_BRA) {
+ lastBB->cfg.attach(&convPoint->cfg, Graph::Edge::FORWARD);
+ lastBB->getExit()->asFlow()->target.bb = convPoint;
+ }
+ setPosition(convPoint, true);
+ }
+ break;
+
+ case SM4_OPCODE_SWITCH:
+ case SM4_OPCODE_CASE:
+ case SM4_OPCODE_ENDSWITCH:
+ assert(!"SWITCH/CASE/ENDSWITCH not implemented");
+ break;
+
+ case SM4_OPCODE_LOOP:
+ {
+ BasicBlock *loopHeader = new BasicBlock(func);
+ BasicBlock *loopBreak = new BasicBlock(func);
+
+ loopBBs.push(loopHeader);
+ breakBBs.push(loopBreak);
+ if (loopBBs.getSize() > func->loopNestingBound)
+ func->loopNestingBound++;
+
+ mkFlow(OP_PREBREAK, loopBreak, CC_ALWAYS, NULL);
+
+ bb->cfg.attach(&loopHeader->cfg, Graph::Edge::TREE);
+ setPosition(loopHeader, true);
+ mkFlow(OP_PRECONT, loopHeader, CC_ALWAYS, NULL);
+ }
+ break;
+ case SM4_OPCODE_ENDLOOP:
+ {
+ BasicBlock *loopBB = reinterpret_cast<BasicBlock *>(loopBBs.pop().u.p);
+
+ if (!bb->isTerminated()) {
+ mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
+ bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
+ }
+ setPosition(reinterpret_cast<BasicBlock *>(breakBBs.pop().u.p), true);
+ }
+ break;
+ case SM4_OPCODE_BREAK:
+ {
+ if (bb->isTerminated())
+ break;
+ BasicBlock *breakBB = reinterpret_cast<BasicBlock *>(breakBBs.peek().u.p);
+ mkFlow(OP_BREAK, breakBB, CC_ALWAYS, NULL);
+ bb->cfg.attach(&breakBB->cfg, Graph::Edge::CROSS);
+ }
+ break;
+ case SM4_OPCODE_BREAKC:
+ {
+ BasicBlock *nextBB = new BasicBlock(func);
+ BasicBlock *breakBB = reinterpret_cast<BasicBlock *>(breakBBs.peek().u.p);
+ CondCode cc = insn->insn.test_nz ? CC_P : CC_NOT_P;
+ mkFlow(OP_BREAK, breakBB, cc, src(0, 0));
+ bb->cfg.attach(&breakBB->cfg, Graph::Edge::CROSS);
+ bb->cfg.attach(&nextBB->cfg, Graph::Edge::FORWARD);
+ setPosition(nextBB, true);
+ }
+ break;
+ case SM4_OPCODE_CONTINUE:
+ {
+ if (bb->isTerminated())
+ break;
+ BasicBlock *contBB = reinterpret_cast<BasicBlock *>(loopBBs.peek().u.p);
+ mkFlow(OP_CONT, contBB, CC_ALWAYS, NULL);
+ contBB->explicitCont = true;
+ bb->cfg.attach(&contBB->cfg, Graph::Edge::BACK);
+ }
+ break;
+ case SM4_OPCODE_CONTINUEC:
+ {
+ BasicBlock *nextBB = new BasicBlock(func);
+ BasicBlock *contBB = reinterpret_cast<BasicBlock *>(loopBBs.peek().u.p);
+ mkFlow(OP_CONT, contBB, insn->insn.test_nz ? CC_P : CC_NOT_P, src(0, 0));
+ bb->cfg.attach(&contBB->cfg, Graph::Edge::BACK);
+ bb->cfg.attach(&nextBB->cfg, Graph::Edge::FORWARD);
+ setPosition(nextBB, true);
+ }
+ break;
+
+ case SM4_OPCODE_SAMPLE:
+ case SM4_OPCODE_SAMPLE_C:
+ case SM4_OPCODE_SAMPLE_C_LZ:
+ case SM4_OPCODE_SAMPLE_L:
+ case SM4_OPCODE_SAMPLE_D:
+ case SM4_OPCODE_SAMPLE_B:
+ handleSAMPLE(op, dst0);
+ break;
+ case SM4_OPCODE_LD:
+ case SM4_OPCODE_LD_MS:
+ handleLOAD(dst0);
+ break;
+
+ case SM4_OPCODE_GATHER4:
+ assert(!"GATHER4 not implemented\n");
+ break;
+
+ case SM4_OPCODE_RESINFO:
+ handleQUERY(dst0, TXQ_DIMS);
+ break;
+ case SM4_OPCODE_SAMPLE_POS:
+ handleQUERY(dst0, TXQ_SAMPLE_POSITION);
+ break;
+
+ case SM4_OPCODE_NOP:
+ mkOp(OP_NOP, TYPE_NONE, NULL);
+ break;
+
+ case SM4_OPCODE_HS_DECLS:
+ // XXX: any significance ?
+ break;
+ case SM4_OPCODE_HS_CONTROL_POINT_PHASE:
+ phase = 0;
+ break;
+ case SM4_OPCODE_HS_FORK_PHASE:
+ if (phase != 1)
+ subPhase = 0;
+ phase = 1;
+ phaseInstance = (phaseStart == pos) ? (phaseInstance + 1) : 0;
+ phaseStart = pos;
+ if (info.prop.tp.outputPatchSize < phaseInstCnt[0][subPhase])
+ unrollPhase = true;
+ break;
+ case SM4_OPCODE_HS_JOIN_PHASE:
+ if (phase != 2)
+ subPhase = 0;
+ phase = 2;
+ phaseInstance = (phaseStart == pos) ? (phaseInstance + 1) : 0;
+ phaseStart = pos;
+ if (info.prop.tp.outputPatchSize < phaseInstCnt[1][subPhase])
+ unrollPhase = true;
+ break;
+
+ default:
+ ERROR("SM4_OPCODE_#%u illegal / not supported\n", insn->opcode);
+ abort();
+ return false;
+ }
+
+ for (c = 0; c < nc; ++c) {
+ if (nDstOpnds >= 1 && rDst0[c]) {
+ if (dst0[c] != rDst0[c])
+ mkMov(rDst0[c], dst0[c]);
+ saveDst(0, c, rDst0[c]);
+ }
+ if (nDstOpnds >= 2 && rDst1[c]) {
+ if (dst1[c] != rDst1[c])
+ mkMov(rDst1[c], dst1[c]);
+ saveDst(1, c, rDst1[c]);
+ }
+ }
+
+ memset(srcPtr, 0, sizeof(srcPtr));
+ memset(dstPtr, 0, sizeof(dstPtr));
+ memset(vtxBase, 0, sizeof(vtxBase));
+ return true;
+}
+
+void
+Converter::exportOutputs()
+{
+ for (int i = 0; i < info.numOutputs; ++i) {
+ for (int c = 0; c < 4; ++c) {
+ if (!oData.exists(i, c))
+ continue;
+ Symbol *sym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32,
+ info.out[i].slot[c] * 4);
+ Value *val = oData.load(i, c, NULL);
+ if (val)
+ mkStore(OP_EXPORT, TYPE_F32, sym, NULL, val);
+ }
+ }
+}
+
+Converter::Converter(Program *p, struct nv50_ir_prog_info *s)
+ : tData32(this),
+ tData64(this),
+ oData(this),
+ info(*s),
+ sm4(*reinterpret_cast<const sm4_program *>(s->bin.source)),
+ prog(p)
+{
+ memset(srcPtr, 0, sizeof(srcPtr));
+ memset(dstPtr, 0, sizeof(dstPtr));
+ memset(vtxBase, 0, sizeof(vtxBase));
+
+ memset(interpMode, 0, sizeof(interpMode));
+
+ nrRegVals = nrArrays = arrayVol = 0;
+
+ for (phase = 3; phase > 0; --phase)
+ for (unsigned int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i)
+ out[phase - 1][i].sn = TGSI_SEMANTIC_COUNT;
+
+ unrollPhase = false;
+ phaseStart = 0;
+ subPhaseCnt[0] = subPhaseCnt[1] = 0;
+}
+
+Converter::~Converter()
+{
+ if (lData)
+ delete[] lData;
+
+ if (subPhaseCnt[0])
+ delete[] phaseInstCnt[0];
+ if (subPhaseCnt[1])
+ delete[] phaseInstCnt[1];
+}
+
+bool
+Converter::haveNextPhase(unsigned int pos) const
+{
+ ++pos;
+ return (pos < sm4.insns.size()) &&
+ (sm4.insns[pos]->opcode == SM4_OPCODE_HS_FORK_PHASE ||
+ sm4.insns[pos]->opcode == SM4_OPCODE_HS_JOIN_PHASE);
+}
+
+bool
+Converter::run()
+{
+ parseSignature();
+
+ for (unsigned int pos = 0; pos < sm4.dcls.size(); ++pos)
+ inspectDeclaration(*sm4.dcls[pos]);
+
+ phaseInstCnt[0] = new unsigned int [subPhaseCnt[0]];
+ phaseInstCnt[1] = new unsigned int [subPhaseCnt[1]];
+ for (int i = 0; i < subPhaseCnt[0]; ++i)
+ phaseInstCnt[0][i] = -1;
+ for (int i = 0; i < subPhaseCnt[1]; ++i)
+ phaseInstCnt[1][i] = -1;
+ // re-increased in handleDeclaration:
+ subPhaseCnt[0] = subPhaseCnt[1] = 0;
+
+ allocateValues();
+ nrArrays = 0;
+ for (unsigned int pos = 0; pos < sm4.dcls.size(); ++pos)
+ handleDeclaration(*sm4.dcls[pos]);
+
+ info.assignSlots(&info);
+
+ if (sm4.dcls.size() == 0 && sm4.insns.size() == 0)
+ return true;
+
+ BasicBlock *entry = new BasicBlock(prog->main);
+ BasicBlock *leave = new BasicBlock(prog->main);
+
+ prog->main->setEntry(entry);
+ prog->main->setExit(leave);
+
+ setPosition(entry, true);
+
+ entryBBs.push(entry);
+ leaveBBs.push(leave);
+
+ if (prog->getType() == Program::TYPE_FRAGMENT) {
+ Symbol *sv = mkSysVal(SV_POSITION, 3);
+ fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
+ mkOp1(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
+ } else
+ if (prog->getType() == Program::TYPE_TESSELLATION_EVAL) {
+ const int n = (info.prop.tp.domain == PIPE_PRIM_TRIANGLES) ? 3 : 2;
+ int c;
+ for (c = 0; c < n; ++c)
+ domainPt[c] =
+ mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_TESS_COORD, c));
+ if (c == 2)
+ domainPt[2] = loadImm(NULL, 0.0f);
+ }
+
+ finalized = false;
+ phaseEnded = 0;
+ phase = 0;
+ subPhase = 0;
+ for (unsigned int pos = 0; pos < sm4.insns.size(); ++pos) {
+ handleInstruction(pos);
+ if (likely(phase == 0) || (phaseEnded < 2))
+ continue;
+ phaseEnded = 0;
+ if (!unrollPhase || !phaseInstanceUsed) {
+ ++subPhase;
+ continue;
+ }
+ phaseInstanceUsed = false;
+ if (phaseInstance < (phaseInstCnt[phase - 1][subPhase] - 1))
+ pos = phaseStart - 1;
+ else
+ ++subPhase;
+ }
+ finalizeShader();
+
+ return true;
+}
+
+} // anonymous namespace
+
+namespace nv50_ir {
+
+bool
+Program::makeFromSM4(struct nv50_ir_prog_info *info)
+{
+ Converter bld(this, info);
+ return bld.run();
+}
+
+} // namespace nv50_ir
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_from_sm4.h b/src/gallium/drivers/nv50/codegen/nv50_ir_from_sm4.h
new file mode 100644
index 00000000000..3c7b55aa1d4
--- /dev/null
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_from_sm4.h
@@ -0,0 +1,183 @@
+
+#ifndef __NV50_IR_FROM_SM4_H__
+#define __NV50_IR_FROM_SM4_H__
+
+typedef enum D3D_PRIMITIVE_TOPOLOGY {
+ D3D_PRIMITIVE_TOPOLOGY_UNDEFINED = 0,
+ D3D_PRIMITIVE_TOPOLOGY_POINTLIST = 1,
+ D3D_PRIMITIVE_TOPOLOGY_LINELIST = 2,
+ D3D_PRIMITIVE_TOPOLOGY_LINESTRIP = 3,
+ D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST = 4,
+ D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP = 5,
+ D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ = 10,
+ D3D_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ = 11,
+ D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ = 12,
+ D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ = 13,
+ D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST = 33,
+ D3D_PRIMITIVE_TOPOLOGY_2_CONTROL_POINT_PATCHLIST = 34,
+ D3D_PRIMITIVE_TOPOLOGY_3_CONTROL_POINT_PATCHLIST = 35,
+ D3D_PRIMITIVE_TOPOLOGY_4_CONTROL_POINT_PATCHLIST = 36,
+ D3D_PRIMITIVE_TOPOLOGY_5_CONTROL_POINT_PATCHLIST = 37,
+ D3D_PRIMITIVE_TOPOLOGY_6_CONTROL_POINT_PATCHLIST = 38,
+ D3D_PRIMITIVE_TOPOLOGY_7_CONTROL_POINT_PATCHLIST = 39,
+ D3D_PRIMITIVE_TOPOLOGY_8_CONTROL_POINT_PATCHLIST = 40,
+ D3D_PRIMITIVE_TOPOLOGY_9_CONTROL_POINT_PATCHLIST = 41,
+ D3D_PRIMITIVE_TOPOLOGY_10_CONTROL_POINT_PATCHLIST = 42,
+ D3D_PRIMITIVE_TOPOLOGY_11_CONTROL_POINT_PATCHLIST = 43,
+ D3D_PRIMITIVE_TOPOLOGY_12_CONTROL_POINT_PATCHLIST = 44,
+ D3D_PRIMITIVE_TOPOLOGY_13_CONTROL_POINT_PATCHLIST = 45,
+ D3D_PRIMITIVE_TOPOLOGY_14_CONTROL_POINT_PATCHLIST = 46,
+ D3D_PRIMITIVE_TOPOLOGY_15_CONTROL_POINT_PATCHLIST = 47,
+ D3D_PRIMITIVE_TOPOLOGY_16_CONTROL_POINT_PATCHLIST = 48,
+ D3D_PRIMITIVE_TOPOLOGY_17_CONTROL_POINT_PATCHLIST = 49,
+ D3D_PRIMITIVE_TOPOLOGY_18_CONTROL_POINT_PATCHLIST = 50,
+ D3D_PRIMITIVE_TOPOLOGY_19_CONTROL_POINT_PATCHLIST = 51,
+ D3D_PRIMITIVE_TOPOLOGY_20_CONTROL_POINT_PATCHLIST = 52,
+ D3D_PRIMITIVE_TOPOLOGY_21_CONTROL_POINT_PATCHLIST = 53,
+ D3D_PRIMITIVE_TOPOLOGY_22_CONTROL_POINT_PATCHLIST = 54,
+ D3D_PRIMITIVE_TOPOLOGY_23_CONTROL_POINT_PATCHLIST = 55,
+ D3D_PRIMITIVE_TOPOLOGY_24_CONTROL_POINT_PATCHLIST = 56,
+ D3D_PRIMITIVE_TOPOLOGY_25_CONTROL_POINT_PATCHLIST = 57,
+ D3D_PRIMITIVE_TOPOLOGY_26_CONTROL_POINT_PATCHLIST = 58,
+ D3D_PRIMITIVE_TOPOLOGY_27_CONTROL_POINT_PATCHLIST = 59,
+ D3D_PRIMITIVE_TOPOLOGY_28_CONTROL_POINT_PATCHLIST = 60,
+ D3D_PRIMITIVE_TOPOLOGY_29_CONTROL_POINT_PATCHLIST = 61,
+ D3D_PRIMITIVE_TOPOLOGY_30_CONTROL_POINT_PATCHLIST = 62,
+ D3D_PRIMITIVE_TOPOLOGY_31_CONTROL_POINT_PATCHLIST = 63,
+ D3D_PRIMITIVE_TOPOLOGY_32_CONTROL_POINT_PATCHLIST = 64,
+} D3D_PRIMITIVE_TOPOLOGY;
+
+typedef enum D3D_RESOURCE_RETURN_TYPE {
+ D3D_RETURN_TYPE_UNORM = 1,
+ D3D_RETURN_TYPE_SNORM = 2,
+ D3D_RETURN_TYPE_SINT = 3,
+ D3D_RETURN_TYPE_UINT = 4,
+ D3D_RETURN_TYPE_FLOAT = 5,
+ D3D_RETURN_TYPE_MIXED = 6,
+ D3D_RETURN_TYPE_DOUBLE = 7,
+ D3D_RETURN_TYPE_CONTINUED = 8,
+ D3D10_RETURN_TYPE_UNORM = 1,
+ D3D10_RETURN_TYPE_SNORM = 2,
+ D3D10_RETURN_TYPE_SINT = 3,
+ D3D10_RETURN_TYPE_UINT = 4,
+ D3D10_RETURN_TYPE_FLOAT = 5,
+ D3D10_RETURN_TYPE_MIXED = 6,
+ D3D11_RETURN_TYPE_UNORM = 1,
+ D3D11_RETURN_TYPE_SNORM = 2,
+ D3D11_RETURN_TYPE_SINT = 3,
+ D3D11_RETURN_TYPE_UINT = 4,
+ D3D11_RETURN_TYPE_FLOAT = 5,
+ D3D11_RETURN_TYPE_MIXED = 6,
+ D3D11_RETURN_TYPE_DOUBLE = 7,
+ D3D11_RETURN_TYPE_CONTINUED = 8
+} D3D_RESOURCE_RETURN_TYPE;
+
+typedef enum D3D_REGISTER_COMPONENT_TYPE {
+ D3D_REGISTER_COMPONENT_UNKNOWN = 0,
+ D3D_REGISTER_COMPONENT_UINT32 = 1,
+ D3D_REGISTER_COMPONENT_SINT32 = 2,
+ D3D_REGISTER_COMPONENT_FLOAT32 = 3,
+ D3D10_REGISTER_COMPONENT_UNKNOWN = 0,
+ D3D10_REGISTER_COMPONENT_UINT32 = 1,
+ D3D10_REGISTER_COMPONENT_SINT32 = 2,
+ D3D10_REGISTER_COMPONENT_FLOAT32 = 3
+} D3D_REGISTER_COMPONENT_TYPE;
+
+typedef enum D3D_TESSELLATOR_DOMAIN {
+ D3D_TESSELLATOR_DOMAIN_UNDEFINED = 0,
+ D3D_TESSELLATOR_DOMAIN_ISOLINE = 1,
+ D3D_TESSELLATOR_DOMAIN_TRI = 2,
+ D3D_TESSELLATOR_DOMAIN_QUAD = 3,
+ D3D11_TESSELLATOR_DOMAIN_UNDEFINED = 0,
+ D3D11_TESSELLATOR_DOMAIN_ISOLINE = 1,
+ D3D11_TESSELLATOR_DOMAIN_TRI = 2,
+ D3D11_TESSELLATOR_DOMAIN_QUAD = 3
+} D3D_TESSELLATOR_DOMAIN;
+
+typedef enum D3D_TESSELLATOR_PARTITIONING {
+ D3D_TESSELLATOR_PARTITIONING_UNDEFINED = 0,
+ D3D_TESSELLATOR_PARTITIONING_INTEGER = 1,
+ D3D_TESSELLATOR_PARTITIONING_POW2 = 2,
+ D3D_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD = 3,
+ D3D_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN = 4,
+ D3D11_TESSELLATOR_PARTITIONING_UNDEFINED = 0,
+ D3D11_TESSELLATOR_PARTITIONING_INTEGER = 1,
+ D3D11_TESSELLATOR_PARTITIONING_POW2 = 2,
+ D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD = 3,
+ D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN = 4
+} D3D_TESSELLATOR_PARTITIONING;
+
+typedef enum D3D_TESSELLATOR_OUTPUT_PRIMITIVE {
+ D3D_TESSELLATOR_OUTPUT_UNDEFINED = 0,
+ D3D_TESSELLATOR_OUTPUT_POINT = 1,
+ D3D_TESSELLATOR_OUTPUT_LINE = 2,
+ D3D_TESSELLATOR_OUTPUT_TRIANGLE_CW = 3,
+ D3D_TESSELLATOR_OUTPUT_TRIANGLE_CCW = 4,
+ D3D11_TESSELLATOR_OUTPUT_UNDEFINED = 0,
+ D3D11_TESSELLATOR_OUTPUT_POINT = 1,
+ D3D11_TESSELLATOR_OUTPUT_LINE = 2,
+ D3D11_TESSELLATOR_OUTPUT_TRIANGLE_CW = 3,
+ D3D11_TESSELLATOR_OUTPUT_TRIANGLE_CCW = 4
+} D3D_TESSELLATOR_OUTPUT_PRIMITIVE;
+
+typedef enum D3D_NAME {
+ D3D_NAME_UNDEFINED = 0,
+ D3D_NAME_POSITION = 1,
+ D3D_NAME_CLIP_DISTANCE = 2,
+ D3D_NAME_CULL_DISTANCE = 3,
+ D3D_NAME_RENDER_TARGET_ARRAY_INDEX = 4,
+ D3D_NAME_VIEWPORT_ARRAY_INDEX = 5,
+ D3D_NAME_VERTEX_ID = 6,
+ D3D_NAME_PRIMITIVE_ID = 7,
+ D3D_NAME_INSTANCE_ID = 8,
+ D3D_NAME_IS_FRONT_FACE = 9,
+ D3D_NAME_SAMPLE_INDEX = 10,
+ D3D_NAME_FINAL_QUAD_EDGE_TESSFACTOR = 11,
+ D3D_NAME_FINAL_QUAD_INSIDE_TESSFACTOR = 12,
+ D3D_NAME_FINAL_TRI_EDGE_TESSFACTOR = 13,
+ D3D_NAME_FINAL_TRI_INSIDE_TESSFACTOR = 14,
+ D3D_NAME_FINAL_LINE_DETAIL_TESSFACTOR = 15,
+ D3D_NAME_FINAL_LINE_DENSITY_TESSFACTOR = 16,
+ D3D_NAME_TARGET = 64,
+ D3D_NAME_DEPTH = 65,
+ D3D_NAME_COVERAGE = 66,
+ D3D_NAME_DEPTH_GREATER_EQUAL = 67,
+ D3D_NAME_DEPTH_LESS_EQUAL = 68,
+ D3D10_NAME_UNDEFINED = 0,
+ D3D10_NAME_POSITION = 1,
+ D3D10_NAME_CLIP_DISTANCE = 2,
+ D3D10_NAME_CULL_DISTANCE = 3,
+ D3D10_NAME_RENDER_TARGET_ARRAY_INDEX = 4,
+ D3D10_NAME_VIEWPORT_ARRAY_INDEX = 5,
+ D3D10_NAME_VERTEX_ID = 6,
+ D3D10_NAME_PRIMITIVE_ID = 7,
+ D3D10_NAME_INSTANCE_ID = 8,
+ D3D10_NAME_IS_FRONT_FACE = 9,
+ D3D10_NAME_SAMPLE_INDEX = 10,
+ D3D11_NAME_FINAL_QUAD_EDGE_TESSFACTOR = 11,
+ D3D11_NAME_FINAL_QUAD_INSIDE_TESSFACTOR = 12,
+ D3D11_NAME_FINAL_TRI_EDGE_TESSFACTOR = 13,
+ D3D11_NAME_FINAL_TRI_INSIDE_TESSFACTOR = 14,
+ D3D11_NAME_FINAL_LINE_DETAIL_TESSFACTOR = 15,
+ D3D11_NAME_FINAL_LINE_DENSITY_TESSFACTOR = 16,
+ D3D10_NAME_TARGET = 64,
+ D3D10_NAME_DEPTH = 65,
+ D3D10_NAME_COVERAGE = 66,
+ D3D11_NAME_DEPTH_GREATER_EQUAL = 67,
+ D3D11_NAME_DEPTH_LESS_EQUAL = 68
+} D3D_NAME;
+
+typedef struct _D3D11_SIGNATURE_PARAMETER_DESC {
+ const char* SemanticName;
+ unsigned int SemanticIndex;
+ unsigned int Register;
+ D3D_NAME SystemValueType;
+ D3D_REGISTER_COMPONENT_TYPE ComponentType;
+ unsigned char Mask;
+ unsigned char ReadWriteMask;
+ unsigned int Stream;
+} D3D11_SIGNATURE_PARAMETER_DESC;
+
+#include "../../../state_trackers/d3d1x/d3d1xshader/include/sm4.h"
+
+#endif // __NV50_IR_FROM_SM4_H__