diff options
Diffstat (limited to 'src/gallium/state_trackers/d3d1x/gd3d1x/sm4_to_tgsi.cpp')
-rw-r--r-- | src/gallium/state_trackers/d3d1x/gd3d1x/sm4_to_tgsi.cpp | 869 |
1 files changed, 869 insertions, 0 deletions
diff --git a/src/gallium/state_trackers/d3d1x/gd3d1x/sm4_to_tgsi.cpp b/src/gallium/state_trackers/d3d1x/gd3d1x/sm4_to_tgsi.cpp new file mode 100644 index 00000000000..615ce8c2550 --- /dev/null +++ b/src/gallium/state_trackers/d3d1x/gd3d1x/sm4_to_tgsi.cpp @@ -0,0 +1,869 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "d3d1xstutil.h" +#include "sm4.h" +#include "tgsi/tgsi_ureg.h" +#include <vector> + +#if 1 +#define check(x) assert(x) +#define fail(x) assert(0 && (x)) +#else +#define check(x) do {if(!(x)) throw(#x);} while(0) +#define fail(x) throw(x) +#endif + +struct tgsi_interpolation +{ + unsigned interpolation; + bool centroid; +}; + +static tgsi_interpolation sm4_to_pipe_interpolation[] = +{ + {TGSI_INTERPOLATE_PERSPECTIVE, false}, /* UNDEFINED */ + {TGSI_INTERPOLATE_CONSTANT, false}, + {TGSI_INTERPOLATE_PERSPECTIVE, false}, /* LINEAR */ + {TGSI_INTERPOLATE_PERSPECTIVE, true}, /* LINEAR_CENTROID */ + {TGSI_INTERPOLATE_LINEAR, false}, /* LINEAR_NOPERSPECTIVE */ + {TGSI_INTERPOLATE_LINEAR, true}, /* LINEAR_NOPERSPECTIVE_CENTROID */ + + // Added in D3D10.1 + {TGSI_INTERPOLATE_PERSPECTIVE, true}, /* LINEAR_SAMPLE */ + {TGSI_INTERPOLATE_LINEAR, true}, /* LINEAR_NOPERSPECTIVE_SAMPLE */ +}; + +static int sm4_to_pipe_sv[] = +{ + -1, + TGSI_SEMANTIC_POSITION, + -1, /*TGSI_SEMANTIC_CLIP_DISTANCE */ + -1, /*TGSI_SEMANTIC_CULL_DISTANCE */ + -1, /*TGSI_SEMANTIC_RENDER_TARGET_ARRAY_INDEX */ + -1, /*TGSI_SEMANTIC_VIEWPORT_ARRAY_INDEX */ + -1, /*TGSI_SEMANTIC_VERTEXID,*/ + TGSI_SEMANTIC_PRIMID, + TGSI_SEMANTIC_INSTANCEID, + TGSI_SEMANTIC_FACE, + -1, /*TGSI_SEMANTIC_SAMPLE_INDEX*/ +}; + +struct sm4_to_tgsi_converter +{ + struct ureg_program* ureg; + std::vector<struct ureg_dst> temps; + std::vector<struct ureg_dst> outputs; + std::vector<struct ureg_src> inputs; + std::vector<struct ureg_src> samplers; + std::vector<std::pair<unsigned, unsigned> > targets; // first is normal, second shadow/comparison + std::vector<unsigned> sampler_modes; // 0 = normal, 1 = shadow/comparison + std::vector<std::pair<unsigned, unsigned> > loops; + sm4_insn* insn; + struct sm4_program& program; + std::vector<unsigned> sm4_to_tgsi_insn_num; + std::vector<std::pair<unsigned, unsigned> > label_to_sm4_insn_num; + bool in_sub; + bool avoid_txf; + bool avoid_int; + + sm4_to_tgsi_converter(struct sm4_program& program) + : program(program) + { + avoid_txf = true; + avoid_int = false; + } + + struct ureg_dst _reg(sm4_op& op) + { + switch(op.file) + { + case SM4_FILE_NULL: + { + struct ureg_dst d; + memset(&d, 0, sizeof(d)); + d.File = TGSI_FILE_NULL; + return d; + } + case SM4_FILE_TEMP: + check(op.has_simple_index()); + check(op.indices[0].disp < temps.size()); + return temps[op.indices[0].disp]; + case SM4_FILE_OUTPUT: + check(op.has_simple_index()); + check(op.indices[0].disp < outputs.size()); + return outputs[op.indices[0].disp]; + default: + check(0); + return ureg_dst_undef(); + } + } + + struct ureg_dst _dst(unsigned i = 0) + { + check(i < insn->num_ops); + sm4_op& op = *insn->ops[i]; + check(op.mode == SM4_OPERAND_MODE_MASK || op.mode == SM4_OPERAND_MODE_SCALAR); + struct ureg_dst d = ureg_writemask(_reg(op), op.mask); + if(insn->insn.sat) + d = ureg_saturate(d); + return d; + } + + struct ureg_src _src(unsigned i) + { + check(i < insn->num_ops); + sm4_op& op = *insn->ops[i]; + struct ureg_src s; + switch(op.file) + { + case SM4_FILE_IMMEDIATE32: + s = ureg_imm4f(ureg, op.imm_values[0].f32, op.imm_values[1].f32, op.imm_values[2].f32, op.imm_values[3].f32); + break; + case SM4_FILE_INPUT: + check(op.is_index_simple(0)); + check(op.num_indices == 1 || op.num_indices == 2); + // TODO: is this correct, or are incorrectly swapping the two indices in the GS case? + check(op.indices[op.num_indices - 1].disp < inputs.size()); + s = inputs[op.indices[op.num_indices - 1].disp]; + if(op.num_indices == 2) + { + s.Dimension = 1; + s.DimensionIndex = op.indices[0].disp; + } + break; + case SM4_FILE_CONSTANT_BUFFER: + // TODO: indirect addressing + check(op.num_indices == 2); + check(op.is_index_simple(0)); + check(op.is_index_simple(1)); + s = ureg_src_register(TGSI_FILE_CONSTANT, (unsigned)op.indices[1].disp); + s.Dimension = 1; + s.DimensionIndex = op.indices[0].disp; + break; + default: + s = ureg_src(_reg(op)); + break; + } + if(op.mode == SM4_OPERAND_MODE_SWIZZLE || op.mode == SM4_OPERAND_MODE_SCALAR) + s = ureg_swizzle(s, op.swizzle[0], op.swizzle[1], op.swizzle[2], op.swizzle[3]); + else + { + /* immediates are masked to show needed values */ + check(op.file == SM4_FILE_IMMEDIATE32 || op.file == SM4_FILE_IMMEDIATE64); + } + if(op.abs) + s = ureg_abs(s); + if(op.neg) + s = ureg_negate(s); + return s; + }; + + int _idx(sm4_file file, unsigned i = 0) + { + check(i < insn->num_ops); + sm4_op& op = *insn->ops[i]; + check(op.file == file); + check(op.has_simple_index()); + return (int)op.indices[0].disp; + } + + int _texslot(bool have_sampler = true) + { + std::map<std::pair<int, int>, int>::iterator i; + i = program.resource_sampler_to_slot.find(std::make_pair(_idx(SM4_FILE_RESOURCE, 2), have_sampler ? _idx(SM4_FILE_SAMPLER, 3) : -1)); + check(i != program.resource_sampler_to_slot.end()); + return i->second; + } + + unsigned tex_target(unsigned texslot) + { + unsigned mode = sampler_modes[program.slot_to_sampler[texslot]]; + unsigned target; + if(mode) + target = targets[program.slot_to_resource[texslot]].second; + else + target = targets[program.slot_to_resource[texslot]].first; + check(target); + return target; + } + + std::vector<struct ureg_dst> insn_tmps; + + struct ureg_dst _tmp() + { + struct ureg_dst t = ureg_DECL_temporary(ureg); + insn_tmps.push_back(t); + return t; + } + + struct ureg_dst _tmp(struct ureg_dst d) + { + if(d.File == TGSI_FILE_TEMPORARY) + return d; + else + return ureg_writemask(_tmp(), d.WriteMask); + } + +#define OP1_(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, _dst(), _src(1)); break +#define OP2_(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, _dst(), _src(1), _src(2)); break +#define OP3_(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, _dst(), _src(1), _src(2), _src(3)); break +#define OP1(n) OP1_(n, n) +#define OP2(n) OP2_(n, n) +#define OP3(n) OP3_(n, n) +#define OP_CF(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, &label); label_to_sm4_insn_num.push_back(std::make_pair(label, program.cf_insn_linked[insn_num])); break; + + void translate_insns(unsigned begin, unsigned end) + { + for(unsigned insn_num = begin; insn_num < end; ++insn_num) + { + sm4_to_tgsi_insn_num[insn_num] = ureg_get_instruction_number(ureg); + unsigned label; + insn = program.insns[insn_num]; + bool ok; + ok = true; + switch(insn->opcode) + { + // trivial instructions + case SM4_OPCODE_NOP: + break; + OP1(MOV); + + // float + OP2(ADD); + OP2(MUL); + OP3(MAD); + OP2(DIV); + OP1(FRC); + OP1(RCP); + OP2(MIN); + OP2(MAX); + OP2_(LT, SLT); + OP2_(GE, SGE); + OP2_(EQ, SEQ); + OP2_(NE, SNE); + + // bitwise + OP1(NOT); + OP2(AND); + OP2(OR); + OP2(XOR); + + // special mathematical + OP2(DP2); + OP2(DP3); + OP2(DP4); + OP1(RSQ); + OP1_(LOG, LG2); + OP1_(EXP, EX2); + + // rounding + OP1_(ROUND_NE, ROUND); + OP1_(ROUND_Z, TRUNC); + OP1_(ROUND_PI, CEIL); + OP1_(ROUND_NI, FLR); + + // cross-thread + OP1_(DERIV_RTX, DDX); + OP1_(DERIV_RTX_COARSE, DDX); + OP1_(DERIV_RTX_FINE, DDX); + OP1_(DERIV_RTY, DDY); + OP1_(DERIV_RTY_COARSE, DDY); + OP1_(DERIV_RTY_FINE, DDY); + case SM4_OPCODE_EMIT: + ureg_EMIT(ureg); + break; + case SM4_OPCODE_CUT: + ureg_ENDPRIM(ureg); + break; + case SM4_OPCODE_EMITTHENCUT: + ureg_EMIT(ureg); + ureg_ENDPRIM(ureg); + break; + + // non-trivial instructions + case SM4_OPCODE_MOVC: + /* CMP checks for < 0, but MOVC checks for != 0 + * but fortunately, x != 0 is equivalent to -abs(x) < 0 + * XXX: can test_nz apply to this?! + */ + ureg_CMP(ureg, _dst(), ureg_negate(ureg_abs(_src(1))), _src(2), _src(3)); + break; + case SM4_OPCODE_SQRT: + { + struct ureg_dst d = _dst(); + struct ureg_dst t = _tmp(d); + ureg_RSQ(ureg, t, _src(1)); + ureg_RCP(ureg, d, ureg_src(t)); + break; + } + case SM4_OPCODE_SINCOS: + { + struct ureg_dst s = _dst(0); + struct ureg_dst c = _dst(1); + struct ureg_src v = _src(2); + if(s.File != TGSI_FILE_NULL) + ureg_SIN(ureg, s, v); + if(c.File != TGSI_FILE_NULL) + ureg_COS(ureg, c, v); + break; + } + + // control flow + case SM4_OPCODE_DISCARD: + ureg_KIL(ureg, _src(0)); + break; + OP_CF(LOOP, BGNLOOP); + OP_CF(ENDLOOP, ENDLOOP); + case SM4_OPCODE_BREAK: + ureg_BRK(ureg); + break; + case SM4_OPCODE_BREAKC: + // XXX: can test_nz apply to this?! + ureg_BREAKC(ureg, _src(0)); + break; + case SM4_OPCODE_CONTINUE: + ureg_CONT(ureg); + break; + case SM4_OPCODE_CONTINUEC: + // XXX: can test_nz apply to this?! + ureg_IF(ureg, _src(0), &label); + ureg_CONT(ureg); + ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg)); + ureg_ENDIF(ureg); + break; + case SM4_OPCODE_SWITCH: + ureg_SWITCH(ureg, _src(0)); + break; + case SM4_OPCODE_CASE: + ureg_CASE(ureg, _src(0)); + break; + case SM4_OPCODE_DEFAULT: + ureg_DEFAULT(ureg); + break; + case SM4_OPCODE_ENDSWITCH: + ureg_ENDSWITCH(ureg); + break; + case SM4_OPCODE_CALL: + ureg_CAL(ureg, &label); + label_to_sm4_insn_num.push_back(std::make_pair(label, program.label_to_insn_num[_idx(SM4_FILE_LABEL)])); + break; + case SM4_OPCODE_LABEL: + if(in_sub) + ureg_ENDSUB(ureg); + else + ureg_END(ureg); + ureg_BGNSUB(ureg); + in_sub = true; + break; + case SM4_OPCODE_RET: + if(in_sub || insn_num != (program.insns.size() - 1)) + ureg_RET(ureg); + break; + case SM4_OPCODE_RETC: + ureg_IF(ureg, _src(0), &label); + if(insn->insn.test_nz) + ureg_RET(ureg); + ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg)); + if(!insn->insn.test_nz) + { + ureg_ELSE(ureg, &label); + ureg_RET(ureg); + ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg)); + } + ureg_ENDIF(ureg); + break; + OP_CF(ELSE, ELSE); + case SM4_OPCODE_ENDIF: + ureg_ENDIF(ureg); + break; + case SM4_OPCODE_IF: + if(insn->insn.test_nz) + { + ureg_IF(ureg, _src(0), &label); + label_to_sm4_insn_num.push_back(std::make_pair(label, program.cf_insn_linked[insn_num])); + } + else + { + unsigned linked = program.cf_insn_linked[insn_num]; + if(program.insns[linked]->opcode == SM4_OPCODE_ENDIF) + { + ureg_IF(ureg, _src(0), &label); + ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg)); + ureg_ELSE(ureg, &label); + label_to_sm4_insn_num.push_back(std::make_pair(label, linked)); + } + else + { + /* we have to swap the branches in this case (fun!) + * TODO: maybe just emit a SEQ 0? + * */ + unsigned endif = program.cf_insn_linked[linked]; + + ureg_IF(ureg, _src(0), &label); + label_to_sm4_insn_num.push_back(std::make_pair(label, linked)); + + translate_insns(linked + 1, endif); + + sm4_to_tgsi_insn_num[linked] = ureg_get_instruction_number(ureg); + ureg_ELSE(ureg, &label); + label_to_sm4_insn_num.push_back(std::make_pair(label, endif)); + + translate_insns(insn_num + 1, linked); + + insn_num = endif - 1; + goto next; + } + } + break; + case SM4_OPCODE_RESINFO: + { + std::map<int, int>::iterator i; + i = program.resource_to_slot.find(_idx(SM4_FILE_RESOURCE, 2)); + check(i != program.resource_to_slot.end()); + unsigned texslot = i->second; + + // no driver actually provides this, unfortunately + ureg_TXQ(ureg, _dst(), tex_target(texslot), _src(1), samplers[texslot]); + break; + }; + // TODO: sample offset, sample index + case SM4_OPCODE_LD: // dst, coord_int, res; mipmap level in last coord_int arg (ouch) + case SM4_OPCODE_LD_MS: + { + unsigned texslot = _texslot(false); + unsigned dim = 0; + switch(targets[texslot].first) + { + case TGSI_TEXTURE_1D: + dim = 1; + break; + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + dim = 2; + break; + case TGSI_TEXTURE_3D: + dim = 3; + break; + default: + check(0); + } + struct ureg_dst tmp = _tmp(); + if(avoid_txf) + { + struct ureg_src texcoord; + if(!avoid_int) + { + ureg_I2F(ureg, tmp, _src(1)); + texcoord = ureg_src(tmp); + } + else + texcoord = _src(1); + + ureg_TXL(ureg, _dst(), tex_target(texslot), ureg_swizzle(texcoord, 0, 1, 2, dim), samplers[texslot]); + } + else + ureg_TXF(ureg, _dst(), tex_target(texslot), ureg_swizzle(_src(1), 0, 1, 2, dim), samplers[texslot]); + break; + } + case SM4_OPCODE_SAMPLE: // dst, coord, res, samp + { + unsigned texslot = _texslot(); + ureg_TEX(ureg, _dst(), tex_target(texslot), _src(1), samplers[texslot]); + break; + } + case SM4_OPCODE_SAMPLE_B: // dst, coord, res, samp, bias.x + { + unsigned texslot = _texslot(); + struct ureg_dst tmp = _tmp(); + ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), _src(1)); + ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_swizzle(_src(4), 0, 0, 0, 0)); + ureg_TXB(ureg, _dst(), tex_target(texslot), ureg_src(tmp), samplers[texslot]); + break; + } + case SM4_OPCODE_SAMPLE_C: // dst, coord, res, samp, comp.x + { + unsigned texslot = _texslot(); + struct ureg_dst tmp = _tmp(); + ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), _src(1)); + ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_swizzle(_src(4), 0, 0, 0, 0)); + ureg_TEX(ureg, _dst(), tex_target(texslot), ureg_src(tmp), samplers[texslot]); + break; + } + case SM4_OPCODE_SAMPLE_C_LZ: // dst, coord, res, samp, comp.x + { + unsigned texslot = _texslot(); + struct ureg_dst tmp = _tmp(); + ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), _src(1)); + ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_swizzle(_src(4), 0, 0, 0, 0)); + ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 0.0)); + ureg_TXL(ureg, _dst(), tex_target(texslot), ureg_src(tmp), samplers[texslot]); + break; + } + case SM4_OPCODE_SAMPLE_D: // dst, coord, res, samp, ddx, ddy + { + unsigned texslot = _texslot(); + ureg_TXD(ureg, _dst(), tex_target(texslot), _src(1), samplers[texslot], _src(4), _src(5)); + break; + } + case SM4_OPCODE_SAMPLE_L: // dst, coord, res, samp, bias.x + { + unsigned texslot = _texslot(); + struct ureg_dst tmp = _tmp(); + ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), _src(1)); + ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_swizzle(_src(4), 0, 0, 0, 0)); + ureg_TXL(ureg, _dst(), tex_target(texslot), ureg_src(tmp), samplers[texslot]); + break; + } + default: + ok = false; + break; + } + + if(!ok && !avoid_int) + { + ok = true; + switch(insn->opcode) + { + // integer + OP1_(ITOF, I2F); + OP1_(FTOI, F2I); + OP2_(IADD, UADD); + OP1(INEG); + OP2_(IMUL, UMUL); + OP3_(IMAD, UMAD); + OP2_(ISHL, SHL); + OP2_(ISHR, ISHR); + OP2(IMIN); + OP2(IMAX); + OP2_(ILT, ISLT); + OP2_(IGE, ISGE); + OP2_(IEQ, USEQ); + OP2_(INE, USNE); + + // unsigned + OP1_(UTOF, U2F); + OP1_(FTOU, F2U); + OP2(UMUL); + OP3(UMAD); + OP2(UMIN); + OP2(UMAX); + OP2_(ULT, USLT); + OP2_(UGE, USGE); + OP2(USHR); + + case SM4_OPCODE_UDIV: + { + struct ureg_dst q = _dst(0); + struct ureg_dst r = _dst(1); + struct ureg_src a = _src(2); + struct ureg_src b = _src(3); + if(q.File != TGSI_FILE_NULL) + ureg_UDIV(ureg, q, a, b); + if(r.File != TGSI_FILE_NULL) + ureg_UMOD(ureg, r, a, b); + break; + } + default: + ok = false; + } + } + + if(!ok && avoid_int) + { + ok = true; + switch(insn->opcode) + { + case SM4_OPCODE_ITOF: + case SM4_OPCODE_UTOF: + break; + OP1_(FTOI, TRUNC); + OP1_(FTOU, FLR); + // integer + OP2_(IADD, ADD); + OP2_(IMUL, MUL); + OP3_(IMAD, MAD); + OP2_(MIN, MIN); + OP2_(MAX, MAX); + OP2_(ILT, SLT); + OP2_(IGE, SGE); + OP2_(IEQ, SEQ); + OP2_(INE, SNE); + + // unsigned + OP2_(UMUL, MUL); + OP3_(UMAD, MAD); + OP2_(UMIN, MIN); + OP2_(UMAX, MAX); + OP2_(ULT, SLT); + OP2_(UGE, SGE); + + case SM4_OPCODE_INEG: + ureg_MOV(ureg, _dst(), ureg_negate(_src(1))); + break; + case SM4_OPCODE_ISHL: + { + struct ureg_dst d = _dst(); + struct ureg_dst t = _tmp(d); + ureg_EX2(ureg, t, _src(2)); + ureg_MUL(ureg, d, ureg_src(t), _src(1)); + break; + } + case SM4_OPCODE_ISHR: + case SM4_OPCODE_USHR: + { + struct ureg_dst d = _dst(); + struct ureg_dst t = _tmp(d); + ureg_EX2(ureg, t, ureg_negate(_src(2))); + ureg_MUL(ureg, t, ureg_src(t), _src(1)); + ureg_FLR(ureg, d, ureg_src(t)); + break; + } + case SM4_OPCODE_UDIV: + { + struct ureg_dst q = _dst(0); + struct ureg_dst r = _dst(1); + struct ureg_src a = _src(2); + struct ureg_src b = _src(3); + struct ureg_dst f = _tmp(); + ureg_DIV(ureg, f, a, b); + if(q.File != TGSI_FILE_NULL) + ureg_FLR(ureg, q, ureg_src(f)); + if(r.File != TGSI_FILE_NULL) + { + ureg_FRC(ureg, f, ureg_src(f)); + ureg_MUL(ureg, r, ureg_src(f), b); + } + break; + } + default: + ok = false; + } + } + + check(ok); + + if(!insn_tmps.empty()) + { + for(unsigned i = 0; i < insn_tmps.size(); ++i) + ureg_release_temporary(ureg, insn_tmps[i]); + insn_tmps.clear(); + } +next:; + } + } + + void* do_translate() + { + unsigned processor; + switch(program.version.type) + { + case 0: + processor = TGSI_PROCESSOR_FRAGMENT; + break; + case 1: + processor = TGSI_PROCESSOR_VERTEX; + break; + case 2: + processor = TGSI_PROCESSOR_GEOMETRY; + break; + default: + fail("Tessellation and compute shaders not yet supported"); + return 0; + } + + if(!sm4_link_cf_insns(program)) + fail("Malformed control flow"); + if(!sm4_find_labels(program)) + fail("Failed to locate labels"); + if(!sm4_allocate_resource_sampler_pairs(program)) + fail("Unsupported (indirect?) accesses to resources and/or samplers"); + + ureg = ureg_create(processor); + + in_sub = false; + + for(unsigned i = 0; i < program.slot_to_resource.size(); ++i) + samplers.push_back(ureg_DECL_sampler(ureg, i)); + + sm4_to_tgsi_insn_num.resize(program.insns.size()); + for(unsigned insn_num = 0; insn_num < program.dcls.size(); ++insn_num) + { + sm4_dcl& dcl = *program.dcls[insn_num]; + int idx = -1; + if(dcl.op.get() && dcl.op->is_index_simple(0)) + idx = dcl.op->indices[0].disp; + switch(dcl.opcode) + { + case SM4_OPCODE_DCL_GLOBAL_FLAGS: + break; + case SM4_OPCODE_DCL_TEMPS: + for(unsigned i = 0; i < dcl.num; ++i) + temps.push_back(ureg_DECL_temporary(ureg)); + break; + case SM4_OPCODE_DCL_INPUT: + check(idx >= 0); + if(processor == TGSI_PROCESSOR_VERTEX) + { + if(inputs.size() <= (unsigned)idx) + inputs.resize(idx + 1); + inputs[idx] = ureg_DECL_vs_input(ureg, idx); + } + else if(processor == TGSI_PROCESSOR_GEOMETRY) + { + // TODO: is this correct? + unsigned gsidx = dcl.op->indices[1].disp; + if(inputs.size() <= (unsigned)gsidx) + inputs.resize(gsidx + 1); + inputs[gsidx] = ureg_DECL_gs_input(ureg, gsidx, TGSI_SEMANTIC_GENERIC, gsidx); + } + else + check(0); + break; + case SM4_OPCODE_DCL_INPUT_PS: + check(idx >= 0); + if(inputs.size() <= (unsigned)idx) + inputs.resize(idx + 1); + inputs[idx] = ureg_DECL_fs_input_cyl_centroid(ureg, TGSI_SEMANTIC_GENERIC, idx, sm4_to_pipe_interpolation[dcl.dcl_input_ps.interpolation].interpolation, 0, sm4_to_pipe_interpolation[dcl.dcl_input_ps.interpolation].centroid); + break; + case SM4_OPCODE_DCL_OUTPUT: + check(idx >= 0); + if(outputs.size() <= (unsigned)idx) + outputs.resize(idx + 1); + if(processor == TGSI_PROCESSOR_FRAGMENT) + outputs[idx] = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, idx); + else + outputs[idx] = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, idx); + break; + case SM4_OPCODE_DCL_INPUT_SIV: + case SM4_OPCODE_DCL_INPUT_SGV: + case SM4_OPCODE_DCL_INPUT_PS_SIV: + case SM4_OPCODE_DCL_INPUT_PS_SGV: + check(idx >= 0); + if(inputs.size() <= (unsigned)idx) + inputs.resize(idx + 1); + // TODO: is this correct? + inputs[idx] = ureg_DECL_system_value(ureg, idx, sm4_to_pipe_sv[dcl.sv], 0); + break; + case SM4_OPCODE_DCL_OUTPUT_SIV: + case SM4_OPCODE_DCL_OUTPUT_SGV: + check(idx >= 0); + if(outputs.size() <= (unsigned)idx) + outputs.resize(idx + 1); + check(sm4_to_pipe_sv[dcl.sv] >= 0); + outputs[idx] = ureg_DECL_output(ureg, sm4_to_pipe_sv[dcl.sv], 0); + break; + case SM4_OPCODE_DCL_RESOURCE: + check(idx >= 0); + if(targets.size() <= (unsigned)idx) + targets.resize(idx + 1); + switch(dcl.dcl_resource.target) + { + case SM4_TARGET_TEXTURE1D: + targets[idx].first = TGSI_TEXTURE_1D; + targets[idx].second = TGSI_TEXTURE_SHADOW1D; + break; + case SM4_TARGET_TEXTURE2D: + targets[idx].first = TGSI_TEXTURE_2D; + targets[idx].second = TGSI_TEXTURE_SHADOW2D; + break; + case SM4_TARGET_TEXTURE3D: + targets[idx].first = TGSI_TEXTURE_3D; + targets[idx].second = 0; + break; + case SM4_TARGET_TEXTURECUBE: + targets[idx].first = TGSI_TEXTURE_CUBE; + targets[idx].second = 0; + break; + default: + // HACK to make SimpleSample10 work + //check(0); + targets[idx].first = TGSI_TEXTURE_2D; + targets[idx].second = TGSI_TEXTURE_SHADOW2D; + break; + } + break; + case SM4_OPCODE_DCL_SAMPLER: + check(idx >= 0); + if(sampler_modes.size() <= (unsigned)idx) + sampler_modes.resize(idx + 1); + check(!dcl.dcl_sampler.mono); + sampler_modes[idx] = dcl.dcl_sampler.shadow; + break; + case SM4_OPCODE_DCL_CONSTANT_BUFFER: + check(dcl.op->num_indices == 2); + check(dcl.op->is_index_simple(0)); + check(dcl.op->is_index_simple(1)); + idx = dcl.op->indices[0].disp; + ureg_DECL_constant2D(ureg, 0, (unsigned)dcl.op->indices[1].disp - 1, idx); + break; + case SM4_OPCODE_DCL_GS_INPUT_PRIMITIVE: + ureg_property_gs_input_prim(ureg, d3d_to_pipe_prim_type[dcl.dcl_gs_input_primitive.primitive]); + break; + case SM4_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY: + ureg_property_gs_output_prim(ureg, d3d_to_pipe_prim[dcl.dcl_gs_output_primitive_topology.primitive_topology]); + break; + case SM4_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT: + ureg_property_gs_max_vertices(ureg, dcl.num); + break; + default: + check(0); + } + } + + translate_insns(0, program.insns.size()); + sm4_to_tgsi_insn_num.push_back(ureg_get_instruction_number(ureg)); + if(in_sub) + ureg_ENDSUB(ureg); + else + ureg_END(ureg); + + for(unsigned i = 0; i < label_to_sm4_insn_num.size(); ++i) + ureg_fixup_label(ureg, label_to_sm4_insn_num[i].first, sm4_to_tgsi_insn_num[label_to_sm4_insn_num[i].second]); + + const struct tgsi_token * tokens = ureg_get_tokens(ureg, 0); + ureg_destroy(ureg); + return (void*)tokens; + } + + void* translate() + { + try + { + return do_translate(); + } + catch(const char*) + { + return 0; + } + } +}; + +void* sm4_to_tgsi(struct sm4_program& program) +{ + sm4_to_tgsi_converter conv(program); + return conv.translate(); +} |