diff options
author | Rob Clark <[email protected]> | 2014-07-25 11:15:59 -0400 |
---|---|---|
committer | Rob Clark <[email protected]> | 2014-07-25 13:29:28 -0400 |
commit | db193e5ad06e7a2fbcffb3bb5df85d212eb12291 (patch) | |
tree | 58d1ec24c0af7b1acb1477eeaababe3d7eda6019 /src/gallium/drivers/freedreno/a3xx | |
parent | 7d7e6ae9c3544ce1889aa9b8a34545c6f42017e7 (diff) |
freedreno/ir3: split out shader compiler from a3xx
Move the bits we want to share between generations from fd3_program to
ir3_shader. So overall structure is:
fdN_shader_stateobj -> ir3_shader -> ir3_shader_variant -> ir3
|- ...
\- ir3_shader_variant -> ir3
So the ir3_shader becomes the topmost generation neutral object, which
manages the set of variants each of which generates, compiles, and
assembles it's own ir.
There is a bit of additional renaming to s/fd3_compiler/ir3_compiler/,
etc.
Keep the split between the gallium level stateobj and the shader helper
object because it might be a good idea to pre-compute some generation
specific register values (ie. anything that is independent of linking).
Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/gallium/drivers/freedreno/a3xx')
21 files changed, 55 insertions, 9429 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c b/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c deleted file mode 100644 index 8c3704bf658..00000000000 --- a/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c +++ /dev/null @@ -1,805 +0,0 @@ -/* - * Copyright (c) 2013 Rob Clark <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <stdio.h> -#include <stdlib.h> -#include <stdint.h> -#include <string.h> -#include <assert.h> - -#include <util/u_debug.h> - -#include "disasm.h" -#include "instr-a3xx.h" - -static enum debug_t debug; - -#define printf debug_printf - -static const char *levels[] = { - "", - "\t", - "\t\t", - "\t\t\t", - "\t\t\t\t", - "\t\t\t\t\t", - "\t\t\t\t\t\t", - "\t\t\t\t\t\t\t", - "\t\t\t\t\t\t\t\t", - "\t\t\t\t\t\t\t\t\t", - "x", - "x", - "x", - "x", - "x", - "x", -}; - -static const char *component = "xyzw"; - -static const char *type[] = { - [TYPE_F16] = "f16", - [TYPE_F32] = "f32", - [TYPE_U16] = "u16", - [TYPE_U32] = "u32", - [TYPE_S16] = "s16", - [TYPE_S32] = "s32", - [TYPE_U8] = "u8", - [TYPE_S8] = "s8", -}; - -static void print_reg(reg_t reg, bool full, bool r, bool c, bool im, - bool neg, bool abs, bool addr_rel) -{ - const char type = c ? 'c' : 'r'; - - // XXX I prefer - and || for neg/abs, but preserving format used - // by libllvm-a3xx for easy diffing.. - - if (abs && neg) - printf("(absneg)"); - else if (neg) - printf("(neg)"); - else if (abs) - printf("(abs)"); - - if (r) - printf("(r)"); - - if (im) { - printf("%d", reg.iim_val); - } else if (addr_rel) { - /* I would just use %+d but trying to make it diff'able with - * libllvm-a3xx... - */ - if (reg.iim_val < 0) - printf("%s%c<a0.x - %d>", full ? "" : "h", type, -reg.iim_val); - else if (reg.iim_val > 0) - printf("%s%c<a0.x + %d>", full ? "" : "h", type, reg.iim_val); - else - printf("%s%c<a0.x>", full ? "" : "h", type); - } else if ((reg.num == REG_A0) && !c) { - printf("a0.%c", component[reg.comp]); - } else if ((reg.num == REG_P0) && !c) { - printf("p0.%c", component[reg.comp]); - } else { - printf("%s%c%d.%c", full ? "" : "h", type, reg.num, component[reg.comp]); - } -} - - -/* current instruction repeat flag: */ -static unsigned repeat; - -static void print_reg_dst(reg_t reg, bool full, bool addr_rel) -{ - print_reg(reg, full, false, false, false, false, false, addr_rel); -} - -static void print_reg_src(reg_t reg, bool full, bool r, bool c, bool im, - bool neg, bool abs, bool addr_rel) -{ - print_reg(reg, full, r, c, im, neg, abs, addr_rel); -} - -static void print_instr_cat0(instr_t *instr) -{ - instr_cat0_t *cat0 = &instr->cat0; - - switch (cat0->opc) { - case OPC_KILL: - printf(" %sp0.%c", cat0->inv ? "!" : "", - component[cat0->comp]); - break; - case OPC_BR: - printf(" %sp0.%c, #%d", cat0->inv ? "!" : "", - component[cat0->comp], cat0->immed); - break; - case OPC_JUMP: - case OPC_CALL: - printf(" #%d", cat0->immed); - break; - } - - if ((debug & PRINT_VERBOSE) && (cat0->dummy1|cat0->dummy2|cat0->dummy3|cat0->dummy4)) - printf("\t{0: %x,%x,%x,%x}", cat0->dummy1, cat0->dummy2, cat0->dummy3, cat0->dummy4); -} - -static void print_instr_cat1(instr_t *instr) -{ - instr_cat1_t *cat1 = &instr->cat1; - - if (cat1->ul) - printf("(ul)"); - - if (cat1->src_type == cat1->dst_type) { - if ((cat1->src_type == TYPE_S16) && (((reg_t)cat1->dst).num == REG_A0)) { - /* special case (nmemonic?): */ - printf("mova"); - } else { - printf("mov.%s%s", type[cat1->src_type], type[cat1->dst_type]); - } - } else { - printf("cov.%s%s", type[cat1->src_type], type[cat1->dst_type]); - } - - printf(" "); - - if (cat1->even) - printf("(even)"); - - if (cat1->pos_inf) - printf("(pos_infinity)"); - - print_reg_dst((reg_t)(cat1->dst), type_size(cat1->dst_type) == 32, - cat1->dst_rel); - - printf(", "); - - /* ugg, have to special case this.. vs print_reg().. */ - if (cat1->src_im) { - if (type_float(cat1->src_type)) - printf("(%f)", cat1->fim_val); - else - printf("%d", cat1->iim_val); - } else if (cat1->src_rel && !cat1->src_c) { - /* I would just use %+d but trying to make it diff'able with - * libllvm-a3xx... - */ - char type = cat1->src_rel_c ? 'c' : 'r'; - if (cat1->off < 0) - printf("%c<a0.x - %d>", type, -cat1->off); - else if (cat1->off > 0) - printf("%c<a0.x + %d>", type, cat1->off); - else - printf("c<a0.x>"); - } else { - print_reg_src((reg_t)(cat1->src), type_size(cat1->src_type) == 32, - cat1->src_r, cat1->src_c, cat1->src_im, false, false, false); - } - - if ((debug & PRINT_VERBOSE) && (cat1->must_be_0)) - printf("\t{1: %x}", cat1->must_be_0); -} - -static void print_instr_cat2(instr_t *instr) -{ - instr_cat2_t *cat2 = &instr->cat2; - static const char *cond[] = { - "lt", - "le", - "gt", - "ge", - "eq", - "ne", - "?6?", - }; - - switch (cat2->opc) { - case OPC_CMPS_F: - case OPC_CMPS_U: - case OPC_CMPS_S: - case OPC_CMPV_F: - case OPC_CMPV_U: - case OPC_CMPV_S: - printf(".%s", cond[cat2->cond]); - break; - } - - printf(" "); - if (cat2->ei) - printf("(ei)"); - print_reg_dst((reg_t)(cat2->dst), cat2->full ^ cat2->dst_half, false); - printf(", "); - - if (cat2->c1.src1_c) { - print_reg_src((reg_t)(cat2->c1.src1), cat2->full, cat2->src1_r, - cat2->c1.src1_c, cat2->src1_im, cat2->src1_neg, - cat2->src1_abs, false); - } else if (cat2->rel1.src1_rel) { - print_reg_src((reg_t)(cat2->rel1.src1), cat2->full, cat2->src1_r, - cat2->rel1.src1_c, cat2->src1_im, cat2->src1_neg, - cat2->src1_abs, cat2->rel1.src1_rel); - } else { - print_reg_src((reg_t)(cat2->src1), cat2->full, cat2->src1_r, - false, cat2->src1_im, cat2->src1_neg, - cat2->src1_abs, false); - } - - switch (cat2->opc) { - case OPC_ABSNEG_F: - case OPC_ABSNEG_S: - case OPC_CLZ_B: - case OPC_CLZ_S: - case OPC_SIGN_F: - case OPC_FLOOR_F: - case OPC_CEIL_F: - case OPC_RNDNE_F: - case OPC_RNDAZ_F: - case OPC_TRUNC_F: - case OPC_NOT_B: - case OPC_BFREV_B: - case OPC_SETRM: - case OPC_CBITS_B: - /* these only have one src reg */ - break; - default: - printf(", "); - if (cat2->c2.src2_c) { - print_reg_src((reg_t)(cat2->c2.src2), cat2->full, cat2->src2_r, - cat2->c2.src2_c, cat2->src2_im, cat2->src2_neg, - cat2->src2_abs, false); - } else if (cat2->rel2.src2_rel) { - print_reg_src((reg_t)(cat2->rel2.src2), cat2->full, cat2->src2_r, - cat2->rel2.src2_c, cat2->src2_im, cat2->src2_neg, - cat2->src2_abs, cat2->rel2.src2_rel); - } else { - print_reg_src((reg_t)(cat2->src2), cat2->full, cat2->src2_r, - false, cat2->src2_im, cat2->src2_neg, - cat2->src2_abs, false); - } - break; - } -} - -static void print_instr_cat3(instr_t *instr) -{ - instr_cat3_t *cat3 = &instr->cat3; - bool full = instr_cat3_full(cat3); - - printf(" "); - print_reg_dst((reg_t)(cat3->dst), full ^ cat3->dst_half, false); - printf(", "); - if (cat3->c1.src1_c) { - print_reg_src((reg_t)(cat3->c1.src1), full, - cat3->src1_r, cat3->c1.src1_c, false, cat3->src1_neg, - false, false); - } else if (cat3->rel1.src1_rel) { - print_reg_src((reg_t)(cat3->rel1.src1), full, - cat3->src1_r, cat3->rel1.src1_c, false, cat3->src1_neg, - false, cat3->rel1.src1_rel); - } else { - print_reg_src((reg_t)(cat3->src1), full, - cat3->src1_r, false, false, cat3->src1_neg, - false, false); - } - printf(", "); - print_reg_src((reg_t)cat3->src2, full, - cat3->src2_r, cat3->src2_c, false, cat3->src2_neg, - false, false); - printf(", "); - if (cat3->c2.src3_c) { - print_reg_src((reg_t)(cat3->c2.src3), full, - cat3->src3_r, cat3->c2.src3_c, false, cat3->src3_neg, - false, false); - } else if (cat3->rel2.src3_rel) { - print_reg_src((reg_t)(cat3->rel2.src3), full, - cat3->src3_r, cat3->rel2.src3_c, false, cat3->src3_neg, - false, cat3->rel2.src3_rel); - } else { - print_reg_src((reg_t)(cat3->src3), full, - cat3->src3_r, false, false, cat3->src3_neg, - false, false); - } -} - -static void print_instr_cat4(instr_t *instr) -{ - instr_cat4_t *cat4 = &instr->cat4; - - printf(" "); - print_reg_dst((reg_t)(cat4->dst), cat4->full ^ cat4->dst_half, false); - printf(", "); - - if (cat4->c.src_c) { - print_reg_src((reg_t)(cat4->c.src), cat4->full, - cat4->src_r, cat4->c.src_c, cat4->src_im, - cat4->src_neg, cat4->src_abs, false); - } else if (cat4->rel.src_rel) { - print_reg_src((reg_t)(cat4->rel.src), cat4->full, - cat4->src_r, cat4->rel.src_c, cat4->src_im, - cat4->src_neg, cat4->src_abs, cat4->rel.src_rel); - } else { - print_reg_src((reg_t)(cat4->src), cat4->full, - cat4->src_r, false, cat4->src_im, - cat4->src_neg, cat4->src_abs, false); - } - - if ((debug & PRINT_VERBOSE) && (cat4->dummy1|cat4->dummy2)) - printf("\t{4: %x,%x}", cat4->dummy1, cat4->dummy2); -} - -static void print_instr_cat5(instr_t *instr) -{ - static const struct { - bool src1, src2, samp, tex; - } info[0x1f] = { - [OPC_ISAM] = { true, false, true, true, }, - [OPC_ISAML] = { true, true, true, true, }, - [OPC_ISAMM] = { true, false, true, true, }, - [OPC_SAM] = { true, false, true, true, }, - [OPC_SAMB] = { true, true, true, true, }, - [OPC_SAML] = { true, true, true, true, }, - [OPC_SAMGQ] = { true, false, true, true, }, - [OPC_GETLOD] = { true, false, true, true, }, - [OPC_CONV] = { true, true, true, true, }, - [OPC_CONVM] = { true, true, true, true, }, - [OPC_GETSIZE] = { true, false, false, true, }, - [OPC_GETBUF] = { false, false, false, true, }, - [OPC_GETPOS] = { true, false, false, true, }, - [OPC_GETINFO] = { false, false, false, true, }, - [OPC_DSX] = { true, false, false, false, }, - [OPC_DSY] = { true, false, false, false, }, - [OPC_GATHER4R] = { true, false, true, true, }, - [OPC_GATHER4G] = { true, false, true, true, }, - [OPC_GATHER4B] = { true, false, true, true, }, - [OPC_GATHER4A] = { true, false, true, true, }, - [OPC_SAMGP0] = { true, false, true, true, }, - [OPC_SAMGP1] = { true, false, true, true, }, - [OPC_SAMGP2] = { true, false, true, true, }, - [OPC_SAMGP3] = { true, false, true, true, }, - [OPC_DSXPP_1] = { true, false, false, false, }, - [OPC_DSYPP_1] = { true, false, false, false, }, - [OPC_RGETPOS] = { false, false, false, false, }, - [OPC_RGETINFO] = { false, false, false, false, }, - }; - instr_cat5_t *cat5 = &instr->cat5; - int i; - - if (cat5->is_3d) printf(".3d"); - if (cat5->is_a) printf(".a"); - if (cat5->is_o) printf(".o"); - if (cat5->is_p) printf(".p"); - if (cat5->is_s) printf(".s"); - if (cat5->is_s2en) printf(".s2en"); - - printf(" "); - - switch (cat5->opc) { - case OPC_DSXPP_1: - case OPC_DSYPP_1: - break; - default: - printf("(%s)", type[cat5->type]); - break; - } - - printf("("); - for (i = 0; i < 4; i++) - if (cat5->wrmask & (1 << i)) - printf("%c", "xyzw"[i]); - printf(")"); - - print_reg_dst((reg_t)(cat5->dst), type_size(cat5->type) == 32, false); - - if (info[cat5->opc].src1) { - printf(", "); - print_reg_src((reg_t)(cat5->src1), cat5->full, false, false, false, - false, false, false); - } - - if (cat5->is_s2en) { - printf(", "); - print_reg_src((reg_t)(cat5->s2en.src2), cat5->full, false, false, false, - false, false, false); - printf(", "); - print_reg_src((reg_t)(cat5->s2en.src3), false, false, false, false, - false, false, false); - } else { - if (cat5->is_o || info[cat5->opc].src2) { - printf(", "); - print_reg_src((reg_t)(cat5->norm.src2), cat5->full, - false, false, false, false, false, false); - } - if (info[cat5->opc].samp) - printf(", s#%d", cat5->norm.samp); - if (info[cat5->opc].tex) - printf(", t#%d", cat5->norm.tex); - } - - if (debug & PRINT_VERBOSE) { - if (cat5->is_s2en) { - if ((debug & PRINT_VERBOSE) && (cat5->s2en.dummy1|cat5->s2en.dummy2|cat5->dummy2)) - printf("\t{5: %x,%x,%x}", cat5->s2en.dummy1, cat5->s2en.dummy2, cat5->dummy2); - } else { - if ((debug & PRINT_VERBOSE) && (cat5->norm.dummy1|cat5->dummy2)) - printf("\t{5: %x,%x}", cat5->norm.dummy1, cat5->dummy2); - } - } -} - -static int32_t u2i(uint32_t val, int nbits) -{ - return ((val >> (nbits-1)) * ~((1 << nbits) - 1)) | val; -} - -static void print_instr_cat6(instr_t *instr) -{ - instr_cat6_t *cat6 = &instr->cat6; - - printf(".%s ", type[cat6->type]); - - switch (cat6->opc) { - case OPC_LDG: - case OPC_LDP: - case OPC_LDL: - case OPC_LDLW: - case OPC_LDLV: - /* load instructions: */ - print_reg_dst((reg_t)(cat6->a.dst), type_size(cat6->type) == 32, false); - printf(","); - switch (cat6->opc) { - case OPC_LDG: - printf("g"); - break; - case OPC_LDP: - printf("p"); - break; - case OPC_LDL: - case OPC_LDLW: - case OPC_LDLV: - printf("l"); - break; - } - printf("["); - print_reg_src((reg_t)(cat6->a.src), true, - false, false, false, false, false, false); - if (cat6->a.off) - printf("%+d", cat6->a.off); - printf("]"); - break; - case OPC_PREFETCH: - /* similar to load instructions: */ - printf("g["); - print_reg_src((reg_t)(cat6->a.src), true, - false, false, false, false, false, false); - if (cat6->a.off) - printf("%+d", cat6->a.off); - printf("]"); - break; - case OPC_STG: - case OPC_STP: - case OPC_STL: - case OPC_STLW: - /* store instructions: */ - switch (cat6->opc) { - case OPC_STG: - printf("g"); - break; - case OPC_STP: - printf("p"); - break; - case OPC_STL: - case OPC_STLW: - printf("l"); - break; - } - printf("["); - print_reg_dst((reg_t)(cat6->b.dst), true, false); - if (cat6->b.off || cat6->b.off_hi) - printf("%+d", u2i((cat6->b.off_hi << 8) | cat6->b.off, 13)); - printf("]"); - printf(","); - print_reg_src((reg_t)(cat6->b.src), type_size(cat6->type) == 32, - false, false, false, false, false, false); - - break; - case OPC_STI: - /* sti has same encoding as other store instructions, but - * slightly different syntax: - */ - print_reg_dst((reg_t)(cat6->b.dst), false /* XXX is it always half? */, false); - if (cat6->b.off || cat6->b.off_hi) - printf("%+d", u2i((cat6->b.off_hi << 8) | cat6->b.off, 13)); - printf(","); - print_reg_src((reg_t)(cat6->b.src), type_size(cat6->type) == 32, - false, false, false, false, false, false); - break; - } - - printf(", %d", cat6->iim_val); - - if (debug & PRINT_VERBOSE) { - switch (cat6->opc) { - case OPC_LDG: - case OPC_LDP: - /* load instructions: */ - if (cat6->a.dummy1|cat6->a.dummy2|cat6->a.dummy3) - printf("\t{6: %x,%x,%x}", cat6->a.dummy1, cat6->a.dummy2, cat6->a.dummy3); - if ((cat6->a.must_be_one1 != 1) || (cat6->a.must_be_one2 != 1)) - printf("{?? %d,%d ??}", cat6->a.must_be_one1, cat6->a.must_be_one2); - break; - case OPC_STG: - case OPC_STP: - case OPC_STI: - /* store instructions: */ - if (cat6->b.dummy1|cat6->b.dummy2) - printf("\t{6: %x,%x}", cat6->b.dummy1, cat6->b.dummy2); - if ((cat6->b.must_be_one1 != 1) || (cat6->b.must_be_one2 != 1) || - (cat6->b.must_be_zero1 != 0)) - printf("{?? %d,%d,%d ??}", cat6->b.must_be_one1, cat6->b.must_be_one2, - cat6->b.must_be_zero1); - break; - } - } -} - -/* size of largest OPC field of all the instruction categories: */ -#define NOPC_BITS 6 - -struct opc_info { - uint16_t cat; - uint16_t opc; - const char *name; - void (*print)(instr_t *instr); -} opcs[1 << (3+NOPC_BITS)] = { -#define OPC(cat, opc, name) [((cat) << NOPC_BITS) | (opc)] = { (cat), (opc), #name, print_instr_cat##cat } - /* category 0: */ - OPC(0, OPC_NOP, nop), - OPC(0, OPC_BR, br), - OPC(0, OPC_JUMP, jump), - OPC(0, OPC_CALL, call), - OPC(0, OPC_RET, ret), - OPC(0, OPC_KILL, kill), - OPC(0, OPC_END, end), - OPC(0, OPC_EMIT, emit), - OPC(0, OPC_CUT, cut), - OPC(0, OPC_CHMASK, chmask), - OPC(0, OPC_CHSH, chsh), - OPC(0, OPC_FLOW_REV, flow_rev), - - /* category 1: */ - OPC(1, 0, ), - - /* category 2: */ - OPC(2, OPC_ADD_F, add.f), - OPC(2, OPC_MIN_F, min.f), - OPC(2, OPC_MAX_F, max.f), - OPC(2, OPC_MUL_F, mul.f), - OPC(2, OPC_SIGN_F, sign.f), - OPC(2, OPC_CMPS_F, cmps.f), - OPC(2, OPC_ABSNEG_F, absneg.f), - OPC(2, OPC_CMPV_F, cmpv.f), - OPC(2, OPC_FLOOR_F, floor.f), - OPC(2, OPC_CEIL_F, ceil.f), - OPC(2, OPC_RNDNE_F, rndne.f), - OPC(2, OPC_RNDAZ_F, rndaz.f), - OPC(2, OPC_TRUNC_F, trunc.f), - OPC(2, OPC_ADD_U, add.u), - OPC(2, OPC_ADD_S, add.s), - OPC(2, OPC_SUB_U, sub.u), - OPC(2, OPC_SUB_S, sub.s), - OPC(2, OPC_CMPS_U, cmps.u), - OPC(2, OPC_CMPS_S, cmps.s), - OPC(2, OPC_MIN_U, min.u), - OPC(2, OPC_MIN_S, min.s), - OPC(2, OPC_MAX_U, max.u), - OPC(2, OPC_MAX_S, max.s), - OPC(2, OPC_ABSNEG_S, absneg.s), - OPC(2, OPC_AND_B, and.b), - OPC(2, OPC_OR_B, or.b), - OPC(2, OPC_NOT_B, not.b), - OPC(2, OPC_XOR_B, xor.b), - OPC(2, OPC_CMPV_U, cmpv.u), - OPC(2, OPC_CMPV_S, cmpv.s), - OPC(2, OPC_MUL_U, mul.u), - OPC(2, OPC_MUL_S, mul.s), - OPC(2, OPC_MULL_U, mull.u), - OPC(2, OPC_BFREV_B, bfrev.b), - OPC(2, OPC_CLZ_S, clz.s), - OPC(2, OPC_CLZ_B, clz.b), - OPC(2, OPC_SHL_B, shl.b), - OPC(2, OPC_SHR_B, shr.b), - OPC(2, OPC_ASHR_B, ashr.b), - OPC(2, OPC_BARY_F, bary.f), - OPC(2, OPC_MGEN_B, mgen.b), - OPC(2, OPC_GETBIT_B, getbit.b), - OPC(2, OPC_SETRM, setrm), - OPC(2, OPC_CBITS_B, cbits.b), - OPC(2, OPC_SHB, shb), - OPC(2, OPC_MSAD, msad), - - /* category 3: */ - OPC(3, OPC_MAD_U16, mad.u16), - OPC(3, OPC_MADSH_U16, madsh.u16), - OPC(3, OPC_MAD_S16, mad.s16), - OPC(3, OPC_MADSH_M16, madsh.m16), - OPC(3, OPC_MAD_U24, mad.u24), - OPC(3, OPC_MAD_S24, mad.s24), - OPC(3, OPC_MAD_F16, mad.f16), - OPC(3, OPC_MAD_F32, mad.f32), - OPC(3, OPC_SEL_B16, sel.b16), - OPC(3, OPC_SEL_B32, sel.b32), - OPC(3, OPC_SEL_S16, sel.s16), - OPC(3, OPC_SEL_S32, sel.s32), - OPC(3, OPC_SEL_F16, sel.f16), - OPC(3, OPC_SEL_F32, sel.f32), - OPC(3, OPC_SAD_S16, sad.s16), - OPC(3, OPC_SAD_S32, sad.s32), - - /* category 4: */ - OPC(4, OPC_RCP, rcp), - OPC(4, OPC_RSQ, rsq), - OPC(4, OPC_LOG2, log2), - OPC(4, OPC_EXP2, exp2), - OPC(4, OPC_SIN, sin), - OPC(4, OPC_COS, cos), - OPC(4, OPC_SQRT, sqrt), - - /* category 5: */ - OPC(5, OPC_ISAM, isam), - OPC(5, OPC_ISAML, isaml), - OPC(5, OPC_ISAMM, isamm), - OPC(5, OPC_SAM, sam), - OPC(5, OPC_SAMB, samb), - OPC(5, OPC_SAML, saml), - OPC(5, OPC_SAMGQ, samgq), - OPC(5, OPC_GETLOD, getlod), - OPC(5, OPC_CONV, conv), - OPC(5, OPC_CONVM, convm), - OPC(5, OPC_GETSIZE, getsize), - OPC(5, OPC_GETBUF, getbuf), - OPC(5, OPC_GETPOS, getpos), - OPC(5, OPC_GETINFO, getinfo), - OPC(5, OPC_DSX, dsx), - OPC(5, OPC_DSY, dsy), - OPC(5, OPC_GATHER4R, gather4r), - OPC(5, OPC_GATHER4G, gather4g), - OPC(5, OPC_GATHER4B, gather4b), - OPC(5, OPC_GATHER4A, gather4a), - OPC(5, OPC_SAMGP0, samgp0), - OPC(5, OPC_SAMGP1, samgp1), - OPC(5, OPC_SAMGP2, samgp2), - OPC(5, OPC_SAMGP3, samgp3), - OPC(5, OPC_DSXPP_1, dsxpp.1), - OPC(5, OPC_DSYPP_1, dsypp.1), - OPC(5, OPC_RGETPOS, rgetpos), - OPC(5, OPC_RGETINFO, rgetinfo), - - - /* category 6: */ - OPC(6, OPC_LDG, ldg), - OPC(6, OPC_LDL, ldl), - OPC(6, OPC_LDP, ldp), - OPC(6, OPC_STG, stg), - OPC(6, OPC_STL, stl), - OPC(6, OPC_STP, stp), - OPC(6, OPC_STI, sti), - OPC(6, OPC_G2L, g2l), - OPC(6, OPC_L2G, l2g), - OPC(6, OPC_PREFETCH, prefetch), - OPC(6, OPC_LDLW, ldlw), - OPC(6, OPC_STLW, stlw), - OPC(6, OPC_RESFMT, resfmt), - OPC(6, OPC_RESINFO, resinf), - OPC(6, OPC_ATOMIC_ADD_L, atomic.add.l), - OPC(6, OPC_ATOMIC_SUB_L, atomic.sub.l), - OPC(6, OPC_ATOMIC_XCHG_L, atomic.xchg.l), - OPC(6, OPC_ATOMIC_INC_L, atomic.inc.l), - OPC(6, OPC_ATOMIC_DEC_L, atomic.dec.l), - OPC(6, OPC_ATOMIC_CMPXCHG_L, atomic.cmpxchg.l), - OPC(6, OPC_ATOMIC_MIN_L, atomic.min.l), - OPC(6, OPC_ATOMIC_MAX_L, atomic.max.l), - OPC(6, OPC_ATOMIC_AND_L, atomic.and.l), - OPC(6, OPC_ATOMIC_OR_L, atomic.or.l), - OPC(6, OPC_ATOMIC_XOR_L, atomic.xor.l), - OPC(6, OPC_LDGB_TYPED_4D, ldgb.typed.4d), - OPC(6, OPC_STGB_4D_4, stgb.4d.4), - OPC(6, OPC_STIB, stib), - OPC(6, OPC_LDC_4, ldc.4), - OPC(6, OPC_LDLV, ldlv), - - -#undef OPC -}; - -#define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | instr_opc(instr)])) - -// XXX hack.. probably should move this table somewhere common: -#include "ir3.h" -const char *ir3_instr_name(struct ir3_instruction *instr) -{ - if (instr->category == -1) return "??meta??"; - return opcs[(instr->category << NOPC_BITS) | instr->opc].name; -} - -static void print_instr(uint32_t *dwords, int level, int n) -{ - instr_t *instr = (instr_t *)dwords; - uint32_t opc = instr_opc(instr); - const char *name; - - printf("%s%04d[%08xx_%08xx] ", levels[level], n, dwords[1], dwords[0]); - -#if 0 - /* print unknown bits: */ - if (debug & PRINT_RAW) - printf("[%08xx_%08xx] ", dwords[1] & 0x001ff800, dwords[0] & 0x00000000); - - if (debug & PRINT_VERBOSE) - printf("%d,%02d ", instr->opc_cat, opc); -#endif - - /* NOTE: order flags are printed is a bit fugly.. but for now I - * try to match the order in llvm-a3xx disassembler for easy - * diff'ing.. - */ - - if (instr->sync) - printf("(sy)"); - if (instr->ss && (instr->opc_cat <= 4)) - printf("(ss)"); - if (instr->jmp_tgt) - printf("(jp)"); - if (instr->repeat && (instr->opc_cat <= 4)) { - printf("(rpt%d)", instr->repeat); - repeat = instr->repeat; - } else { - repeat = 0; - } - if (instr->ul && ((2 <= instr->opc_cat) && (instr->opc_cat <= 4))) - printf("(ul)"); - - name = GETINFO(instr)->name; - - if (name) { - printf("%s", name); - GETINFO(instr)->print(instr); - } else { - printf("unknown(%d,%d)", instr->opc_cat, opc); - } - - printf("\n"); -} - -int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, enum shader_t type) -{ - int i; - - assert((sizedwords % 2) == 0); - - for (i = 0; i < sizedwords; i += 2) - print_instr(&dwords[i], level, i/2); - - return 0; -} diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c deleted file mode 100644 index 0c22e55711b..00000000000 --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c +++ /dev/null @@ -1,2638 +0,0 @@ -/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ - -/* - * Copyright (C) 2013 Rob Clark <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark <[email protected]> - */ - -#include <stdarg.h> - -#include "pipe/p_state.h" -#include "util/u_string.h" -#include "util/u_memory.h" -#include "util/u_inlines.h" -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_ureg.h" -#include "tgsi/tgsi_info.h" -#include "tgsi/tgsi_strings.h" -#include "tgsi/tgsi_dump.h" -#include "tgsi/tgsi_scan.h" - -#include "freedreno_lowering.h" - -#include "fd3_compiler.h" -#include "fd3_program.h" - -#include "instr-a3xx.h" -#include "ir3.h" - -struct fd3_compile_context { - const struct tgsi_token *tokens; - bool free_tokens; - struct ir3 *ir; - struct fd3_shader_variant *so; - - struct ir3_block *block; - struct ir3_instruction *current_instr; - - /* we need to defer updates to block->outputs[] until the end - * of an instruction (so we don't see new value until *after* - * the src registers are processed) - */ - struct { - struct ir3_instruction *instr, **instrp; - } output_updates[16]; - unsigned num_output_updates; - - /* are we in a sequence of "atomic" instructions? - */ - bool atomic; - - /* For fragment shaders, from the hw perspective the only - * actual input is r0.xy position register passed to bary.f. - * But TGSI doesn't know that, it still declares things as - * IN[] registers. So we do all the input tracking normally - * and fix things up after compile_instructions() - * - * NOTE that frag_pos is the hardware position (possibly it - * is actually an index or tag or some such.. it is *not* - * values that can be directly used for gl_FragCoord..) - */ - struct ir3_instruction *frag_pos, *frag_face, *frag_coord[4]; - - struct tgsi_parse_context parser; - unsigned type; - - struct tgsi_shader_info info; - - /* for calculating input/output positions/linkages: */ - unsigned next_inloc; - - unsigned num_internal_temps; - struct tgsi_src_register internal_temps[6]; - - /* idx/slot for last compiler generated immediate */ - unsigned immediate_idx; - - /* stack of branch instructions that mark (potentially nested) - * branch if/else/loop/etc - */ - struct { - struct ir3_instruction *instr, *cond; - bool inv; /* true iff in else leg of branch */ - } branch[16]; - unsigned int branch_count; - - /* list of kill instructions: */ - struct ir3_instruction *kill[16]; - unsigned int kill_count; - - /* used when dst is same as one of the src, to avoid overwriting a - * src element before the remaining scalar instructions that make - * up the vector operation - */ - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register *tmp_src; -}; - - -static void vectorize(struct fd3_compile_context *ctx, - struct ir3_instruction *instr, struct tgsi_dst_register *dst, - int nsrcs, ...); -static void create_mov(struct fd3_compile_context *ctx, - struct tgsi_dst_register *dst, struct tgsi_src_register *src); -static type_t get_ftype(struct fd3_compile_context *ctx); - -static unsigned -compile_init(struct fd3_compile_context *ctx, struct fd3_shader_variant *so, - const struct tgsi_token *tokens) -{ - unsigned ret; - struct tgsi_shader_info *info = &ctx->info; - const struct fd_lowering_config lconfig = { - .color_two_side = so->key.color_two_side, - .lower_DST = true, - .lower_XPD = true, - .lower_SCS = true, - .lower_LRP = true, - .lower_FRC = true, - .lower_POW = true, - .lower_LIT = true, - .lower_EXP = true, - .lower_LOG = true, - .lower_DP4 = true, - .lower_DP3 = true, - .lower_DPH = true, - .lower_DP2 = true, - .lower_DP2A = true, - }; - - ctx->tokens = fd_transform_lowering(&lconfig, tokens, &ctx->info); - ctx->free_tokens = !!ctx->tokens; - if (!ctx->tokens) { - /* no lowering */ - ctx->tokens = tokens; - } - ctx->ir = so->ir; - ctx->so = so; - ctx->next_inloc = 8; - ctx->num_internal_temps = 0; - ctx->branch_count = 0; - ctx->kill_count = 0; - ctx->block = NULL; - ctx->current_instr = NULL; - ctx->num_output_updates = 0; - ctx->atomic = false; - ctx->frag_pos = NULL; - ctx->frag_face = NULL; - - memset(ctx->frag_coord, 0, sizeof(ctx->frag_coord)); - -#define FM(x) (1 << TGSI_FILE_##x) - /* optimize can't deal with relative addressing: */ - if (info->indirect_files & (FM(TEMPORARY) | FM(INPUT) | FM(OUTPUT))) - return TGSI_PARSE_ERROR; - - /* Immediates go after constants: */ - so->first_immediate = info->file_max[TGSI_FILE_CONSTANT] + 1; - ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1); - - ret = tgsi_parse_init(&ctx->parser, ctx->tokens); - if (ret != TGSI_PARSE_OK) - return ret; - - ctx->type = ctx->parser.FullHeader.Processor.Processor; - - return ret; -} - -static void -compile_error(struct fd3_compile_context *ctx, const char *format, ...) -{ - va_list ap; - va_start(ap, format); - _debug_vprintf(format, ap); - va_end(ap); - tgsi_dump(ctx->tokens, 0); - debug_assert(0); -} - -#define compile_assert(ctx, cond) do { \ - if (!(cond)) compile_error((ctx), "failed assert: "#cond"\n"); \ - } while (0) - -static void -compile_free(struct fd3_compile_context *ctx) -{ - if (ctx->free_tokens) - free((void *)ctx->tokens); - tgsi_parse_free(&ctx->parser); -} - -struct instr_translater { - void (*fxn)(const struct instr_translater *t, - struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst); - unsigned tgsi_opc; - opc_t opc; - opc_t hopc; /* opc to use for half_precision mode, if different */ - unsigned arg; -}; - -static void -instr_finish(struct fd3_compile_context *ctx) -{ - unsigned i; - - if (ctx->atomic) - return; - - for (i = 0; i < ctx->num_output_updates; i++) - *(ctx->output_updates[i].instrp) = ctx->output_updates[i].instr; - - ctx->num_output_updates = 0; -} - -/* For "atomic" groups of instructions, for example the four scalar - * instructions to perform a vec4 operation. Basically this just - * blocks out handling of output_updates so the next scalar instruction - * still sees the result from before the start of the atomic group. - * - * NOTE: when used properly, this could probably replace get/put_dst() - * stuff. - */ -static void -instr_atomic_start(struct fd3_compile_context *ctx) -{ - ctx->atomic = true; -} - -static void -instr_atomic_end(struct fd3_compile_context *ctx) -{ - ctx->atomic = false; - instr_finish(ctx); -} - -static struct ir3_instruction * -instr_create(struct fd3_compile_context *ctx, int category, opc_t opc) -{ - instr_finish(ctx); - return (ctx->current_instr = ir3_instr_create(ctx->block, category, opc)); -} - -static struct ir3_instruction * -instr_clone(struct fd3_compile_context *ctx, struct ir3_instruction *instr) -{ - instr_finish(ctx); - return (ctx->current_instr = ir3_instr_clone(instr)); -} - -static struct ir3_block * -push_block(struct fd3_compile_context *ctx) -{ - struct ir3_block *block; - unsigned ntmp, nin, nout; - -#define SCALAR_REGS(file) (4 * (ctx->info.file_max[TGSI_FILE_ ## file] + 1)) - - /* hmm, give ourselves room to create 4 extra temporaries (vec4): - */ - ntmp = SCALAR_REGS(TEMPORARY); - ntmp += 4 * 4; - - nout = SCALAR_REGS(OUTPUT); - nin = SCALAR_REGS(INPUT); - - /* for outermost block, 'inputs' are the actual shader INPUT - * register file. Reads from INPUT registers always go back to - * top block. For nested blocks, 'inputs' is used to track any - * TEMPORARY file register from one of the enclosing blocks that - * is ready in this block. - */ - if (!ctx->block) { - /* NOTE: fragment shaders actually have two inputs (r0.xy, the - * position) - */ - if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { - int n = 2; - if (ctx->info.reads_position) - n += 4; - if (ctx->info.uses_frontface) - n += 4; - nin = MAX2(n, nin); - nout += ARRAY_SIZE(ctx->kill); - } - } else { - nin = ntmp; - } - - block = ir3_block_create(ctx->ir, ntmp, nin, nout); - - if ((ctx->type == TGSI_PROCESSOR_FRAGMENT) && !ctx->block) - block->noutputs -= ARRAY_SIZE(ctx->kill); - - block->parent = ctx->block; - ctx->block = block; - - return block; -} - -static void -pop_block(struct fd3_compile_context *ctx) -{ - ctx->block = ctx->block->parent; - compile_assert(ctx, ctx->block); -} - -static struct ir3_instruction * -create_output(struct ir3_block *block, struct ir3_instruction *instr, - unsigned n) -{ - struct ir3_instruction *out; - - out = ir3_instr_create(block, -1, OPC_META_OUTPUT); - out->inout.block = block; - ir3_reg_create(out, n, 0); - if (instr) - ir3_reg_create(out, 0, IR3_REG_SSA)->instr = instr; - - return out; -} - -static struct ir3_instruction * -create_input(struct ir3_block *block, struct ir3_instruction *instr, - unsigned n) -{ - struct ir3_instruction *in; - - in = ir3_instr_create(block, -1, OPC_META_INPUT); - in->inout.block = block; - ir3_reg_create(in, n, 0); - if (instr) - ir3_reg_create(in, 0, IR3_REG_SSA)->instr = instr; - - return in; -} - -static struct ir3_instruction * -block_input(struct ir3_block *block, unsigned n) -{ - /* references to INPUT register file always go back up to - * top level: - */ - if (block->parent) - return block_input(block->parent, n); - return block->inputs[n]; -} - -/* return temporary in scope, creating if needed meta-input node - * to track block inputs - */ -static struct ir3_instruction * -block_temporary(struct ir3_block *block, unsigned n) -{ - /* references to TEMPORARY register file, find the nearest - * enclosing block which has already assigned this temporary, - * creating meta-input instructions along the way to keep - * track of block inputs - */ - if (block->parent && !block->temporaries[n]) { - /* if already have input for this block, reuse: */ - if (!block->inputs[n]) - block->inputs[n] = block_temporary(block->parent, n); - - /* and create new input to return: */ - return create_input(block, block->inputs[n], n); - } - return block->temporaries[n]; -} - -static struct ir3_instruction * -create_immed(struct fd3_compile_context *ctx, float val) -{ - /* NOTE: *don't* use instr_create() here! - */ - struct ir3_instruction *instr; - instr = ir3_instr_create(ctx->block, 1, 0); - instr->cat1.src_type = get_ftype(ctx); - instr->cat1.dst_type = get_ftype(ctx); - ir3_reg_create(instr, 0, 0); - ir3_reg_create(instr, 0, IR3_REG_IMMED)->fim_val = val; - return instr; -} - -static void -ssa_dst(struct fd3_compile_context *ctx, struct ir3_instruction *instr, - const struct tgsi_dst_register *dst, unsigned chan) -{ - unsigned n = regid(dst->Index, chan); - unsigned idx = ctx->num_output_updates; - - compile_assert(ctx, idx < ARRAY_SIZE(ctx->output_updates)); - - /* NOTE: defer update of temporaries[idx] or output[idx] - * until instr_finish(), so that if the current instruction - * reads the same TEMP/OUT[] it gets the old value: - * - * bleh.. this might be a bit easier to just figure out - * in instr_finish(). But at that point we've already - * lost information about OUTPUT vs TEMPORARY register - * file.. - */ - - switch (dst->File) { - case TGSI_FILE_OUTPUT: - compile_assert(ctx, n < ctx->block->noutputs); - ctx->output_updates[idx].instrp = &ctx->block->outputs[n]; - ctx->output_updates[idx].instr = instr; - ctx->num_output_updates++; - break; - case TGSI_FILE_TEMPORARY: - compile_assert(ctx, n < ctx->block->ntemporaries); - ctx->output_updates[idx].instrp = &ctx->block->temporaries[n]; - ctx->output_updates[idx].instr = instr; - ctx->num_output_updates++; - break; - case TGSI_FILE_ADDRESS: - compile_assert(ctx, n < 1); - ctx->output_updates[idx].instrp = &ctx->block->address; - ctx->output_updates[idx].instr = instr; - ctx->num_output_updates++; - break; - } -} - -static void -ssa_src(struct fd3_compile_context *ctx, struct ir3_register *reg, - const struct tgsi_src_register *src, unsigned chan) -{ - struct ir3_block *block = ctx->block; - unsigned n = regid(src->Index, chan); - - switch (src->File) { - case TGSI_FILE_INPUT: - reg->flags |= IR3_REG_SSA; - reg->instr = block_input(ctx->block, n); - break; - case TGSI_FILE_OUTPUT: - /* really this should just happen in case of 'MOV_SAT OUT[n], ..', - * for the following clamp instructions: - */ - reg->flags |= IR3_REG_SSA; - reg->instr = block->outputs[n]; - /* we don't have to worry about read from an OUTPUT that was - * assigned outside of the current block, because the _SAT - * clamp instructions will always be in the same block as - * the original instruction which wrote the OUTPUT - */ - compile_assert(ctx, reg->instr); - break; - case TGSI_FILE_TEMPORARY: - reg->flags |= IR3_REG_SSA; - reg->instr = block_temporary(ctx->block, n); - break; - } - - if ((reg->flags & IR3_REG_SSA) && !reg->instr) { - /* this can happen when registers (or components of a TGSI - * register) are used as src before they have been assigned - * (undefined contents). To avoid confusing the rest of the - * compiler, and to generally keep things peachy, substitute - * an instruction that sets the src to 0.0. Or to keep - * things undefined, I could plug in a random number? :-P - * - * NOTE: *don't* use instr_create() here! - */ - reg->instr = create_immed(ctx, 0.0); - } -} - -static struct ir3_register * -add_dst_reg_wrmask(struct fd3_compile_context *ctx, - struct ir3_instruction *instr, const struct tgsi_dst_register *dst, - unsigned chan, unsigned wrmask) -{ - unsigned flags = 0, num = 0; - struct ir3_register *reg; - - switch (dst->File) { - case TGSI_FILE_OUTPUT: - case TGSI_FILE_TEMPORARY: - /* uses SSA */ - break; - case TGSI_FILE_ADDRESS: - flags |= IR3_REG_ADDR; - /* uses SSA */ - break; - default: - compile_error(ctx, "unsupported dst register file: %s\n", - tgsi_file_name(dst->File)); - break; - } - - if (dst->Indirect) - flags |= IR3_REG_RELATIV; - - reg = ir3_reg_create(instr, regid(num, chan), flags); - - /* NOTE: do not call ssa_dst() if atomic.. vectorize() - * itself will call ssa_dst(). This is to filter out - * the (initially bogus) .x component dst which is - * created (but not necessarily used, ie. if the net - * result of the vector operation does not write to - * the .x component) - */ - - reg->wrmask = wrmask; - if (wrmask == 0x1) { - /* normal case */ - if (!ctx->atomic) - ssa_dst(ctx, instr, dst, chan); - } else if ((dst->File == TGSI_FILE_TEMPORARY) || - (dst->File == TGSI_FILE_OUTPUT) || - (dst->File == TGSI_FILE_ADDRESS)) { - unsigned i; - - /* if instruction writes multiple, we need to create - * some place-holder collect the registers: - */ - for (i = 0; i < 4; i++) { - if (wrmask & (1 << i)) { - struct ir3_instruction *collect = - ir3_instr_create(ctx->block, -1, OPC_META_FO); - collect->fo.off = i; - /* unused dst reg: */ - ir3_reg_create(collect, 0, 0); - /* and src reg used to hold original instr */ - ir3_reg_create(collect, 0, IR3_REG_SSA)->instr = instr; - if (!ctx->atomic) - ssa_dst(ctx, collect, dst, chan+i); - } - } - } - - return reg; -} - -static struct ir3_register * -add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, - const struct tgsi_dst_register *dst, unsigned chan) -{ - return add_dst_reg_wrmask(ctx, instr, dst, chan, 0x1); -} - -static struct ir3_register * -add_src_reg_wrmask(struct fd3_compile_context *ctx, - struct ir3_instruction *instr, const struct tgsi_src_register *src, - unsigned chan, unsigned wrmask) -{ - unsigned flags = 0, num = 0; - struct ir3_register *reg; - struct ir3_instruction *orig = NULL; - - /* TODO we need to use a mov to temp for const >= 64.. or maybe - * we could use relative addressing.. - */ - compile_assert(ctx, src->Index < 64); - - switch (src->File) { - case TGSI_FILE_IMMEDIATE: - /* TODO if possible, use actual immediate instead of const.. but - * TGSI has vec4 immediates, we can only embed scalar (of limited - * size, depending on instruction..) - */ - flags |= IR3_REG_CONST; - num = src->Index + ctx->so->first_immediate; - break; - case TGSI_FILE_CONSTANT: - flags |= IR3_REG_CONST; - num = src->Index; - break; - case TGSI_FILE_OUTPUT: - /* NOTE: we should only end up w/ OUTPUT file for things like - * clamp()'ing saturated dst instructions - */ - case TGSI_FILE_INPUT: - case TGSI_FILE_TEMPORARY: - /* uses SSA */ - break; - default: - compile_error(ctx, "unsupported src register file: %s\n", - tgsi_file_name(src->File)); - break; - } - - if (src->Absolute) - flags |= IR3_REG_ABS; - if (src->Negate) - flags |= IR3_REG_NEGATE; - - if (src->Indirect) { - flags |= IR3_REG_RELATIV; - - /* shouldn't happen, and we can't cope with it below: */ - compile_assert(ctx, wrmask == 0x1); - - /* wrap in a meta-deref to track both the src and address: */ - orig = instr; - - instr = ir3_instr_create(ctx->block, -1, OPC_META_DEREF); - ir3_reg_create(instr, 0, 0); - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = ctx->block->address; - } - - reg = ir3_reg_create(instr, regid(num, chan), flags); - - reg->wrmask = wrmask; - if (wrmask == 0x1) { - /* normal case */ - ssa_src(ctx, reg, src, chan); - } else if ((src->File == TGSI_FILE_TEMPORARY) || - (src->File == TGSI_FILE_OUTPUT) || - (src->File == TGSI_FILE_INPUT)) { - struct ir3_instruction *collect; - unsigned i; - - compile_assert(ctx, !src->Indirect); - - /* if instruction reads multiple, we need to create - * some place-holder collect the registers: - */ - collect = ir3_instr_create(ctx->block, -1, OPC_META_FI); - ir3_reg_create(collect, 0, 0); /* unused dst reg */ - - for (i = 0; i < 4; i++) { - if (wrmask & (1 << i)) { - /* and src reg used point to the original instr */ - ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), - src, chan + i); - } else if (wrmask & ~((i << i) - 1)) { - /* if any remaining components, then dummy - * placeholder src reg to fill in the blanks: - */ - ir3_reg_create(collect, 0, 0); - } - } - - reg->flags |= IR3_REG_SSA; - reg->instr = collect; - } - - if (src->Indirect) { - reg = ir3_reg_create(orig, 0, flags | IR3_REG_SSA); - reg->instr = instr; - } - return reg; -} - -static struct ir3_register * -add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, - const struct tgsi_src_register *src, unsigned chan) -{ - return add_src_reg_wrmask(ctx, instr, src, chan, 0x1); -} - -static void -src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst) -{ - src->File = dst->File; - src->Indirect = dst->Indirect; - src->Dimension = dst->Dimension; - src->Index = dst->Index; - src->Absolute = 0; - src->Negate = 0; - src->SwizzleX = TGSI_SWIZZLE_X; - src->SwizzleY = TGSI_SWIZZLE_Y; - src->SwizzleZ = TGSI_SWIZZLE_Z; - src->SwizzleW = TGSI_SWIZZLE_W; -} - -/* Get internal-temp src/dst to use for a sequence of instructions - * generated by a single TGSI op. - */ -static struct tgsi_src_register * -get_internal_temp(struct fd3_compile_context *ctx, - struct tgsi_dst_register *tmp_dst) -{ - struct tgsi_src_register *tmp_src; - int n; - - tmp_dst->File = TGSI_FILE_TEMPORARY; - tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW; - tmp_dst->Indirect = 0; - tmp_dst->Dimension = 0; - - /* assign next temporary: */ - n = ctx->num_internal_temps++; - compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps)); - tmp_src = &ctx->internal_temps[n]; - - tmp_dst->Index = ctx->info.file_max[TGSI_FILE_TEMPORARY] + n + 1; - - src_from_dst(tmp_src, tmp_dst); - - return tmp_src; -} - -static inline bool -is_const(struct tgsi_src_register *src) -{ - return (src->File == TGSI_FILE_CONSTANT) || - (src->File == TGSI_FILE_IMMEDIATE); -} - -static inline bool -is_relative(struct tgsi_src_register *src) -{ - return src->Indirect; -} - -static inline bool -is_rel_or_const(struct tgsi_src_register *src) -{ - return is_relative(src) || is_const(src); -} - -static type_t -get_ftype(struct fd3_compile_context *ctx) -{ - return TYPE_F32; -} - -static type_t -get_utype(struct fd3_compile_context *ctx) -{ - return TYPE_U32; -} - -static unsigned -src_swiz(struct tgsi_src_register *src, int chan) -{ - switch (chan) { - case 0: return src->SwizzleX; - case 1: return src->SwizzleY; - case 2: return src->SwizzleZ; - case 3: return src->SwizzleW; - } - assert(0); - return 0; -} - -/* for instructions that cannot take a const register as src, if needed - * generate a move to temporary gpr: - */ -static struct tgsi_src_register * -get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src) -{ - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register *tmp_src; - - compile_assert(ctx, is_rel_or_const(src)); - - tmp_src = get_internal_temp(ctx, &tmp_dst); - - create_mov(ctx, &tmp_dst, src); - - return tmp_src; -} - -static void -get_immediate(struct fd3_compile_context *ctx, - struct tgsi_src_register *reg, uint32_t val) -{ - unsigned neg, swiz, idx, i; - /* actually maps 1:1 currently.. not sure if that is safe to rely on: */ - static const unsigned swiz2tgsi[] = { - TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, - }; - - for (i = 0; i < ctx->immediate_idx; i++) { - swiz = i % 4; - idx = i / 4; - - if (ctx->so->immediates[idx].val[swiz] == val) { - neg = 0; - break; - } - - if (ctx->so->immediates[idx].val[swiz] == -val) { - neg = 1; - break; - } - } - - if (i == ctx->immediate_idx) { - /* need to generate a new immediate: */ - swiz = i % 4; - idx = i / 4; - neg = 0; - ctx->so->immediates[idx].val[swiz] = val; - ctx->so->immediates_count = idx + 1; - ctx->immediate_idx++; - } - - reg->File = TGSI_FILE_IMMEDIATE; - reg->Indirect = 0; - reg->Dimension = 0; - reg->Index = idx; - reg->Absolute = 0; - reg->Negate = neg; - reg->SwizzleX = swiz2tgsi[swiz]; - reg->SwizzleY = swiz2tgsi[swiz]; - reg->SwizzleZ = swiz2tgsi[swiz]; - reg->SwizzleW = swiz2tgsi[swiz]; -} - -static void -create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst, - struct tgsi_src_register *src) -{ - type_t type_mov = get_ftype(ctx); - unsigned i; - - for (i = 0; i < 4; i++) { - /* move to destination: */ - if (dst->WriteMask & (1 << i)) { - struct ir3_instruction *instr; - - if (src->Absolute || src->Negate) { - /* can't have abs or neg on a mov instr, so use - * absneg.f instead to handle these cases: - */ - instr = instr_create(ctx, 2, OPC_ABSNEG_F); - } else { - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = type_mov; - instr->cat1.dst_type = type_mov; - } - - add_dst_reg(ctx, instr, dst, i); - add_src_reg(ctx, instr, src, src_swiz(src, i)); - } - } -} - -static void -create_clamp(struct fd3_compile_context *ctx, - struct tgsi_dst_register *dst, struct tgsi_src_register *val, - struct tgsi_src_register *minval, struct tgsi_src_register *maxval) -{ - struct ir3_instruction *instr; - - instr = instr_create(ctx, 2, OPC_MAX_F); - vectorize(ctx, instr, dst, 2, val, 0, minval, 0); - - instr = instr_create(ctx, 2, OPC_MIN_F); - vectorize(ctx, instr, dst, 2, val, 0, maxval, 0); -} - -static void -create_clamp_imm(struct fd3_compile_context *ctx, - struct tgsi_dst_register *dst, - uint32_t minval, uint32_t maxval) -{ - struct tgsi_src_register minconst, maxconst; - struct tgsi_src_register src; - - src_from_dst(&src, dst); - - get_immediate(ctx, &minconst, minval); - get_immediate(ctx, &maxconst, maxval); - - create_clamp(ctx, dst, &src, &minconst, &maxconst); -} - -static struct tgsi_dst_register * -get_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst) -{ - struct tgsi_dst_register *dst = &inst->Dst[0].Register; - unsigned i; - for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - struct tgsi_src_register *src = &inst->Src[i].Register; - if ((src->File == dst->File) && (src->Index == dst->Index)) { - if ((dst->WriteMask == TGSI_WRITEMASK_XYZW) && - (src->SwizzleX == TGSI_SWIZZLE_X) && - (src->SwizzleY == TGSI_SWIZZLE_Y) && - (src->SwizzleZ == TGSI_SWIZZLE_Z) && - (src->SwizzleW == TGSI_SWIZZLE_W)) - continue; - ctx->tmp_src = get_internal_temp(ctx, &ctx->tmp_dst); - ctx->tmp_dst.WriteMask = dst->WriteMask; - dst = &ctx->tmp_dst; - break; - } - } - return dst; -} - -static void -put_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst, - struct tgsi_dst_register *dst) -{ - /* if necessary, add mov back into original dst: */ - if (dst != &inst->Dst[0].Register) { - create_mov(ctx, &inst->Dst[0].Register, ctx->tmp_src); - } -} - -/* helper to generate the necessary repeat and/or additional instructions - * to turn a scalar instruction into a vector operation: - */ -static void -vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr, - struct tgsi_dst_register *dst, int nsrcs, ...) -{ - va_list ap; - int i, j, n = 0; - - instr_atomic_start(ctx); - - add_dst_reg(ctx, instr, dst, TGSI_SWIZZLE_X); - - va_start(ap, nsrcs); - for (j = 0; j < nsrcs; j++) { - struct tgsi_src_register *src = - va_arg(ap, struct tgsi_src_register *); - unsigned flags = va_arg(ap, unsigned); - struct ir3_register *reg; - if (flags & IR3_REG_IMMED) { - reg = ir3_reg_create(instr, 0, IR3_REG_IMMED); - /* this is an ugly cast.. should have put flags first! */ - reg->iim_val = *(int *)&src; - } else { - reg = add_src_reg(ctx, instr, src, TGSI_SWIZZLE_X); - } - reg->flags |= flags & ~IR3_REG_NEGATE; - if (flags & IR3_REG_NEGATE) - reg->flags ^= IR3_REG_NEGATE; - } - va_end(ap); - - for (i = 0; i < 4; i++) { - if (dst->WriteMask & (1 << i)) { - struct ir3_instruction *cur; - - if (n++ == 0) { - cur = instr; - } else { - cur = instr_clone(ctx, instr); - } - - ssa_dst(ctx, cur, dst, i); - - /* fix-up dst register component: */ - cur->regs[0]->num = regid(cur->regs[0]->num >> 2, i); - - /* fix-up src register component: */ - va_start(ap, nsrcs); - for (j = 0; j < nsrcs; j++) { - struct ir3_register *reg = cur->regs[j+1]; - struct tgsi_src_register *src = - va_arg(ap, struct tgsi_src_register *); - unsigned flags = va_arg(ap, unsigned); - if (reg->flags & IR3_REG_SSA) { - ssa_src(ctx, reg, src, src_swiz(src, i)); - } else if (!(flags & IR3_REG_IMMED)) { - reg->num = regid(reg->num >> 2, src_swiz(src, i)); - } - } - va_end(ap); - } - } - - instr_atomic_end(ctx); -} - -/* - * Handlers for TGSI instructions which do not have a 1:1 mapping to - * native instructions: - */ - -static void -trans_clamp(const struct instr_translater *t, - struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register *src0 = &inst->Src[0].Register; - struct tgsi_src_register *src1 = &inst->Src[1].Register; - struct tgsi_src_register *src2 = &inst->Src[2].Register; - - create_clamp(ctx, dst, src0, src1, src2); - - put_dst(ctx, inst, dst); -} - -/* ARL(x) = x, but mova from hrN.x to a0.. */ -static void -trans_arl(const struct instr_translater *t, - struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct ir3_instruction *instr; - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register *tmp_src; - struct tgsi_dst_register *dst = &inst->Dst[0].Register; - struct tgsi_src_register *src = &inst->Src[0].Register; - unsigned chan = src->SwizzleX; - - compile_assert(ctx, dst->File == TGSI_FILE_ADDRESS); - - /* NOTE: we allocate a temporary from a flat register - * namespace (ignoring half vs full). It turns out - * not to really matter since registers get reassigned - * later in ir3_ra which (hopefully!) can deal a bit - * better with mixed half and full precision. - */ - tmp_src = get_internal_temp(ctx, &tmp_dst); - - /* cov.f{32,16}s16 Rtmp, Rsrc */ - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = get_ftype(ctx); - instr->cat1.dst_type = TYPE_S16; - add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF; - add_src_reg(ctx, instr, src, chan); - - /* shl.b Rtmp, Rtmp, 2 */ - instr = instr_create(ctx, 2, OPC_SHL_B); - add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF; - add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF; - ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 2; - - /* mova a0, Rtmp */ - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = TYPE_S16; - instr->cat1.dst_type = TYPE_S16; - add_dst_reg(ctx, instr, dst, 0)->flags |= IR3_REG_HALF; - add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF; -} - -/* - * texture fetch/sample instructions: - */ - -struct tex_info { - int8_t order[4]; - unsigned src_wrmask, flags; -}; - -static const struct tex_info * -get_tex_info(struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - static const struct tex_info tex1d = { - .order = { 0, -1, -1, -1 }, /* coord.x */ - .src_wrmask = TGSI_WRITEMASK_XY, - .flags = 0, - }; - static const struct tex_info tex1ds = { - .order = { 0, -1, 2, -1 }, /* coord.xz */ - .src_wrmask = TGSI_WRITEMASK_XYZ, - .flags = IR3_INSTR_S, - }; - static const struct tex_info tex2d = { - .order = { 0, 1, -1, -1 }, /* coord.xy */ - .src_wrmask = TGSI_WRITEMASK_XY, - .flags = 0, - }; - static const struct tex_info tex2ds = { - .order = { 0, 1, 2, -1 }, /* coord.xyz */ - .src_wrmask = TGSI_WRITEMASK_XYZ, - .flags = IR3_INSTR_S, - }; - static const struct tex_info tex3d = { - .order = { 0, 1, 2, -1 }, /* coord.xyz */ - .src_wrmask = TGSI_WRITEMASK_XYZ, - .flags = IR3_INSTR_3D, - }; - static const struct tex_info tex3ds = { - .order = { 0, 1, 2, 3 }, /* coord.xyzw */ - .src_wrmask = TGSI_WRITEMASK_XYZW, - .flags = IR3_INSTR_S | IR3_INSTR_3D, - }; - static const struct tex_info txp1d = { - .order = { 0, -1, 3, -1 }, /* coord.xw */ - .src_wrmask = TGSI_WRITEMASK_XYZ, - .flags = IR3_INSTR_P, - }; - static const struct tex_info txp1ds = { - .order = { 0, -1, 2, 3 }, /* coord.xzw */ - .src_wrmask = TGSI_WRITEMASK_XYZW, - .flags = IR3_INSTR_P | IR3_INSTR_S, - }; - static const struct tex_info txp2d = { - .order = { 0, 1, 3, -1 }, /* coord.xyw */ - .src_wrmask = TGSI_WRITEMASK_XYZ, - .flags = IR3_INSTR_P, - }; - static const struct tex_info txp2ds = { - .order = { 0, 1, 2, 3 }, /* coord.xyzw */ - .src_wrmask = TGSI_WRITEMASK_XYZW, - .flags = IR3_INSTR_P | IR3_INSTR_S, - }; - static const struct tex_info txp3d = { - .order = { 0, 1, 2, 3 }, /* coord.xyzw */ - .src_wrmask = TGSI_WRITEMASK_XYZW, - .flags = IR3_INSTR_P | IR3_INSTR_3D, - }; - - unsigned tex = inst->Texture.Texture; - - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_TEX: - switch (tex) { - case TGSI_TEXTURE_1D: - return &tex1d; - case TGSI_TEXTURE_SHADOW1D: - return &tex1ds; - case TGSI_TEXTURE_2D: - case TGSI_TEXTURE_RECT: - return &tex2d; - case TGSI_TEXTURE_SHADOW2D: - case TGSI_TEXTURE_SHADOWRECT: - return &tex2ds; - case TGSI_TEXTURE_3D: - case TGSI_TEXTURE_CUBE: - return &tex3d; - case TGSI_TEXTURE_SHADOWCUBE: - return &tex3ds; - default: - compile_error(ctx, "unknown texture type: %s\n", - tgsi_texture_names[tex]); - return NULL; - } - break; - case TGSI_OPCODE_TXP: - switch (tex) { - case TGSI_TEXTURE_1D: - return &txp1d; - case TGSI_TEXTURE_SHADOW1D: - return &txp1ds; - case TGSI_TEXTURE_2D: - case TGSI_TEXTURE_RECT: - return &txp2d; - case TGSI_TEXTURE_SHADOW2D: - case TGSI_TEXTURE_SHADOWRECT: - return &txp2ds; - case TGSI_TEXTURE_3D: - case TGSI_TEXTURE_CUBE: - return &txp3d; - default: - compile_error(ctx, "unknown texture type: %s\n", - tgsi_texture_names[tex]); - break; - } - break; - } - compile_assert(ctx, 0); - return NULL; -} - -static struct tgsi_src_register * -get_tex_coord(struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst, - const struct tex_info *tinf) -{ - struct tgsi_src_register *coord = &inst->Src[0].Register; - struct ir3_instruction *instr; - unsigned tex = inst->Texture.Texture; - bool needs_mov = false; - unsigned i; - - /* cat5 instruction cannot seem to handle const or relative: */ - if (is_rel_or_const(coord)) - needs_mov = true; - - /* 1D textures we fix up w/ 0.0 as 2nd coord: */ - if ((tex == TGSI_TEXTURE_1D) || (tex == TGSI_TEXTURE_SHADOW1D)) - needs_mov = true; - - /* The texture sample instructions need to coord in successive - * registers/components (ie. src.xy but not src.yx). And TXP - * needs the .w component in .z for 2D.. so in some cases we - * might need to emit some mov instructions to shuffle things - * around: - */ - for (i = 1; (i < 4) && (tinf->order[i] >= 0) && !needs_mov; i++) - if (src_swiz(coord, i) != (src_swiz(coord, 0) + tinf->order[i])) - needs_mov = true; - - if (needs_mov) { - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register *tmp_src; - unsigned j; - - type_t type_mov = get_ftype(ctx); - - /* need to move things around: */ - tmp_src = get_internal_temp(ctx, &tmp_dst); - - for (j = 0; j < 4; j++) { - if (tinf->order[j] < 0) - continue; - instr = instr_create(ctx, 1, 0); /* mov */ - instr->cat1.src_type = type_mov; - instr->cat1.dst_type = type_mov; - add_dst_reg(ctx, instr, &tmp_dst, j); - add_src_reg(ctx, instr, coord, - src_swiz(coord, tinf->order[j])); - } - - /* fix up .y coord: */ - if ((tex == TGSI_TEXTURE_1D) || - (tex == TGSI_TEXTURE_SHADOW1D)) { - instr = instr_create(ctx, 1, 0); /* mov */ - instr->cat1.src_type = type_mov; - instr->cat1.dst_type = type_mov; - add_dst_reg(ctx, instr, &tmp_dst, 1); /* .y */ - ir3_reg_create(instr, 0, IR3_REG_IMMED)->fim_val = 0.5; - } - - coord = tmp_src; - } - - return coord; -} - -static void -trans_samp(const struct instr_translater *t, - struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct ir3_instruction *instr; - struct tgsi_dst_register *dst = &inst->Dst[0].Register; - struct tgsi_src_register *coord; - struct tgsi_src_register *samp = &inst->Src[1].Register; - const struct tex_info *tinf; - - tinf = get_tex_info(ctx, inst); - coord = get_tex_coord(ctx, inst, tinf); - - instr = instr_create(ctx, 5, t->opc); - instr->cat5.type = get_ftype(ctx); - instr->cat5.samp = samp->Index; - instr->cat5.tex = samp->Index; - instr->flags |= tinf->flags; - - add_dst_reg_wrmask(ctx, instr, dst, 0, dst->WriteMask); - add_src_reg_wrmask(ctx, instr, coord, coord->SwizzleX, tinf->src_wrmask); -} - -/* - * SEQ(a,b) = (a == b) ? 1.0 : 0.0 - * cmps.f.eq tmp0, a, b - * cov.u16f16 dst, tmp0 - * - * SNE(a,b) = (a != b) ? 1.0 : 0.0 - * cmps.f.ne tmp0, a, b - * cov.u16f16 dst, tmp0 - * - * SGE(a,b) = (a >= b) ? 1.0 : 0.0 - * cmps.f.ge tmp0, a, b - * cov.u16f16 dst, tmp0 - * - * SLE(a,b) = (a <= b) ? 1.0 : 0.0 - * cmps.f.le tmp0, a, b - * cov.u16f16 dst, tmp0 - * - * SGT(a,b) = (a > b) ? 1.0 : 0.0 - * cmps.f.gt tmp0, a, b - * cov.u16f16 dst, tmp0 - * - * SLT(a,b) = (a < b) ? 1.0 : 0.0 - * cmps.f.lt tmp0, a, b - * cov.u16f16 dst, tmp0 - * - * CMP(a,b,c) = (a < 0.0) ? b : c - * cmps.f.lt tmp0, a, {0.0} - * sel.b16 dst, b, tmp0, c - */ -static void -trans_cmp(const struct instr_translater *t, - struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct ir3_instruction *instr; - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register *tmp_src; - struct tgsi_src_register constval0; - /* final instruction for CMP() uses orig src1 and src2: */ - struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register *a0, *a1, *a2; - unsigned condition; - - tmp_src = get_internal_temp(ctx, &tmp_dst); - - a0 = &inst->Src[0].Register; /* a */ - a1 = &inst->Src[1].Register; /* b */ - - switch (t->tgsi_opc) { - case TGSI_OPCODE_SEQ: - case TGSI_OPCODE_FSEQ: - condition = IR3_COND_EQ; - break; - case TGSI_OPCODE_SNE: - case TGSI_OPCODE_FSNE: - condition = IR3_COND_NE; - break; - case TGSI_OPCODE_SGE: - case TGSI_OPCODE_FSGE: - condition = IR3_COND_GE; - break; - case TGSI_OPCODE_SLT: - case TGSI_OPCODE_FSLT: - condition = IR3_COND_LT; - break; - case TGSI_OPCODE_SLE: - condition = IR3_COND_LE; - break; - case TGSI_OPCODE_SGT: - condition = IR3_COND_GT; - break; - case TGSI_OPCODE_CMP: - get_immediate(ctx, &constval0, fui(0.0)); - a0 = &inst->Src[0].Register; /* a */ - a1 = &constval0; /* {0.0} */ - condition = IR3_COND_LT; - break; - default: - compile_assert(ctx, 0); - return; - } - - if (is_const(a0) && is_const(a1)) - a0 = get_unconst(ctx, a0); - - /* cmps.f.<cond> tmp, a0, a1 */ - instr = instr_create(ctx, 2, OPC_CMPS_F); - instr->cat2.condition = condition; - vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0); - - switch (t->tgsi_opc) { - case TGSI_OPCODE_SEQ: - case TGSI_OPCODE_FSEQ: - case TGSI_OPCODE_SGE: - case TGSI_OPCODE_FSGE: - case TGSI_OPCODE_SLE: - case TGSI_OPCODE_SNE: - case TGSI_OPCODE_FSNE: - case TGSI_OPCODE_SGT: - case TGSI_OPCODE_SLT: - case TGSI_OPCODE_FSLT: - /* cov.u16f16 dst, tmp0 */ - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = get_utype(ctx); - instr->cat1.dst_type = get_ftype(ctx); - vectorize(ctx, instr, dst, 1, tmp_src, 0); - break; - case TGSI_OPCODE_CMP: - a1 = &inst->Src[1].Register; - a2 = &inst->Src[2].Register; - /* sel.{b32,b16} dst, src2, tmp, src1 */ - instr = instr_create(ctx, 3, OPC_SEL_B32); - vectorize(ctx, instr, dst, 3, a1, 0, tmp_src, 0, a2, 0); - - break; - } - - put_dst(ctx, inst, dst); -} - -/* - * USNE(a,b) = (a != b) ? 1 : 0 - * cmps.u32.ne dst, a, b - * - * USEQ(a,b) = (a == b) ? 1 : 0 - * cmps.u32.eq dst, a, b - * - * ISGE(a,b) = (a > b) ? 1 : 0 - * cmps.s32.ge dst, a, b - * - * USGE(a,b) = (a > b) ? 1 : 0 - * cmps.u32.ge dst, a, b - * - * ISLT(a,b) = (a < b) ? 1 : 0 - * cmps.s32.lt dst, a, b - * - * USLT(a,b) = (a < b) ? 1 : 0 - * cmps.u32.lt dst, a, b - * - * UCMP(a,b,c) = (a < 0) ? b : c - * cmps.u32.lt tmp0, a, {0} - * sel.b16 dst, b, tmp0, c - */ -static void -trans_icmp(const struct instr_translater *t, - struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct ir3_instruction *instr; - struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register constval0; - struct tgsi_src_register *a0, *a1, *a2; - unsigned condition; - - a0 = &inst->Src[0].Register; /* a */ - a1 = &inst->Src[1].Register; /* b */ - - switch (t->tgsi_opc) { - case TGSI_OPCODE_USNE: - condition = IR3_COND_NE; - break; - case TGSI_OPCODE_USEQ: - condition = IR3_COND_EQ; - break; - case TGSI_OPCODE_ISGE: - case TGSI_OPCODE_USGE: - condition = IR3_COND_GE; - break; - case TGSI_OPCODE_ISLT: - case TGSI_OPCODE_USLT: - condition = IR3_COND_LT; - break; - case TGSI_OPCODE_UCMP: - get_immediate(ctx, &constval0, 0); - a0 = &inst->Src[0].Register; /* a */ - a1 = &constval0; /* {0} */ - condition = IR3_COND_LT; - break; - - default: - compile_assert(ctx, 0); - return; - } - - if (is_const(a0) && is_const(a1)) - a0 = get_unconst(ctx, a0); - - if (t->tgsi_opc == TGSI_OPCODE_UCMP) { - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register *tmp_src; - tmp_src = get_internal_temp(ctx, &tmp_dst); - /* cmps.u32.lt tmp, a0, a1 */ - instr = instr_create(ctx, 2, t->opc); - instr->cat2.condition = condition; - vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0); - - a1 = &inst->Src[1].Register; - a2 = &inst->Src[2].Register; - /* sel.{b32,b16} dst, src2, tmp, src1 */ - instr = instr_create(ctx, 3, OPC_SEL_B32); - vectorize(ctx, instr, dst, 3, a1, 0, tmp_src, 0, a2, 0); - } else { - /* cmps.{u32,s32}.<cond> dst, a0, a1 */ - instr = instr_create(ctx, 2, t->opc); - instr->cat2.condition = condition; - vectorize(ctx, instr, dst, 2, a0, 0, a1, 0); - } - put_dst(ctx, inst, dst); -} - -/* - * Conditional / Flow control - */ - -static void -push_branch(struct fd3_compile_context *ctx, bool inv, - struct ir3_instruction *instr, struct ir3_instruction *cond) -{ - unsigned int idx = ctx->branch_count++; - compile_assert(ctx, idx < ARRAY_SIZE(ctx->branch)); - ctx->branch[idx].instr = instr; - ctx->branch[idx].inv = inv; - /* else side of branch has same condition: */ - if (!inv) - ctx->branch[idx].cond = cond; -} - -static struct ir3_instruction * -pop_branch(struct fd3_compile_context *ctx) -{ - unsigned int idx = --ctx->branch_count; - return ctx->branch[idx].instr; -} - -static void -trans_if(const struct instr_translater *t, - struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct ir3_instruction *instr, *cond; - struct tgsi_src_register *src = &inst->Src[0].Register; - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register *tmp_src; - struct tgsi_src_register constval; - - get_immediate(ctx, &constval, fui(0.0)); - tmp_src = get_internal_temp(ctx, &tmp_dst); - - if (is_const(src)) - src = get_unconst(ctx, src); - - /* cmps.f.ne tmp0, b, {0.0} */ - instr = instr_create(ctx, 2, OPC_CMPS_F); - add_dst_reg(ctx, instr, &tmp_dst, 0); - add_src_reg(ctx, instr, src, src->SwizzleX); - add_src_reg(ctx, instr, &constval, constval.SwizzleX); - instr->cat2.condition = IR3_COND_NE; - - compile_assert(ctx, instr->regs[1]->flags & IR3_REG_SSA); /* because get_unconst() */ - cond = instr->regs[1]->instr; - - /* meta:flow tmp0 */ - instr = instr_create(ctx, -1, OPC_META_FLOW); - ir3_reg_create(instr, 0, 0); /* dummy dst */ - add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_X); - - push_branch(ctx, false, instr, cond); - instr->flow.if_block = push_block(ctx); -} - -static void -trans_else(const struct instr_translater *t, - struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct ir3_instruction *instr; - - pop_block(ctx); - - instr = pop_branch(ctx); - - compile_assert(ctx, (instr->category == -1) && - (instr->opc == OPC_META_FLOW)); - - push_branch(ctx, true, instr, NULL); - instr->flow.else_block = push_block(ctx); -} - -static struct ir3_instruction * -find_temporary(struct ir3_block *block, unsigned n) -{ - if (block->parent && !block->temporaries[n]) - return find_temporary(block->parent, n); - return block->temporaries[n]; -} - -static struct ir3_instruction * -find_output(struct ir3_block *block, unsigned n) -{ - if (block->parent && !block->outputs[n]) - return find_output(block->parent, n); - return block->outputs[n]; -} - -static struct ir3_instruction * -create_phi(struct fd3_compile_context *ctx, struct ir3_instruction *cond, - struct ir3_instruction *a, struct ir3_instruction *b) -{ - struct ir3_instruction *phi; - - compile_assert(ctx, cond); - - /* Either side of the condition could be null.. which - * indicates a variable written on only one side of the - * branch. Normally this should only be variables not - * used outside of that side of the branch. So we could - * just 'return a ? a : b;' in that case. But for better - * defined undefined behavior we just stick in imm{0.0}. - * In the common case of a value only used within the - * one side of the branch, the PHI instruction will not - * get scheduled - */ - if (!a) - a = create_immed(ctx, 0.0); - if (!b) - b = create_immed(ctx, 0.0); - - phi = instr_create(ctx, -1, OPC_META_PHI); - ir3_reg_create(phi, 0, 0); /* dummy dst */ - ir3_reg_create(phi, 0, IR3_REG_SSA)->instr = cond; - ir3_reg_create(phi, 0, IR3_REG_SSA)->instr = a; - ir3_reg_create(phi, 0, IR3_REG_SSA)->instr = b; - - return phi; -} - -static void -trans_endif(const struct instr_translater *t, - struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct ir3_instruction *instr; - struct ir3_block *ifb, *elseb; - struct ir3_instruction **ifout, **elseout; - unsigned i, ifnout = 0, elsenout = 0; - - pop_block(ctx); - - instr = pop_branch(ctx); - - compile_assert(ctx, (instr->category == -1) && - (instr->opc == OPC_META_FLOW)); - - ifb = instr->flow.if_block; - elseb = instr->flow.else_block; - /* if there is no else block, the parent block is used for the - * branch-not-taken src of the PHI instructions: - */ - if (!elseb) - elseb = ifb->parent; - - /* worst case sizes: */ - ifnout = ifb->ntemporaries + ifb->noutputs; - elsenout = elseb->ntemporaries + elseb->noutputs; - - ifout = ir3_alloc(ctx->ir, sizeof(ifb->outputs[0]) * ifnout); - if (elseb != ifb->parent) - elseout = ir3_alloc(ctx->ir, sizeof(ifb->outputs[0]) * elsenout); - - ifnout = 0; - elsenout = 0; - - /* generate PHI instructions for any temporaries written: */ - for (i = 0; i < ifb->ntemporaries; i++) { - struct ir3_instruction *a = ifb->temporaries[i]; - struct ir3_instruction *b = elseb->temporaries[i]; - - /* if temporary written in if-block, or if else block - * is present and temporary written in else-block: - */ - if (a || ((elseb != ifb->parent) && b)) { - struct ir3_instruction *phi; - - /* if only written on one side, find the closest - * enclosing update on other side: - */ - if (!a) - a = find_temporary(ifb, i); - if (!b) - b = find_temporary(elseb, i); - - ifout[ifnout] = a; - a = create_output(ifb, a, ifnout++); - - if (elseb != ifb->parent) { - elseout[elsenout] = b; - b = create_output(elseb, b, elsenout++); - } - - phi = create_phi(ctx, instr, a, b); - ctx->block->temporaries[i] = phi; - } - } - - compile_assert(ctx, ifb->noutputs == elseb->noutputs); - - /* .. and any outputs written: */ - for (i = 0; i < ifb->noutputs; i++) { - struct ir3_instruction *a = ifb->outputs[i]; - struct ir3_instruction *b = elseb->outputs[i]; - - /* if output written in if-block, or if else block - * is present and output written in else-block: - */ - if (a || ((elseb != ifb->parent) && b)) { - struct ir3_instruction *phi; - - /* if only written on one side, find the closest - * enclosing update on other side: - */ - if (!a) - a = find_output(ifb, i); - if (!b) - b = find_output(elseb, i); - - ifout[ifnout] = a; - a = create_output(ifb, a, ifnout++); - - if (elseb != ifb->parent) { - elseout[elsenout] = b; - b = create_output(elseb, b, elsenout++); - } - - phi = create_phi(ctx, instr, a, b); - ctx->block->outputs[i] = phi; - } - } - - ifb->noutputs = ifnout; - ifb->outputs = ifout; - - if (elseb != ifb->parent) { - elseb->noutputs = elsenout; - elseb->outputs = elseout; - } - - // TODO maybe we want to compact block->inputs? -} - -/* - * Kill - */ - -static void -trans_kill(const struct instr_translater *t, - struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct ir3_instruction *instr, *immed, *cond = NULL; - bool inv = false; - - switch (t->tgsi_opc) { - case TGSI_OPCODE_KILL: - /* unconditional kill, use enclosing if condition: */ - if (ctx->branch_count > 0) { - unsigned int idx = ctx->branch_count - 1; - cond = ctx->branch[idx].cond; - inv = ctx->branch[idx].inv; - } else { - cond = create_immed(ctx, 1.0); - } - - break; - } - - compile_assert(ctx, cond); - - immed = create_immed(ctx, 0.0); - - /* cmps.f.ne p0.x, cond, {0.0} */ - instr = instr_create(ctx, 2, OPC_CMPS_F); - instr->cat2.condition = IR3_COND_NE; - ir3_reg_create(instr, regid(REG_P0, 0), 0); - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = cond; - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = immed; - cond = instr; - - /* kill p0.x */ - instr = instr_create(ctx, 0, OPC_KILL); - instr->cat0.inv = inv; - ir3_reg_create(instr, 0, 0); /* dummy dst */ - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = cond; - - ctx->kill[ctx->kill_count++] = instr; -} - -/* - * Kill-If - */ - -static void -trans_killif(const struct instr_translater *t, - struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct tgsi_src_register *src = &inst->Src[0].Register; - struct ir3_instruction *instr, *immed, *cond = NULL; - bool inv = false; - - immed = create_immed(ctx, 0.0); - - /* cmps.f.ne p0.x, cond, {0.0} */ - instr = instr_create(ctx, 2, OPC_CMPS_F); - instr->cat2.condition = IR3_COND_NE; - ir3_reg_create(instr, regid(REG_P0, 0), 0); - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = immed; - add_src_reg(ctx, instr, src, src->SwizzleX); - - cond = instr; - - /* kill p0.x */ - instr = instr_create(ctx, 0, OPC_KILL); - instr->cat0.inv = inv; - ir3_reg_create(instr, 0, 0); /* dummy dst */ - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = cond; - - ctx->kill[ctx->kill_count++] = instr; - -} -/* - * I2F / U2F / F2I / F2U - */ - -static void -trans_cov(const struct instr_translater *t, - struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct ir3_instruction *instr; - struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register *src = &inst->Src[0].Register; - - // cov.f32s32 dst, tmp0 / - instr = instr_create(ctx, 1, 0); - switch (t->tgsi_opc) { - case TGSI_OPCODE_U2F: - instr->cat1.src_type = TYPE_U32; - instr->cat1.dst_type = TYPE_F32; - break; - case TGSI_OPCODE_I2F: - instr->cat1.src_type = TYPE_S32; - instr->cat1.dst_type = TYPE_F32; - break; - case TGSI_OPCODE_F2U: - instr->cat1.src_type = TYPE_F32; - instr->cat1.dst_type = TYPE_U32; - break; - case TGSI_OPCODE_F2I: - instr->cat1.src_type = TYPE_F32; - instr->cat1.dst_type = TYPE_S32; - break; - - } - vectorize(ctx, instr, dst, 1, src, 0); -} - -/* - * Handlers for TGSI instructions which do have 1:1 mapping to native - * instructions: - */ - -static void -instr_cat0(const struct instr_translater *t, - struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - instr_create(ctx, 0, t->opc); -} - -static void -instr_cat1(const struct instr_translater *t, - struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register *src = &inst->Src[0].Register; - create_mov(ctx, dst, src); - put_dst(ctx, inst, dst); -} - -static void -instr_cat2(const struct instr_translater *t, - struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register *src0 = &inst->Src[0].Register; - struct tgsi_src_register *src1 = &inst->Src[1].Register; - struct ir3_instruction *instr; - unsigned src0_flags = 0, src1_flags = 0; - - switch (t->tgsi_opc) { - case TGSI_OPCODE_ABS: - case TGSI_OPCODE_IABS: - src0_flags = IR3_REG_ABS; - break; - case TGSI_OPCODE_SUB: - case TGSI_OPCODE_INEG: - src1_flags = IR3_REG_NEGATE; - break; - } - - switch (t->opc) { - case OPC_ABSNEG_F: - case OPC_ABSNEG_S: - case OPC_CLZ_B: - case OPC_CLZ_S: - case OPC_SIGN_F: - case OPC_FLOOR_F: - case OPC_CEIL_F: - case OPC_RNDNE_F: - case OPC_RNDAZ_F: - case OPC_TRUNC_F: - case OPC_NOT_B: - case OPC_BFREV_B: - case OPC_SETRM: - case OPC_CBITS_B: - /* these only have one src reg */ - instr = instr_create(ctx, 2, t->opc); - vectorize(ctx, instr, dst, 1, src0, src0_flags); - break; - default: - if (is_const(src0) && is_const(src1)) - src0 = get_unconst(ctx, src0); - - instr = instr_create(ctx, 2, t->opc); - vectorize(ctx, instr, dst, 2, src0, src0_flags, - src1, src1_flags); - break; - } - - put_dst(ctx, inst, dst); -} - -static void -instr_cat3(const struct instr_translater *t, - struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register *src0 = &inst->Src[0].Register; - struct tgsi_src_register *src1 = &inst->Src[1].Register; - struct ir3_instruction *instr; - - /* in particular, can't handle const for src1 for cat3.. - * for mad, we can swap first two src's if needed: - */ - if (is_rel_or_const(src1)) { - if (is_mad(t->opc) && !is_rel_or_const(src0)) { - struct tgsi_src_register *tmp; - tmp = src0; - src0 = src1; - src1 = tmp; - } else { - src1 = get_unconst(ctx, src1); - } - } - - instr = instr_create(ctx, 3, t->opc); - vectorize(ctx, instr, dst, 3, src0, 0, src1, 0, - &inst->Src[2].Register, 0); - put_dst(ctx, inst, dst); -} - -static void -instr_cat4(const struct instr_translater *t, - struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register *src = &inst->Src[0].Register; - struct ir3_instruction *instr; - unsigned i; - - /* seems like blob compiler avoids const as src.. */ - if (is_const(src)) - src = get_unconst(ctx, src); - - /* we need to replicate into each component: */ - for (i = 0; i < 4; i++) { - if (dst->WriteMask & (1 << i)) { - instr = instr_create(ctx, 4, t->opc); - add_dst_reg(ctx, instr, dst, i); - add_src_reg(ctx, instr, src, src->SwizzleX); - } - } - - put_dst(ctx, inst, dst); -} - -static const struct instr_translater translaters[TGSI_OPCODE_LAST] = { -#define INSTR(n, f, ...) \ - [TGSI_OPCODE_ ## n] = { .fxn = (f), .tgsi_opc = TGSI_OPCODE_ ## n, ##__VA_ARGS__ } - - INSTR(MOV, instr_cat1), - INSTR(RCP, instr_cat4, .opc = OPC_RCP), - INSTR(RSQ, instr_cat4, .opc = OPC_RSQ), - INSTR(SQRT, instr_cat4, .opc = OPC_SQRT), - INSTR(MUL, instr_cat2, .opc = OPC_MUL_F), - INSTR(ADD, instr_cat2, .opc = OPC_ADD_F), - INSTR(SUB, instr_cat2, .opc = OPC_ADD_F), - INSTR(MIN, instr_cat2, .opc = OPC_MIN_F), - INSTR(MAX, instr_cat2, .opc = OPC_MAX_F), - INSTR(UADD, instr_cat2, .opc = OPC_ADD_U), - INSTR(IMIN, instr_cat2, .opc = OPC_MIN_S), - INSTR(UMIN, instr_cat2, .opc = OPC_MIN_U), - INSTR(IMAX, instr_cat2, .opc = OPC_MAX_S), - INSTR(UMAX, instr_cat2, .opc = OPC_MAX_U), - INSTR(AND, instr_cat2, .opc = OPC_AND_B), - INSTR(OR, instr_cat2, .opc = OPC_OR_B), - INSTR(NOT, instr_cat2, .opc = OPC_NOT_B), - INSTR(XOR, instr_cat2, .opc = OPC_XOR_B), - INSTR(UMUL, instr_cat2, .opc = OPC_MUL_U), - INSTR(SHL, instr_cat2, .opc = OPC_SHL_B), - INSTR(USHR, instr_cat2, .opc = OPC_SHR_B), - INSTR(ISHR, instr_cat2, .opc = OPC_ASHR_B), - INSTR(IABS, instr_cat2, .opc = OPC_ABSNEG_S), - INSTR(INEG, instr_cat2, .opc = OPC_ABSNEG_S), - INSTR(AND, instr_cat2, .opc = OPC_AND_B), - INSTR(MAD, instr_cat3, .opc = OPC_MAD_F32, .hopc = OPC_MAD_F16), - INSTR(TRUNC, instr_cat2, .opc = OPC_TRUNC_F), - INSTR(CLAMP, trans_clamp), - INSTR(FLR, instr_cat2, .opc = OPC_FLOOR_F), - INSTR(ROUND, instr_cat2, .opc = OPC_RNDNE_F), - INSTR(SSG, instr_cat2, .opc = OPC_SIGN_F), - INSTR(CEIL, instr_cat2, .opc = OPC_CEIL_F), - INSTR(ARL, trans_arl), - INSTR(EX2, instr_cat4, .opc = OPC_EXP2), - INSTR(LG2, instr_cat4, .opc = OPC_LOG2), - INSTR(ABS, instr_cat2, .opc = OPC_ABSNEG_F), - INSTR(COS, instr_cat4, .opc = OPC_COS), - INSTR(SIN, instr_cat4, .opc = OPC_SIN), - INSTR(TEX, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TEX), - INSTR(TXP, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TXP), - INSTR(SGT, trans_cmp), - INSTR(SLT, trans_cmp), - INSTR(FSLT, trans_cmp), - INSTR(SGE, trans_cmp), - INSTR(FSGE, trans_cmp), - INSTR(SLE, trans_cmp), - INSTR(SNE, trans_cmp), - INSTR(FSNE, trans_cmp), - INSTR(SEQ, trans_cmp), - INSTR(FSEQ, trans_cmp), - INSTR(CMP, trans_cmp), - INSTR(USNE, trans_icmp, .opc = OPC_CMPS_U), - INSTR(USEQ, trans_icmp, .opc = OPC_CMPS_U), - INSTR(ISGE, trans_icmp, .opc = OPC_CMPS_S), - INSTR(USGE, trans_icmp, .opc = OPC_CMPS_U), - INSTR(ISLT, trans_icmp, .opc = OPC_CMPS_S), - INSTR(USLT, trans_icmp, .opc = OPC_CMPS_U), - INSTR(UCMP, trans_icmp, .opc = OPC_CMPS_U), - INSTR(IF, trans_if), - INSTR(UIF, trans_if), - INSTR(ELSE, trans_else), - INSTR(ENDIF, trans_endif), - INSTR(END, instr_cat0, .opc = OPC_END), - INSTR(KILL, trans_kill, .opc = OPC_KILL), - INSTR(KILL_IF, trans_killif, .opc = OPC_KILL), - INSTR(I2F, trans_cov), - INSTR(U2F, trans_cov), - INSTR(F2I, trans_cov), - INSTR(F2U, trans_cov), -}; - -static fd3_semantic -decl_semantic(const struct tgsi_declaration_semantic *sem) -{ - return fd3_semantic_name(sem->Name, sem->Index); -} - -static struct ir3_instruction * -decl_in_frag_bary(struct fd3_compile_context *ctx, unsigned regid, - unsigned j, unsigned inloc) -{ - struct ir3_instruction *instr; - struct ir3_register *src; - - /* bary.f dst, #inloc, r0.x */ - instr = instr_create(ctx, 2, OPC_BARY_F); - ir3_reg_create(instr, regid, 0); /* dummy dst */ - ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = inloc; - src = ir3_reg_create(instr, 0, IR3_REG_SSA); - src->wrmask = 0x3; - src->instr = ctx->frag_pos; - - return instr; -} - -/* TGSI_SEMANTIC_POSITION - * """""""""""""""""""""" - * - * For fragment shaders, TGSI_SEMANTIC_POSITION is used to indicate that - * fragment shader input contains the fragment's window position. The X - * component starts at zero and always increases from left to right. - * The Y component starts at zero and always increases but Y=0 may either - * indicate the top of the window or the bottom depending on the fragment - * coordinate origin convention (see TGSI_PROPERTY_FS_COORD_ORIGIN). - * The Z coordinate ranges from 0 to 1 to represent depth from the front - * to the back of the Z buffer. The W component contains the reciprocol - * of the interpolated vertex position W component. - */ -static struct ir3_instruction * -decl_in_frag_coord(struct fd3_compile_context *ctx, unsigned regid, - unsigned j) -{ - struct ir3_instruction *instr, *src; - - compile_assert(ctx, !ctx->frag_coord[j]); - - ctx->frag_coord[j] = create_input(ctx->block, NULL, 0); - - - switch (j) { - case 0: /* .x */ - case 1: /* .y */ - /* for frag_coord, we get unsigned values.. we need - * to subtract (integer) 8 and divide by 16 (right- - * shift by 4) then convert to float: - */ - - /* add.s tmp, src, -8 */ - instr = instr_create(ctx, 2, OPC_ADD_S); - ir3_reg_create(instr, regid, 0); /* dummy dst */ - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = ctx->frag_coord[j]; - ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -8; - src = instr; - - /* shr.b tmp, tmp, 4 */ - instr = instr_create(ctx, 2, OPC_SHR_B); - ir3_reg_create(instr, regid, 0); /* dummy dst */ - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src; - ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 4; - src = instr; - - /* mov.u32f32 dst, tmp */ - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = TYPE_U32; - instr->cat1.dst_type = TYPE_F32; - ir3_reg_create(instr, regid, 0); /* dummy dst */ - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src; - - break; - case 2: /* .z */ - case 3: /* .w */ - /* seems that we can use these as-is: */ - instr = ctx->frag_coord[j]; - break; - default: - compile_error(ctx, "invalid channel\n"); - instr = create_immed(ctx, 0.0); - break; - } - - return instr; -} - -/* TGSI_SEMANTIC_FACE - * """""""""""""""""" - * - * This label applies to fragment shader inputs only and indicates that - * the register contains front/back-face information of the form (F, 0, - * 0, 1). The first component will be positive when the fragment belongs - * to a front-facing polygon, and negative when the fragment belongs to a - * back-facing polygon. - */ -static struct ir3_instruction * -decl_in_frag_face(struct fd3_compile_context *ctx, unsigned regid, - unsigned j) -{ - struct ir3_instruction *instr, *src; - - switch (j) { - case 0: /* .x */ - compile_assert(ctx, !ctx->frag_face); - - ctx->frag_face = create_input(ctx->block, NULL, 0); - - /* for faceness, we always get -1 or 0 (int).. but TGSI expects - * positive vs negative float.. and piglit further seems to - * expect -1.0 or 1.0: - * - * mul.s tmp, hr0.x, 2 - * add.s tmp, tmp, 1 - * mov.s16f32, dst, tmp - * - */ - - instr = instr_create(ctx, 2, OPC_MUL_S); - ir3_reg_create(instr, regid, 0); /* dummy dst */ - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = ctx->frag_face; - ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 2; - src = instr; - - instr = instr_create(ctx, 2, OPC_ADD_S); - ir3_reg_create(instr, regid, 0); /* dummy dst */ - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src; - ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 1; - src = instr; - - instr = instr_create(ctx, 1, 0); /* mov */ - instr->cat1.src_type = TYPE_S32; - instr->cat1.dst_type = TYPE_F32; - ir3_reg_create(instr, regid, 0); /* dummy dst */ - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src; - - break; - case 1: /* .y */ - case 2: /* .z */ - instr = create_immed(ctx, 0.0); - break; - case 3: /* .w */ - instr = create_immed(ctx, 1.0); - break; - default: - compile_error(ctx, "invalid channel\n"); - instr = create_immed(ctx, 0.0); - break; - } - - return instr; -} - -static void -decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) -{ - struct fd3_shader_variant *so = ctx->so; - unsigned name = decl->Semantic.Name; - unsigned i; - - /* I don't think we should get frag shader input without - * semantic info? Otherwise how do inputs get linked to - * vert outputs? - */ - compile_assert(ctx, (ctx->type == TGSI_PROCESSOR_VERTEX) || - decl->Declaration.Semantic); - - for (i = decl->Range.First; i <= decl->Range.Last; i++) { - unsigned n = so->inputs_count++; - unsigned r = regid(i, 0); - unsigned ncomp, j; - - /* we'll figure out the actual components used after scheduling */ - ncomp = 4; - - DBG("decl in -> r%d", i); - - compile_assert(ctx, n < ARRAY_SIZE(so->inputs)); - - so->inputs[n].semantic = decl_semantic(&decl->Semantic); - so->inputs[n].compmask = (1 << ncomp) - 1; - so->inputs[n].regid = r; - so->inputs[n].inloc = ctx->next_inloc; - - for (j = 0; j < ncomp; j++) { - struct ir3_instruction *instr = NULL; - - if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { - /* for fragment shaders, POSITION and FACE are handled - * specially, not using normal varying / bary.f - */ - if (name == TGSI_SEMANTIC_POSITION) { - so->inputs[n].bary = false; - so->frag_coord = true; - instr = decl_in_frag_coord(ctx, r + j, j); - } else if (name == TGSI_SEMANTIC_FACE) { - so->inputs[n].bary = false; - so->frag_face = true; - instr = decl_in_frag_face(ctx, r + j, j); - } else { - so->inputs[n].bary = true; - instr = decl_in_frag_bary(ctx, r + j, j, - so->inputs[n].inloc + j - 8); - } - } else { - instr = create_input(ctx->block, NULL, (i * 4) + j); - } - - ctx->block->inputs[(i * 4) + j] = instr; - } - - if (so->inputs[n].bary || (ctx->type == TGSI_PROCESSOR_VERTEX)) { - ctx->next_inloc += ncomp; - so->total_in += ncomp; - } - } -} - -static void -decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) -{ - struct fd3_shader_variant *so = ctx->so; - unsigned comp = 0; - unsigned name = decl->Semantic.Name; - unsigned i; - - compile_assert(ctx, decl->Declaration.Semantic); - - DBG("decl out[%d] -> r%d", name, decl->Range.First); - - if (ctx->type == TGSI_PROCESSOR_VERTEX) { - switch (name) { - case TGSI_SEMANTIC_POSITION: - so->writes_pos = true; - break; - case TGSI_SEMANTIC_PSIZE: - so->writes_psize = true; - break; - case TGSI_SEMANTIC_COLOR: - case TGSI_SEMANTIC_BCOLOR: - case TGSI_SEMANTIC_GENERIC: - case TGSI_SEMANTIC_FOG: - case TGSI_SEMANTIC_TEXCOORD: - break; - default: - compile_error(ctx, "unknown VS semantic name: %s\n", - tgsi_semantic_names[name]); - } - } else { - switch (name) { - case TGSI_SEMANTIC_POSITION: - comp = 2; /* tgsi will write to .z component */ - so->writes_pos = true; - break; - case TGSI_SEMANTIC_COLOR: - break; - default: - compile_error(ctx, "unknown FS semantic name: %s\n", - tgsi_semantic_names[name]); - } - } - - for (i = decl->Range.First; i <= decl->Range.Last; i++) { - unsigned n = so->outputs_count++; - unsigned ncomp, j; - - ncomp = 4; - - compile_assert(ctx, n < ARRAY_SIZE(so->outputs)); - - so->outputs[n].semantic = decl_semantic(&decl->Semantic); - so->outputs[n].regid = regid(i, comp); - - /* avoid undefined outputs, stick a dummy mov from imm{0.0}, - * which if the output is actually assigned will be over- - * written - */ - for (j = 0; j < ncomp; j++) - ctx->block->outputs[(i * 4) + j] = create_immed(ctx, 0.0); - } -} - -/* from TGSI perspective, we actually have inputs. But most of the "inputs" - * for a fragment shader are just bary.f instructions. The *actual* inputs - * from the hw perspective are the frag_pos and optionally frag_coord and - * frag_face. - */ -static void -fixup_frag_inputs(struct fd3_compile_context *ctx) -{ - struct fd3_shader_variant *so = ctx->so; - struct ir3_block *block = ctx->block; - struct ir3_instruction **inputs; - struct ir3_instruction *instr; - int n, regid = 0; - - block->ninputs = 0; - - n = 4; /* always have frag_pos */ - n += COND(so->frag_face, 4); - n += COND(so->frag_coord, 4); - - inputs = ir3_alloc(ctx->ir, n * (sizeof(struct ir3_instruction *))); - - if (so->frag_face) { - /* this ultimately gets assigned to hr0.x so doesn't conflict - * with frag_coord/frag_pos.. - */ - inputs[block->ninputs++] = ctx->frag_face; - ctx->frag_face->regs[0]->num = 0; - - /* remaining channels not used, but let's avoid confusing - * other parts that expect inputs to come in groups of vec4 - */ - inputs[block->ninputs++] = NULL; - inputs[block->ninputs++] = NULL; - inputs[block->ninputs++] = NULL; - } - - /* since we don't know where to set the regid for frag_coord, - * we have to use r0.x for it. But we don't want to *always* - * use r1.x for frag_pos as that could increase the register - * footprint on simple shaders: - */ - if (so->frag_coord) { - ctx->frag_coord[0]->regs[0]->num = regid++; - ctx->frag_coord[1]->regs[0]->num = regid++; - ctx->frag_coord[2]->regs[0]->num = regid++; - ctx->frag_coord[3]->regs[0]->num = regid++; - - inputs[block->ninputs++] = ctx->frag_coord[0]; - inputs[block->ninputs++] = ctx->frag_coord[1]; - inputs[block->ninputs++] = ctx->frag_coord[2]; - inputs[block->ninputs++] = ctx->frag_coord[3]; - } - - /* we always have frag_pos: */ - so->pos_regid = regid; - - /* r0.x */ - instr = create_input(block, NULL, block->ninputs); - instr->regs[0]->num = regid++; - inputs[block->ninputs++] = instr; - ctx->frag_pos->regs[1]->instr = instr; - - /* r0.y */ - instr = create_input(block, NULL, block->ninputs); - instr->regs[0]->num = regid++; - inputs[block->ninputs++] = instr; - ctx->frag_pos->regs[2]->instr = instr; - - block->inputs = inputs; -} - -static void -compile_instructions(struct fd3_compile_context *ctx) -{ - push_block(ctx); - - /* for fragment shader, we have a single input register (usually - * r0.xy) which is used as the base for bary.f varying fetch instrs: - */ - if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { - struct ir3_instruction *instr; - instr = ir3_instr_create(ctx->block, -1, OPC_META_FI); - ir3_reg_create(instr, 0, 0); - ir3_reg_create(instr, 0, IR3_REG_SSA); /* r0.x */ - ir3_reg_create(instr, 0, IR3_REG_SSA); /* r0.y */ - ctx->frag_pos = instr; - } - - while (!tgsi_parse_end_of_tokens(&ctx->parser)) { - tgsi_parse_token(&ctx->parser); - - switch (ctx->parser.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_DECLARATION: { - struct tgsi_full_declaration *decl = - &ctx->parser.FullToken.FullDeclaration; - if (decl->Declaration.File == TGSI_FILE_OUTPUT) { - decl_out(ctx, decl); - } else if (decl->Declaration.File == TGSI_FILE_INPUT) { - decl_in(ctx, decl); - } - break; - } - case TGSI_TOKEN_TYPE_IMMEDIATE: { - /* TODO: if we know the immediate is small enough, and only - * used with instructions that can embed an immediate, we - * can skip this: - */ - struct tgsi_full_immediate *imm = - &ctx->parser.FullToken.FullImmediate; - unsigned n = ctx->so->immediates_count++; - compile_assert(ctx, n < ARRAY_SIZE(ctx->so->immediates)); - memcpy(ctx->so->immediates[n].val, imm->u, 16); - break; - } - case TGSI_TOKEN_TYPE_INSTRUCTION: { - struct tgsi_full_instruction *inst = - &ctx->parser.FullToken.FullInstruction; - unsigned opc = inst->Instruction.Opcode; - const struct instr_translater *t = &translaters[opc]; - - if (t->fxn) { - t->fxn(t, ctx, inst); - ctx->num_internal_temps = 0; - } else { - compile_error(ctx, "unknown TGSI opc: %s\n", - tgsi_get_opcode_name(opc)); - } - - switch (inst->Instruction.Saturate) { - case TGSI_SAT_ZERO_ONE: - create_clamp_imm(ctx, &inst->Dst[0].Register, - fui(0.0), fui(1.0)); - break; - case TGSI_SAT_MINUS_PLUS_ONE: - create_clamp_imm(ctx, &inst->Dst[0].Register, - fui(-1.0), fui(1.0)); - break; - } - - instr_finish(ctx); - - break; - } - default: - break; - } - } -} - -static void -compile_dump(struct fd3_compile_context *ctx) -{ - const char *name = (ctx->so->type == SHADER_VERTEX) ? "vert" : "frag"; - static unsigned n = 0; - char fname[16]; - FILE *f; - snprintf(fname, sizeof(fname), "%s-%04u.dot", name, n++); - f = fopen(fname, "w"); - if (!f) - return; - ir3_block_depth(ctx->block); - ir3_dump(ctx->ir, name, ctx->block, f); - fclose(f); -} - -int -fd3_compile_shader(struct fd3_shader_variant *so, - const struct tgsi_token *tokens, struct fd3_shader_key key) -{ - struct fd3_compile_context ctx; - struct ir3_block *block; - struct ir3_instruction **inputs; - unsigned i, j, actual_in; - int ret = 0; - - assert(!so->ir); - - so->ir = ir3_create(); - - assert(so->ir); - - if (compile_init(&ctx, so, tokens) != TGSI_PARSE_OK) { - ret = -1; - goto out; - } - - compile_instructions(&ctx); - - block = ctx.block; - - /* keep track of the inputs from TGSI perspective.. */ - inputs = block->inputs; - - /* but fixup actual inputs for frag shader: */ - if (ctx.type == TGSI_PROCESSOR_FRAGMENT) - fixup_frag_inputs(&ctx); - - /* at this point, for binning pass, throw away unneeded outputs: */ - if (key.binning_pass) { - for (i = 0, j = 0; i < so->outputs_count; i++) { - unsigned name = sem2name(so->outputs[i].semantic); - unsigned idx = sem2name(so->outputs[i].semantic); - - /* throw away everything but first position/psize */ - if ((idx == 0) && ((name == TGSI_SEMANTIC_POSITION) || - (name == TGSI_SEMANTIC_PSIZE))) { - if (i != j) { - so->outputs[j] = so->outputs[i]; - block->outputs[(j*4)+0] = block->outputs[(i*4)+0]; - block->outputs[(j*4)+1] = block->outputs[(i*4)+1]; - block->outputs[(j*4)+2] = block->outputs[(i*4)+2]; - block->outputs[(j*4)+3] = block->outputs[(i*4)+3]; - } - j++; - } - } - so->outputs_count = j; - block->noutputs = j * 4; - } - - /* at this point, we want the kill's in the outputs array too, - * so that they get scheduled (since they have no dst).. we've - * already ensured that the array is big enough in push_block(): - */ - if (ctx.type == TGSI_PROCESSOR_FRAGMENT) { - for (i = 0; i < ctx.kill_count; i++) - block->outputs[block->noutputs++] = ctx.kill[i]; - } - - if (fd_mesa_debug & FD_DBG_OPTDUMP) - compile_dump(&ctx); - - ret = ir3_block_flatten(block); - if (ret < 0) - goto out; - if ((ret > 0) && (fd_mesa_debug & FD_DBG_OPTDUMP)) - compile_dump(&ctx); - - ir3_block_cp(block); - - if (fd_mesa_debug & FD_DBG_OPTDUMP) - compile_dump(&ctx); - - ir3_block_depth(block); - - if (fd_mesa_debug & FD_DBG_OPTMSGS) { - printf("AFTER DEPTH:\n"); - ir3_dump_instr_list(block->head); - } - - ir3_block_sched(block); - - if (fd_mesa_debug & FD_DBG_OPTMSGS) { - printf("AFTER SCHED:\n"); - ir3_dump_instr_list(block->head); - } - - ret = ir3_block_ra(block, so->type, key.half_precision, - so->frag_coord, so->frag_face, &so->has_samp); - if (ret) - goto out; - - if (fd_mesa_debug & FD_DBG_OPTMSGS) { - printf("AFTER RA:\n"); - ir3_dump_instr_list(block->head); - } - - /* fixup input/outputs: */ - for (i = 0; i < so->outputs_count; i++) { - so->outputs[i].regid = block->outputs[i*4]->regs[0]->num; - /* preserve hack for depth output.. tgsi writes depth to .z, - * but what we give the hw is the scalar register: - */ - if ((ctx.type == TGSI_PROCESSOR_FRAGMENT) && - (sem2name(so->outputs[i].semantic) == TGSI_SEMANTIC_POSITION)) - so->outputs[i].regid += 2; - } - /* Note that some or all channels of an input may be unused: */ - actual_in = 0; - for (i = 0; i < so->inputs_count; i++) { - unsigned j, regid = ~0, compmask = 0; - so->inputs[i].ncomp = 0; - for (j = 0; j < 4; j++) { - struct ir3_instruction *in = inputs[(i*4) + j]; - if (in) { - compmask |= (1 << j); - regid = in->regs[0]->num - j; - actual_in++; - so->inputs[i].ncomp++; - } - } - so->inputs[i].regid = regid; - so->inputs[i].compmask = compmask; - } - - /* fragment shader always gets full vec4's even if it doesn't - * fetch all components, but vertex shader we need to update - * with the actual number of components fetch, otherwise thing - * will hang due to mismaptch between VFD_DECODE's and - * TOTALATTRTOVS - */ - if (so->type == SHADER_VERTEX) - so->total_in = actual_in; - -out: - if (ret) { - ir3_destroy(so->ir); - so->ir = NULL; - } - compile_free(&ctx); - - return ret; -} diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h deleted file mode 100644 index a53bb3ee9a5..00000000000 --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h +++ /dev/null @@ -1,43 +0,0 @@ -/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ - -/* - * Copyright (C) 2013 Rob Clark <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark <[email protected]> - */ - -#ifndef FD3_COMPILER_H_ -#define FD3_COMPILER_H_ - -#include "fd3_program.h" -#include "fd3_util.h" - - -int fd3_compile_shader(struct fd3_shader_variant *so, - const struct tgsi_token *tokens, - struct fd3_shader_key key); -int fd3_compile_shader_old(struct fd3_shader_variant *so, - const struct tgsi_token *tokens, - struct fd3_shader_key key); - -#endif /* FD3_COMPILER_H_ */ diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c deleted file mode 100644 index 66f724b35c0..00000000000 --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c +++ /dev/null @@ -1,1524 +0,0 @@ -/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ - -/* - * Copyright (C) 2013 Rob Clark <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark <[email protected]> - */ - -#include <stdarg.h> - -#include "pipe/p_state.h" -#include "util/u_string.h" -#include "util/u_memory.h" -#include "util/u_inlines.h" -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_ureg.h" -#include "tgsi/tgsi_info.h" -#include "tgsi/tgsi_strings.h" -#include "tgsi/tgsi_dump.h" -#include "tgsi/tgsi_scan.h" - -#include "freedreno_lowering.h" - -#include "fd3_compiler.h" -#include "fd3_program.h" -#include "fd3_util.h" - -#include "instr-a3xx.h" -#include "ir3.h" - - -struct fd3_compile_context { - const struct tgsi_token *tokens; - bool free_tokens; - struct ir3 *ir; - struct ir3_block *block; - struct fd3_shader_variant *so; - - struct tgsi_parse_context parser; - unsigned type; - - struct tgsi_shader_info info; - - /* last input dst (for setting (ei) flag): */ - struct ir3_register *last_input; - - /* last instruction with relative addressing: */ - struct ir3_instruction *last_rel; - - /* for calculating input/output positions/linkages: */ - unsigned next_inloc; - - unsigned num_internal_temps; - struct tgsi_src_register internal_temps[6]; - - /* track registers which need to synchronize w/ "complex alu" cat3 - * instruction pipeline: - */ - regmask_t needs_ss; - - /* track registers which need to synchronize with texture fetch - * pipeline: - */ - regmask_t needs_sy; - - /* inputs start at r0, temporaries start after last input, and - * outputs start after last temporary. - * - * We could be more clever, because this is not a hw restriction, - * but probably best just to implement an optimizing pass to - * reduce the # of registers used and get rid of redundant mov's - * (to output register). - */ - unsigned base_reg[TGSI_FILE_COUNT]; - - /* idx/slot for last compiler generated immediate */ - unsigned immediate_idx; - - /* stack of branch instructions that start (potentially nested) - * branch instructions, so that we can fix up the branch targets - * so that we can fix up the branch target on the corresponding - * END instruction - */ - struct ir3_instruction *branch[16]; - unsigned int branch_count; - - /* used when dst is same as one of the src, to avoid overwriting a - * src element before the remaining scalar instructions that make - * up the vector operation - */ - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register *tmp_src; -}; - - -static void vectorize(struct fd3_compile_context *ctx, - struct ir3_instruction *instr, struct tgsi_dst_register *dst, - int nsrcs, ...); -static void create_mov(struct fd3_compile_context *ctx, - struct tgsi_dst_register *dst, struct tgsi_src_register *src); - -static unsigned -compile_init(struct fd3_compile_context *ctx, struct fd3_shader_variant *so, - const struct tgsi_token *tokens) -{ - unsigned ret, base = 0; - struct tgsi_shader_info *info = &ctx->info; - const struct fd_lowering_config lconfig = { - .color_two_side = so->key.color_two_side, - .lower_DST = true, - .lower_XPD = true, - .lower_SCS = true, - .lower_LRP = true, - .lower_FRC = true, - .lower_POW = true, - .lower_LIT = true, - .lower_EXP = true, - .lower_LOG = true, - .lower_DP4 = true, - .lower_DP3 = true, - .lower_DPH = true, - .lower_DP2 = true, - .lower_DP2A = true, - }; - - ctx->tokens = fd_transform_lowering(&lconfig, tokens, &ctx->info); - ctx->free_tokens = !!ctx->tokens; - if (!ctx->tokens) { - /* no lowering */ - ctx->tokens = tokens; - } - ctx->ir = so->ir; - ctx->block = ir3_block_create(ctx->ir, 0, 0, 0); - ctx->so = so; - ctx->last_input = NULL; - ctx->last_rel = NULL; - ctx->next_inloc = 8; - ctx->num_internal_temps = 0; - ctx->branch_count = 0; - - regmask_init(&ctx->needs_ss); - regmask_init(&ctx->needs_sy); - memset(ctx->base_reg, 0, sizeof(ctx->base_reg)); - - /* Immediates go after constants: */ - ctx->base_reg[TGSI_FILE_CONSTANT] = 0; - ctx->base_reg[TGSI_FILE_IMMEDIATE] = - info->file_max[TGSI_FILE_CONSTANT] + 1; - - /* if full precision and fragment shader, don't clobber - * r0.x w/ bary fetch: - */ - if ((so->type == SHADER_FRAGMENT) && !so->key.half_precision) - base = 1; - - /* Temporaries after outputs after inputs: */ - ctx->base_reg[TGSI_FILE_INPUT] = base; - ctx->base_reg[TGSI_FILE_OUTPUT] = base + - info->file_max[TGSI_FILE_INPUT] + 1; - ctx->base_reg[TGSI_FILE_TEMPORARY] = base + - info->file_max[TGSI_FILE_INPUT] + 1 + - info->file_max[TGSI_FILE_OUTPUT] + 1; - - so->first_immediate = ctx->base_reg[TGSI_FILE_IMMEDIATE]; - ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1); - - ret = tgsi_parse_init(&ctx->parser, ctx->tokens); - if (ret != TGSI_PARSE_OK) - return ret; - - ctx->type = ctx->parser.FullHeader.Processor.Processor; - - return ret; -} - -static void -compile_error(struct fd3_compile_context *ctx, const char *format, ...) -{ - va_list ap; - va_start(ap, format); - _debug_vprintf(format, ap); - va_end(ap); - tgsi_dump(ctx->tokens, 0); - debug_assert(0); -} - -#define compile_assert(ctx, cond) do { \ - if (!(cond)) compile_error((ctx), "failed assert: "#cond"\n"); \ - } while (0) - -static void -compile_free(struct fd3_compile_context *ctx) -{ - if (ctx->free_tokens) - free((void *)ctx->tokens); - tgsi_parse_free(&ctx->parser); -} - -struct instr_translater { - void (*fxn)(const struct instr_translater *t, - struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst); - unsigned tgsi_opc; - opc_t opc; - opc_t hopc; /* opc to use for half_precision mode, if different */ - unsigned arg; -}; - -static void -handle_last_rel(struct fd3_compile_context *ctx) -{ - if (ctx->last_rel) { - ctx->last_rel->flags |= IR3_INSTR_UL; - ctx->last_rel = NULL; - } -} - -static struct ir3_instruction * -instr_create(struct fd3_compile_context *ctx, int category, opc_t opc) -{ - return ir3_instr_create(ctx->block, category, opc); -} - -static void -add_nop(struct fd3_compile_context *ctx, unsigned count) -{ - while (count-- > 0) - instr_create(ctx, 0, OPC_NOP); -} - -static unsigned -src_flags(struct fd3_compile_context *ctx, struct ir3_register *reg) -{ - unsigned flags = 0; - - if (reg->flags & (IR3_REG_CONST | IR3_REG_IMMED)) - return flags; - - if (regmask_get(&ctx->needs_ss, reg)) { - flags |= IR3_INSTR_SS; - regmask_init(&ctx->needs_ss); - } - - if (regmask_get(&ctx->needs_sy, reg)) { - flags |= IR3_INSTR_SY; - regmask_init(&ctx->needs_sy); - } - - return flags; -} - -static struct ir3_register * -add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, - const struct tgsi_dst_register *dst, unsigned chan) -{ - unsigned flags = 0, num = 0; - struct ir3_register *reg; - - switch (dst->File) { - case TGSI_FILE_OUTPUT: - case TGSI_FILE_TEMPORARY: - num = dst->Index + ctx->base_reg[dst->File]; - break; - case TGSI_FILE_ADDRESS: - num = REG_A0; - break; - default: - compile_error(ctx, "unsupported dst register file: %s\n", - tgsi_file_name(dst->File)); - break; - } - - if (dst->Indirect) - flags |= IR3_REG_RELATIV; - if (ctx->so->key.half_precision) - flags |= IR3_REG_HALF; - - reg = ir3_reg_create(instr, regid(num, chan), flags); - - if (dst->Indirect) - ctx->last_rel = instr; - - return reg; -} - -static struct ir3_register * -add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, - const struct tgsi_src_register *src, unsigned chan) -{ - unsigned flags = 0, num = 0; - struct ir3_register *reg; - - /* TODO we need to use a mov to temp for const >= 64.. or maybe - * we could use relative addressing.. - */ - compile_assert(ctx, src->Index < 64); - - switch (src->File) { - case TGSI_FILE_IMMEDIATE: - /* TODO if possible, use actual immediate instead of const.. but - * TGSI has vec4 immediates, we can only embed scalar (of limited - * size, depending on instruction..) - */ - case TGSI_FILE_CONSTANT: - flags |= IR3_REG_CONST; - num = src->Index + ctx->base_reg[src->File]; - break; - case TGSI_FILE_OUTPUT: - /* NOTE: we should only end up w/ OUTPUT file for things like - * clamp()'ing saturated dst instructions - */ - case TGSI_FILE_INPUT: - case TGSI_FILE_TEMPORARY: - num = src->Index + ctx->base_reg[src->File]; - break; - default: - compile_error(ctx, "unsupported src register file: %s\n", - tgsi_file_name(src->File)); - break; - } - - if (src->Absolute) - flags |= IR3_REG_ABS; - if (src->Negate) - flags |= IR3_REG_NEGATE; - if (src->Indirect) - flags |= IR3_REG_RELATIV; - if (ctx->so->key.half_precision) - flags |= IR3_REG_HALF; - - reg = ir3_reg_create(instr, regid(num, chan), flags); - - if (src->Indirect) - ctx->last_rel = instr; - - instr->flags |= src_flags(ctx, reg); - - return reg; -} - -static void -src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst) -{ - src->File = dst->File; - src->Indirect = dst->Indirect; - src->Dimension = dst->Dimension; - src->Index = dst->Index; - src->Absolute = 0; - src->Negate = 0; - src->SwizzleX = TGSI_SWIZZLE_X; - src->SwizzleY = TGSI_SWIZZLE_Y; - src->SwizzleZ = TGSI_SWIZZLE_Z; - src->SwizzleW = TGSI_SWIZZLE_W; -} - -/* Get internal-temp src/dst to use for a sequence of instructions - * generated by a single TGSI op. - */ -static struct tgsi_src_register * -get_internal_temp(struct fd3_compile_context *ctx, - struct tgsi_dst_register *tmp_dst) -{ - struct tgsi_src_register *tmp_src; - int n; - - tmp_dst->File = TGSI_FILE_TEMPORARY; - tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW; - tmp_dst->Indirect = 0; - tmp_dst->Dimension = 0; - - /* assign next temporary: */ - n = ctx->num_internal_temps++; - compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps)); - tmp_src = &ctx->internal_temps[n]; - - tmp_dst->Index = ctx->info.file_max[TGSI_FILE_TEMPORARY] + n + 1; - - src_from_dst(tmp_src, tmp_dst); - - return tmp_src; -} - -/* Get internal half-precision temp src/dst to use for a sequence of - * instructions generated by a single TGSI op. - */ -static struct tgsi_src_register * -get_internal_temp_hr(struct fd3_compile_context *ctx, - struct tgsi_dst_register *tmp_dst) -{ - struct tgsi_src_register *tmp_src; - int n; - - if (ctx->so->key.half_precision) - return get_internal_temp(ctx, tmp_dst); - - tmp_dst->File = TGSI_FILE_TEMPORARY; - tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW; - tmp_dst->Indirect = 0; - tmp_dst->Dimension = 0; - - /* assign next temporary: */ - n = ctx->num_internal_temps++; - compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps)); - tmp_src = &ctx->internal_temps[n]; - - /* just use hr0 because no one else should be using half- - * precision regs: - */ - tmp_dst->Index = 0; - - src_from_dst(tmp_src, tmp_dst); - - return tmp_src; -} - -static inline bool -is_const(struct tgsi_src_register *src) -{ - return (src->File == TGSI_FILE_CONSTANT) || - (src->File == TGSI_FILE_IMMEDIATE); -} - -static inline bool -is_relative(struct tgsi_src_register *src) -{ - return src->Indirect; -} - -static inline bool -is_rel_or_const(struct tgsi_src_register *src) -{ - return is_relative(src) || is_const(src); -} - -static type_t -get_ftype(struct fd3_compile_context *ctx) -{ - return ctx->so->key.half_precision ? TYPE_F16 : TYPE_F32; -} - -static type_t -get_utype(struct fd3_compile_context *ctx) -{ - return ctx->so->key.half_precision ? TYPE_U16 : TYPE_U32; -} - -static unsigned -src_swiz(struct tgsi_src_register *src, int chan) -{ - switch (chan) { - case 0: return src->SwizzleX; - case 1: return src->SwizzleY; - case 2: return src->SwizzleZ; - case 3: return src->SwizzleW; - } - assert(0); - return 0; -} - -/* for instructions that cannot take a const register as src, if needed - * generate a move to temporary gpr: - */ -static struct tgsi_src_register * -get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src) -{ - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register *tmp_src; - - compile_assert(ctx, is_rel_or_const(src)); - - tmp_src = get_internal_temp(ctx, &tmp_dst); - - create_mov(ctx, &tmp_dst, src); - - return tmp_src; -} - -static void -get_immediate(struct fd3_compile_context *ctx, - struct tgsi_src_register *reg, uint32_t val) -{ - unsigned neg, swiz, idx, i; - /* actually maps 1:1 currently.. not sure if that is safe to rely on: */ - static const unsigned swiz2tgsi[] = { - TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, - }; - - for (i = 0; i < ctx->immediate_idx; i++) { - swiz = i % 4; - idx = i / 4; - - if (ctx->so->immediates[idx].val[swiz] == val) { - neg = 0; - break; - } - - if (ctx->so->immediates[idx].val[swiz] == -val) { - neg = 1; - break; - } - } - - if (i == ctx->immediate_idx) { - /* need to generate a new immediate: */ - swiz = i % 4; - idx = i / 4; - neg = 0; - ctx->so->immediates[idx].val[swiz] = val; - ctx->so->immediates_count = idx + 1; - ctx->immediate_idx++; - } - - reg->File = TGSI_FILE_IMMEDIATE; - reg->Indirect = 0; - reg->Dimension = 0; - reg->Index = idx; - reg->Absolute = 0; - reg->Negate = neg; - reg->SwizzleX = swiz2tgsi[swiz]; - reg->SwizzleY = swiz2tgsi[swiz]; - reg->SwizzleZ = swiz2tgsi[swiz]; - reg->SwizzleW = swiz2tgsi[swiz]; -} - -static void -create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst, - struct tgsi_src_register *src) -{ - type_t type_mov = get_ftype(ctx); - unsigned i; - - for (i = 0; i < 4; i++) { - /* move to destination: */ - if (dst->WriteMask & (1 << i)) { - struct ir3_instruction *instr; - - if (src->Absolute || src->Negate) { - /* can't have abs or neg on a mov instr, so use - * absneg.f instead to handle these cases: - */ - instr = instr_create(ctx, 2, OPC_ABSNEG_F); - } else { - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = type_mov; - instr->cat1.dst_type = type_mov; - } - - add_dst_reg(ctx, instr, dst, i); - add_src_reg(ctx, instr, src, src_swiz(src, i)); - } else { - add_nop(ctx, 1); - } - } -} - -static void -create_clamp(struct fd3_compile_context *ctx, - struct tgsi_dst_register *dst, struct tgsi_src_register *val, - struct tgsi_src_register *minval, struct tgsi_src_register *maxval) -{ - struct ir3_instruction *instr; - - instr = instr_create(ctx, 2, OPC_MAX_F); - vectorize(ctx, instr, dst, 2, val, 0, minval, 0); - - instr = instr_create(ctx, 2, OPC_MIN_F); - vectorize(ctx, instr, dst, 2, val, 0, maxval, 0); -} - -static void -create_clamp_imm(struct fd3_compile_context *ctx, - struct tgsi_dst_register *dst, - uint32_t minval, uint32_t maxval) -{ - struct tgsi_src_register minconst, maxconst; - struct tgsi_src_register src; - - src_from_dst(&src, dst); - - get_immediate(ctx, &minconst, minval); - get_immediate(ctx, &maxconst, maxval); - - create_clamp(ctx, dst, &src, &minconst, &maxconst); -} - -static struct tgsi_dst_register * -get_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst) -{ - struct tgsi_dst_register *dst = &inst->Dst[0].Register; - unsigned i; - for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - struct tgsi_src_register *src = &inst->Src[i].Register; - if ((src->File == dst->File) && (src->Index == dst->Index)) { - if ((dst->WriteMask == TGSI_WRITEMASK_XYZW) && - (src->SwizzleX == TGSI_SWIZZLE_X) && - (src->SwizzleY == TGSI_SWIZZLE_Y) && - (src->SwizzleZ == TGSI_SWIZZLE_Z) && - (src->SwizzleW == TGSI_SWIZZLE_W)) - continue; - ctx->tmp_src = get_internal_temp(ctx, &ctx->tmp_dst); - ctx->tmp_dst.WriteMask = dst->WriteMask; - dst = &ctx->tmp_dst; - break; - } - } - return dst; -} - -static void -put_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst, - struct tgsi_dst_register *dst) -{ - /* if necessary, add mov back into original dst: */ - if (dst != &inst->Dst[0].Register) { - create_mov(ctx, &inst->Dst[0].Register, ctx->tmp_src); - } -} - -/* helper to generate the necessary repeat and/or additional instructions - * to turn a scalar instruction into a vector operation: - */ -static void -vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr, - struct tgsi_dst_register *dst, int nsrcs, ...) -{ - va_list ap; - int i, j, n = 0; - bool indirect = dst->Indirect; - - add_dst_reg(ctx, instr, dst, TGSI_SWIZZLE_X); - - va_start(ap, nsrcs); - for (j = 0; j < nsrcs; j++) { - struct tgsi_src_register *src = - va_arg(ap, struct tgsi_src_register *); - unsigned flags = va_arg(ap, unsigned); - struct ir3_register *reg; - if (flags & IR3_REG_IMMED) { - reg = ir3_reg_create(instr, 0, IR3_REG_IMMED); - /* this is an ugly cast.. should have put flags first! */ - reg->iim_val = *(int *)&src; - } else { - reg = add_src_reg(ctx, instr, src, TGSI_SWIZZLE_X); - indirect |= src->Indirect; - } - reg->flags |= flags & ~IR3_REG_NEGATE; - if (flags & IR3_REG_NEGATE) - reg->flags ^= IR3_REG_NEGATE; - } - va_end(ap); - - for (i = 0; i < 4; i++) { - if (dst->WriteMask & (1 << i)) { - struct ir3_instruction *cur; - - if (n++ == 0) { - cur = instr; - } else { - cur = ir3_instr_clone(instr); - cur->flags &= ~(IR3_INSTR_SY | IR3_INSTR_SS | IR3_INSTR_JP); - } - - /* fix-up dst register component: */ - cur->regs[0]->num = regid(cur->regs[0]->num >> 2, i); - - /* fix-up src register component: */ - va_start(ap, nsrcs); - for (j = 0; j < nsrcs; j++) { - struct tgsi_src_register *src = - va_arg(ap, struct tgsi_src_register *); - unsigned flags = va_arg(ap, unsigned); - if (!(flags & IR3_REG_IMMED)) { - cur->regs[j+1]->num = - regid(cur->regs[j+1]->num >> 2, - src_swiz(src, i)); - cur->flags |= src_flags(ctx, cur->regs[j+1]); - } - } - va_end(ap); - - if (indirect) - ctx->last_rel = cur; - } - } - - /* pad w/ nop's.. at least until we are clever enough to - * figure out if we really need to.. - */ - add_nop(ctx, 4 - n); -} - -/* - * Handlers for TGSI instructions which do not have a 1:1 mapping to - * native instructions: - */ - -static void -trans_clamp(const struct instr_translater *t, - struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register *src0 = &inst->Src[0].Register; - struct tgsi_src_register *src1 = &inst->Src[1].Register; - struct tgsi_src_register *src2 = &inst->Src[2].Register; - - create_clamp(ctx, dst, src0, src1, src2); - - put_dst(ctx, inst, dst); -} - -/* ARL(x) = x, but mova from hrN.x to a0.. */ -static void -trans_arl(const struct instr_translater *t, - struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct ir3_instruction *instr; - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register *tmp_src; - struct tgsi_dst_register *dst = &inst->Dst[0].Register; - struct tgsi_src_register *src = &inst->Src[0].Register; - unsigned chan = src->SwizzleX; - compile_assert(ctx, dst->File == TGSI_FILE_ADDRESS); - - handle_last_rel(ctx); - - tmp_src = get_internal_temp_hr(ctx, &tmp_dst); - - /* cov.{f32,f16}s16 Rtmp, Rsrc */ - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = get_ftype(ctx); - instr->cat1.dst_type = TYPE_S16; - add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF; - add_src_reg(ctx, instr, src, chan); - - add_nop(ctx, 3); - - /* shl.b Rtmp, Rtmp, 2 */ - instr = instr_create(ctx, 2, OPC_SHL_B); - add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF; - add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF; - ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 2; - - add_nop(ctx, 3); - - /* mova a0, Rtmp */ - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = TYPE_S16; - instr->cat1.dst_type = TYPE_S16; - add_dst_reg(ctx, instr, dst, 0)->flags |= IR3_REG_HALF; - add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF; - - /* need to ensure 5 instr slots before a0 is used: */ - add_nop(ctx, 6); -} - -/* texture fetch/sample instructions: */ -static void -trans_samp(const struct instr_translater *t, - struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct ir3_register *r; - struct ir3_instruction *instr; - struct tgsi_src_register *coord = &inst->Src[0].Register; - struct tgsi_src_register *samp = &inst->Src[1].Register; - unsigned tex = inst->Texture.Texture; - int8_t *order; - unsigned i, flags = 0, src_wrmask; - bool needs_mov = false; - - switch (t->arg) { - case TGSI_OPCODE_TEX: - if (tex == TGSI_TEXTURE_2D) { - order = (int8_t[4]){ 0, 1, -1, -1 }; - src_wrmask = TGSI_WRITEMASK_XY; - } else { - order = (int8_t[4]){ 0, 1, 2, -1 }; - src_wrmask = TGSI_WRITEMASK_XYZ; - } - break; - case TGSI_OPCODE_TXP: - if (tex == TGSI_TEXTURE_2D) { - order = (int8_t[4]){ 0, 1, 3, -1 }; - src_wrmask = TGSI_WRITEMASK_XYZ; - } else { - order = (int8_t[4]){ 0, 1, 2, 3 }; - src_wrmask = TGSI_WRITEMASK_XYZW; - } - flags |= IR3_INSTR_P; - break; - default: - compile_assert(ctx, 0); - break; - } - - if ((tex == TGSI_TEXTURE_3D) || (tex == TGSI_TEXTURE_CUBE)) { - add_nop(ctx, 3); - flags |= IR3_INSTR_3D; - } - - /* cat5 instruction cannot seem to handle const or relative: */ - if (is_rel_or_const(coord)) - needs_mov = true; - - /* The texture sample instructions need to coord in successive - * registers/components (ie. src.xy but not src.yx). And TXP - * needs the .w component in .z for 2D.. so in some cases we - * might need to emit some mov instructions to shuffle things - * around: - */ - for (i = 1; (i < 4) && (order[i] >= 0) && !needs_mov; i++) - if (src_swiz(coord, i) != (src_swiz(coord, 0) + order[i])) - needs_mov = true; - - if (needs_mov) { - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register *tmp_src; - unsigned j; - - type_t type_mov = get_ftype(ctx); - - /* need to move things around: */ - tmp_src = get_internal_temp(ctx, &tmp_dst); - - for (j = 0; (j < 4) && (order[j] >= 0); j++) { - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = type_mov; - instr->cat1.dst_type = type_mov; - add_dst_reg(ctx, instr, &tmp_dst, j); - add_src_reg(ctx, instr, coord, - src_swiz(coord, order[j])); - } - - coord = tmp_src; - - add_nop(ctx, 4 - j); - } - - instr = instr_create(ctx, 5, t->opc); - instr->cat5.type = get_ftype(ctx); - instr->cat5.samp = samp->Index; - instr->cat5.tex = samp->Index; - instr->flags |= flags; - - r = add_dst_reg(ctx, instr, &inst->Dst[0].Register, 0); - r->wrmask = inst->Dst[0].Register.WriteMask; - - add_src_reg(ctx, instr, coord, coord->SwizzleX)->wrmask = src_wrmask; - - /* after add_src_reg() so we don't set (sy) on sam instr itself! */ - regmask_set(&ctx->needs_sy, r); -} - -/* - * SEQ(a,b) = (a == b) ? 1.0 : 0.0 - * cmps.f.eq tmp0, b, a - * cov.u16f16 dst, tmp0 - * - * SNE(a,b) = (a != b) ? 1.0 : 0.0 - * cmps.f.eq tmp0, b, a - * add.s tmp0, tmp0, -1 - * sel.f16 dst, {0.0}, tmp0, {1.0} - * - * SGE(a,b) = (a >= b) ? 1.0 : 0.0 - * cmps.f.ge tmp0, a, b - * cov.u16f16 dst, tmp0 - * - * SLE(a,b) = (a <= b) ? 1.0 : 0.0 - * cmps.f.ge tmp0, b, a - * cov.u16f16 dst, tmp0 - * - * SGT(a,b) = (a > b) ? 1.0 : 0.0 - * cmps.f.ge tmp0, b, a - * add.s tmp0, tmp0, -1 - * sel.f16 dst, {0.0}, tmp0, {1.0} - * - * SLT(a,b) = (a < b) ? 1.0 : 0.0 - * cmps.f.ge tmp0, a, b - * add.s tmp0, tmp0, -1 - * sel.f16 dst, {0.0}, tmp0, {1.0} - * - * CMP(a,b,c) = (a < 0.0) ? b : c - * cmps.f.ge tmp0, a, {0.0} - * add.s tmp0, tmp0, -1 - * sel.f16 dst, c, tmp0, b - */ -static void -trans_cmp(const struct instr_translater *t, - struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct ir3_instruction *instr; - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register *tmp_src; - struct tgsi_src_register constval0, constval1; - /* final instruction for CMP() uses orig src1 and src2: */ - struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register *a0, *a1; - unsigned condition; - - tmp_src = get_internal_temp(ctx, &tmp_dst); - - switch (t->tgsi_opc) { - case TGSI_OPCODE_SEQ: - case TGSI_OPCODE_SNE: - a0 = &inst->Src[1].Register; /* b */ - a1 = &inst->Src[0].Register; /* a */ - condition = IR3_COND_EQ; - break; - case TGSI_OPCODE_SGE: - case TGSI_OPCODE_SLT: - a0 = &inst->Src[0].Register; /* a */ - a1 = &inst->Src[1].Register; /* b */ - condition = IR3_COND_GE; - break; - case TGSI_OPCODE_SLE: - case TGSI_OPCODE_SGT: - a0 = &inst->Src[1].Register; /* b */ - a1 = &inst->Src[0].Register; /* a */ - condition = IR3_COND_GE; - break; - case TGSI_OPCODE_CMP: - get_immediate(ctx, &constval0, fui(0.0)); - a0 = &inst->Src[0].Register; /* a */ - a1 = &constval0; /* {0.0} */ - condition = IR3_COND_GE; - break; - default: - compile_assert(ctx, 0); - return; - } - - if (is_const(a0) && is_const(a1)) - a0 = get_unconst(ctx, a0); - - /* cmps.f.ge tmp, a0, a1 */ - instr = instr_create(ctx, 2, OPC_CMPS_F); - instr->cat2.condition = condition; - vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0); - - switch (t->tgsi_opc) { - case TGSI_OPCODE_SEQ: - case TGSI_OPCODE_SGE: - case TGSI_OPCODE_SLE: - /* cov.u16f16 dst, tmp0 */ - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = get_utype(ctx); - instr->cat1.dst_type = get_ftype(ctx); - vectorize(ctx, instr, dst, 1, tmp_src, 0); - break; - case TGSI_OPCODE_SNE: - case TGSI_OPCODE_SGT: - case TGSI_OPCODE_SLT: - case TGSI_OPCODE_CMP: - /* add.s tmp, tmp, -1 */ - instr = instr_create(ctx, 2, OPC_ADD_S); - vectorize(ctx, instr, &tmp_dst, 2, tmp_src, 0, -1, IR3_REG_IMMED); - - if (t->tgsi_opc == TGSI_OPCODE_CMP) { - /* sel.{f32,f16} dst, src2, tmp, src1 */ - instr = instr_create(ctx, 3, - ctx->so->key.half_precision ? OPC_SEL_F16 : OPC_SEL_F32); - vectorize(ctx, instr, dst, 3, - &inst->Src[2].Register, 0, - tmp_src, 0, - &inst->Src[1].Register, 0); - } else { - get_immediate(ctx, &constval0, fui(0.0)); - get_immediate(ctx, &constval1, fui(1.0)); - /* sel.{f32,f16} dst, {0.0}, tmp0, {1.0} */ - instr = instr_create(ctx, 3, - ctx->so->key.half_precision ? OPC_SEL_F16 : OPC_SEL_F32); - vectorize(ctx, instr, dst, 3, - &constval0, 0, tmp_src, 0, &constval1, 0); - } - - break; - } - - put_dst(ctx, inst, dst); -} - -/* - * Conditional / Flow control - */ - -static unsigned -find_instruction(struct fd3_compile_context *ctx, struct ir3_instruction *instr) -{ - unsigned i; - for (i = 0; i < ctx->ir->instrs_count; i++) - if (ctx->ir->instrs[i] == instr) - return i; - return ~0; -} - -static void -push_branch(struct fd3_compile_context *ctx, struct ir3_instruction *instr) -{ - ctx->branch[ctx->branch_count++] = instr; -} - -static void -pop_branch(struct fd3_compile_context *ctx) -{ - struct ir3_instruction *instr; - - /* if we were clever enough, we'd patch this up after the fact, - * and set (jp) flag on whatever the next instruction was, rather - * than inserting an extra nop.. - */ - instr = instr_create(ctx, 0, OPC_NOP); - instr->flags |= IR3_INSTR_JP; - - /* pop the branch instruction from the stack and fix up branch target: */ - instr = ctx->branch[--ctx->branch_count]; - instr->cat0.immed = ctx->ir->instrs_count - find_instruction(ctx, instr) - 1; -} - -/* We probably don't really want to translate if/else/endif into branches.. - * the blob driver evaluates both legs of the if and then uses the sel - * instruction to pick which sides of the branch to "keep".. but figuring - * that out will take somewhat more compiler smarts. So hopefully branches - * don't kill performance too badly. - */ -static void -trans_if(const struct instr_translater *t, - struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct ir3_instruction *instr; - struct tgsi_src_register *src = &inst->Src[0].Register; - struct tgsi_src_register constval; - - get_immediate(ctx, &constval, fui(0.0)); - - if (is_const(src)) - src = get_unconst(ctx, src); - - instr = instr_create(ctx, 2, OPC_CMPS_F); - ir3_reg_create(instr, regid(REG_P0, 0), 0); - add_src_reg(ctx, instr, src, src->SwizzleX); - add_src_reg(ctx, instr, &constval, constval.SwizzleX); - instr->cat2.condition = IR3_COND_EQ; - - instr = instr_create(ctx, 0, OPC_BR); - push_branch(ctx, instr); -} - -static void -trans_else(const struct instr_translater *t, - struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct ir3_instruction *instr; - - /* for first half of if/else/endif, generate a jump past the else: */ - instr = instr_create(ctx, 0, OPC_JUMP); - - pop_branch(ctx); - push_branch(ctx, instr); -} - -static void -trans_endif(const struct instr_translater *t, - struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - pop_branch(ctx); -} - -/* - * Handlers for TGSI instructions which do have 1:1 mapping to native - * instructions: - */ - -static void -instr_cat0(const struct instr_translater *t, - struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - instr_create(ctx, 0, t->opc); -} - -static void -instr_cat1(const struct instr_translater *t, - struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register *src = &inst->Src[0].Register; - - /* mov instructions can't handle a negate on src: */ - if (src->Negate) { - struct tgsi_src_register constval; - struct ir3_instruction *instr; - - /* since right now, we are using uniformly either TYPE_F16 or - * TYPE_F32, and we don't utilize the conversion possibilities - * of mov instructions, we can get away with substituting an - * add.f which can handle negate. Might need to revisit this - * in the future if we start supporting widening/narrowing or - * conversion to/from integer.. - */ - instr = instr_create(ctx, 2, OPC_ADD_F); - get_immediate(ctx, &constval, fui(0.0)); - vectorize(ctx, instr, dst, 2, src, 0, &constval, 0); - } else { - create_mov(ctx, dst, src); - /* create_mov() generates vector sequence, so no vectorize() */ - } - put_dst(ctx, inst, dst); -} - -static void -instr_cat2(const struct instr_translater *t, - struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register *src0 = &inst->Src[0].Register; - struct tgsi_src_register *src1 = &inst->Src[1].Register; - struct ir3_instruction *instr; - unsigned src0_flags = 0, src1_flags = 0; - - switch (t->tgsi_opc) { - case TGSI_OPCODE_ABS: - src0_flags = IR3_REG_ABS; - break; - case TGSI_OPCODE_SUB: - src1_flags = IR3_REG_NEGATE; - break; - } - - switch (t->opc) { - case OPC_ABSNEG_F: - case OPC_ABSNEG_S: - case OPC_CLZ_B: - case OPC_CLZ_S: - case OPC_SIGN_F: - case OPC_FLOOR_F: - case OPC_CEIL_F: - case OPC_RNDNE_F: - case OPC_RNDAZ_F: - case OPC_TRUNC_F: - case OPC_NOT_B: - case OPC_BFREV_B: - case OPC_SETRM: - case OPC_CBITS_B: - /* these only have one src reg */ - instr = instr_create(ctx, 2, t->opc); - vectorize(ctx, instr, dst, 1, src0, src0_flags); - break; - default: - if (is_const(src0) && is_const(src1)) - src0 = get_unconst(ctx, src0); - - instr = instr_create(ctx, 2, t->opc); - vectorize(ctx, instr, dst, 2, src0, src0_flags, - src1, src1_flags); - break; - } - - put_dst(ctx, inst, dst); -} - -static void -instr_cat3(const struct instr_translater *t, - struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register *src0 = &inst->Src[0].Register; - struct tgsi_src_register *src1 = &inst->Src[1].Register; - struct ir3_instruction *instr; - - /* in particular, can't handle const for src1 for cat3.. - * for mad, we can swap first two src's if needed: - */ - if (is_rel_or_const(src1)) { - if (is_mad(t->opc) && !is_rel_or_const(src0)) { - struct tgsi_src_register *tmp; - tmp = src0; - src0 = src1; - src1 = tmp; - } else { - src1 = get_unconst(ctx, src1); - } - } - - instr = instr_create(ctx, 3, - ctx->so->key.half_precision ? t->hopc : t->opc); - vectorize(ctx, instr, dst, 3, src0, 0, src1, 0, - &inst->Src[2].Register, 0); - put_dst(ctx, inst, dst); -} - -static void -instr_cat4(const struct instr_translater *t, - struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register *src = &inst->Src[0].Register; - struct ir3_instruction *instr; - unsigned i, n; - - /* seems like blob compiler avoids const as src.. */ - if (is_const(src)) - src = get_unconst(ctx, src); - - /* worst case: */ - add_nop(ctx, 6); - - /* we need to replicate into each component: */ - for (i = 0, n = 0; i < 4; i++) { - if (dst->WriteMask & (1 << i)) { - if (n++) - add_nop(ctx, 1); - instr = instr_create(ctx, 4, t->opc); - add_dst_reg(ctx, instr, dst, i); - add_src_reg(ctx, instr, src, src->SwizzleX); - } - } - - regmask_set(&ctx->needs_ss, instr->regs[0]); - put_dst(ctx, inst, dst); -} - -static const struct instr_translater translaters[TGSI_OPCODE_LAST] = { -#define INSTR(n, f, ...) \ - [TGSI_OPCODE_ ## n] = { .fxn = (f), .tgsi_opc = TGSI_OPCODE_ ## n, ##__VA_ARGS__ } - - INSTR(MOV, instr_cat1), - INSTR(RCP, instr_cat4, .opc = OPC_RCP), - INSTR(RSQ, instr_cat4, .opc = OPC_RSQ), - INSTR(SQRT, instr_cat4, .opc = OPC_SQRT), - INSTR(MUL, instr_cat2, .opc = OPC_MUL_F), - INSTR(ADD, instr_cat2, .opc = OPC_ADD_F), - INSTR(SUB, instr_cat2, .opc = OPC_ADD_F), - INSTR(MIN, instr_cat2, .opc = OPC_MIN_F), - INSTR(MAX, instr_cat2, .opc = OPC_MAX_F), - INSTR(MAD, instr_cat3, .opc = OPC_MAD_F32, .hopc = OPC_MAD_F16), - INSTR(TRUNC, instr_cat2, .opc = OPC_TRUNC_F), - INSTR(CLAMP, trans_clamp), - INSTR(FLR, instr_cat2, .opc = OPC_FLOOR_F), - INSTR(ROUND, instr_cat2, .opc = OPC_RNDNE_F), - INSTR(SSG, instr_cat2, .opc = OPC_SIGN_F), - INSTR(ARL, trans_arl), - INSTR(EX2, instr_cat4, .opc = OPC_EXP2), - INSTR(LG2, instr_cat4, .opc = OPC_LOG2), - INSTR(ABS, instr_cat2, .opc = OPC_ABSNEG_F), - INSTR(COS, instr_cat4, .opc = OPC_COS), - INSTR(SIN, instr_cat4, .opc = OPC_SIN), - INSTR(TEX, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TEX), - INSTR(TXP, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TXP), - INSTR(SGT, trans_cmp), - INSTR(SLT, trans_cmp), - INSTR(SGE, trans_cmp), - INSTR(SLE, trans_cmp), - INSTR(SNE, trans_cmp), - INSTR(SEQ, trans_cmp), - INSTR(CMP, trans_cmp), - INSTR(IF, trans_if), - INSTR(ELSE, trans_else), - INSTR(ENDIF, trans_endif), - INSTR(END, instr_cat0, .opc = OPC_END), - INSTR(KILL, instr_cat0, .opc = OPC_KILL), -}; - -static fd3_semantic -decl_semantic(const struct tgsi_declaration_semantic *sem) -{ - return fd3_semantic_name(sem->Name, sem->Index); -} - -static int -decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) -{ - struct fd3_shader_variant *so = ctx->so; - unsigned base = ctx->base_reg[TGSI_FILE_INPUT]; - unsigned i, flags = 0; - int nop = 0; - - /* I don't think we should get frag shader input without - * semantic info? Otherwise how do inputs get linked to - * vert outputs? - */ - compile_assert(ctx, (ctx->type == TGSI_PROCESSOR_VERTEX) || - decl->Declaration.Semantic); - - if (ctx->so->key.half_precision) - flags |= IR3_REG_HALF; - - for (i = decl->Range.First; i <= decl->Range.Last; i++) { - unsigned n = so->inputs_count++; - unsigned r = regid(i + base, 0); - unsigned ncomp; - - /* TODO use ctx->info.input_usage_mask[decl->Range.n] to figure out ncomp: */ - ncomp = 4; - - DBG("decl in -> r%d", i + base); // XXX - - compile_assert(ctx, n < ARRAY_SIZE(so->inputs)); - - so->inputs[n].semantic = decl_semantic(&decl->Semantic); - so->inputs[n].compmask = (1 << ncomp) - 1; - so->inputs[n].ncomp = ncomp; - so->inputs[n].regid = r; - so->inputs[n].inloc = ctx->next_inloc; - so->inputs[n].bary = true; /* all that is supported */ - ctx->next_inloc += ncomp; - - so->total_in += ncomp; - - /* for frag shaders, we need to generate the corresponding bary instr: */ - if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { - unsigned j; - - for (j = 0; j < ncomp; j++) { - struct ir3_instruction *instr; - struct ir3_register *dst; - - instr = instr_create(ctx, 2, OPC_BARY_F); - - /* dst register: */ - dst = ir3_reg_create(instr, r + j, flags); - ctx->last_input = dst; - - /* input position: */ - ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = - so->inputs[n].inloc + j - 8; - - /* input base (always r0.xy): */ - ir3_reg_create(instr, regid(0,0), 0)->wrmask = 0x3; - } - - nop = 6; - } - } - - return nop; -} - -static void -decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) -{ - struct fd3_shader_variant *so = ctx->so; - unsigned base = ctx->base_reg[TGSI_FILE_OUTPUT]; - unsigned comp = 0; - unsigned name = decl->Semantic.Name; - unsigned i; - - compile_assert(ctx, decl->Declaration.Semantic); // TODO is this ever not true? - - DBG("decl out[%d] -> r%d", name, decl->Range.First + base); // XXX - - if (ctx->type == TGSI_PROCESSOR_VERTEX) { - switch (name) { - case TGSI_SEMANTIC_POSITION: - so->writes_pos = true; - break; - case TGSI_SEMANTIC_PSIZE: - so->writes_psize = true; - break; - case TGSI_SEMANTIC_COLOR: - case TGSI_SEMANTIC_BCOLOR: - case TGSI_SEMANTIC_GENERIC: - case TGSI_SEMANTIC_FOG: - case TGSI_SEMANTIC_TEXCOORD: - break; - default: - compile_error(ctx, "unknown VS semantic name: %s\n", - tgsi_semantic_names[name]); - } - } else { - switch (name) { - case TGSI_SEMANTIC_POSITION: - comp = 2; /* tgsi will write to .z component */ - so->writes_pos = true; - break; - case TGSI_SEMANTIC_COLOR: - break; - default: - compile_error(ctx, "unknown FS semantic name: %s\n", - tgsi_semantic_names[name]); - } - } - - for (i = decl->Range.First; i <= decl->Range.Last; i++) { - unsigned n = so->outputs_count++; - compile_assert(ctx, n < ARRAY_SIZE(so->outputs)); - so->outputs[n].semantic = decl_semantic(&decl->Semantic); - so->outputs[n].regid = regid(i + base, comp); - } -} - -static void -decl_samp(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) -{ - ctx->so->has_samp = true; -} - -static void -compile_instructions(struct fd3_compile_context *ctx) -{ - struct ir3 *ir = ctx->ir; - int nop = 0; - - while (!tgsi_parse_end_of_tokens(&ctx->parser)) { - tgsi_parse_token(&ctx->parser); - - switch (ctx->parser.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_DECLARATION: { - struct tgsi_full_declaration *decl = - &ctx->parser.FullToken.FullDeclaration; - if (decl->Declaration.File == TGSI_FILE_OUTPUT) { - decl_out(ctx, decl); - } else if (decl->Declaration.File == TGSI_FILE_INPUT) { - nop = decl_in(ctx, decl); - } else if (decl->Declaration.File == TGSI_FILE_SAMPLER) { - decl_samp(ctx, decl); - } - break; - } - case TGSI_TOKEN_TYPE_IMMEDIATE: { - /* TODO: if we know the immediate is small enough, and only - * used with instructions that can embed an immediate, we - * can skip this: - */ - struct tgsi_full_immediate *imm = - &ctx->parser.FullToken.FullImmediate; - unsigned n = ctx->so->immediates_count++; - memcpy(ctx->so->immediates[n].val, imm->u, 16); - break; - } - case TGSI_TOKEN_TYPE_INSTRUCTION: { - struct tgsi_full_instruction *inst = - &ctx->parser.FullToken.FullInstruction; - unsigned opc = inst->Instruction.Opcode; - const struct instr_translater *t = &translaters[opc]; - - add_nop(ctx, nop); - nop = 0; - - if (t->fxn) { - t->fxn(t, ctx, inst); - ctx->num_internal_temps = 0; - } else { - compile_error(ctx, "unknown TGSI opc: %s\n", - tgsi_get_opcode_name(opc)); - } - - switch (inst->Instruction.Saturate) { - case TGSI_SAT_ZERO_ONE: - create_clamp_imm(ctx, &inst->Dst[0].Register, - fui(0.0), fui(1.0)); - break; - case TGSI_SAT_MINUS_PLUS_ONE: - create_clamp_imm(ctx, &inst->Dst[0].Register, - fui(-1.0), fui(1.0)); - break; - } - - break; - } - default: - break; - } - } - - if (ir->instrs_count > 0) - ir->instrs[0]->flags |= IR3_INSTR_SS | IR3_INSTR_SY; - - if (ctx->last_input) - ctx->last_input->flags |= IR3_REG_EI; - - handle_last_rel(ctx); -} - -int -fd3_compile_shader_old(struct fd3_shader_variant *so, - const struct tgsi_token *tokens, struct fd3_shader_key key) -{ - struct fd3_compile_context ctx; - - assert(!so->ir); - - so->ir = ir3_create(); - - assert(so->ir); - - if (compile_init(&ctx, so, tokens) != TGSI_PARSE_OK) - return -1; - - compile_instructions(&ctx); - - compile_free(&ctx); - - return 0; -} diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c index 4b2d94103f5..89af740c07c 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c @@ -44,7 +44,7 @@ static void emit_vertexbufs(struct fd_context *ctx, struct fd_ringbuffer *ring, - struct fd3_shader_key key) + struct ir3_shader_key key) { struct fd_vertex_stateobj *vtx = ctx->vtx; struct fd_vertexbuf_stateobj *vertexbuf = &ctx->vertexbuf; @@ -70,7 +70,7 @@ emit_vertexbufs(struct fd_context *ctx, struct fd_ringbuffer *ring, static void draw_impl(struct fd_context *ctx, const struct pipe_draw_info *info, - struct fd_ringbuffer *ring, unsigned dirty, struct fd3_shader_key key) + struct fd_ringbuffer *ring, unsigned dirty, struct ir3_shader_key key) { fd3_emit_state(ctx, ring, &ctx->prog, dirty, key); @@ -99,7 +99,7 @@ static void fd3_draw(struct fd_context *ctx, const struct pipe_draw_info *info) { unsigned dirty = ctx->dirty; - struct fd3_shader_key key = { + struct ir3_shader_key key = { /* do binning pass first: */ .binning_pass = true, .color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false, @@ -127,7 +127,7 @@ fd3_clear_binning(struct fd_context *ctx, unsigned dirty) { struct fd3_context *fd3_ctx = fd3_context(ctx); struct fd_ringbuffer *ring = ctx->binning_ring; - struct fd3_shader_key key = { + struct ir3_shader_key key = { .binning_pass = true, .half_precision = true, }; @@ -168,7 +168,7 @@ fd3_clear(struct fd_context *ctx, unsigned buffers, struct fd_ringbuffer *ring = ctx->ring; unsigned dirty = ctx->dirty; unsigned ce, i; - struct fd3_shader_key key = { + struct ir3_shader_key key = { .half_precision = true, }; diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index 1e4de26406a..44932dc241d 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -87,7 +87,7 @@ static void emit_constants(struct fd_ringbuffer *ring, enum adreno_state_block sb, struct fd_constbuf_stateobj *constbuf, - struct fd3_shader_variant *shader) + struct ir3_shader_variant *shader) { uint32_t enabled_mask = constbuf->enabled_mask; uint32_t first_immediate; @@ -291,7 +291,7 @@ fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf void fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, - struct fd3_shader_variant *vp, + struct ir3_shader_variant *vp, struct fd3_vertex_buf *vbufs, uint32_t n) { uint32_t i, j, last = 0; @@ -350,10 +350,10 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, void fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, struct fd_program_stateobj *prog, uint32_t dirty, - struct fd3_shader_key key) + struct ir3_shader_key key) { - struct fd3_shader_variant *vp; - struct fd3_shader_variant *fp; + struct ir3_shader_variant *vp; + struct ir3_shader_variant *fp; fp = fd3_shader_variant(prog->fp, key); vp = fd3_shader_variant(prog->vp, key); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h index f2ae4dc295e..5735c9f873d 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h @@ -33,7 +33,7 @@ #include "freedreno_context.h" #include "fd3_util.h" - +#include "ir3_shader.h" struct fd_ringbuffer; enum adreno_state_block; @@ -56,11 +56,11 @@ struct fd3_vertex_buf { }; void fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, - struct fd3_shader_variant *vp, + struct ir3_shader_variant *vp, struct fd3_vertex_buf *vbufs, uint32_t n); void fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, struct fd_program_stateobj *prog, uint32_t dirty, - struct fd3_shader_key key); + struct ir3_shader_key key); void fd3_emit_restore(struct fd_context *ctx); #endif /* FD3_EMIT_H */ diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c index 8519a90ccfa..6828d0e1fb4 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c @@ -43,7 +43,7 @@ #include "fd3_util.h" #include "fd3_zsa.h" -static const struct fd3_shader_key key = { +static const struct ir3_shader_key key = { // XXX should set this based on render target format! We don't // want half_precision if float32 render target!!! .half_precision = true, diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c index 164b1521a89..78c71d42e39 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -38,176 +38,23 @@ #include "freedreno_program.h" #include "fd3_program.h" -#include "fd3_compiler.h" #include "fd3_emit.h" #include "fd3_texture.h" #include "fd3_util.h" static void -delete_variant(struct fd3_shader_variant *v) +delete_shader_stateobj(struct fd3_shader_stateobj *so) { - ir3_destroy(v->ir); - fd_bo_del(v->bo); - free(v); -} - -static void -assemble_variant(struct fd3_shader_variant *so) -{ - struct fd_context *ctx = fd_context(so->so->pctx); - uint32_t sz, *bin; - - bin = ir3_assemble(so->ir, &so->info); - sz = so->info.sizedwords * 4; - - so->bo = fd_bo_new(ctx->dev, sz, - DRM_FREEDRENO_GEM_CACHE_WCOMBINE | - DRM_FREEDRENO_GEM_TYPE_KMEM); - - memcpy(fd_bo_map(so->bo), bin, sz); - - free(bin); - - so->instrlen = so->info.sizedwords / 8; - so->constlen = so->info.max_const + 1; -} - -/* for vertex shader, the inputs are loaded into registers before the shader - * is executed, so max_regs from the shader instructions might not properly - * reflect the # of registers actually used: - */ -static void -fixup_vp_regfootprint(struct fd3_shader_variant *so) -{ - unsigned i; - for (i = 0; i < so->inputs_count; i++) { - if (so->inputs[i].compmask) { - uint32_t regid = (so->inputs[i].regid + 3) >> 2; - so->info.max_reg = MAX2(so->info.max_reg, regid); - } - } - for (i = 0; i < so->outputs_count; i++) { - uint32_t regid = (so->outputs[i].regid + 3) >> 2; - so->info.max_reg = MAX2(so->info.max_reg, regid); - } -} - -static struct fd3_shader_variant * -create_variant(struct fd3_shader_stateobj *so, struct fd3_shader_key key) -{ - struct fd3_shader_variant *v = CALLOC_STRUCT(fd3_shader_variant); - const struct tgsi_token *tokens = so->tokens; - int ret; - - if (!v) - return NULL; - - v->so = so; - v->key = key; - v->type = so->type; - - if (fd_mesa_debug & FD_DBG_DISASM) { - DBG("dump tgsi: type=%d, k={bp=%u,cts=%u,hp=%u}", so->type, - key.binning_pass, key.color_two_side, key.half_precision); - tgsi_dump(tokens, 0); - } - - if (!(fd_mesa_debug & FD_DBG_NOOPT)) { - ret = fd3_compile_shader(v, tokens, key); - if (ret) { - debug_error("new compiler failed, trying fallback!"); - - v->inputs_count = 0; - v->outputs_count = 0; - v->total_in = 0; - v->has_samp = false; - v->immediates_count = 0; - } - } else { - ret = -1; /* force fallback to old compiler */ - } - - if (ret) - ret = fd3_compile_shader_old(v, tokens, key); - - if (ret) { - debug_error("compile failed!"); - goto fail; - } - - assemble_variant(v); - if (!v->bo) { - debug_error("assemble failed!"); - goto fail; - } - - if (so->type == SHADER_VERTEX) - fixup_vp_regfootprint(v); - - if (fd_mesa_debug & FD_DBG_DISASM) { - DBG("disassemble: type=%d, k={bp=%u,cts=%u,hp=%u}", v->type, - key.binning_pass, key.color_two_side, key.half_precision); - disasm_a3xx(fd_bo_map(v->bo), v->info.sizedwords, 0, v->type); - } - - return v; - -fail: - delete_variant(v); - return NULL; -} - -struct fd3_shader_variant * -fd3_shader_variant(struct fd3_shader_stateobj *so, struct fd3_shader_key key) -{ - struct fd3_shader_variant *v; - - /* some shader key values only apply to vertex or frag shader, - * so normalize the key to avoid constructing multiple identical - * variants: - */ - if (so->type == SHADER_FRAGMENT) { - key.binning_pass = false; - } - if (so->type == SHADER_VERTEX) { - key.color_two_side = false; - key.half_precision = false; - } - - for (v = so->variants; v; v = v->next) - if (!memcmp(&key, &v->key, sizeof(key))) - return v; - - /* compile new variant if it doesn't exist already: */ - v = create_variant(so, key); - v->next = so->variants; - so->variants = v; - - return v; -} - - -static void -delete_shader(struct fd3_shader_stateobj *so) -{ - struct fd3_shader_variant *v, *t; - for (v = so->variants; v; ) { - t = v; - v = v->next; - delete_variant(t); - } - free((void *)so->tokens); + ir3_shader_destroy(so->shader); free(so); } static struct fd3_shader_stateobj * -create_shader(struct pipe_context *pctx, const struct pipe_shader_state *cso, +create_shader_stateobj(struct pipe_context *pctx, const struct pipe_shader_state *cso, enum shader_t type) { struct fd3_shader_stateobj *so = CALLOC_STRUCT(fd3_shader_stateobj); - so->pctx = pctx; - so->type = type; - so->tokens = tgsi_dup_tokens(cso->tokens); + so->shader = ir3_shader_create(pctx, cso->tokens, type); return so; } @@ -215,32 +62,32 @@ static void * fd3_fp_state_create(struct pipe_context *pctx, const struct pipe_shader_state *cso) { - return create_shader(pctx, cso, SHADER_FRAGMENT); + return create_shader_stateobj(pctx, cso, SHADER_FRAGMENT); } static void fd3_fp_state_delete(struct pipe_context *pctx, void *hwcso) { struct fd3_shader_stateobj *so = hwcso; - delete_shader(so); + delete_shader_stateobj(so); } static void * fd3_vp_state_create(struct pipe_context *pctx, const struct pipe_shader_state *cso) { - return create_shader(pctx, cso, SHADER_VERTEX); + return create_shader_stateobj(pctx, cso, SHADER_VERTEX); } static void fd3_vp_state_delete(struct pipe_context *pctx, void *hwcso) { struct fd3_shader_stateobj *so = hwcso; - delete_shader(so); + delete_shader_stateobj(so); } static void -emit_shader(struct fd_ringbuffer *ring, const struct fd3_shader_variant *so) +emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so) { const struct ir3_info *si = &so->info; enum adreno_state_block sb; @@ -281,7 +128,7 @@ emit_shader(struct fd_ringbuffer *ring, const struct fd3_shader_variant *so) } static int -find_output(const struct fd3_shader_variant *so, fd3_semantic semantic) +find_output(const struct ir3_shader_variant *so, ir3_semantic semantic) { int j; @@ -297,7 +144,7 @@ find_output(const struct fd3_shader_variant *so, fd3_semantic semantic) */ if (sem2name(semantic) == TGSI_SEMANTIC_BCOLOR) { unsigned idx = sem2idx(semantic); - return find_output(so, fd3_semantic_name(TGSI_SEMANTIC_COLOR, idx)); + return find_output(so, ir3_semantic_name(TGSI_SEMANTIC_COLOR, idx)); } debug_assert(0); @@ -306,7 +153,7 @@ find_output(const struct fd3_shader_variant *so, fd3_semantic semantic) } static int -next_varying(const struct fd3_shader_variant *so, int i) +next_varying(const struct ir3_shader_variant *so, int i) { while (++i < so->inputs_count) if (so->inputs[i].compmask && so->inputs[i].bary) @@ -315,7 +162,7 @@ next_varying(const struct fd3_shader_variant *so, int i) } static uint32_t -find_output_regid(const struct fd3_shader_variant *so, fd3_semantic semantic) +find_output_regid(const struct ir3_shader_variant *so, ir3_semantic semantic) { int j; for (j = 0; j < so->outputs_count; j++) @@ -326,9 +173,9 @@ find_output_regid(const struct fd3_shader_variant *so, fd3_semantic semantic) void fd3_program_emit(struct fd_ringbuffer *ring, - struct fd_program_stateobj *prog, struct fd3_shader_key key) + struct fd_program_stateobj *prog, struct ir3_shader_key key) { - const struct fd3_shader_variant *vp, *fp; + const struct ir3_shader_variant *vp, *fp; const struct ir3_info *vsi, *fsi; uint32_t pos_regid, posz_regid, psize_regid, color_regid; int i, j, k; @@ -337,7 +184,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, if (key.binning_pass) { /* use dummy stateobj to simplify binning vs non-binning: */ - static const struct fd3_shader_variant binning_fp = {}; + static const struct ir3_shader_variant binning_fp = {}; fp = &binning_fp; } else { fp = fd3_shader_variant(prog->fp, key); @@ -347,13 +194,13 @@ fd3_program_emit(struct fd_ringbuffer *ring, fsi = &fp->info; pos_regid = find_output_regid(vp, - fd3_semantic_name(TGSI_SEMANTIC_POSITION, 0)); + ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0)); posz_regid = find_output_regid(fp, - fd3_semantic_name(TGSI_SEMANTIC_POSITION, 0)); + ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0)); psize_regid = find_output_regid(vp, - fd3_semantic_name(TGSI_SEMANTIC_PSIZE, 0)); + ir3_semantic_name(TGSI_SEMANTIC_PSIZE, 0)); color_regid = find_output_regid(fp, - fd3_semantic_name(TGSI_SEMANTIC_COLOR, 0)); + ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0)); /* we could probably divide this up into things that need to be * emitted if frag-prog is dirty vs if vert-prog is dirty.. @@ -522,16 +369,16 @@ fd3_program_emit(struct fd_ringbuffer *ring, A3XX_VPC_PACK_NUMNONPOSVSVAR(fp->total_in)); OUT_PKT0(ring, REG_A3XX_VPC_VARYING_INTERP_MODE(0), 4); - OUT_RING(ring, fp->so->vinterp[0]); /* VPC_VARYING_INTERP[0].MODE */ - OUT_RING(ring, fp->so->vinterp[1]); /* VPC_VARYING_INTERP[1].MODE */ - OUT_RING(ring, fp->so->vinterp[2]); /* VPC_VARYING_INTERP[2].MODE */ - OUT_RING(ring, fp->so->vinterp[3]); /* VPC_VARYING_INTERP[3].MODE */ + OUT_RING(ring, fp->shader->vinterp[0]); /* VPC_VARYING_INTERP[0].MODE */ + OUT_RING(ring, fp->shader->vinterp[1]); /* VPC_VARYING_INTERP[1].MODE */ + OUT_RING(ring, fp->shader->vinterp[2]); /* VPC_VARYING_INTERP[2].MODE */ + OUT_RING(ring, fp->shader->vinterp[3]); /* VPC_VARYING_INTERP[3].MODE */ OUT_PKT0(ring, REG_A3XX_VPC_VARYING_PS_REPL_MODE(0), 4); - OUT_RING(ring, fp->so->vpsrepl[0]); /* VPC_VARYING_PS_REPL[0].MODE */ - OUT_RING(ring, fp->so->vpsrepl[1]); /* VPC_VARYING_PS_REPL[1].MODE */ - OUT_RING(ring, fp->so->vpsrepl[2]); /* VPC_VARYING_PS_REPL[2].MODE */ - OUT_RING(ring, fp->so->vpsrepl[3]); /* VPC_VARYING_PS_REPL[3].MODE */ + OUT_RING(ring, fp->shader->vpsrepl[0]); /* VPC_VARYING_PS_REPL[0].MODE */ + OUT_RING(ring, fp->shader->vpsrepl[1]); /* VPC_VARYING_PS_REPL[1].MODE */ + OUT_RING(ring, fp->shader->vpsrepl[2]); /* VPC_VARYING_PS_REPL[2].MODE */ + OUT_RING(ring, fp->shader->vpsrepl[3]); /* VPC_VARYING_PS_REPL[3].MODE */ } OUT_PKT0(ring, REG_A3XX_VFD_VS_THREADING_THRESHOLD, 1); @@ -558,10 +405,10 @@ fix_blit_fp(struct pipe_context *pctx) struct fd_context *ctx = fd_context(pctx); struct fd3_shader_stateobj *so = ctx->blit_prog.fp; - so->vpsrepl[0] = 0x99999999; - so->vpsrepl[1] = 0x99999999; - so->vpsrepl[2] = 0x99999999; - so->vpsrepl[3] = 0x99999999; + so->shader->vpsrepl[0] = 0x99999999; + so->shader->vpsrepl[1] = 0x99999999; + so->shader->vpsrepl[2] = 0x99999999; + so->shader->vpsrepl[3] = 0x99999999; } void diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.h b/src/gallium/drivers/freedreno/a3xx/fd3_program.h index e2ed1cc3dda..cebaeecc5bc 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.h @@ -30,127 +30,22 @@ #define FD3_PROGRAM_H_ #include "pipe/p_context.h" - #include "freedreno_context.h" -#include "fd3_util.h" -#include "ir3.h" -#include "disasm.h" - -typedef uint16_t fd3_semantic; /* semantic name + index */ -static inline fd3_semantic -fd3_semantic_name(uint8_t name, uint16_t index) -{ - return (name << 8) | (index & 0xff); -} - -static inline uint8_t sem2name(fd3_semantic sem) -{ - return sem >> 8; -} - -static inline uint16_t sem2idx(fd3_semantic sem) -{ - return sem & 0xff; -} - -struct fd3_shader_variant { - struct fd_bo *bo; - - struct fd3_shader_key key; - - struct ir3_info info; - struct ir3 *ir; - - /* the instructions length is in units of instruction groups - * (4 instructions, 8 dwords): - */ - unsigned instrlen; - - /* the constants length is in units of vec4's, and is the sum of - * the uniforms and the built-in compiler constants - */ - unsigned constlen; - - /* About Linkage: - * + Let the frag shader determine the position/compmask for the - * varyings, since it is the place where we know if the varying - * is actually used, and if so, which components are used. So - * what the hw calls "outloc" is taken from the "inloc" of the - * frag shader. - * + From the vert shader, we only need the output regid - */ - - /* for frag shader, pos_regid holds the frag_pos, ie. what is passed - * to bary.f instructions - */ - uint8_t pos_regid; - bool frag_coord, frag_face; - - /* varyings/outputs: */ - unsigned outputs_count; - struct { - fd3_semantic semantic; - uint8_t regid; - } outputs[16 + 2]; /* +POSITION +PSIZE */ - bool writes_pos, writes_psize; - - /* vertices/inputs: */ - unsigned inputs_count; - struct { - fd3_semantic semantic; - uint8_t regid; - uint8_t compmask; - uint8_t ncomp; - /* in theory inloc of fs should match outloc of vs: */ - uint8_t inloc; - uint8_t bary; - } inputs[16 + 2]; /* +POSITION +FACE */ - - unsigned total_in; /* sum of inputs (scalar) */ - - /* do we have one or more texture sample instructions: */ - bool has_samp; - - /* const reg # of first immediate, ie. 1 == c1 - * (not regid, because TGSI thinks in terms of vec4 registers, - * not scalar registers) - */ - unsigned first_immediate; - unsigned immediates_count; - struct { - uint32_t val[4]; - } immediates[64]; - - /* shader varients form a linked list: */ - struct fd3_shader_variant *next; - - /* replicated here to avoid passing extra ptrs everywhere: */ - enum shader_t type; - struct fd3_shader_stateobj *so; -}; +#include "ir3_shader.h" struct fd3_shader_stateobj { - enum shader_t type; - - struct pipe_context *pctx; - const struct tgsi_token *tokens; - - struct fd3_shader_variant *variants; - - /* so far, only used for blit_prog shader.. values for - * VPC_VARYING_INTERP[i].MODE and VPC_VARYING_PS_REPL[i].MODE - * - * Possibly should be in fd3_program_variant? - */ - uint32_t vinterp[4], vpsrepl[4]; + struct ir3_shader *shader; }; -struct fd3_shader_variant * fd3_shader_variant(struct fd3_shader_stateobj *so, - struct fd3_shader_key key); - void fd3_program_emit(struct fd_ringbuffer *ring, - struct fd_program_stateobj *prog, struct fd3_shader_key key); + struct fd_program_stateobj *prog, struct ir3_shader_key key); void fd3_prog_init(struct pipe_context *pctx); +static inline struct ir3_shader_variant * +fd3_shader_variant(struct fd3_shader_stateobj *so, struct ir3_shader_key key) +{ + return ir3_shader_variant(so->shader, key); +} + #endif /* FD3_PROGRAM_H_ */ diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_util.h b/src/gallium/drivers/freedreno/a3xx/fd3_util.h index 6462d18f913..4681840b173 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_util.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_util.h @@ -43,22 +43,4 @@ enum a3xx_color_swap fd3_pipe2swap(enum pipe_format format); uint32_t fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a); -/* Configuration key used to identify a shader variant.. different - * shader variants can be used to implement features not supported - * in hw (two sided color), binning-pass vertex shader, etc. - * - * NOTE: this is declared here (rather than fd3_program.h) as it is - * passed around through a lot of the emit code in various parts - * which would otherwise not necessarily need to incl fd3_program.h - */ -struct fd3_shader_key { - /* vertex shader variant parameters: */ - unsigned binning_pass : 1; - - /* fragment shader variant parameters: */ - unsigned color_two_side : 1; - unsigned half_precision : 1; -}; -struct fd3_shader_variant; - #endif /* FD3_UTIL_H_ */ diff --git a/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h b/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h deleted file mode 100644 index c67f1037ced..00000000000 --- a/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h +++ /dev/null @@ -1,691 +0,0 @@ -/* - * Copyright (c) 2013 Rob Clark <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef INSTR_A3XX_H_ -#define INSTR_A3XX_H_ - -#define PACKED __attribute__((__packed__)) - -#include <stdint.h> -#include <assert.h> - -typedef enum { - /* category 0: */ - OPC_NOP = 0, - OPC_BR = 1, - OPC_JUMP = 2, - OPC_CALL = 3, - OPC_RET = 4, - OPC_KILL = 5, - OPC_END = 6, - OPC_EMIT = 7, - OPC_CUT = 8, - OPC_CHMASK = 9, - OPC_CHSH = 10, - OPC_FLOW_REV = 11, - - /* category 1: */ - /* no opc.. all category 1 are variants of mov */ - - /* category 2: */ - OPC_ADD_F = 0, - OPC_MIN_F = 1, - OPC_MAX_F = 2, - OPC_MUL_F = 3, - OPC_SIGN_F = 4, - OPC_CMPS_F = 5, - OPC_ABSNEG_F = 6, - OPC_CMPV_F = 7, - /* 8 - invalid */ - OPC_FLOOR_F = 9, - OPC_CEIL_F = 10, - OPC_RNDNE_F = 11, - OPC_RNDAZ_F = 12, - OPC_TRUNC_F = 13, - /* 14-15 - invalid */ - OPC_ADD_U = 16, - OPC_ADD_S = 17, - OPC_SUB_U = 18, - OPC_SUB_S = 19, - OPC_CMPS_U = 20, - OPC_CMPS_S = 21, - OPC_MIN_U = 22, - OPC_MIN_S = 23, - OPC_MAX_U = 24, - OPC_MAX_S = 25, - OPC_ABSNEG_S = 26, - /* 27 - invalid */ - OPC_AND_B = 28, - OPC_OR_B = 29, - OPC_NOT_B = 30, - OPC_XOR_B = 31, - /* 32 - invalid */ - OPC_CMPV_U = 33, - OPC_CMPV_S = 34, - /* 35-47 - invalid */ - OPC_MUL_U = 48, - OPC_MUL_S = 49, - OPC_MULL_U = 50, - OPC_BFREV_B = 51, - OPC_CLZ_S = 52, - OPC_CLZ_B = 53, - OPC_SHL_B = 54, - OPC_SHR_B = 55, - OPC_ASHR_B = 56, - OPC_BARY_F = 57, - OPC_MGEN_B = 58, - OPC_GETBIT_B = 59, - OPC_SETRM = 60, - OPC_CBITS_B = 61, - OPC_SHB = 62, - OPC_MSAD = 63, - - /* category 3: */ - OPC_MAD_U16 = 0, - OPC_MADSH_U16 = 1, - OPC_MAD_S16 = 2, - OPC_MADSH_M16 = 3, /* should this be .s16? */ - OPC_MAD_U24 = 4, - OPC_MAD_S24 = 5, - OPC_MAD_F16 = 6, - OPC_MAD_F32 = 7, - OPC_SEL_B16 = 8, - OPC_SEL_B32 = 9, - OPC_SEL_S16 = 10, - OPC_SEL_S32 = 11, - OPC_SEL_F16 = 12, - OPC_SEL_F32 = 13, - OPC_SAD_S16 = 14, - OPC_SAD_S32 = 15, - - /* category 4: */ - OPC_RCP = 0, - OPC_RSQ = 1, - OPC_LOG2 = 2, - OPC_EXP2 = 3, - OPC_SIN = 4, - OPC_COS = 5, - OPC_SQRT = 6, - // 7-63 - invalid - - /* category 5: */ - OPC_ISAM = 0, - OPC_ISAML = 1, - OPC_ISAMM = 2, - OPC_SAM = 3, - OPC_SAMB = 4, - OPC_SAML = 5, - OPC_SAMGQ = 6, - OPC_GETLOD = 7, - OPC_CONV = 8, - OPC_CONVM = 9, - OPC_GETSIZE = 10, - OPC_GETBUF = 11, - OPC_GETPOS = 12, - OPC_GETINFO = 13, - OPC_DSX = 14, - OPC_DSY = 15, - OPC_GATHER4R = 16, - OPC_GATHER4G = 17, - OPC_GATHER4B = 18, - OPC_GATHER4A = 19, - OPC_SAMGP0 = 20, - OPC_SAMGP1 = 21, - OPC_SAMGP2 = 22, - OPC_SAMGP3 = 23, - OPC_DSXPP_1 = 24, - OPC_DSYPP_1 = 25, - OPC_RGETPOS = 26, - OPC_RGETINFO = 27, - - /* category 6: */ - OPC_LDG = 0, /* load-global */ - OPC_LDL = 1, - OPC_LDP = 2, - OPC_STG = 3, /* store-global */ - OPC_STL = 4, - OPC_STP = 5, - OPC_STI = 6, - OPC_G2L = 7, - OPC_L2G = 8, - OPC_PREFETCH = 9, - OPC_LDLW = 10, - OPC_STLW = 11, - OPC_RESFMT = 14, - OPC_RESINFO = 15, - OPC_ATOMIC_ADD_L = 16, - OPC_ATOMIC_SUB_L = 17, - OPC_ATOMIC_XCHG_L = 18, - OPC_ATOMIC_INC_L = 19, - OPC_ATOMIC_DEC_L = 20, - OPC_ATOMIC_CMPXCHG_L = 21, - OPC_ATOMIC_MIN_L = 22, - OPC_ATOMIC_MAX_L = 23, - OPC_ATOMIC_AND_L = 24, - OPC_ATOMIC_OR_L = 25, - OPC_ATOMIC_XOR_L = 26, - OPC_LDGB_TYPED_4D = 27, - OPC_STGB_4D_4 = 28, - OPC_STIB = 29, - OPC_LDC_4 = 30, - OPC_LDLV = 31, - - /* meta instructions (category -1): */ - /* placeholder instr to mark inputs/outputs: */ - OPC_META_INPUT = 0, - OPC_META_OUTPUT = 1, - /* The "fan-in" and "fan-out" instructions are used for keeping - * track of instructions that write to multiple dst registers - * (fan-out) like texture sample instructions, or read multiple - * consecutive scalar registers (fan-in) (bary.f, texture samp) - */ - OPC_META_FO = 2, - OPC_META_FI = 3, - /* branches/flow control */ - OPC_META_FLOW = 4, - OPC_META_PHI = 5, - /* relative addressing */ - OPC_META_DEREF = 6, - - -} opc_t; - -typedef enum { - TYPE_F16 = 0, - TYPE_F32 = 1, - TYPE_U16 = 2, - TYPE_U32 = 3, - TYPE_S16 = 4, - TYPE_S32 = 5, - TYPE_U8 = 6, - TYPE_S8 = 7, // XXX I assume? -} type_t; - -static inline uint32_t type_size(type_t type) -{ - switch (type) { - case TYPE_F32: - case TYPE_U32: - case TYPE_S32: - return 32; - case TYPE_F16: - case TYPE_U16: - case TYPE_S16: - return 16; - case TYPE_U8: - case TYPE_S8: - return 8; - default: - assert(0); /* invalid type */ - return 0; - } -} - -static inline int type_float(type_t type) -{ - return (type == TYPE_F32) || (type == TYPE_F16); -} - -static inline int type_uint(type_t type) -{ - return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8); -} - -static inline int type_sint(type_t type) -{ - return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8); -} - -typedef union PACKED { - /* normal gpr or const src register: */ - struct PACKED { - uint32_t comp : 2; - uint32_t num : 10; - }; - /* for immediate val: */ - int32_t iim_val : 11; - /* to make compiler happy: */ - uint32_t dummy32; - uint32_t dummy10 : 10; - uint32_t dummy11 : 11; - uint32_t dummy12 : 12; - uint32_t dummy13 : 13; - uint32_t dummy8 : 8; -} reg_t; - -/* special registers: */ -#define REG_A0 61 /* address register */ -#define REG_P0 62 /* predicate register */ - -static inline int reg_special(reg_t reg) -{ - return (reg.num == REG_A0) || (reg.num == REG_P0); -} - -typedef struct PACKED { - /* dword0: */ - int16_t immed : 16; - uint32_t dummy1 : 16; - - /* dword1: */ - uint32_t dummy2 : 8; - uint32_t repeat : 3; - uint32_t dummy3 : 1; - uint32_t ss : 1; - uint32_t dummy4 : 7; - uint32_t inv : 1; - uint32_t comp : 2; - uint32_t opc : 4; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; -} instr_cat0_t; - -typedef struct PACKED { - /* dword0: */ - union PACKED { - /* for normal src register: */ - struct PACKED { - uint32_t src : 11; - /* at least low bit of pad must be zero or it will - * look like a address relative src - */ - uint32_t pad : 21; - }; - /* for address relative: */ - struct PACKED { - int32_t off : 10; - uint32_t src_rel_c : 1; - uint32_t src_rel : 1; - uint32_t unknown : 20; - }; - /* for immediate: */ - int32_t iim_val; - float fim_val; - }; - - /* dword1: */ - uint32_t dst : 8; - uint32_t repeat : 3; - uint32_t src_r : 1; - uint32_t ss : 1; - uint32_t ul : 1; - uint32_t dst_type : 3; - uint32_t dst_rel : 1; - uint32_t src_type : 3; - uint32_t src_c : 1; - uint32_t src_im : 1; - uint32_t even : 1; - uint32_t pos_inf : 1; - uint32_t must_be_0 : 2; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; -} instr_cat1_t; - -typedef struct PACKED { - /* dword0: */ - union PACKED { - struct PACKED { - uint32_t src1 : 11; - uint32_t must_be_zero1: 2; - uint32_t src1_im : 1; /* immediate */ - uint32_t src1_neg : 1; /* negate */ - uint32_t src1_abs : 1; /* absolute value */ - }; - struct PACKED { - uint32_t src1 : 10; - uint32_t src1_c : 1; /* relative-const */ - uint32_t src1_rel : 1; /* relative address */ - uint32_t must_be_zero : 1; - uint32_t dummy : 3; - } rel1; - struct PACKED { - uint32_t src1 : 12; - uint32_t src1_c : 1; /* const */ - uint32_t dummy : 3; - } c1; - }; - - union PACKED { - struct PACKED { - uint32_t src2 : 11; - uint32_t must_be_zero2: 2; - uint32_t src2_im : 1; /* immediate */ - uint32_t src2_neg : 1; /* negate */ - uint32_t src2_abs : 1; /* absolute value */ - }; - struct PACKED { - uint32_t src2 : 10; - uint32_t src2_c : 1; /* relative-const */ - uint32_t src2_rel : 1; /* relative address */ - uint32_t must_be_zero : 1; - uint32_t dummy : 3; - } rel2; - struct PACKED { - uint32_t src2 : 12; - uint32_t src2_c : 1; /* const */ - uint32_t dummy : 3; - } c2; - }; - - /* dword1: */ - uint32_t dst : 8; - uint32_t repeat : 3; - uint32_t src1_r : 1; - uint32_t ss : 1; - uint32_t ul : 1; /* dunno */ - uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */ - uint32_t ei : 1; - uint32_t cond : 3; - uint32_t src2_r : 1; - uint32_t full : 1; /* not half */ - uint32_t opc : 6; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; -} instr_cat2_t; - -typedef struct PACKED { - /* dword0: */ - union PACKED { - struct PACKED { - uint32_t src1 : 11; - uint32_t must_be_zero1: 2; - uint32_t src2_c : 1; - uint32_t src1_neg : 1; - uint32_t src2_r : 1; - }; - struct PACKED { - uint32_t src1 : 10; - uint32_t src1_c : 1; - uint32_t src1_rel : 1; - uint32_t must_be_zero : 1; - uint32_t dummy : 3; - } rel1; - struct PACKED { - uint32_t src1 : 12; - uint32_t src1_c : 1; - uint32_t dummy : 3; - } c1; - }; - - union PACKED { - struct PACKED { - uint32_t src3 : 11; - uint32_t must_be_zero2: 2; - uint32_t src3_r : 1; - uint32_t src2_neg : 1; - uint32_t src3_neg : 1; - }; - struct PACKED { - uint32_t src3 : 10; - uint32_t src3_c : 1; - uint32_t src3_rel : 1; - uint32_t must_be_zero : 1; - uint32_t dummy : 3; - } rel2; - struct PACKED { - uint32_t src3 : 12; - uint32_t src3_c : 1; - uint32_t dummy : 3; - } c2; - }; - - /* dword1: */ - uint32_t dst : 8; - uint32_t repeat : 3; - uint32_t src1_r : 1; - uint32_t ss : 1; - uint32_t ul : 1; - uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */ - uint32_t src2 : 8; - uint32_t opc : 4; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; -} instr_cat3_t; - -static inline bool instr_cat3_full(instr_cat3_t *cat3) -{ - switch (cat3->opc) { - case OPC_MAD_F16: - case OPC_MAD_U16: - case OPC_MAD_S16: - case OPC_SEL_B16: - case OPC_SEL_S16: - case OPC_SEL_F16: - case OPC_SAD_S16: - case OPC_SAD_S32: // really?? - return false; - default: - return true; - } -} - -typedef struct PACKED { - /* dword0: */ - union PACKED { - struct PACKED { - uint32_t src : 11; - uint32_t must_be_zero1: 2; - uint32_t src_im : 1; /* immediate */ - uint32_t src_neg : 1; /* negate */ - uint32_t src_abs : 1; /* absolute value */ - }; - struct PACKED { - uint32_t src : 10; - uint32_t src_c : 1; /* relative-const */ - uint32_t src_rel : 1; /* relative address */ - uint32_t must_be_zero : 1; - uint32_t dummy : 3; - } rel; - struct PACKED { - uint32_t src : 12; - uint32_t src_c : 1; /* const */ - uint32_t dummy : 3; - } c; - }; - uint32_t dummy1 : 16; /* seem to be ignored */ - - /* dword1: */ - uint32_t dst : 8; - uint32_t repeat : 3; - uint32_t src_r : 1; - uint32_t ss : 1; - uint32_t ul : 1; - uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */ - uint32_t dummy2 : 5; /* seem to be ignored */ - uint32_t full : 1; /* not half */ - uint32_t opc : 6; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; -} instr_cat4_t; - -typedef struct PACKED { - /* dword0: */ - union PACKED { - /* normal case: */ - struct PACKED { - uint32_t full : 1; /* not half */ - uint32_t src1 : 8; - uint32_t src2 : 8; - uint32_t dummy1 : 4; /* seem to be ignored */ - uint32_t samp : 4; - uint32_t tex : 7; - } norm; - /* s2en case: */ - struct PACKED { - uint32_t full : 1; /* not half */ - uint32_t src1 : 8; - uint32_t src2 : 11; - uint32_t dummy1 : 1; - uint32_t src3 : 8; - uint32_t dummy2 : 3; - } s2en; - /* same in either case: */ - // XXX I think, confirm this - struct PACKED { - uint32_t full : 1; /* not half */ - uint32_t src1 : 8; - uint32_t pad : 23; - }; - }; - - /* dword1: */ - uint32_t dst : 8; - uint32_t wrmask : 4; /* write-mask */ - uint32_t type : 3; - uint32_t dummy2 : 1; /* seems to be ignored */ - uint32_t is_3d : 1; - - uint32_t is_a : 1; - uint32_t is_s : 1; - uint32_t is_s2en : 1; - uint32_t is_o : 1; - uint32_t is_p : 1; - - uint32_t opc : 5; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; -} instr_cat5_t; - -/* used for load instructions: */ -typedef struct PACKED { - /* dword0: */ - uint32_t must_be_one1 : 1; - int16_t off : 13; - uint32_t src : 8; - uint32_t dummy1 : 1; - uint32_t must_be_one2 : 1; - int32_t iim_val : 8; - - /* dword1: */ - uint32_t dst : 8; - uint32_t dummy2 : 9; - uint32_t type : 3; - uint32_t dummy3 : 2; - uint32_t opc : 5; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; -} instr_cat6a_t; - -/* used for store instructions: */ -typedef struct PACKED { - /* dword0: */ - uint32_t must_be_zero1 : 1; - uint32_t src : 8; - uint32_t off_hi : 5; /* high bits of 'off'... ugly! */ - uint32_t dummy1 : 9; - uint32_t must_be_one1 : 1; - int32_t iim_val : 8; - - /* dword1: */ - uint16_t off : 8; - uint32_t must_be_one2 : 1; - uint32_t dst : 8; - uint32_t type : 3; - uint32_t dummy2 : 2; - uint32_t opc : 5; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; -} instr_cat6b_t; - -typedef union PACKED { - instr_cat6a_t a; - instr_cat6b_t b; - struct PACKED { - /* dword0: */ - uint32_t pad1 : 24; - int32_t iim_val : 8; - - /* dword1: */ - uint32_t pad2 : 17; - uint32_t type : 3; - uint32_t pad3 : 2; - uint32_t opc : 5; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; - }; -} instr_cat6_t; - -typedef union PACKED { - instr_cat0_t cat0; - instr_cat1_t cat1; - instr_cat2_t cat2; - instr_cat3_t cat3; - instr_cat4_t cat4; - instr_cat5_t cat5; - instr_cat6_t cat6; - struct PACKED { - /* dword0: */ - uint64_t pad1 : 40; - uint32_t repeat : 3; /* cat0-cat4 */ - uint32_t pad2 : 1; - uint32_t ss : 1; /* cat1-cat4 (cat0??) */ - uint32_t ul : 1; /* cat2-cat4 (and cat1 in blob.. which may be bug??) */ - uint32_t pad3 : 13; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; - - }; -} instr_t; - -static inline uint32_t instr_opc(instr_t *instr) -{ - switch (instr->opc_cat) { - case 0: return instr->cat0.opc; - case 1: return 0; - case 2: return instr->cat2.opc; - case 3: return instr->cat3.opc; - case 4: return instr->cat4.opc; - case 5: return instr->cat5.opc; - case 6: return instr->cat6.opc; - default: return 0; - } -} - -static inline bool is_mad(opc_t opc) -{ - switch (opc) { - case OPC_MAD_U16: - case OPC_MADSH_U16: - case OPC_MAD_S16: - case OPC_MADSH_M16: - case OPC_MAD_U24: - case OPC_MAD_S24: - case OPC_MAD_F16: - case OPC_MAD_F32: - return true; - default: - return false; - } -} - -#endif /* INSTR_A3XX_H_ */ diff --git a/src/gallium/drivers/freedreno/a3xx/ir3.c b/src/gallium/drivers/freedreno/a3xx/ir3.c deleted file mode 100644 index ea2a9251b28..00000000000 --- a/src/gallium/drivers/freedreno/a3xx/ir3.c +++ /dev/null @@ -1,675 +0,0 @@ -/* - * Copyright (c) 2012 Rob Clark <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ir3.h" - -#include <stdlib.h> -#include <stdio.h> -#include <string.h> -#include <assert.h> -#include <stdbool.h> -#include <errno.h> - -#include "freedreno_util.h" -#include "instr-a3xx.h" - -#define CHUNK_SZ 1020 - -struct ir3_heap_chunk { - struct ir3_heap_chunk *next; - uint32_t heap[CHUNK_SZ]; -}; - -static void grow_heap(struct ir3 *shader) -{ - struct ir3_heap_chunk *chunk = calloc(1, sizeof(*chunk)); - chunk->next = shader->chunk; - shader->chunk = chunk; - shader->heap_idx = 0; -} - -/* simple allocator to carve allocations out of an up-front allocated heap, - * so that we can free everything easily in one shot. - */ -void * ir3_alloc(struct ir3 *shader, int sz) -{ - void *ptr; - - sz = align(sz, 4) / 4; - - if ((shader->heap_idx + sz) > CHUNK_SZ) - grow_heap(shader); - - ptr = &shader->chunk->heap[shader->heap_idx]; - shader->heap_idx += sz; - - return ptr; -} - -struct ir3 * ir3_create(void) -{ - struct ir3 *shader = - calloc(1, sizeof(struct ir3)); - grow_heap(shader); - return shader; -} - -void ir3_destroy(struct ir3 *shader) -{ - while (shader->chunk) { - struct ir3_heap_chunk *chunk = shader->chunk; - shader->chunk = chunk->next; - free(chunk); - } - free(shader); -} - -#define iassert(cond) do { \ - if (!(cond)) { \ - assert(cond); \ - return -1; \ - } } while (0) - -static uint32_t reg(struct ir3_register *reg, struct ir3_info *info, - uint32_t repeat, uint32_t valid_flags) -{ - reg_t val = { .dummy32 = 0 }; - - assert(!(reg->flags & ~valid_flags)); - - if (!(reg->flags & IR3_REG_R)) - repeat = 0; - - if (reg->flags & IR3_REG_IMMED) { - val.iim_val = reg->iim_val; - } else { - int8_t components = util_last_bit(reg->wrmask); - int8_t max = (reg->num + repeat + components - 1) >> 2; - - val.comp = reg->num & 0x3; - val.num = reg->num >> 2; - - if (reg->flags & IR3_REG_CONST) { - info->max_const = MAX2(info->max_const, max); - } else if ((max != REG_A0) && (max != REG_P0)) { - if (reg->flags & IR3_REG_HALF) { - info->max_half_reg = MAX2(info->max_half_reg, max); - } else { - info->max_reg = MAX2(info->max_reg, max); - } - } - } - - return val.dummy32; -} - -static int emit_cat0(struct ir3_instruction *instr, void *ptr, - struct ir3_info *info) -{ - instr_cat0_t *cat0 = ptr; - - cat0->immed = instr->cat0.immed; - cat0->repeat = instr->repeat; - cat0->ss = !!(instr->flags & IR3_INSTR_SS); - cat0->inv = instr->cat0.inv; - cat0->comp = instr->cat0.comp; - cat0->opc = instr->opc; - cat0->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); - cat0->sync = !!(instr->flags & IR3_INSTR_SY); - cat0->opc_cat = 0; - - return 0; -} - -static uint32_t type_flags(type_t type) -{ - return (type_size(type) == 32) ? 0 : IR3_REG_HALF; -} - -static int emit_cat1(struct ir3_instruction *instr, void *ptr, - struct ir3_info *info) -{ - struct ir3_register *dst = instr->regs[0]; - struct ir3_register *src = instr->regs[1]; - instr_cat1_t *cat1 = ptr; - - iassert(instr->regs_count == 2); - iassert(!((dst->flags ^ type_flags(instr->cat1.dst_type)) & IR3_REG_HALF)); - iassert((src->flags & IR3_REG_IMMED) || - !((src->flags ^ type_flags(instr->cat1.src_type)) & IR3_REG_HALF)); - - if (src->flags & IR3_REG_IMMED) { - cat1->iim_val = src->iim_val; - cat1->src_im = 1; - } else if (src->flags & IR3_REG_RELATIV) { - cat1->off = src->offset; - cat1->src_rel = 1; - cat1->src_rel_c = !!(src->flags & IR3_REG_CONST); - } else { - cat1->src = reg(src, info, instr->repeat, - IR3_REG_IMMED | IR3_REG_R | - IR3_REG_CONST | IR3_REG_HALF); - cat1->src_c = !!(src->flags & IR3_REG_CONST); - } - - cat1->dst = reg(dst, info, instr->repeat, - IR3_REG_RELATIV | IR3_REG_EVEN | - IR3_REG_R | IR3_REG_POS_INF | IR3_REG_HALF); - cat1->repeat = instr->repeat; - cat1->src_r = !!(src->flags & IR3_REG_R); - cat1->ss = !!(instr->flags & IR3_INSTR_SS); - cat1->ul = !!(instr->flags & IR3_INSTR_UL); - cat1->dst_type = instr->cat1.dst_type; - cat1->dst_rel = !!(dst->flags & IR3_REG_RELATIV); - cat1->src_type = instr->cat1.src_type; - cat1->even = !!(dst->flags & IR3_REG_EVEN); - cat1->pos_inf = !!(dst->flags & IR3_REG_POS_INF); - cat1->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); - cat1->sync = !!(instr->flags & IR3_INSTR_SY); - cat1->opc_cat = 1; - - return 0; -} - -static int emit_cat2(struct ir3_instruction *instr, void *ptr, - struct ir3_info *info) -{ - struct ir3_register *dst = instr->regs[0]; - struct ir3_register *src1 = instr->regs[1]; - struct ir3_register *src2 = instr->regs[2]; - instr_cat2_t *cat2 = ptr; - - iassert((instr->regs_count == 2) || (instr->regs_count == 3)); - - if (src1->flags & IR3_REG_RELATIV) { - iassert(src1->num < (1 << 10)); - cat2->rel1.src1 = reg(src1, info, instr->repeat, - IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_NEGATE | - IR3_REG_ABS | IR3_REG_R | IR3_REG_HALF); - cat2->rel1.src1_c = !!(src1->flags & IR3_REG_CONST); - cat2->rel1.src1_rel = 1; - } else if (src1->flags & IR3_REG_CONST) { - iassert(src1->num < (1 << 12)); - cat2->c1.src1 = reg(src1, info, instr->repeat, - IR3_REG_CONST | IR3_REG_NEGATE | IR3_REG_ABS | - IR3_REG_R | IR3_REG_HALF); - cat2->c1.src1_c = 1; - } else { - iassert(src1->num < (1 << 11)); - cat2->src1 = reg(src1, info, instr->repeat, - IR3_REG_IMMED | IR3_REG_NEGATE | IR3_REG_ABS | - IR3_REG_R | IR3_REG_HALF); - } - cat2->src1_im = !!(src1->flags & IR3_REG_IMMED); - cat2->src1_neg = !!(src1->flags & IR3_REG_NEGATE); - cat2->src1_abs = !!(src1->flags & IR3_REG_ABS); - cat2->src1_r = !!(src1->flags & IR3_REG_R); - - if (src2) { - iassert((src2->flags & IR3_REG_IMMED) || - !((src1->flags ^ src2->flags) & IR3_REG_HALF)); - - if (src2->flags & IR3_REG_RELATIV) { - iassert(src2->num < (1 << 10)); - cat2->rel2.src2 = reg(src2, info, instr->repeat, - IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_NEGATE | - IR3_REG_ABS | IR3_REG_R | IR3_REG_HALF); - cat2->rel2.src2_c = !!(src2->flags & IR3_REG_CONST); - cat2->rel2.src2_rel = 1; - } else if (src2->flags & IR3_REG_CONST) { - iassert(src2->num < (1 << 12)); - cat2->c2.src2 = reg(src2, info, instr->repeat, - IR3_REG_CONST | IR3_REG_NEGATE | IR3_REG_ABS | - IR3_REG_R | IR3_REG_HALF); - cat2->c2.src2_c = 1; - } else { - iassert(src2->num < (1 << 11)); - cat2->src2 = reg(src2, info, instr->repeat, - IR3_REG_IMMED | IR3_REG_NEGATE | IR3_REG_ABS | - IR3_REG_R | IR3_REG_HALF); - } - - cat2->src2_im = !!(src2->flags & IR3_REG_IMMED); - cat2->src2_neg = !!(src2->flags & IR3_REG_NEGATE); - cat2->src2_abs = !!(src2->flags & IR3_REG_ABS); - cat2->src2_r = !!(src2->flags & IR3_REG_R); - } - - cat2->dst = reg(dst, info, instr->repeat, - IR3_REG_R | IR3_REG_EI | IR3_REG_HALF); - cat2->repeat = instr->repeat; - cat2->ss = !!(instr->flags & IR3_INSTR_SS); - cat2->ul = !!(instr->flags & IR3_INSTR_UL); - cat2->dst_half = !!((src1->flags ^ dst->flags) & IR3_REG_HALF); - cat2->ei = !!(dst->flags & IR3_REG_EI); - cat2->cond = instr->cat2.condition; - cat2->full = ! (src1->flags & IR3_REG_HALF); - cat2->opc = instr->opc; - cat2->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); - cat2->sync = !!(instr->flags & IR3_INSTR_SY); - cat2->opc_cat = 2; - - return 0; -} - -static int emit_cat3(struct ir3_instruction *instr, void *ptr, - struct ir3_info *info) -{ - struct ir3_register *dst = instr->regs[0]; - struct ir3_register *src1 = instr->regs[1]; - struct ir3_register *src2 = instr->regs[2]; - struct ir3_register *src3 = instr->regs[3]; - instr_cat3_t *cat3 = ptr; - uint32_t src_flags = 0; - - switch (instr->opc) { - case OPC_MAD_F16: - case OPC_MAD_U16: - case OPC_MAD_S16: - case OPC_SEL_B16: - case OPC_SEL_S16: - case OPC_SEL_F16: - case OPC_SAD_S16: - case OPC_SAD_S32: // really?? - src_flags |= IR3_REG_HALF; - break; - default: - break; - } - - iassert(instr->regs_count == 4); - iassert(!((src1->flags ^ src_flags) & IR3_REG_HALF)); - iassert(!((src2->flags ^ src_flags) & IR3_REG_HALF)); - iassert(!((src3->flags ^ src_flags) & IR3_REG_HALF)); - - if (src1->flags & IR3_REG_RELATIV) { - iassert(src1->num < (1 << 10)); - cat3->rel1.src1 = reg(src1, info, instr->repeat, - IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_NEGATE | - IR3_REG_R | IR3_REG_HALF); - cat3->rel1.src1_c = !!(src1->flags & IR3_REG_CONST); - cat3->rel1.src1_rel = 1; - } else if (src1->flags & IR3_REG_CONST) { - iassert(src1->num < (1 << 12)); - cat3->c1.src1 = reg(src1, info, instr->repeat, - IR3_REG_CONST | IR3_REG_NEGATE | IR3_REG_R | - IR3_REG_HALF); - cat3->c1.src1_c = 1; - } else { - iassert(src1->num < (1 << 11)); - cat3->src1 = reg(src1, info, instr->repeat, - IR3_REG_NEGATE | IR3_REG_R | IR3_REG_HALF); - } - - cat3->src1_neg = !!(src1->flags & IR3_REG_NEGATE); - cat3->src1_r = !!(src1->flags & IR3_REG_R); - - cat3->src2 = reg(src2, info, instr->repeat, - IR3_REG_CONST | IR3_REG_NEGATE | - IR3_REG_R | IR3_REG_HALF); - cat3->src2_c = !!(src2->flags & IR3_REG_CONST); - cat3->src2_neg = !!(src2->flags & IR3_REG_NEGATE); - cat3->src2_r = !!(src2->flags & IR3_REG_R); - - - if (src3->flags & IR3_REG_RELATIV) { - iassert(src3->num < (1 << 10)); - cat3->rel2.src3 = reg(src3, info, instr->repeat, - IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_NEGATE | - IR3_REG_R | IR3_REG_HALF); - cat3->rel2.src3_c = !!(src3->flags & IR3_REG_CONST); - cat3->rel2.src3_rel = 1; - } else if (src3->flags & IR3_REG_CONST) { - iassert(src3->num < (1 << 12)); - cat3->c2.src3 = reg(src3, info, instr->repeat, - IR3_REG_CONST | IR3_REG_NEGATE | IR3_REG_R | - IR3_REG_HALF); - cat3->c2.src3_c = 1; - } else { - iassert(src3->num < (1 << 11)); - cat3->src3 = reg(src3, info, instr->repeat, - IR3_REG_NEGATE | IR3_REG_R | IR3_REG_HALF); - } - - cat3->src3_neg = !!(src3->flags & IR3_REG_NEGATE); - cat3->src3_r = !!(src3->flags & IR3_REG_R); - - cat3->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); - cat3->repeat = instr->repeat; - cat3->ss = !!(instr->flags & IR3_INSTR_SS); - cat3->ul = !!(instr->flags & IR3_INSTR_UL); - cat3->dst_half = !!((src_flags ^ dst->flags) & IR3_REG_HALF); - cat3->opc = instr->opc; - cat3->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); - cat3->sync = !!(instr->flags & IR3_INSTR_SY); - cat3->opc_cat = 3; - - return 0; -} - -static int emit_cat4(struct ir3_instruction *instr, void *ptr, - struct ir3_info *info) -{ - struct ir3_register *dst = instr->regs[0]; - struct ir3_register *src = instr->regs[1]; - instr_cat4_t *cat4 = ptr; - - iassert(instr->regs_count == 2); - - if (src->flags & IR3_REG_RELATIV) { - iassert(src->num < (1 << 10)); - cat4->rel.src = reg(src, info, instr->repeat, - IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_NEGATE | - IR3_REG_ABS | IR3_REG_R | IR3_REG_HALF); - cat4->rel.src_c = !!(src->flags & IR3_REG_CONST); - cat4->rel.src_rel = 1; - } else if (src->flags & IR3_REG_CONST) { - iassert(src->num < (1 << 12)); - cat4->c.src = reg(src, info, instr->repeat, - IR3_REG_CONST | IR3_REG_NEGATE | IR3_REG_ABS | - IR3_REG_R | IR3_REG_HALF); - cat4->c.src_c = 1; - } else { - iassert(src->num < (1 << 11)); - cat4->src = reg(src, info, instr->repeat, - IR3_REG_IMMED | IR3_REG_NEGATE | IR3_REG_ABS | - IR3_REG_R | IR3_REG_HALF); - } - - cat4->src_im = !!(src->flags & IR3_REG_IMMED); - cat4->src_neg = !!(src->flags & IR3_REG_NEGATE); - cat4->src_abs = !!(src->flags & IR3_REG_ABS); - cat4->src_r = !!(src->flags & IR3_REG_R); - - cat4->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); - cat4->repeat = instr->repeat; - cat4->ss = !!(instr->flags & IR3_INSTR_SS); - cat4->ul = !!(instr->flags & IR3_INSTR_UL); - cat4->dst_half = !!((src->flags ^ dst->flags) & IR3_REG_HALF); - cat4->full = ! (src->flags & IR3_REG_HALF); - cat4->opc = instr->opc; - cat4->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); - cat4->sync = !!(instr->flags & IR3_INSTR_SY); - cat4->opc_cat = 4; - - return 0; -} - -static int emit_cat5(struct ir3_instruction *instr, void *ptr, - struct ir3_info *info) -{ - struct ir3_register *dst = instr->regs[0]; - struct ir3_register *src1 = instr->regs[1]; - struct ir3_register *src2 = instr->regs[2]; - struct ir3_register *src3 = instr->regs[3]; - instr_cat5_t *cat5 = ptr; - - iassert(!((dst->flags ^ type_flags(instr->cat5.type)) & IR3_REG_HALF)); - - if (src1) { - cat5->full = ! (src1->flags & IR3_REG_HALF); - cat5->src1 = reg(src1, info, instr->repeat, IR3_REG_HALF); - } - - - if (instr->flags & IR3_INSTR_S2EN) { - if (src2) { - iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF)); - cat5->s2en.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF); - } - if (src3) { - iassert(src3->flags & IR3_REG_HALF); - cat5->s2en.src3 = reg(src3, info, instr->repeat, IR3_REG_HALF); - } - iassert(!(instr->cat5.samp | instr->cat5.tex)); - } else { - iassert(!src3); - if (src2) { - iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF)); - cat5->norm.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF); - } - cat5->norm.samp = instr->cat5.samp; - cat5->norm.tex = instr->cat5.tex; - } - - cat5->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); - cat5->wrmask = dst->wrmask; - cat5->type = instr->cat5.type; - cat5->is_3d = !!(instr->flags & IR3_INSTR_3D); - cat5->is_a = !!(instr->flags & IR3_INSTR_A); - cat5->is_s = !!(instr->flags & IR3_INSTR_S); - cat5->is_s2en = !!(instr->flags & IR3_INSTR_S2EN); - cat5->is_o = !!(instr->flags & IR3_INSTR_O); - cat5->is_p = !!(instr->flags & IR3_INSTR_P); - cat5->opc = instr->opc; - cat5->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); - cat5->sync = !!(instr->flags & IR3_INSTR_SY); - cat5->opc_cat = 5; - - return 0; -} - -static int emit_cat6(struct ir3_instruction *instr, void *ptr, - struct ir3_info *info) -{ - struct ir3_register *dst = instr->regs[0]; - struct ir3_register *src = instr->regs[1]; - instr_cat6_t *cat6 = ptr; - - iassert(instr->regs_count == 2); - - switch (instr->opc) { - /* load instructions: */ - case OPC_LDG: - case OPC_LDP: - case OPC_LDL: - case OPC_LDLW: - case OPC_LDLV: - case OPC_PREFETCH: { - instr_cat6a_t *cat6a = ptr; - - iassert(!((dst->flags ^ type_flags(instr->cat6.type)) & IR3_REG_HALF)); - - cat6a->must_be_one1 = 1; - cat6a->must_be_one2 = 1; - cat6a->off = instr->cat6.offset; - cat6a->src = reg(src, info, instr->repeat, 0); - cat6a->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); - break; - } - /* store instructions: */ - case OPC_STG: - case OPC_STP: - case OPC_STL: - case OPC_STLW: - case OPC_STI: { - instr_cat6b_t *cat6b = ptr; - uint32_t src_flags = type_flags(instr->cat6.type); - uint32_t dst_flags = (instr->opc == OPC_STI) ? IR3_REG_HALF : 0; - - iassert(!((src->flags ^ src_flags) & IR3_REG_HALF)); - - cat6b->must_be_one1 = 1; - cat6b->must_be_one2 = 1; - cat6b->src = reg(src, info, instr->repeat, src_flags); - cat6b->off_hi = instr->cat6.offset >> 8; - cat6b->off = instr->cat6.offset; - cat6b->dst = reg(dst, info, instr->repeat, IR3_REG_R | dst_flags); - - break; - } - default: - // TODO - break; - } - - cat6->iim_val = instr->cat6.iim_val; - cat6->type = instr->cat6.type; - cat6->opc = instr->opc; - cat6->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); - cat6->sync = !!(instr->flags & IR3_INSTR_SY); - cat6->opc_cat = 6; - - return 0; -} - -static int (*emit[])(struct ir3_instruction *instr, void *ptr, - struct ir3_info *info) = { - emit_cat0, emit_cat1, emit_cat2, emit_cat3, emit_cat4, emit_cat5, emit_cat6, -}; - -void * ir3_assemble(struct ir3 *shader, struct ir3_info *info) -{ - uint32_t *ptr, *dwords; - uint32_t i; - - info->max_reg = -1; - info->max_half_reg = -1; - info->max_const = -1; - info->instrs_count = 0; - - /* need a integer number of instruction "groups" (sets of four - * instructions), so pad out w/ NOPs if needed: - * (each instruction is 64bits) - */ - info->sizedwords = 2 * align(shader->instrs_count, 4); - - ptr = dwords = calloc(1, 4 * info->sizedwords); - - for (i = 0; i < shader->instrs_count; i++) { - struct ir3_instruction *instr = shader->instrs[i]; - int ret = emit[instr->category](instr, dwords, info); - if (ret) - goto fail; - info->instrs_count += 1 + instr->repeat; - dwords += 2; - } - - return ptr; - -fail: - free(ptr); - return NULL; -} - -static struct ir3_register * reg_create(struct ir3 *shader, - int num, int flags) -{ - struct ir3_register *reg = - ir3_alloc(shader, sizeof(struct ir3_register)); - reg->wrmask = 1; - reg->flags = flags; - reg->num = num; - return reg; -} - -static void insert_instr(struct ir3 *shader, - struct ir3_instruction *instr) -{ -#ifdef DEBUG - static uint32_t serialno = 0; - instr->serialno = ++serialno; -#endif - if (shader->instrs_count == shader->instrs_sz) { - shader->instrs_sz = MAX2(2 * shader->instrs_sz, 16); - shader->instrs = realloc(shader->instrs, - shader->instrs_sz * sizeof(shader->instrs[0])); - } - shader->instrs[shader->instrs_count++] = instr; -} - -struct ir3_block * ir3_block_create(struct ir3 *shader, - unsigned ntmp, unsigned nin, unsigned nout) -{ - struct ir3_block *block; - unsigned size; - char *ptr; - - size = sizeof(*block); - size += sizeof(block->temporaries[0]) * ntmp; - size += sizeof(block->inputs[0]) * nin; - size += sizeof(block->outputs[0]) * nout; - - ptr = ir3_alloc(shader, size); - - block = (void *)ptr; - ptr += sizeof(*block); - - block->temporaries = (void *)ptr; - block->ntemporaries = ntmp; - ptr += sizeof(block->temporaries[0]) * ntmp; - - block->inputs = (void *)ptr; - block->ninputs = nin; - ptr += sizeof(block->inputs[0]) * nin; - - block->outputs = (void *)ptr; - block->noutputs = nout; - ptr += sizeof(block->outputs[0]) * nout; - - block->shader = shader; - - return block; -} - -struct ir3_instruction * ir3_instr_create(struct ir3_block *block, - int category, opc_t opc) -{ - struct ir3_instruction *instr = - ir3_alloc(block->shader, sizeof(struct ir3_instruction)); - instr->block = block; - instr->category = category; - instr->opc = opc; - insert_instr(block->shader, instr); - return instr; -} - -struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr) -{ - struct ir3_instruction *new_instr = - ir3_alloc(instr->block->shader, sizeof(struct ir3_instruction)); - unsigned i; - - *new_instr = *instr; - insert_instr(instr->block->shader, new_instr); - - /* clone registers: */ - new_instr->regs_count = 0; - for (i = 0; i < instr->regs_count; i++) { - struct ir3_register *reg = instr->regs[i]; - struct ir3_register *new_reg = - ir3_reg_create(new_instr, reg->num, reg->flags); - *new_reg = *reg; - } - - return new_instr; -} - -struct ir3_register * ir3_reg_create(struct ir3_instruction *instr, - int num, int flags) -{ - struct ir3_register *reg = reg_create(instr->block->shader, num, flags); - assert(instr->regs_count < ARRAY_SIZE(instr->regs)); - instr->regs[instr->regs_count++] = reg; - return reg; -} diff --git a/src/gallium/drivers/freedreno/a3xx/ir3.h b/src/gallium/drivers/freedreno/a3xx/ir3.h deleted file mode 100644 index 9ed914ba2e4..00000000000 --- a/src/gallium/drivers/freedreno/a3xx/ir3.h +++ /dev/null @@ -1,480 +0,0 @@ -/* - * Copyright (c) 2013 Rob Clark <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef IR3_H_ -#define IR3_H_ - -#include <stdint.h> -#include <stdbool.h> - -#include "instr-a3xx.h" -#include "disasm.h" /* TODO move 'enum shader_t' somewhere else.. */ - -/* low level intermediate representation of an adreno shader program */ - -struct ir3; -struct ir3_instruction; -struct ir3_block; - -struct ir3 * fd_asm_parse(const char *src); - -struct ir3_info { - uint16_t sizedwords; - uint16_t instrs_count; /* expanded to account for rpt's */ - /* NOTE: max_reg, etc, does not include registers not touched - * by the shader (ie. vertex fetched via VFD_DECODE but not - * touched by shader) - */ - int8_t max_reg; /* highest GPR # used by shader */ - int8_t max_half_reg; - int8_t max_const; -}; - -struct ir3_register { - enum { - IR3_REG_CONST = 0x001, - IR3_REG_IMMED = 0x002, - IR3_REG_HALF = 0x004, - IR3_REG_RELATIV= 0x008, - IR3_REG_R = 0x010, - IR3_REG_NEGATE = 0x020, - IR3_REG_ABS = 0x040, - IR3_REG_EVEN = 0x080, - IR3_REG_POS_INF= 0x100, - /* (ei) flag, end-input? Set on last bary, presumably to signal - * that the shader needs no more input: - */ - IR3_REG_EI = 0x200, - /* meta-flags, for intermediate stages of IR, ie. - * before register assignment is done: - */ - IR3_REG_SSA = 0x1000, /* 'instr' is ptr to assigning instr */ - IR3_REG_IA = 0x2000, /* meta-input dst is "assigned" */ - IR3_REG_ADDR = 0x4000, /* register is a0.x */ - } flags; - union { - /* normal registers: - * the component is in the low two bits of the reg #, so - * rN.x becomes: (N << 2) | x - */ - int num; - /* immediate: */ - int iim_val; - float fim_val; - /* relative: */ - int offset; - /* for IR3_REG_SSA, src registers contain ptr back to - * assigning instruction. - */ - struct ir3_instruction *instr; - }; - - /* used for cat5 instructions, but also for internal/IR level - * tracking of what registers are read/written by an instruction. - * wrmask may be a bad name since it is used to represent both - * src and dst that touch multiple adjacent registers. - */ - int wrmask; -}; - -struct ir3_instruction { - struct ir3_block *block; - int category; - opc_t opc; - enum { - /* (sy) flag is set on first instruction, and after sample - * instructions (probably just on RAW hazard). - */ - IR3_INSTR_SY = 0x001, - /* (ss) flag is set on first instruction, and first instruction - * to depend on the result of "long" instructions (RAW hazard): - * - * rcp, rsq, log2, exp2, sin, cos, sqrt - * - * It seems to synchronize until all in-flight instructions are - * completed, for example: - * - * rsq hr1.w, hr1.w - * add.f hr2.z, (neg)hr2.z, hc0.y - * mul.f hr2.w, (neg)hr2.y, (neg)hr2.y - * rsq hr2.x, hr2.x - * (rpt1)nop - * mad.f16 hr2.w, hr2.z, hr2.z, hr2.w - * nop - * mad.f16 hr2.w, (neg)hr0.w, (neg)hr0.w, hr2.w - * (ss)(rpt2)mul.f hr1.x, (r)hr1.x, hr1.w - * (rpt2)mul.f hr0.x, (neg)(r)hr0.x, hr2.x - * - * The last mul.f does not have (ss) set, presumably because the - * (ss) on the previous instruction does the job. - * - * The blob driver also seems to set it on WAR hazards, although - * not really clear if this is needed or just blob compiler being - * sloppy. So far I haven't found a case where removing the (ss) - * causes problems for WAR hazard, but I could just be getting - * lucky: - * - * rcp r1.y, r3.y - * (ss)(rpt2)mad.f32 r3.y, (r)c9.x, r1.x, (r)r3.z - * - */ - IR3_INSTR_SS = 0x002, - /* (jp) flag is set on jump targets: - */ - IR3_INSTR_JP = 0x004, - IR3_INSTR_UL = 0x008, - IR3_INSTR_3D = 0x010, - IR3_INSTR_A = 0x020, - IR3_INSTR_O = 0x040, - IR3_INSTR_P = 0x080, - IR3_INSTR_S = 0x100, - IR3_INSTR_S2EN = 0x200, - /* meta-flags, for intermediate stages of IR, ie. - * before register assignment is done: - */ - IR3_INSTR_MARK = 0x1000, - } flags; - int repeat; - unsigned regs_count; - struct ir3_register *regs[5]; - union { - struct { - char inv; - char comp; - int immed; - } cat0; - struct { - type_t src_type, dst_type; - } cat1; - struct { - enum { - IR3_COND_LT = 0, - IR3_COND_LE = 1, - IR3_COND_GT = 2, - IR3_COND_GE = 3, - IR3_COND_EQ = 4, - IR3_COND_NE = 5, - } condition; - } cat2; - struct { - unsigned samp, tex; - type_t type; - } cat5; - struct { - type_t type; - int offset; - int iim_val; - } cat6; - /* for meta-instructions, just used to hold extra data - * before instruction scheduling, etc - */ - struct { - int off; /* component/offset */ - } fo; - struct { - struct ir3_block *if_block, *else_block; - } flow; - struct { - struct ir3_block *block; - } inout; - }; - - /* transient values used during various algorithms: */ - union { - /* The instruction depth is the max dependency distance to output. - * - * You can also think of it as the "cost", if we did any sort of - * optimization for register footprint. Ie. a value that is just - * result of moving a const to a reg would have a low cost, so to - * it could make sense to duplicate the instruction at various - * points where the result is needed to reduce register footprint. - */ - unsigned depth; - }; - struct ir3_instruction *next; -#ifdef DEBUG - uint32_t serialno; -#endif -}; - -struct ir3_heap_chunk; - -struct ir3 { - unsigned instrs_count, instrs_sz; - struct ir3_instruction **instrs; - unsigned heap_idx; - struct ir3_heap_chunk *chunk; -}; - -struct ir3_block { - struct ir3 *shader; - unsigned ntemporaries, ninputs, noutputs; - /* maps TGSI_FILE_TEMPORARY index back to the assigning instruction: */ - struct ir3_instruction **temporaries; - struct ir3_instruction **inputs; - struct ir3_instruction **outputs; - /* only a single address register: */ - struct ir3_instruction *address; - struct ir3_block *parent; - struct ir3_instruction *head; -}; - -struct ir3 * ir3_create(void); -void ir3_destroy(struct ir3 *shader); -void * ir3_assemble(struct ir3 *shader, - struct ir3_info *info); -void * ir3_alloc(struct ir3 *shader, int sz); - -struct ir3_block * ir3_block_create(struct ir3 *shader, - unsigned ntmp, unsigned nin, unsigned nout); - -struct ir3_instruction * ir3_instr_create(struct ir3_block *block, - int category, opc_t opc); -struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr); -const char *ir3_instr_name(struct ir3_instruction *instr); - -struct ir3_register * ir3_reg_create(struct ir3_instruction *instr, - int num, int flags); - - -static inline bool ir3_instr_check_mark(struct ir3_instruction *instr) -{ - if (instr->flags & IR3_INSTR_MARK) - return true; /* already visited */ - instr->flags ^= IR3_INSTR_MARK; - return false; -} - -static inline void ir3_clear_mark(struct ir3 *shader) -{ - /* TODO would be nice to drop the instruction array.. for - * new compiler, _clear_mark() is all we use it for, and - * we could probably manage a linked list instead.. - */ - unsigned i; - for (i = 0; i < shader->instrs_count; i++) { - struct ir3_instruction *instr = shader->instrs[i]; - instr->flags &= ~IR3_INSTR_MARK; - } -} - -static inline int ir3_instr_regno(struct ir3_instruction *instr, - struct ir3_register *reg) -{ - unsigned i; - for (i = 0; i < instr->regs_count; i++) - if (reg == instr->regs[i]) - return i; - return -1; -} - - -/* comp: - * 0 - x - * 1 - y - * 2 - z - * 3 - w - */ -static inline uint32_t regid(int num, int comp) -{ - return (num << 2) | (comp & 0x3); -} - -static inline uint32_t reg_num(struct ir3_register *reg) -{ - return reg->num >> 2; -} - -static inline uint32_t reg_comp(struct ir3_register *reg) -{ - return reg->num & 0x3; -} - -static inline bool is_flow(struct ir3_instruction *instr) -{ - return (instr->category == 0); -} - -static inline bool is_kill(struct ir3_instruction *instr) -{ - return is_flow(instr) && (instr->opc == OPC_KILL); -} - -static inline bool is_nop(struct ir3_instruction *instr) -{ - return is_flow(instr) && (instr->opc == OPC_NOP); -} - -static inline bool is_alu(struct ir3_instruction *instr) -{ - return (1 <= instr->category) && (instr->category <= 3); -} - -static inline bool is_sfu(struct ir3_instruction *instr) -{ - return (instr->category == 4); -} - -static inline bool is_tex(struct ir3_instruction *instr) -{ - return (instr->category == 5); -} - -static inline bool is_input(struct ir3_instruction *instr) -{ - return (instr->category == 2) && (instr->opc == OPC_BARY_F); -} - -static inline bool is_meta(struct ir3_instruction *instr) -{ - /* TODO how should we count PHI (and maybe fan-in/out) which - * might actually contribute some instructions to the final - * result? - */ - return (instr->category == -1); -} - -static inline bool is_addr(struct ir3_instruction *instr) -{ - return is_meta(instr) && (instr->opc == OPC_META_DEREF); -} - -static inline bool writes_addr(struct ir3_instruction *instr) -{ - if (instr->regs_count > 0) { - struct ir3_register *dst = instr->regs[0]; - return !!(dst->flags & IR3_REG_ADDR); - } - return false; -} - -static inline bool writes_pred(struct ir3_instruction *instr) -{ - if (instr->regs_count > 0) { - struct ir3_register *dst = instr->regs[0]; - return reg_num(dst) == REG_P0; - } - return false; -} - -static inline bool reg_gpr(struct ir3_register *r) -{ - if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_RELATIV | IR3_REG_SSA | IR3_REG_ADDR)) - return false; - if ((reg_num(r) == REG_A0) || (reg_num(r) == REG_P0)) - return false; - return true; -} - -/* dump: */ -#include <stdio.h> -void ir3_dump(struct ir3 *shader, const char *name, - struct ir3_block *block /* XXX maybe 'block' ptr should move to ir3? */, - FILE *f); -void ir3_dump_instr_single(struct ir3_instruction *instr); -void ir3_dump_instr_list(struct ir3_instruction *instr); - -/* flatten if/else: */ -int ir3_block_flatten(struct ir3_block *block); - -/* depth calculation: */ -int ir3_delayslots(struct ir3_instruction *assigner, - struct ir3_instruction *consumer, unsigned n); -void ir3_block_depth(struct ir3_block *block); - -/* copy-propagate: */ -void ir3_block_cp(struct ir3_block *block); - -/* scheduling: */ -void ir3_block_sched(struct ir3_block *block); - -/* register assignment: */ -int ir3_block_ra(struct ir3_block *block, enum shader_t type, - bool half_precision, bool frag_coord, bool frag_face, - bool *has_samp); - -#ifndef ARRAY_SIZE -# define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) -#endif - -/* ************************************************************************* */ -/* split this out or find some helper to use.. like main/bitset.h.. */ - -#include <string.h> - -#define MAX_REG 256 - -typedef uint8_t regmask_t[2 * MAX_REG / 8]; - -static inline unsigned regmask_idx(struct ir3_register *reg) -{ - unsigned num = reg->num; - assert(num < MAX_REG); - if (reg->flags & IR3_REG_HALF) - num += MAX_REG; - return num; -} - -static inline void regmask_init(regmask_t *regmask) -{ - memset(regmask, 0, sizeof(*regmask)); -} - -static inline void regmask_set(regmask_t *regmask, struct ir3_register *reg) -{ - unsigned idx = regmask_idx(reg); - unsigned i; - for (i = 0; i < 4; i++, idx++) - if (reg->wrmask & (1 << i)) - (*regmask)[idx / 8] |= 1 << (idx % 8); -} - -/* set bits in a if not set in b, conceptually: - * a |= (reg & ~b) - */ -static inline void regmask_set_if_not(regmask_t *a, - struct ir3_register *reg, regmask_t *b) -{ - unsigned idx = regmask_idx(reg); - unsigned i; - for (i = 0; i < 4; i++, idx++) - if (reg->wrmask & (1 << i)) - if (!((*b)[idx / 8] & (1 << (idx % 8)))) - (*a)[idx / 8] |= 1 << (idx % 8); -} - -static inline unsigned regmask_get(regmask_t *regmask, - struct ir3_register *reg) -{ - unsigned idx = regmask_idx(reg); - unsigned i; - for (i = 0; i < 4; i++, idx++) - if (reg->wrmask & (1 << i)) - if ((*regmask)[idx / 8] & (1 << (idx % 8))) - return true; - return false; -} - -/* ************************************************************************* */ - -#endif /* IR3_H_ */ diff --git a/src/gallium/drivers/freedreno/a3xx/ir3_cp.c b/src/gallium/drivers/freedreno/a3xx/ir3_cp.c deleted file mode 100644 index 73c2a27c6eb..00000000000 --- a/src/gallium/drivers/freedreno/a3xx/ir3_cp.c +++ /dev/null @@ -1,158 +0,0 @@ -/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ - -/* - * Copyright (C) 2014 Rob Clark <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark <[email protected]> - */ - -#include "ir3.h" - -/* - * Copy Propagate: - * - * TODO probably want some sort of visitor sort of interface to - * avoid duplicating the same graph traversal logic everywhere.. - * - */ - -static void block_cp(struct ir3_block *block); -static struct ir3_instruction * instr_cp(struct ir3_instruction *instr, bool keep); - -static bool is_eligible_mov(struct ir3_instruction *instr) -{ - if ((instr->category == 1) && - (instr->cat1.src_type == instr->cat1.dst_type)) { - struct ir3_register *dst = instr->regs[0]; - struct ir3_register *src = instr->regs[1]; - if (dst->flags & IR3_REG_ADDR) - return false; - if ((src->flags & IR3_REG_SSA) && - /* TODO: propagate abs/neg modifiers if possible */ - !(src->flags & (IR3_REG_ABS | IR3_REG_NEGATE | IR3_REG_RELATIV))) - return true; - } - return false; -} - -static void walk_children(struct ir3_instruction *instr, bool keep) -{ - unsigned i; - - /* walk down the graph from each src: */ - for (i = 1; i < instr->regs_count; i++) { - struct ir3_register *src = instr->regs[i]; - if (src->flags & IR3_REG_SSA) - src->instr = instr_cp(src->instr, keep); - } -} - -static struct ir3_instruction * -instr_cp_fanin(struct ir3_instruction *instr) -{ - unsigned i; - - /* we need to handle fanin specially, to detect cases - * when we need to keep a mov - */ - - for (i = 1; i < instr->regs_count; i++) { - struct ir3_register *src = instr->regs[i]; - if (src->flags & IR3_REG_SSA) { - struct ir3_instruction *cand = - instr_cp(src->instr, false); - - /* if the candidate is a fanout, then keep - * the move. - * - * This is a bit, um, fragile, but it should - * catch the extra mov's that the front-end - * puts in for us already in these cases. - */ - if (is_meta(cand) && (cand->opc == OPC_META_FO)) - cand = instr_cp(src->instr, true); - - src->instr = cand; - } - } - - walk_children(instr, false); - - return instr; - -} - -static struct ir3_instruction * -instr_cp(struct ir3_instruction *instr, bool keep) -{ - /* if we've already visited this instruction, bail now: */ - if (ir3_instr_check_mark(instr)) - return instr; - - if (is_meta(instr) && (instr->opc == OPC_META_FI)) - return instr_cp_fanin(instr); - - if (is_eligible_mov(instr) && !keep) { - struct ir3_register *src = instr->regs[1]; - return instr_cp(src->instr, false); - } - - walk_children(instr, false); - - return instr; -} - -static void block_cp(struct ir3_block *block) -{ - unsigned i, j; - - for (i = 0; i < block->noutputs; i++) { - if (block->outputs[i]) { - struct ir3_instruction *out = - instr_cp(block->outputs[i], false); - - /* To deal with things like this: - * - * 43: MOV OUT[2], TEMP[5] - * 44: MOV OUT[0], TEMP[5] - * - * we need to ensure that no two outputs point to - * the same instruction - */ - for (j = 0; j < i; j++) { - if (block->outputs[j] == out) { - out = instr_cp(block->outputs[i], true); - break; - } - } - - block->outputs[i] = out; - } - } -} - -void ir3_block_cp(struct ir3_block *block) -{ - ir3_clear_mark(block->shader); - block_cp(block); -} diff --git a/src/gallium/drivers/freedreno/a3xx/ir3_depth.c b/src/gallium/drivers/freedreno/a3xx/ir3_depth.c deleted file mode 100644 index dcc0362f0c8..00000000000 --- a/src/gallium/drivers/freedreno/a3xx/ir3_depth.c +++ /dev/null @@ -1,159 +0,0 @@ -/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ - -/* - * Copyright (C) 2014 Rob Clark <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark <[email protected]> - */ - -#include "util/u_math.h" - -#include "ir3.h" - -/* - * Instruction Depth: - * - * Calculates weighted instruction depth, ie. the sum of # of needed - * instructions plus delay slots back to original input (ie INPUT or - * CONST). That is to say, an instructions depth is: - * - * depth(instr) { - * d = 0; - * // for each src register: - * foreach (src in instr->regs[1..n]) - * d = max(d, delayslots(src->instr, n) + depth(src->instr)); - * return d + 1; - * } - * - * After an instruction's depth is calculated, it is inserted into the - * blocks depth sorted list, which is used by the scheduling pass. - */ - -/* calculate required # of delay slots between the instruction that - * assigns a value and the one that consumes - */ -int ir3_delayslots(struct ir3_instruction *assigner, - struct ir3_instruction *consumer, unsigned n) -{ - /* worst case is cat1-3 (alu) -> cat4/5 needing 6 cycles, normal - * alu -> alu needs 3 cycles, cat4 -> alu and texture fetch - * handled with sync bits - */ - - if (is_meta(assigner)) - return 0; - - if (writes_addr(assigner)) - return 6; - - /* handled via sync flags: */ - if (is_sfu(assigner) || is_tex(assigner)) - return 0; - - /* assigner must be alu: */ - if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer)) { - return 6; - } else if ((consumer->category == 3) && - is_mad(consumer->opc) && (n == 2)) { - /* special case, 3rd src to cat3 not required on first cycle */ - return 1; - } else { - return 3; - } -} - -static void insert_by_depth(struct ir3_instruction *instr) -{ - struct ir3_block *block = instr->block; - struct ir3_instruction *n = block->head; - struct ir3_instruction *p = NULL; - - while (n && (n != instr) && (n->depth > instr->depth)) { - p = n; - n = n->next; - } - - instr->next = n; - if (p) - p->next = instr; - else - block->head = instr; -} - -static void ir3_instr_depth(struct ir3_instruction *instr) -{ - unsigned i; - - /* if we've already visited this instruction, bail now: */ - if (ir3_instr_check_mark(instr)) - return; - - instr->depth = 0; - - for (i = 1; i < instr->regs_count; i++) { - struct ir3_register *src = instr->regs[i]; - if (src->flags & IR3_REG_SSA) { - unsigned sd; - - /* visit child to compute it's depth: */ - ir3_instr_depth(src->instr); - - sd = ir3_delayslots(src->instr, instr, i-1) + - src->instr->depth; - - instr->depth = MAX2(instr->depth, sd); - } - } - - /* meta-instructions don't add cycles, other than PHI.. which - * might translate to a real instruction.. - * - * well, not entirely true, fan-in/out, etc might need to need - * to generate some extra mov's in edge cases, etc.. probably - * we might want to do depth calculation considering the worst - * case for these?? - */ - if (!is_meta(instr)) - instr->depth++; - - insert_by_depth(instr); -} - -void ir3_block_depth(struct ir3_block *block) -{ - unsigned i; - - block->head = NULL; - - ir3_clear_mark(block->shader); - for (i = 0; i < block->noutputs; i++) - if (block->outputs[i]) - ir3_instr_depth(block->outputs[i]); - - /* at this point, any unvisited input is unused: */ - for (i = 0; i < block->ninputs; i++) { - struct ir3_instruction *in = block->inputs[i]; - if (in && !ir3_instr_check_mark(in)) - block->inputs[i] = NULL; - } -} diff --git a/src/gallium/drivers/freedreno/a3xx/ir3_dump.c b/src/gallium/drivers/freedreno/a3xx/ir3_dump.c deleted file mode 100644 index 1a6f49d51cd..00000000000 --- a/src/gallium/drivers/freedreno/a3xx/ir3_dump.c +++ /dev/null @@ -1,425 +0,0 @@ -/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ - -/* - * Copyright (C) 2014 Rob Clark <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark <[email protected]> - */ - -#include <stdarg.h> - -#include "ir3.h" - -#define PTRID(x) ((unsigned long)(x)) - -struct ir3_dump_ctx { - FILE *f; - bool verbose; -}; - -static void dump_instr_name(struct ir3_dump_ctx *ctx, - struct ir3_instruction *instr) -{ - /* for debugging: */ - if (ctx->verbose) { -#ifdef DEBUG - fprintf(ctx->f, "%04u:", instr->serialno); -#endif - fprintf(ctx->f, "%03u: ", instr->depth); - } - - if (instr->flags & IR3_INSTR_SY) - fprintf(ctx->f, "(sy)"); - if (instr->flags & IR3_INSTR_SS) - fprintf(ctx->f, "(ss)"); - - if (is_meta(instr)) { - switch(instr->opc) { - case OPC_META_PHI: - fprintf(ctx->f, "Φ"); - break; - case OPC_META_DEREF: - fprintf(ctx->f, "(*)"); - break; - default: - /* shouldn't hit here.. just for debugging: */ - switch (instr->opc) { - case OPC_META_INPUT: fprintf(ctx->f, "_meta:in"); break; - case OPC_META_OUTPUT: fprintf(ctx->f, "_meta:out"); break; - case OPC_META_FO: fprintf(ctx->f, "_meta:fo"); break; - case OPC_META_FI: fprintf(ctx->f, "_meta:fi"); break; - case OPC_META_FLOW: fprintf(ctx->f, "_meta:flow"); break; - - default: fprintf(ctx->f, "_meta:%d", instr->opc); break; - } - break; - } - } else if (instr->category == 1) { - static const char *type[] = { - [TYPE_F16] = "f16", - [TYPE_F32] = "f32", - [TYPE_U16] = "u16", - [TYPE_U32] = "u32", - [TYPE_S16] = "s16", - [TYPE_S32] = "s32", - [TYPE_U8] = "u8", - [TYPE_S8] = "s8", - }; - if (instr->cat1.src_type == instr->cat1.dst_type) - fprintf(ctx->f, "mov"); - else - fprintf(ctx->f, "cov"); - fprintf(ctx->f, ".%s%s", type[instr->cat1.src_type], type[instr->cat1.dst_type]); - } else { - fprintf(ctx->f, "%s", ir3_instr_name(instr)); - if (instr->flags & IR3_INSTR_3D) - fprintf(ctx->f, ".3d"); - if (instr->flags & IR3_INSTR_A) - fprintf(ctx->f, ".a"); - if (instr->flags & IR3_INSTR_O) - fprintf(ctx->f, ".o"); - if (instr->flags & IR3_INSTR_P) - fprintf(ctx->f, ".p"); - if (instr->flags & IR3_INSTR_S) - fprintf(ctx->f, ".s"); - if (instr->flags & IR3_INSTR_S2EN) - fprintf(ctx->f, ".s2en"); - } -} - -static void dump_reg_name(struct ir3_dump_ctx *ctx, - struct ir3_register *reg) -{ - if ((reg->flags & IR3_REG_ABS) && (reg->flags & IR3_REG_NEGATE)) - fprintf(ctx->f, "(absneg)"); - else if (reg->flags & IR3_REG_NEGATE) - fprintf(ctx->f, "(neg)"); - else if (reg->flags & IR3_REG_ABS) - fprintf(ctx->f, "(abs)"); - - if (reg->flags & IR3_REG_IMMED) { - fprintf(ctx->f, "imm[%f,%d,0x%x]", reg->fim_val, reg->iim_val, reg->iim_val); - } else if (reg->flags & IR3_REG_SSA) { - if (ctx->verbose) { - fprintf(ctx->f, "_["); - dump_instr_name(ctx, reg->instr); - fprintf(ctx->f, "]"); - } - } else { - if (reg->flags & IR3_REG_HALF) - fprintf(ctx->f, "h"); - if (reg->flags & IR3_REG_CONST) - fprintf(ctx->f, "c%u.%c", reg_num(reg), "xyzw"[reg_comp(reg)]); - else - fprintf(ctx->f, "r%u.%c", reg_num(reg), "xyzw"[reg_comp(reg)]); - } -} - -static void ir3_instr_dump(struct ir3_dump_ctx *ctx, - struct ir3_instruction *instr); -static void ir3_block_dump(struct ir3_dump_ctx *ctx, - struct ir3_block *block, const char *name); - -static void dump_instr(struct ir3_dump_ctx *ctx, - struct ir3_instruction *instr) -{ - /* if we've already visited this instruction, bail now: */ - if (ir3_instr_check_mark(instr)) - return; - - /* some meta-instructions need to be handled specially: */ - if (is_meta(instr)) { - if ((instr->opc == OPC_META_FO) || - (instr->opc == OPC_META_FI)) { - unsigned i; - for (i = 1; i < instr->regs_count; i++) { - struct ir3_register *reg = instr->regs[i]; - if (reg->flags & IR3_REG_SSA) - dump_instr(ctx, reg->instr); - } - } else if (instr->opc == OPC_META_FLOW) { - struct ir3_register *reg = instr->regs[1]; - ir3_block_dump(ctx, instr->flow.if_block, "if"); - if (instr->flow.else_block) - ir3_block_dump(ctx, instr->flow.else_block, "else"); - if (reg->flags & IR3_REG_SSA) - dump_instr(ctx, reg->instr); - } else if ((instr->opc == OPC_META_PHI) || - (instr->opc == OPC_META_DEREF)) { - /* treat like a normal instruction: */ - ir3_instr_dump(ctx, instr); - } - } else { - ir3_instr_dump(ctx, instr); - } -} - -/* arrarraggh! if link is to something outside of the current block, we - * need to defer emitting the link until the end of the block, since the - * edge triggers pre-creation of the node it links to inside the cluster, - * even though it is meant to be outside.. - */ -static struct { - char buf[40960]; - unsigned n; -} edge_buf; - -/* helper to print or defer: */ -static void printdef(struct ir3_dump_ctx *ctx, - bool defer, const char *fmt, ...) -{ - va_list ap; - va_start(ap, fmt); - if (defer) { - unsigned n = edge_buf.n; - n += vsnprintf(&edge_buf.buf[n], sizeof(edge_buf.buf) - n, - fmt, ap); - edge_buf.n = n; - } else { - vfprintf(ctx->f, fmt, ap); - } - va_end(ap); -} - -static void dump_link2(struct ir3_dump_ctx *ctx, - struct ir3_instruction *instr, const char *target, bool defer) -{ - /* some meta-instructions need to be handled specially: */ - if (is_meta(instr)) { - if (instr->opc == OPC_META_INPUT) { - printdef(ctx, defer, "input%lx:<in%u>:w -> %s", - PTRID(instr->inout.block), - instr->regs[0]->num, target); - } else if (instr->opc == OPC_META_FO) { - struct ir3_register *reg = instr->regs[1]; - dump_link2(ctx, reg->instr, target, defer); - printdef(ctx, defer, "[label=\".%c\"]", - "xyzw"[instr->fo.off & 0x3]); - } else if (instr->opc == OPC_META_FI) { - unsigned i; - - /* recursively dump all parents and links */ - for (i = 1; i < instr->regs_count; i++) { - struct ir3_register *reg = instr->regs[i]; - if (reg->flags & IR3_REG_SSA) { - dump_link2(ctx, reg->instr, target, defer); - printdef(ctx, defer, "[label=\".%c\"]", - "xyzw"[(i - 1) & 0x3]); - } - } - } else if (instr->opc == OPC_META_OUTPUT) { - printdef(ctx, defer, "output%lx:<out%u>:w -> %s", - PTRID(instr->inout.block), - instr->regs[0]->num, target); - } else if ((instr->opc == OPC_META_PHI) || - (instr->opc == OPC_META_DEREF)) { - /* treat like a normal instruction: */ - printdef(ctx, defer, "instr%lx:<dst0> -> %s", PTRID(instr), target); - } - } else { - printdef(ctx, defer, "instr%lx:<dst0> -> %s", PTRID(instr), target); - } -} - -static void dump_link(struct ir3_dump_ctx *ctx, - struct ir3_instruction *instr, - struct ir3_block *block, const char *target) -{ - bool defer = instr->block != block; - dump_link2(ctx, instr, target, defer); - printdef(ctx, defer, "\n"); -} - -static struct ir3_register *follow_flow(struct ir3_register *reg) -{ - if (reg->flags & IR3_REG_SSA) { - struct ir3_instruction *instr = reg->instr; - /* go with the flow.. */ - if (is_meta(instr) && (instr->opc == OPC_META_FLOW)) - return instr->regs[1]; - } - return reg; -} - -static void ir3_instr_dump(struct ir3_dump_ctx *ctx, - struct ir3_instruction *instr) -{ - unsigned i; - - fprintf(ctx->f, "instr%lx [shape=record,style=filled,fillcolor=lightgrey,label=\"{", - PTRID(instr)); - dump_instr_name(ctx, instr); - - /* destination register: */ - fprintf(ctx->f, "|<dst0>"); - - /* source register(s): */ - for (i = 1; i < instr->regs_count; i++) { - struct ir3_register *reg = follow_flow(instr->regs[i]); - - fprintf(ctx->f, "|"); - - if (reg->flags & IR3_REG_SSA) - fprintf(ctx->f, "<src%u> ", (i - 1)); - - dump_reg_name(ctx, reg); - } - - fprintf(ctx->f, "}\"];\n"); - - /* and recursively dump dependent instructions: */ - for (i = 1; i < instr->regs_count; i++) { - struct ir3_register *reg = instr->regs[i]; - char target[32]; /* link target */ - - if (!(reg->flags & IR3_REG_SSA)) - continue; - - snprintf(target, sizeof(target), "instr%lx:<src%u>", - PTRID(instr), (i - 1)); - - dump_instr(ctx, reg->instr); - dump_link(ctx, follow_flow(reg)->instr, instr->block, target); - } -} - -static void ir3_block_dump(struct ir3_dump_ctx *ctx, - struct ir3_block *block, const char *name) -{ - unsigned i, n; - - n = edge_buf.n; - - fprintf(ctx->f, "subgraph cluster%lx {\n", PTRID(block)); - fprintf(ctx->f, "label=\"%s\";\n", name); - - /* draw inputs: */ - fprintf(ctx->f, "input%lx [shape=record,label=\"inputs", PTRID(block)); - for (i = 0; i < block->ninputs; i++) - if (block->inputs[i]) - fprintf(ctx->f, "|<in%u> i%u.%c", i, (i >> 2), "xyzw"[i & 0x3]); - fprintf(ctx->f, "\"];\n"); - - /* draw instruction graph: */ - for (i = 0; i < block->noutputs; i++) - dump_instr(ctx, block->outputs[i]); - - /* draw outputs: */ - fprintf(ctx->f, "output%lx [shape=record,label=\"outputs", PTRID(block)); - for (i = 0; i < block->noutputs; i++) - fprintf(ctx->f, "|<out%u> o%u.%c", i, (i >> 2), "xyzw"[i & 0x3]); - fprintf(ctx->f, "\"];\n"); - - /* and links to outputs: */ - for (i = 0; i < block->noutputs; i++) { - char target[32]; /* link target */ - - /* NOTE: there could be outputs that are never assigned, - * so skip them - */ - if (!block->outputs[i]) - continue; - - snprintf(target, sizeof(target), "output%lx:<out%u>:e", - PTRID(block), i); - - dump_link(ctx, block->outputs[i], block, target); - } - - fprintf(ctx->f, "}\n"); - - /* and links to inputs: */ - if (block->parent) { - for (i = 0; i < block->ninputs; i++) { - char target[32]; /* link target */ - - if (!block->inputs[i]) - continue; - - dump_instr(ctx, block->inputs[i]); - - snprintf(target, sizeof(target), "input%lx:<in%u>:e", - PTRID(block), i); - - dump_link(ctx, block->inputs[i], block, target); - } - } - - /* dump deferred edges: */ - if (edge_buf.n > n) { - fprintf(ctx->f, "%*s", edge_buf.n - n, &edge_buf.buf[n]); - edge_buf.n = n; - } -} - -void ir3_dump(struct ir3 *shader, const char *name, - struct ir3_block *block /* XXX maybe 'block' ptr should move to ir3? */, - FILE *f) -{ - struct ir3_dump_ctx ctx = { - .f = f, - }; - ir3_clear_mark(shader); - fprintf(ctx.f, "digraph G {\n"); - fprintf(ctx.f, "rankdir=RL;\n"); - fprintf(ctx.f, "nodesep=0.25;\n"); - fprintf(ctx.f, "ranksep=1.5;\n"); - ir3_block_dump(&ctx, block, name); - fprintf(ctx.f, "}\n"); -} - -/* - * For Debugging: - */ - -void -ir3_dump_instr_single(struct ir3_instruction *instr) -{ - struct ir3_dump_ctx ctx = { - .f = stdout, - .verbose = true, - }; - unsigned i; - - dump_instr_name(&ctx, instr); - for (i = 0; i < instr->regs_count; i++) { - struct ir3_register *reg = instr->regs[i]; - printf(i ? ", " : " "); - dump_reg_name(&ctx, reg); - } - printf("\n"); -} - -void -ir3_dump_instr_list(struct ir3_instruction *instr) -{ - unsigned n = 0; - - while (instr) { - ir3_dump_instr_single(instr); - if (!is_meta(instr)) - n++; - instr = instr->next; - } - printf("%u instructions\n", n); -} diff --git a/src/gallium/drivers/freedreno/a3xx/ir3_flatten.c b/src/gallium/drivers/freedreno/a3xx/ir3_flatten.c deleted file mode 100644 index 9389227034c..00000000000 --- a/src/gallium/drivers/freedreno/a3xx/ir3_flatten.c +++ /dev/null @@ -1,155 +0,0 @@ -/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ - -/* - * Copyright (C) 2014 Rob Clark <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark <[email protected]> - */ - -#include <stdarg.h> - -#include "ir3.h" - -/* - * Flatten: flatten out legs of if/else, etc - * - * TODO probably should use some heuristic to decide to not flatten - * if one side of the other is too large / deeply nested / whatever? - */ - -struct ir3_flatten_ctx { - struct ir3_block *block; - unsigned cnt; -}; - -static struct ir3_register *unwrap(struct ir3_register *reg) -{ - - if (reg->flags & IR3_REG_SSA) { - struct ir3_instruction *instr = reg->instr; - if (is_meta(instr)) { - switch (instr->opc) { - case OPC_META_OUTPUT: - case OPC_META_FLOW: - if (instr->regs_count > 1) - return instr->regs[1]; - return NULL; - default: - break; - } - } - } - return reg; -} - -static void ir3_instr_flatten(struct ir3_flatten_ctx *ctx, - struct ir3_instruction *instr) -{ - unsigned i; - - /* if we've already visited this instruction, bail now: */ - if (ir3_instr_check_mark(instr)) - return; - - instr->block = ctx->block; - - /* TODO: maybe some threshold to decide whether to - * flatten or not?? - */ - if (is_meta(instr)) { - if (instr->opc == OPC_META_PHI) { - struct ir3_register *cond, *t, *f; - - cond = unwrap(instr->regs[1]); - t = unwrap(instr->regs[2]); /* true val */ - f = unwrap(instr->regs[3]); /* false val */ - - /* must have cond, but t or f may be null if only written - * one one side of the if/else (in which case we can just - * convert the PHI to a simple move). - */ - assert(cond); - assert(t || f); - - if (t && f) { - /* convert the PHI instruction to sel.{b16,b32} */ - instr->category = 3; - - /* instruction type based on dst size: */ - if (instr->regs[0]->flags & IR3_REG_HALF) - instr->opc = OPC_SEL_B16; - else - instr->opc = OPC_SEL_B32; - - instr->regs[1] = t; - instr->regs[2] = cond; - instr->regs[3] = f; - } else { - /* convert to simple mov: */ - instr->category = 1; - instr->cat1.dst_type = TYPE_F32; - instr->cat1.src_type = TYPE_F32; - instr->regs_count = 2; - instr->regs[1] = t ? t : f; - } - - ctx->cnt++; - } else if ((instr->opc == OPC_META_INPUT) && - (instr->regs_count == 2)) { - type_t ftype; - - if (instr->regs[0]->flags & IR3_REG_HALF) - ftype = TYPE_F16; - else - ftype = TYPE_F32; - - /* convert meta:input to mov: */ - instr->category = 1; - instr->cat1.src_type = ftype; - instr->cat1.dst_type = ftype; - } - } - - /* recursively visit children: */ - for (i = 1; i < instr->regs_count; i++) { - struct ir3_register *src = instr->regs[i]; - if (src->flags & IR3_REG_SSA) - ir3_instr_flatten(ctx, src->instr); - } -} - -/* return >= 0 is # of phi's flattened, < 0 is error */ -int ir3_block_flatten(struct ir3_block *block) -{ - struct ir3_flatten_ctx ctx = { - .block = block, - }; - unsigned i; - - ir3_clear_mark(block->shader); - for(i = 0; i < block->noutputs; i++) - if (block->outputs[i]) - ir3_instr_flatten(&ctx, block->outputs[i]); - - return ctx.cnt; -} diff --git a/src/gallium/drivers/freedreno/a3xx/ir3_ra.c b/src/gallium/drivers/freedreno/a3xx/ir3_ra.c deleted file mode 100644 index b916dd51393..00000000000 --- a/src/gallium/drivers/freedreno/a3xx/ir3_ra.c +++ /dev/null @@ -1,790 +0,0 @@ -/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ - -/* - * Copyright (C) 2014 Rob Clark <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark <[email protected]> - */ - -#include "pipe/p_shader_tokens.h" -#include "util/u_math.h" - -#include "ir3.h" -#include "ir3_visitor.h" - -/* - * Register Assignment: - * - * NOTE: currently only works on a single basic block.. need to think - * about how multiple basic blocks are going to get scheduled. But - * I think I want to re-arrange how blocks work, ie. get rid of the - * block nesting thing.. - * - * NOTE: we could do register coalescing (eliminate moves) as part of - * the RA step.. OTOH I think we need to do scheduling before register - * assignment. And if we remove a mov that effects scheduling (unless - * we leave a placeholder nop, which seems lame), so I'm not really - * sure how practical this is to do both in a single stage. But OTOH - * I'm not really sure a sane way for the CP stage to realize when it - * cannot remove a mov due to multi-register constraints.. - * - */ - -struct ir3_ra_ctx { - struct ir3_block *block; - enum shader_t type; - bool half_precision; - bool frag_coord; - bool frag_face; - bool has_samp; - int cnt; - bool error; -}; - -/* sorta ugly way to retrofit half-precision support.. rather than - * passing extra param around, just OR in a high bit. All the low - * value arithmetic (ie. +/- offset within a contiguous vec4, etc) - * will continue to work as long as you don't underflow (and that - * would go badly anyways). - */ -#define REG_HALF 0x8000 - -struct ir3_ra_assignment { - int8_t off; /* offset of instruction dst within range */ - uint8_t num; /* number of components for the range */ -}; - -static void ra_assign(struct ir3_ra_ctx *ctx, - struct ir3_instruction *assigner, int num); -static struct ir3_ra_assignment ra_calc(struct ir3_instruction *instr); - -/* - * Register Allocation: - */ - -#define REG(n, wm, f) (struct ir3_register){ \ - .flags = (f), \ - .num = (n), \ - .wrmask = TGSI_WRITEMASK_ ## wm, \ - } - -/* check that the register exists, is a GPR and is not special (a0/p0) */ -static struct ir3_register * reg_check(struct ir3_instruction *instr, unsigned n) -{ - if ((n < instr->regs_count) && reg_gpr(instr->regs[n])) - return instr->regs[n]; - return NULL; -} - -static int output_base(struct ir3_ra_ctx *ctx) -{ - /* ugg, for fragment shader we need to have input at r0.x - * (or at least if there is a way to configure it, I can't - * see how because the blob driver always uses r0.x (ie. - * all zeros) - */ - if (ctx->type == SHADER_FRAGMENT) { - if (ctx->half_precision) - return ctx->frag_face ? 4 : 3; - return ctx->frag_coord ? 8 : 4; - } - return 0; -} - -/* live means read before written */ -static void compute_liveregs(struct ir3_ra_ctx *ctx, - struct ir3_instruction *instr, regmask_t *liveregs) -{ - struct ir3_block *block = instr->block; - regmask_t written; - unsigned i, j; - - regmask_init(liveregs); - regmask_init(&written); - - for (instr = instr->next; instr; instr = instr->next) { - struct ir3_register *r; - - if (is_meta(instr)) - continue; - - /* check first src's read: */ - for (j = 1; j < instr->regs_count; j++) { - r = reg_check(instr, j); - if (r) - regmask_set_if_not(liveregs, r, &written); - } - - /* then dst written (if assigned already): */ - if (instr->flags & IR3_INSTR_MARK) { - r = reg_check(instr, 0); - if (r) - regmask_set(&written, r); - } - } - - /* be sure to account for output registers too: */ - for (i = 0; i < block->noutputs; i++) { - struct ir3_register reg = REG(output_base(ctx) + i, X, 0); - regmask_set_if_not(liveregs, ®, &written); - } -} - -/* calculate registers that are clobbered before last use of 'assigner'. - * This needs to be done backwards, although it could possibly be - * combined into compute_liveregs(). (Ie. compute_liveregs() could - * reverse the list, then do this part backwards reversing the list - * again back to original order.) Otoh, probably I should try to - * construct a proper interference graph instead. - * - * XXX this need to follow the same recursion path that is used for - * to rename/assign registers (ie. ra_assign_src()).. this is a bit - * ugly right now, maybe refactor into node iterator sort of things - * that iterates nodes in the correct order? - */ -static bool compute_clobbers(struct ir3_ra_ctx *ctx, - struct ir3_instruction *instr, struct ir3_instruction *assigner, - regmask_t *liveregs) -{ - unsigned i; - bool live = false, was_live = false; - - if (instr == NULL) { - struct ir3_block *block = ctx->block; - - /* if at the end, check outputs: */ - for (i = 0; i < block->noutputs; i++) - if (block->outputs[i] == assigner) - return true; - return false; - } - - for (i = 1; i < instr->regs_count; i++) { - struct ir3_register *reg = instr->regs[i]; - if ((reg->flags & IR3_REG_SSA) && (reg->instr == assigner)) { - if (is_meta(instr)) { - switch (instr->opc) { - case OPC_META_INPUT: - // TODO - assert(0); - break; - case OPC_META_FO: - case OPC_META_FI: - was_live |= compute_clobbers(ctx, instr->next, - instr, liveregs); - break; - default: - break; - } - } - live = true; - break; - } - } - - was_live |= compute_clobbers(ctx, instr->next, assigner, liveregs); - - if (was_live && (instr->regs_count > 0) && - (instr->flags & IR3_INSTR_MARK) && - !is_meta(instr)) - regmask_set(liveregs, instr->regs[0]); - - return live || was_live; -} - -static int find_available(regmask_t *liveregs, int size, bool half) -{ - unsigned i; - unsigned f = half ? IR3_REG_HALF : 0; - for (i = 0; i < MAX_REG - size; i++) { - if (!regmask_get(liveregs, ®(i, X, f))) { - unsigned start = i++; - for (; (i < MAX_REG) && ((i - start) < size); i++) - if (regmask_get(liveregs, ®(i, X, f))) - break; - if ((i - start) >= size) - return start; - } - } - assert(0); - return -1; -} - -static int alloc_block(struct ir3_ra_ctx *ctx, - struct ir3_instruction *instr, int size) -{ - if (!instr) { - /* special case, allocating shader outputs. At this - * point, nothing is allocated, just start the shader - * outputs at r0.x and let compute_liveregs() take - * care of the rest from here: - */ - return 0; - } else { - struct ir3_register *dst = instr->regs[0]; - regmask_t liveregs; - - compute_liveregs(ctx, instr, &liveregs); - - // XXX XXX XXX XXX XXX XXX XXX XXX XXX - // XXX hack.. maybe ra_calc should give us a list of - // instrs to compute_clobbers() on? - if (is_meta(instr) && (instr->opc == OPC_META_INPUT) && - (instr->regs_count == 1)) { - unsigned i, base = instr->regs[0]->num & ~0x3; - for (i = 0; i < 4; i++) { - struct ir3_instruction *in = ctx->block->inputs[base + i]; - if (in) - compute_clobbers(ctx, in->next, in, &liveregs); - } - } else - // XXX XXX XXX XXX XXX XXX XXX XXX XXX - compute_clobbers(ctx, instr->next, instr, &liveregs); - - return find_available(&liveregs, size, - !!(dst->flags & IR3_REG_HALF)); - } -} - -/* - * Constraint Calculation: - */ - -struct ra_calc_visitor { - struct ir3_visitor base; - struct ir3_ra_assignment a; -}; - -static inline struct ra_calc_visitor *ra_calc_visitor(struct ir3_visitor *v) -{ - return (struct ra_calc_visitor *)v; -} - -/* calculate register assignment for the instruction. If the register - * written by this instruction is required to be part of a range, to - * handle other (input/output/sam/bary.f/etc) contiguous register range - * constraints, that is calculated handled here. - */ -static void ra_calc_dst(struct ir3_visitor *v, - struct ir3_instruction *instr, struct ir3_register *reg) -{ - struct ra_calc_visitor *c = ra_calc_visitor(v); - if (is_tex(instr)) { - c->a.off = 0; - c->a.num = 4; - } else { - c->a.off = 0; - c->a.num = 1; - } -} - -static void -ra_calc_dst_shader_input(struct ir3_visitor *v, - struct ir3_instruction *instr, struct ir3_register *reg) -{ - struct ra_calc_visitor *c = ra_calc_visitor(v); - struct ir3_block *block = instr->block; - struct ir3_register *dst = instr->regs[0]; - unsigned base = dst->num & ~0x3; - unsigned i, num = 0; - - assert(!(dst->flags & IR3_REG_IA)); - - /* check what input components we need: */ - for (i = 0; i < 4; i++) { - unsigned idx = base + i; - if ((idx < block->ninputs) && block->inputs[idx]) - num = i + 1; - } - - c->a.off = dst->num - base; - c->a.num = num; -} - -static void ra_calc_src_fanin(struct ir3_visitor *v, - struct ir3_instruction *instr, struct ir3_register *reg) -{ - struct ra_calc_visitor *c = ra_calc_visitor(v); - unsigned srcn = ir3_instr_regno(instr, reg) - 1; - c->a.off += srcn; - c->a.num += srcn; - c->a.num = MAX2(c->a.num, instr->regs_count - 1); -} - -static const struct ir3_visitor_funcs calc_visitor_funcs = { - .instr = ir3_visit_instr, - .dst_shader_input = ra_calc_dst_shader_input, - .dst_fanout = ra_calc_dst, - .dst_fanin = ra_calc_dst, - .dst = ra_calc_dst, - .src_fanout = ir3_visit_reg, - .src_fanin = ra_calc_src_fanin, - .src = ir3_visit_reg, -}; - -static struct ir3_ra_assignment ra_calc(struct ir3_instruction *assigner) -{ - struct ra_calc_visitor v = { - .base.funcs = &calc_visitor_funcs, - }; - - ir3_visit_instr(&v.base, assigner); - - return v.a; -} - -/* - * Register Assignment: - */ - -struct ra_assign_visitor { - struct ir3_visitor base; - struct ir3_ra_ctx *ctx; - int num; -}; - -static inline struct ra_assign_visitor *ra_assign_visitor(struct ir3_visitor *v) -{ - return (struct ra_assign_visitor *)v; -} - -static type_t half_type(type_t type) -{ - switch (type) { - case TYPE_F32: return TYPE_F16; - case TYPE_U32: return TYPE_U16; - case TYPE_S32: return TYPE_S16; - /* instructions may already be fixed up: */ - case TYPE_F16: - case TYPE_U16: - case TYPE_S16: - return type; - default: - assert(0); - return ~0; - } -} - -/* some instructions need fix-up if dst register is half precision: */ -static void fixup_half_instr_dst(struct ir3_instruction *instr) -{ - switch (instr->category) { - case 1: /* move instructions */ - instr->cat1.dst_type = half_type(instr->cat1.dst_type); - break; - case 3: - switch (instr->opc) { - case OPC_MAD_F32: - instr->opc = OPC_MAD_F16; - break; - case OPC_SEL_B32: - instr->opc = OPC_SEL_B16; - break; - case OPC_SEL_S32: - instr->opc = OPC_SEL_S16; - break; - case OPC_SEL_F32: - instr->opc = OPC_SEL_F16; - break; - case OPC_SAD_S32: - instr->opc = OPC_SAD_S16; - break; - /* instructions may already be fixed up: */ - case OPC_MAD_F16: - case OPC_SEL_B16: - case OPC_SEL_S16: - case OPC_SEL_F16: - case OPC_SAD_S16: - break; - default: - assert(0); - break; - } - break; - case 5: - instr->cat5.type = half_type(instr->cat5.type); - break; - } -} -/* some instructions need fix-up if src register is half precision: */ -static void fixup_half_instr_src(struct ir3_instruction *instr) -{ - switch (instr->category) { - case 1: /* move instructions */ - instr->cat1.src_type = half_type(instr->cat1.src_type); - break; - } -} - -static void ra_assign_reg(struct ir3_visitor *v, - struct ir3_instruction *instr, struct ir3_register *reg) -{ - struct ra_assign_visitor *a = ra_assign_visitor(v); - - if (is_flow(instr) && (instr->opc == OPC_KILL)) - return; - - reg->flags &= ~IR3_REG_SSA; - reg->num = a->num & ~REG_HALF; - - assert(reg->num >= 0); - - if (a->num & REG_HALF) { - reg->flags |= IR3_REG_HALF; - /* if dst reg being assigned, patch up the instr: */ - if (reg == instr->regs[0]) - fixup_half_instr_dst(instr); - else - fixup_half_instr_src(instr); - } -} - -static void ra_assign_dst_shader_input(struct ir3_visitor *v, - struct ir3_instruction *instr, struct ir3_register *reg) -{ - struct ra_assign_visitor *a = ra_assign_visitor(v); - unsigned i, base = reg->num & ~0x3; - int off = base - reg->num; - - ra_assign_reg(v, instr, reg); - reg->flags |= IR3_REG_IA; - - /* trigger assignment of all our companion input components: */ - for (i = 0; i < 4; i++) { - struct ir3_instruction *in = instr->block->inputs[i+base]; - if (in && is_meta(in) && (in->opc == OPC_META_INPUT)) - ra_assign(a->ctx, in, a->num + off + i); - } -} - -static void ra_assign_dst_fanout(struct ir3_visitor *v, - struct ir3_instruction *instr, struct ir3_register *reg) -{ - struct ra_assign_visitor *a = ra_assign_visitor(v); - struct ir3_register *src = instr->regs[1]; - ra_assign_reg(v, instr, reg); - if (src->flags & IR3_REG_SSA) - ra_assign(a->ctx, src->instr, a->num - instr->fo.off); -} - -static void ra_assign_src_fanout(struct ir3_visitor *v, - struct ir3_instruction *instr, struct ir3_register *reg) -{ - struct ra_assign_visitor *a = ra_assign_visitor(v); - ra_assign_reg(v, instr, reg); - ra_assign(a->ctx, instr, a->num + instr->fo.off); -} - - -static void ra_assign_src_fanin(struct ir3_visitor *v, - struct ir3_instruction *instr, struct ir3_register *reg) -{ - struct ra_assign_visitor *a = ra_assign_visitor(v); - unsigned j, srcn = ir3_instr_regno(instr, reg) - 1; - ra_assign_reg(v, instr, reg); - ra_assign(a->ctx, instr, a->num - srcn); - for (j = 1; j < instr->regs_count; j++) { - struct ir3_register *reg = instr->regs[j]; - if (reg->flags & IR3_REG_SSA) /* could be renamed already */ - ra_assign(a->ctx, reg->instr, a->num - srcn + j - 1); - } -} - -static const struct ir3_visitor_funcs assign_visitor_funcs = { - .instr = ir3_visit_instr, - .dst_shader_input = ra_assign_dst_shader_input, - .dst_fanout = ra_assign_dst_fanout, - .dst_fanin = ra_assign_reg, - .dst = ra_assign_reg, - .src_fanout = ra_assign_src_fanout, - .src_fanin = ra_assign_src_fanin, - .src = ra_assign_reg, -}; - -static void ra_assign(struct ir3_ra_ctx *ctx, - struct ir3_instruction *assigner, int num) -{ - struct ra_assign_visitor v = { - .base.funcs = &assign_visitor_funcs, - .ctx = ctx, - .num = num, - }; - - /* if we've already visited this instruction, bail now: */ - if (ir3_instr_check_mark(assigner)) { - debug_assert(assigner->regs[0]->num == (num & ~REG_HALF)); - if (assigner->regs[0]->num != (num & ~REG_HALF)) { - /* impossible situation, should have been resolved - * at an earlier stage by inserting extra mov's: - */ - ctx->error = true; - } - return; - } - - ir3_visit_instr(&v.base, assigner); -} - -/* - * - */ - -static void ir3_instr_ra(struct ir3_ra_ctx *ctx, - struct ir3_instruction *instr) -{ - struct ir3_register *dst; - unsigned num; - - /* skip over nop's */ - if (instr->regs_count == 0) - return; - - dst = instr->regs[0]; - - /* if we've already visited this instruction, bail now: */ - if (instr->flags & IR3_INSTR_MARK) - return; - - /* allocate register(s): */ - if (is_addr(instr)) { - num = instr->regs[2]->num; - } else if (reg_gpr(dst)) { - struct ir3_ra_assignment a; - a = ra_calc(instr); - num = alloc_block(ctx, instr, a.num) + a.off; - } else if (dst->flags & IR3_REG_ADDR) { - dst->flags &= ~IR3_REG_ADDR; - num = regid(REG_A0, 0) | REG_HALF; - } else { - /* predicate register (p0).. etc */ - return; - } - - ra_assign(ctx, instr, num); -} - -/* flatten into shader: */ -// XXX this should probably be somewhere else: -static void legalize(struct ir3_ra_ctx *ctx, struct ir3_block *block) -{ - struct ir3_instruction *n; - struct ir3 *shader = block->shader; - struct ir3_instruction *end = - ir3_instr_create(block, 0, OPC_END); - struct ir3_instruction *last_input = NULL; - struct ir3_instruction *last_rel = NULL; - regmask_t needs_ss_war; /* write after read */ - regmask_t needs_ss; - regmask_t needs_sy; - - regmask_init(&needs_ss_war); - regmask_init(&needs_ss); - regmask_init(&needs_sy); - - shader->instrs_count = 0; - - for (n = block->head; n; n = n->next) { - struct ir3_register *reg; - unsigned i; - - if (is_meta(n)) - continue; - - for (i = 1; i < n->regs_count; i++) { - reg = n->regs[i]; - - if (reg_gpr(reg)) { - - /* TODO: we probably only need (ss) for alu - * instr consuming sfu result.. need to make - * some tests for both this and (sy).. - */ - if (regmask_get(&needs_ss, reg)) { - n->flags |= IR3_INSTR_SS; - regmask_init(&needs_ss); - } - - if (regmask_get(&needs_sy, reg)) { - n->flags |= IR3_INSTR_SY; - regmask_init(&needs_sy); - } - } - - /* TODO: is it valid to have address reg loaded from a - * relative src (ie. mova a0, c<a0.x+4>)? If so, the - * last_rel check below should be moved ahead of this: - */ - if (reg->flags & IR3_REG_RELATIV) - last_rel = n; - } - - if (n->regs_count > 0) { - reg = n->regs[0]; - if (regmask_get(&needs_ss_war, reg)) { - n->flags |= IR3_INSTR_SS; - regmask_init(&needs_ss_war); // ??? I assume? - } - - if (last_rel && (reg->num == regid(REG_A0, 0))) { - last_rel->flags |= IR3_INSTR_UL; - last_rel = NULL; - } - } - - /* cat5+ does not have an (ss) bit, if needed we need to - * insert a nop to carry the sync flag. Would be kinda - * clever if we were aware of this during scheduling, but - * this should be a pretty rare case: - */ - if ((n->flags & IR3_INSTR_SS) && (n->category >= 5)) { - struct ir3_instruction *nop; - nop = ir3_instr_create(block, 0, OPC_NOP); - nop->flags |= IR3_INSTR_SS; - n->flags &= ~IR3_INSTR_SS; - } - - /* need to be able to set (ss) on first instruction: */ - if ((shader->instrs_count == 0) && (n->category >= 5)) - ir3_instr_create(block, 0, OPC_NOP); - - if (is_nop(n) && shader->instrs_count) { - struct ir3_instruction *last = - shader->instrs[shader->instrs_count-1]; - if (is_nop(last) && (last->repeat < 5)) { - last->repeat++; - last->flags |= n->flags; - continue; - } - } - - shader->instrs[shader->instrs_count++] = n; - - if (is_sfu(n)) - regmask_set(&needs_ss, n->regs[0]); - - if (is_tex(n)) { - /* this ends up being the # of samp instructions.. but that - * is ok, everything else only cares whether it is zero or - * not. We do this here, rather than when we encounter a - * SAMP decl, because (especially in binning pass shader) - * the samp instruction(s) could get eliminated if the - * result is not used. - */ - ctx->has_samp = true; - regmask_set(&needs_sy, n->regs[0]); - } - - /* both tex/sfu appear to not always immediately consume - * their src register(s): - */ - if (is_tex(n) || is_sfu(n)) { - for (i = 1; i < n->regs_count; i++) { - reg = n->regs[i]; - if (reg_gpr(reg)) - regmask_set(&needs_ss_war, reg); - } - } - - if (is_input(n)) - last_input = n; - } - - if (last_input) - last_input->regs[0]->flags |= IR3_REG_EI; - - if (last_rel) - last_rel->flags |= IR3_INSTR_UL; - - shader->instrs[shader->instrs_count++] = end; - - shader->instrs[0]->flags |= IR3_INSTR_SS | IR3_INSTR_SY; -} - -static int block_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block) -{ - struct ir3_instruction *n; - - if (!block->parent) { - unsigned i, j; - int base, off = output_base(ctx); - - base = alloc_block(ctx, NULL, block->noutputs + off); - - if (ctx->half_precision) - base |= REG_HALF; - - for (i = 0; i < block->noutputs; i++) - if (block->outputs[i] && !is_kill(block->outputs[i])) - ra_assign(ctx, block->outputs[i], base + i + off); - - if (ctx->type == SHADER_FRAGMENT) { - i = 0; - if (ctx->frag_face) { - /* if we have frag_face, it gets hr0.x */ - ra_assign(ctx, block->inputs[i], REG_HALF | 0); - i += 4; - } - for (j = 0; i < block->ninputs; i++, j++) - if (block->inputs[i]) - ra_assign(ctx, block->inputs[i], (base & ~REG_HALF) + j); - } else { - for (i = 0; i < block->ninputs; i++) - if (block->inputs[i]) - ir3_instr_ra(ctx, block->inputs[i]); - } - } - - /* then loop over instruction list and assign registers: - */ - n = block->head; - while (n) { - ir3_instr_ra(ctx, n); - if (ctx->error) - return -1; - n = n->next; - } - - legalize(ctx, block); - - return 0; -} - -int ir3_block_ra(struct ir3_block *block, enum shader_t type, - bool half_precision, bool frag_coord, bool frag_face, - bool *has_samp) -{ - struct ir3_ra_ctx ctx = { - .block = block, - .type = type, - .half_precision = half_precision, - .frag_coord = frag_coord, - .frag_face = frag_face, - }; - int ret; - - ir3_clear_mark(block->shader); - ret = block_ra(&ctx, block); - *has_samp = ctx.has_samp; - - return ret; -} diff --git a/src/gallium/drivers/freedreno/a3xx/ir3_sched.c b/src/gallium/drivers/freedreno/a3xx/ir3_sched.c deleted file mode 100644 index 3ef67731926..00000000000 --- a/src/gallium/drivers/freedreno/a3xx/ir3_sched.c +++ /dev/null @@ -1,401 +0,0 @@ -/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ - -/* - * Copyright (C) 2014 Rob Clark <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark <[email protected]> - */ - - -#include "util/u_math.h" - -#include "ir3.h" - -enum { - SCHEDULED = -1, - DELAYED = -2, -}; - -/* - * Instruction Scheduling: - * - * Using the depth sorted list from depth pass, attempt to recursively - * schedule deepest unscheduled path. The first instruction that cannot - * be scheduled, returns the required delay slots it needs, at which - * point we return back up to the top and attempt to schedule by next - * highest depth. After a sufficient number of instructions have been - * scheduled, return back to beginning of list and start again. If you - * reach the end of depth sorted list without being able to insert any - * instruction, insert nop's. Repeat until no more unscheduled - * instructions. - * - * There are a few special cases that need to be handled, since sched - * is currently independent of register allocation. Usages of address - * register (a0.x) or predicate register (p0.x) must be serialized. Ie. - * if you have two pairs of instructions that write the same special - * register and then read it, then those pairs cannot be interleaved. - * To solve this, when we are in such a scheduling "critical section", - * and we encounter a conflicting write to a special register, we try - * to schedule any remaining instructions that use that value first. - */ - -struct ir3_sched_ctx { - struct ir3_instruction *scheduled; /* last scheduled instr */ - struct ir3_instruction *addr; /* current a0.x user, if any */ - struct ir3_instruction *pred; /* current p0.x user, if any */ - unsigned cnt; -}; - -static struct ir3_instruction * -deepest(struct ir3_instruction **srcs, unsigned nsrcs) -{ - struct ir3_instruction *d = NULL; - unsigned i = 0, id = 0; - - while ((i < nsrcs) && !(d = srcs[id = i])) - i++; - - if (!d) - return NULL; - - for (; i < nsrcs; i++) - if (srcs[i] && (srcs[i]->depth > d->depth)) - d = srcs[id = i]; - - srcs[id] = NULL; - - return d; -} - -static unsigned distance(struct ir3_sched_ctx *ctx, - struct ir3_instruction *instr, unsigned maxd) -{ - struct ir3_instruction *n = ctx->scheduled; - unsigned d = 0; - while (n && (n != instr) && (d < maxd)) { - if (is_alu(n) || is_flow(n)) - d++; - n = n->next; - } - return d; -} - -/* TODO maybe we want double linked list? */ -static struct ir3_instruction * prev(struct ir3_instruction *instr) -{ - struct ir3_instruction *p = instr->block->head; - while (p && (p->next != instr)) - p = p->next; - return p; -} - -static void schedule(struct ir3_sched_ctx *ctx, - struct ir3_instruction *instr, bool remove) -{ - struct ir3_block *block = instr->block; - - /* maybe there is a better way to handle this than just stuffing - * a nop.. ideally we'd know about this constraint in the - * scheduling and depth calculation.. - */ - if (ctx->scheduled && is_sfu(ctx->scheduled) && is_sfu(instr)) - schedule(ctx, ir3_instr_create(block, 0, OPC_NOP), false); - - /* remove from depth list: - */ - if (remove) { - struct ir3_instruction *p = prev(instr); - - /* NOTE: this can happen for inputs which are not - * read.. in that case there is no need to schedule - * the input, so just bail: - */ - if (instr != (p ? p->next : block->head)) - return; - - if (p) - p->next = instr->next; - else - block->head = instr->next; - } - - if (writes_addr(instr)) { - assert(ctx->addr == NULL); - ctx->addr = instr; - } - - if (writes_pred(instr)) { - assert(ctx->pred == NULL); - ctx->pred = instr; - } - - instr->flags |= IR3_INSTR_MARK; - - instr->next = ctx->scheduled; - ctx->scheduled = instr; - - ctx->cnt++; -} - -/* - * Delay-slot calculation. Follows fanin/fanout. - */ - -static unsigned delay_calc2(struct ir3_sched_ctx *ctx, - struct ir3_instruction *assigner, - struct ir3_instruction *consumer, unsigned srcn) -{ - unsigned delay = 0; - - if (is_meta(assigner)) { - unsigned i; - for (i = 1; i < assigner->regs_count; i++) { - struct ir3_register *reg = assigner->regs[i]; - if (reg->flags & IR3_REG_SSA) { - unsigned d = delay_calc2(ctx, reg->instr, - consumer, srcn); - delay = MAX2(delay, d); - } - } - } else { - delay = ir3_delayslots(assigner, consumer, srcn); - delay -= distance(ctx, assigner, delay); - } - - return delay; -} - -static unsigned delay_calc(struct ir3_sched_ctx *ctx, - struct ir3_instruction *instr) -{ - unsigned i, delay = 0; - - for (i = 1; i < instr->regs_count; i++) { - struct ir3_register *reg = instr->regs[i]; - if (reg->flags & IR3_REG_SSA) { - unsigned d = delay_calc2(ctx, reg->instr, - instr, i - 1); - delay = MAX2(delay, d); - } - } - - return delay; -} - -/* A negative return value signals that an instruction has been newly - * scheduled, return back up to the top of the stack (to block_sched()) - */ -static int trysched(struct ir3_sched_ctx *ctx, - struct ir3_instruction *instr) -{ - struct ir3_instruction *srcs[ARRAY_SIZE(instr->regs) - 1]; - struct ir3_instruction *src; - unsigned i, delay, nsrcs = 0; - - /* if already scheduled: */ - if (instr->flags & IR3_INSTR_MARK) - return 0; - - /* figure out our src's: */ - for (i = 1; i < instr->regs_count; i++) { - struct ir3_register *reg = instr->regs[i]; - if (reg->flags & IR3_REG_SSA) - srcs[nsrcs++] = reg->instr; - } - - /* for each src register in sorted order: - */ - delay = 0; - while ((src = deepest(srcs, nsrcs))) { - delay = trysched(ctx, src); - if (delay) - return delay; - } - - /* all our dependents are scheduled, figure out if - * we have enough delay slots to schedule ourself: - */ - delay = delay_calc(ctx, instr); - if (delay) - return delay; - - /* if this is a write to address/predicate register, and that - * register is currently in use, we need to defer until it is - * free: - */ - if (writes_addr(instr) && ctx->addr) { - assert(ctx->addr != instr); - return DELAYED; - } - if (writes_pred(instr) && ctx->pred) { - assert(ctx->pred != instr); - return DELAYED; - } - - schedule(ctx, instr, true); - return SCHEDULED; -} - -static struct ir3_instruction * reverse(struct ir3_instruction *instr) -{ - struct ir3_instruction *reversed = NULL; - while (instr) { - struct ir3_instruction *next = instr->next; - instr->next = reversed; - reversed = instr; - instr = next; - } - return reversed; -} - -static bool uses_current_addr(struct ir3_sched_ctx *ctx, - struct ir3_instruction *instr) -{ - unsigned i; - for (i = 1; i < instr->regs_count; i++) { - struct ir3_register *reg = instr->regs[i]; - if (reg->flags & IR3_REG_SSA) { - if (is_addr(reg->instr)) { - struct ir3_instruction *addr; - addr = reg->instr->regs[1]->instr; /* the mova */ - if (ctx->addr == addr) - return true; - } - } - } - return false; -} - -static bool uses_current_pred(struct ir3_sched_ctx *ctx, - struct ir3_instruction *instr) -{ - unsigned i; - for (i = 1; i < instr->regs_count; i++) { - struct ir3_register *reg = instr->regs[i]; - if ((reg->flags & IR3_REG_SSA) && (ctx->pred == reg->instr)) - return true; - } - return false; -} - -/* when we encounter an instruction that writes to the address register - * when it is in use, we delay that instruction and try to schedule all - * other instructions using the current address register: - */ -static int block_sched_undelayed(struct ir3_sched_ctx *ctx, - struct ir3_block *block) -{ - struct ir3_instruction *instr = block->head; - bool addr_in_use = false; - bool pred_in_use = false; - unsigned cnt = ~0; - - while (instr) { - struct ir3_instruction *next = instr->next; - bool addr = uses_current_addr(ctx, instr); - bool pred = uses_current_pred(ctx, instr); - - if (addr || pred) { - int ret = trysched(ctx, instr); - if (ret == SCHEDULED) - cnt = 0; - else if (ret > 0) - cnt = MIN2(cnt, ret); - if (addr) - addr_in_use = true; - if (pred) - pred_in_use = true; - } - - instr = next; - } - - if (!addr_in_use) - ctx->addr = NULL; - - if (!pred_in_use) - ctx->pred = NULL; - - return cnt; -} - -static void block_sched(struct ir3_sched_ctx *ctx, struct ir3_block *block) -{ - struct ir3_instruction *instr; - - /* schedule all the shader input's (meta-instr) first so that - * the RA step sees that the input registers contain a value - * from the start of the shader: - */ - if (!block->parent) { - unsigned i; - for (i = 0; i < block->ninputs; i++) { - struct ir3_instruction *in = block->inputs[i]; - if (in) - schedule(ctx, in, true); - } - } - - while ((instr = block->head)) { - /* NOTE: always grab next *before* trysched(), in case the - * instruction is actually scheduled (and therefore moved - * from depth list into scheduled list) - */ - struct ir3_instruction *next = instr->next; - int cnt = trysched(ctx, instr); - - if (cnt == DELAYED) - cnt = block_sched_undelayed(ctx, block); - - /* -1 is signal to return up stack, but to us means same as 0: */ - cnt = MAX2(0, cnt); - cnt += ctx->cnt; - instr = next; - - /* if deepest remaining instruction cannot be scheduled, try - * the increasingly more shallow instructions until needed - * number of delay slots is filled: - */ - while (instr && (cnt > ctx->cnt)) { - next = instr->next; - trysched(ctx, instr); - instr = next; - } - - /* and if we run out of instructions that can be scheduled, - * then it is time for nop's: - */ - while (cnt > ctx->cnt) - schedule(ctx, ir3_instr_create(block, 0, OPC_NOP), false); - } - - /* at this point, scheduled list is in reverse order, so fix that: */ - block->head = reverse(ctx->scheduled); -} - -void ir3_block_sched(struct ir3_block *block) -{ - struct ir3_sched_ctx ctx = {0}; - ir3_clear_mark(block->shader); - block_sched(&ctx, block); -} diff --git a/src/gallium/drivers/freedreno/a3xx/ir3_visitor.h b/src/gallium/drivers/freedreno/a3xx/ir3_visitor.h deleted file mode 100644 index 1c60d1620ca..00000000000 --- a/src/gallium/drivers/freedreno/a3xx/ir3_visitor.h +++ /dev/null @@ -1,154 +0,0 @@ -/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ - -/* - * Copyright (C) 2014 Rob Clark <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark <[email protected]> - */ - -#ifndef IR3_VISITOR_H_ -#define IR3_VISITOR_H_ - -/** - * Visitor which follows dst to src relationships between instructions, - * first visiting the dst (writer) instruction, followed by src (reader) - * instruction(s). - * - * TODO maybe we want multiple different visitors to walk the - * graph in different ways? - */ - -struct ir3_visitor; - -typedef void (*ir3_visit_instr_func)(struct ir3_visitor *v, - struct ir3_instruction *instr); - -typedef void (*ir3_visit_reg_func)(struct ir3_visitor *v, - struct ir3_instruction *instr, struct ir3_register *reg); - -struct ir3_visitor_funcs { - ir3_visit_instr_func instr; // TODO do we need?? - - ir3_visit_reg_func dst_shader_input; - ir3_visit_reg_func dst_block_input; - ir3_visit_reg_func dst_fanout; - ir3_visit_reg_func dst_fanin; - ir3_visit_reg_func dst; - - ir3_visit_reg_func src_block_input; - ir3_visit_reg_func src_fanout; - ir3_visit_reg_func src_fanin; - ir3_visit_reg_func src; -}; - -struct ir3_visitor { - const struct ir3_visitor_funcs *funcs; - bool error; -}; - -#include "util/u_debug.h" - -static void visit_instr_dst(struct ir3_visitor *v, - struct ir3_instruction *instr) -{ - struct ir3_register *reg = instr->regs[0]; - - if (is_meta(instr)) { - switch (instr->opc) { - case OPC_META_INPUT: - if (instr->regs_count == 1) - v->funcs->dst_shader_input(v, instr, reg); - else - v->funcs->dst_block_input(v, instr, reg); - return; - case OPC_META_FO: - v->funcs->dst_fanout(v, instr, reg); - return; - case OPC_META_FI: - v->funcs->dst_fanin(v, instr, reg); - return; - default: - break; - - } - } - - v->funcs->dst(v, instr, reg); -} - -static void visit_instr_src(struct ir3_visitor *v, - struct ir3_instruction *instr, struct ir3_register *reg) -{ - if (is_meta(instr)) { - switch (instr->opc) { - case OPC_META_INPUT: - /* shader-input does not have a src, only block input: */ - debug_assert(instr->regs_count == 2); - v->funcs->src_block_input(v, instr, reg); - return; - case OPC_META_FO: - v->funcs->src_fanout(v, instr, reg); - return; - case OPC_META_FI: - v->funcs->src_fanin(v, instr, reg); - return; - default: - break; - - } - } - - v->funcs->src(v, instr, reg); -} - -static void ir3_visit_instr(struct ir3_visitor *v, - struct ir3_instruction *instr) -{ - struct ir3_instruction *n; - - /* visit instruction that assigns value: */ - if (instr->regs_count > 0) - visit_instr_dst(v, instr); - - /* and of any following instructions which read that value: */ - n = instr->next; - while (n && !v->error) { - unsigned i; - - for (i = 1; i < n->regs_count; i++) { - struct ir3_register *reg = n->regs[i]; - if ((reg->flags & IR3_REG_SSA) && (reg->instr == instr)) - visit_instr_src(v, n, reg); - } - - n = n->next; - } -} - -static void ir3_visit_reg(struct ir3_visitor *v, - struct ir3_instruction *instr, struct ir3_register *reg) -{ - /* no-op */ -} - -#endif /* IR3_VISITOR_H_ */ |