diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/drivers/nvc0/nvc0_pc.c | 713 | ||||
-rw-r--r-- | src/gallium/drivers/nvc0/nvc0_pc.h | 637 | ||||
-rw-r--r-- | src/gallium/drivers/nvc0/nvc0_pc_emit.c | 1026 | ||||
-rw-r--r-- | src/gallium/drivers/nvc0/nvc0_pc_optimize.c | 1434 | ||||
-rw-r--r-- | src/gallium/drivers/nvc0/nvc0_pc_print.c | 381 | ||||
-rw-r--r-- | src/gallium/drivers/nvc0/nvc0_pc_regalloc.c | 1053 | ||||
-rw-r--r-- | src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 2026 |
7 files changed, 0 insertions, 7270 deletions
diff --git a/src/gallium/drivers/nvc0/nvc0_pc.c b/src/gallium/drivers/nvc0/nvc0_pc.c deleted file mode 100644 index 2c3b8555f32..00000000000 --- a/src/gallium/drivers/nvc0/nvc0_pc.c +++ /dev/null @@ -1,713 +0,0 @@ -/* - * Copyright 2010 Christoph Bumiller - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "nvc0_pc.h" -#include "nvc0_program.h" - -uint8_t -nvc0_ir_reverse_cc(uint8_t cc) -{ - static const uint8_t cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; - - return cc_swapped[cc & 7] | (cc & ~7); -} - -boolean -nvc0_insn_can_load(struct nv_instruction *nvi, int s, - struct nv_instruction *ld) -{ - int i; - - if (ld->opcode == NV_OP_MOV && ld->src[0]->value->reg.file == NV_FILE_IMM) { - if (s > 1 || !(nvc0_op_info_table[nvi->opcode].immediate & (1 << s))) - return FALSE; - if (!(nvc0_op_info_table[nvi->opcode].immediate & 4)) - if (ld->src[0]->value->reg.imm.u32 & 0xfff) - return FALSE; - } else - if (!(nvc0_op_info_table[nvi->opcode].memory & (1 << s))) - return FALSE; - - if (ld->indirect >= 0) - return FALSE; - - /* a few ops can use g[] sources directly, but we don't support g[] yet */ - if (ld->src[0]->value->reg.file == NV_FILE_MEM_L || - ld->src[0]->value->reg.file == NV_FILE_MEM_G) - return FALSE; - - for (i = 0; i < 3 && nvi->src[i]; ++i) - if (nvi->src[i]->value->reg.file == NV_FILE_IMM) - return FALSE; - - return TRUE; -} - -/* Return whether this instruction can be executed conditionally. */ -boolean -nvc0_insn_is_predicateable(struct nv_instruction *nvi) -{ - if (nvi->predicate >= 0) /* already predicated */ - return FALSE; - if (!nvc0_op_info_table[nvi->opcode].predicate && - !nvc0_op_info_table[nvi->opcode].pseudo) - return FALSE; - return TRUE; -} - -int -nvc0_insn_refcount(struct nv_instruction *nvi) -{ - int rc = 0; - int i; - for (i = 0; i < 5 && nvi->def[i]; ++i) { - if (!nvi->def[i]) - return rc; - rc += nvi->def[i]->refc; - } - return rc; -} - -int -nvc0_pc_replace_value(struct nv_pc *pc, - struct nv_value *old_val, - struct nv_value *new_val) -{ - int i, n, s; - - if (old_val == new_val) - return old_val->refc; - - for (i = 0, n = 0; i < pc->num_refs; ++i) { - if (pc->refs[i]->value == old_val) { - ++n; - for (s = 0; s < 6 && pc->refs[i]->insn->src[s]; ++s) - if (pc->refs[i]->insn->src[s] == pc->refs[i]) - break; - assert(s < 6); - nv_reference(pc, pc->refs[i]->insn, s, new_val); - } - } - return n; -} - -static INLINE boolean -is_gpr63(struct nv_value *val) -{ - return (val->reg.file == NV_FILE_GPR && val->reg.id == 63); -} - -struct nv_value * -nvc0_pc_find_constant(struct nv_ref *ref) -{ - struct nv_value *src; - - if (!ref) - return NULL; - - src = ref->value; - while (src->insn && src->insn->opcode == NV_OP_MOV) { - assert(!src->insn->src[0]->mod); - src = src->insn->src[0]->value; - } - if ((src->reg.file == NV_FILE_IMM) || is_gpr63(src) || - (src->insn && - src->insn->opcode == NV_OP_LD && - src->insn->src[0]->value->reg.file >= NV_FILE_MEM_C(0) && - src->insn->src[0]->value->reg.file <= NV_FILE_MEM_C(15))) - return src; - return NULL; -} - -struct nv_value * -nvc0_pc_find_immediate(struct nv_ref *ref) -{ - struct nv_value *src = nvc0_pc_find_constant(ref); - - return (src && (src->reg.file == NV_FILE_IMM || is_gpr63(src))) ? src : NULL; -} - -static void -nv_pc_free_refs(struct nv_pc *pc) -{ - int i; - for (i = 0; i < pc->num_refs; i += 64) - FREE(pc->refs[i]); - FREE(pc->refs); -} - -static const char * -edge_name(ubyte type) -{ - switch (type) { - case CFG_EDGE_FORWARD: return "forward"; - case CFG_EDGE_BACK: return "back"; - case CFG_EDGE_LOOP_ENTER: return "loop"; - case CFG_EDGE_LOOP_LEAVE: return "break"; - case CFG_EDGE_FAKE: return "fake"; - default: - return "?"; - } -} - -void -nvc0_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f, - void *priv) -{ - struct nv_basic_block *bb[64], *bbb[16], *b; - int j, p, pp; - - bb[0] = root; - p = 1; - pp = 0; - - while (p > 0) { - b = bb[--p]; - b->priv = 0; - - for (j = 1; j >= 0; --j) { - if (!b->out[j]) - continue; - - switch (b->out_kind[j]) { - case CFG_EDGE_BACK: - continue; - case CFG_EDGE_FORWARD: - case CFG_EDGE_FAKE: - if (++b->out[j]->priv == b->out[j]->num_in) - bb[p++] = b->out[j]; - break; - case CFG_EDGE_LOOP_ENTER: - bb[p++] = b->out[j]; - break; - case CFG_EDGE_LOOP_LEAVE: - if (!b->out[j]->priv) { - bbb[pp++] = b->out[j]; - b->out[j]->priv = 1; - } - break; - default: - assert(0); - break; - } - } - - f(priv, b); - - if (!p) { - p = pp; - for (; pp > 0; --pp) - bb[pp - 1] = bbb[pp - 1]; - } - } -} - -static void -nv_do_print_function(void *priv, struct nv_basic_block *b) -{ - struct nv_instruction *i; - - debug_printf("=== BB %i ", b->id); - if (b->out[0]) - debug_printf("[%s -> %i] ", edge_name(b->out_kind[0]), b->out[0]->id); - if (b->out[1]) - debug_printf("[%s -> %i] ", edge_name(b->out_kind[1]), b->out[1]->id); - debug_printf("===\n"); - - i = b->phi; - if (!i) - i = b->entry; - for (; i; i = i->next) - nvc0_print_instruction(i); -} - -void -nvc0_print_function(struct nv_basic_block *root) -{ - if (root->subroutine) - debug_printf("SUBROUTINE %i\n", root->subroutine); - else - debug_printf("MAIN\n"); - - nvc0_pc_pass_in_order(root, nv_do_print_function, root); -} - -void -nvc0_print_program(struct nv_pc *pc) -{ - int i; - for (i = 0; i < pc->num_subroutines + 1; ++i) - if (pc->root[i]) - nvc0_print_function(pc->root[i]); -} - -#if NV50_DEBUG & NV50_DEBUG_PROG_CFLOW -static void -nv_do_print_cfgraph(struct nv_pc *pc, FILE *f, struct nv_basic_block *b) -{ - int i; - - b->pass_seq = pc->pass_seq; - - fprintf(f, "\t%i [shape=box]\n", b->id); - - for (i = 0; i < 2; ++i) { - if (!b->out[i]) - continue; - switch (b->out_kind[i]) { - case CFG_EDGE_FORWARD: - fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id); - break; - case CFG_EDGE_LOOP_ENTER: - fprintf(f, "\t%i -> %i [color=green];\n", b->id, b->out[i]->id); - break; - case CFG_EDGE_LOOP_LEAVE: - fprintf(f, "\t%i -> %i [color=red];\n", b->id, b->out[i]->id); - break; - case CFG_EDGE_BACK: - fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id); - continue; - case CFG_EDGE_FAKE: - fprintf(f, "\t%i -> %i [style=dotted];\n", b->id, b->out[i]->id); - break; - default: - assert(0); - break; - } - if (b->out[i]->pass_seq < pc->pass_seq) - nv_do_print_cfgraph(pc, f, b->out[i]); - } -} - -/* Print the control flow graph of subroutine @subr (0 == MAIN) to a file. */ -static void -nv_print_cfgraph(struct nv_pc *pc, const char *filepath, int subr) -{ - FILE *f; - - f = fopen(filepath, "a"); - if (!f) - return; - - fprintf(f, "digraph G {\n"); - - ++pc->pass_seq; - - nv_do_print_cfgraph(pc, f, pc->root[subr]); - - fprintf(f, "}\n"); - - fclose(f); -} -#endif - -static INLINE void -nvc0_pc_print_binary(struct nv_pc *pc) -{ - unsigned i; - - NV50_DBGMSG(SHADER, "nvc0_pc_print_binary(%u ops)\n", pc->emit_size / 8); - - for (i = 0; i < pc->emit_size / 4; i += 2) { - debug_printf("0x%08x ", pc->emit[i + 0]); - debug_printf("0x%08x ", pc->emit[i + 1]); - if ((i % 16) == 15) - debug_printf("\n"); - } - debug_printf("\n"); -} - -static int -nvc0_emit_program(struct nv_pc *pc) -{ - uint32_t *code = pc->emit; - int n; - - NV50_DBGMSG(SHADER, "emitting program: size = %u\n", pc->emit_size); - - pc->emit_pos = 0; - for (n = 0; n < pc->num_blocks; ++n) { - struct nv_instruction *i; - struct nv_basic_block *b = pc->bb_list[n]; - - for (i = b->entry; i; i = i->next) { - nvc0_emit_instruction(pc, i); - pc->emit += 2; - pc->emit_pos += 8; - } - } - assert(pc->emit == &code[pc->emit_size / 4]); - - pc->emit[0] = 0x00001de7; - pc->emit[1] = 0x80000000; - pc->emit_size += 8; - - pc->emit = code; - -#if NV50_DEBUG & NV50_DEBUG_SHADER - nvc0_pc_print_binary(pc); -#endif - - return 0; -} - -int -nvc0_generate_code(struct nvc0_translation_info *ti) -{ - struct nv_pc *pc; - int ret; - int i; - - pc = CALLOC_STRUCT(nv_pc); - if (!pc) - return 1; - - pc->is_fragprog = ti->prog->type == PIPE_SHADER_FRAGMENT; - - pc->root = CALLOC(ti->num_subrs + 1, sizeof(pc->root[0])); - if (!pc->root) { - FREE(pc); - return 1; - } - pc->num_subroutines = ti->num_subrs; - - ret = nvc0_tgsi_to_nc(pc, ti); - if (ret) - goto out; -#if NV50_DEBUG & NV50_DEBUG_PROG_IR - nvc0_print_program(pc); -#endif - - pc->opt_reload_elim = ti->require_stores ? FALSE : TRUE; - - /* optimization */ - ret = nvc0_pc_exec_pass0(pc); - if (ret) - goto out; -#if NV50_DEBUG & NV50_DEBUG_PROG_IR - nvc0_print_program(pc); -#endif - - /* register allocation */ - ret = nvc0_pc_exec_pass1(pc); - if (ret) - goto out; -#if NV50_DEBUG & NV50_DEBUG_PROG_CFLOW - nvc0_print_program(pc); - nv_print_cfgraph(pc, "nvc0_shader_cfgraph.dot", 0); -#endif - - /* prepare for emission */ - ret = nvc0_pc_exec_pass2(pc); - if (ret) - goto out; - assert(!(pc->emit_size % 8)); - - pc->emit = CALLOC(pc->emit_size / 4 + 2, 4); - if (!pc->emit) { - ret = 3; - goto out; - } - ret = nvc0_emit_program(pc); - if (ret) - goto out; - - ti->prog->code = pc->emit; - ti->prog->code_base = 0; - ti->prog->code_size = pc->emit_size; - ti->prog->parm_size = 0; - - ti->prog->max_gpr = MAX2(4, pc->max_reg[NV_FILE_GPR] + 1); - - ti->prog->relocs = pc->reloc_entries; - ti->prog->num_relocs = pc->num_relocs; - - NV50_DBGMSG(SHADER, "SHADER TRANSLATION - %s\n", ret ? "failed" : "success"); - -out: - nv_pc_free_refs(pc); - - for (i = 0; i < pc->num_blocks; ++i) - FREE(pc->bb_list[i]); - if (pc->root) - FREE(pc->root); - if (ret) { - /* on success, these will be referenced by struct nvc0_program */ - if (pc->emit) - FREE(pc->emit); - if (pc->immd_buf) - FREE(pc->immd_buf); - if (pc->reloc_entries) - FREE(pc->reloc_entries); - } - FREE(pc); - return ret; -} - -static void -nvbb_insert_phi(struct nv_basic_block *b, struct nv_instruction *i) -{ - if (!b->phi) { - i->prev = NULL; - b->phi = i; - i->next = b->entry; - if (b->entry) { - assert(!b->entry->prev && b->exit); - b->entry->prev = i; - } else { - b->entry = i; - b->exit = i; - } - } else { - assert(b->entry); - if (b->entry->opcode == NV_OP_PHI) { /* insert after entry */ - assert(b->entry == b->exit); - b->entry->next = i; - i->prev = b->entry; - b->entry = i; - b->exit = i; - } else { /* insert before entry */ - assert(b->entry->prev && b->exit); - i->next = b->entry; - i->prev = b->entry->prev; - b->entry->prev = i; - i->prev->next = i; - } - } -} - -void -nvc0_insn_append(struct nv_basic_block *b, struct nv_instruction *i) -{ - if (i->opcode == NV_OP_PHI) { - nvbb_insert_phi(b, i); - } else { - i->prev = b->exit; - if (b->exit) - b->exit->next = i; - b->exit = i; - if (!b->entry) - b->entry = i; - else - if (i->prev && i->prev->opcode == NV_OP_PHI) - b->entry = i; - } - - i->bb = b; - b->num_instructions++; - - if (i->prev && i->prev->terminator) - nvc0_insns_permute(i->prev, i); -} - -void -nvc0_insn_insert_after(struct nv_instruction *at, struct nv_instruction *ni) -{ - if (!at->next) { - nvc0_insn_append(at->bb, ni); - return; - } - ni->next = at->next; - ni->prev = at; - ni->next->prev = ni; - ni->prev->next = ni; - ni->bb = at->bb; - ni->bb->num_instructions++; -} - -void -nvc0_insn_insert_before(struct nv_instruction *at, struct nv_instruction *ni) -{ - nvc0_insn_insert_after(at, ni); - nvc0_insns_permute(at, ni); -} - -void -nvc0_insn_delete(struct nv_instruction *nvi) -{ - struct nv_basic_block *b = nvi->bb; - int s; - - /* debug_printf("REM: "); nv_print_instruction(nvi); */ - - for (s = 0; s < 6 && nvi->src[s]; ++s) - nv_reference(NULL, nvi, s, NULL); - - if (nvi->next) - nvi->next->prev = nvi->prev; - else { - assert(nvi == b->exit); - b->exit = nvi->prev; - } - - if (nvi->prev) - nvi->prev->next = nvi->next; - - if (nvi == b->entry) { - /* PHIs don't get hooked to b->entry */ - b->entry = nvi->next; - assert(!nvi->prev || nvi->prev->opcode == NV_OP_PHI); - } - - if (nvi == b->phi) { - if (nvi->opcode != NV_OP_PHI) - NV50_DBGMSG(PROG_IR, "NOTE: b->phi points to non-PHI instruction\n"); - - assert(!nvi->prev); - if (!nvi->next || nvi->next->opcode != NV_OP_PHI) - b->phi = NULL; - else - b->phi = nvi->next; - } -} - -void -nvc0_insns_permute(struct nv_instruction *i1, struct nv_instruction *i2) -{ - struct nv_basic_block *b = i1->bb; - - assert(i1->opcode != NV_OP_PHI && - i2->opcode != NV_OP_PHI); - assert(i1->next == i2); - - if (b->exit == i2) - b->exit = i1; - - if (b->entry == i1) - b->entry = i2; - - i2->prev = i1->prev; - i1->next = i2->next; - i2->next = i1; - i1->prev = i2; - - if (i2->prev) - i2->prev->next = i2; - if (i1->next) - i1->next->prev = i1; -} - -void -nvc0_bblock_attach(struct nv_basic_block *parent, - struct nv_basic_block *b, ubyte edge_kind) -{ - assert(b->num_in < 8); - - if (parent->out[0]) { - assert(!parent->out[1]); - parent->out[1] = b; - parent->out_kind[1] = edge_kind; - } else { - parent->out[0] = b; - parent->out_kind[0] = edge_kind; - } - - b->in[b->num_in] = parent; - b->in_kind[b->num_in++] = edge_kind; -} - -/* NOTE: all BRKs are treated as conditional, so there are 2 outgoing BBs */ - -boolean -nvc0_bblock_dominated_by(struct nv_basic_block *b, struct nv_basic_block *d) -{ - int j; - - if (b == d) - return TRUE; - - for (j = 0; j < b->num_in; ++j) - if ((b->in_kind[j] != CFG_EDGE_BACK) && - !nvc0_bblock_dominated_by(b->in[j], d)) - return FALSE; - - return j ? TRUE : FALSE; -} - -/* check if @bf (future) can be reached from @bp (past), stop at @bt */ -boolean -nvc0_bblock_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp, - struct nv_basic_block *bt) -{ - struct nv_basic_block *q[NV_PC_MAX_BASIC_BLOCKS], *b; - int i, p, n; - - p = 0; - n = 1; - q[0] = bp; - - while (p < n) { - b = q[p++]; - - if (b == bf) - break; - if (b == bt) - continue; - assert(n <= (1024 - 2)); - - for (i = 0; i < 2; ++i) { - if (b->out[i] && !IS_WALL_EDGE(b->out_kind[i]) && !b->out[i]->priv) { - q[n] = b->out[i]; - q[n++]->priv = 1; - } - } - } - for (--n; n >= 0; --n) - q[n]->priv = 0; - - return (b == bf); -} - -static struct nv_basic_block * -nvbb_find_dom_frontier(struct nv_basic_block *b, struct nv_basic_block *df) -{ - struct nv_basic_block *out; - int i; - - if (!nvc0_bblock_dominated_by(df, b)) { - for (i = 0; i < df->num_in; ++i) { - if (df->in_kind[i] == CFG_EDGE_BACK) - continue; - if (nvc0_bblock_dominated_by(df->in[i], b)) - return df; - } - } - for (i = 0; i < 2 && df->out[i]; ++i) { - if (df->out_kind[i] == CFG_EDGE_BACK) - continue; - if ((out = nvbb_find_dom_frontier(b, df->out[i]))) - return out; - } - return NULL; -} - -struct nv_basic_block * -nvc0_bblock_dom_frontier(struct nv_basic_block *b) -{ - struct nv_basic_block *df; - int i; - - for (i = 0; i < 2 && b->out[i]; ++i) - if ((df = nvbb_find_dom_frontier(b, b->out[i]))) - return df; - return NULL; -} diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h deleted file mode 100644 index 441692d766c..00000000000 --- a/src/gallium/drivers/nvc0/nvc0_pc.h +++ /dev/null @@ -1,637 +0,0 @@ -/* - * Copyright 2010 Christoph Bumiller - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __NVC0_COMPILER_H__ -#define __NVC0_COMPILER_H__ - -#include "nv50/nv50_debug.h" - -#include "pipe/p_defines.h" -#include "util/u_inlines.h" -#include "util/u_memory.h" -#include "util/u_double_list.h" - -/* pseudo opcodes */ -#define NV_OP_UNDEF 0 -#define NV_OP_BIND 1 -#define NV_OP_MERGE 2 -#define NV_OP_PHI 3 -#define NV_OP_SELECT 4 -#define NV_OP_NOP 5 - -/** - * BIND forces source operand i into the same register as destination operand i, - * and the operands will be assigned consecutive registers (needed for TEX). - * Beware conflicts ! - * SELECT forces its multiple source operands and its destination operand into - * one and the same register. - */ - -/* base opcodes */ -#define NV_OP_LD 6 -#define NV_OP_ST 7 -#define NV_OP_MOV 8 -#define NV_OP_AND 9 -#define NV_OP_OR 10 -#define NV_OP_XOR 11 -#define NV_OP_SHL 12 -#define NV_OP_SHR 13 -#define NV_OP_NOT 14 -#define NV_OP_SET 15 -#define NV_OP_ADD 16 -#define NV_OP_SUB 17 -#define NV_OP_MUL 18 -#define NV_OP_MAD 19 -#define NV_OP_ABS 20 -#define NV_OP_NEG 21 -#define NV_OP_MAX 22 -#define NV_OP_MIN 23 -#define NV_OP_CVT 24 -#define NV_OP_CEIL 25 -#define NV_OP_FLOOR 26 -#define NV_OP_TRUNC 27 -#define NV_OP_SAD 28 - -/* shader opcodes */ -#define NV_OP_VFETCH 29 -#define NV_OP_PFETCH 30 -#define NV_OP_EXPORT 31 -#define NV_OP_LINTERP 32 -#define NV_OP_PINTERP 33 -#define NV_OP_EMIT 34 -#define NV_OP_RESTART 35 -#define NV_OP_TEX 36 -#define NV_OP_TXB 37 -#define NV_OP_TXL 38 -#define NV_OP_TXF 39 -#define NV_OP_TXQ 40 -#define NV_OP_QUADOP 41 -#define NV_OP_DFDX 42 -#define NV_OP_DFDY 43 -#define NV_OP_KIL 44 - -/* control flow opcodes */ -#define NV_OP_BRA 45 -#define NV_OP_CALL 46 -#define NV_OP_RET 47 -#define NV_OP_EXIT 48 -#define NV_OP_BREAK 49 -#define NV_OP_BREAKADDR 50 -#define NV_OP_JOINAT 51 -#define NV_OP_JOIN 52 - -/* typed opcodes */ -#define NV_OP_ADD_F32 NV_OP_ADD -#define NV_OP_ADD_B32 53 -#define NV_OP_MUL_F32 NV_OP_MUL -#define NV_OP_MUL_B32 54 -#define NV_OP_ABS_F32 NV_OP_ABS -#define NV_OP_ABS_S32 55 -#define NV_OP_NEG_F32 NV_OP_NEG -#define NV_OP_NEG_S32 56 -#define NV_OP_MAX_F32 NV_OP_MAX -#define NV_OP_MAX_S32 57 -#define NV_OP_MAX_U32 58 -#define NV_OP_MIN_F32 NV_OP_MIN -#define NV_OP_MIN_S32 59 -#define NV_OP_MIN_U32 60 -#define NV_OP_SET_F32 61 -#define NV_OP_SET_S32 62 -#define NV_OP_SET_U32 63 -#define NV_OP_SAR 64 -#define NV_OP_RCP 65 -#define NV_OP_RSQ 66 -#define NV_OP_LG2 67 -#define NV_OP_SIN 68 -#define NV_OP_COS 69 -#define NV_OP_EX2 70 -#define NV_OP_PRESIN 71 -#define NV_OP_PREEX2 72 -#define NV_OP_SAT 73 - -/* newly added opcodes */ -#define NV_OP_SET_F32_AND 74 -#define NV_OP_SET_F32_OR 75 -#define NV_OP_SET_F32_XOR 76 -#define NV_OP_SELP 77 -#define NV_OP_SLCT 78 -#define NV_OP_SLCT_F32 NV_OP_SLCT -#define NV_OP_SLCT_S32 79 -#define NV_OP_SLCT_U32 80 -#define NV_OP_SUB_F32 NV_OP_SUB -#define NV_OP_SUB_S32 81 -#define NV_OP_MAD_F32 NV_OP_MAD -#define NV_OP_FSET_F32 82 -#define NV_OP_TXG 83 - -#define NV_OP_COUNT 84 - -/* nv50 files omitted */ -#define NV_FILE_GPR 0 -#define NV_FILE_COND 1 -#define NV_FILE_PRED 2 -#define NV_FILE_IMM 16 -#define NV_FILE_MEM_S 32 -#define NV_FILE_MEM_V 34 -#define NV_FILE_MEM_A 35 -#define NV_FILE_MEM_L 48 -#define NV_FILE_MEM_G 64 -#define NV_FILE_MEM_C(i) (80 + i) - -#define NV_IS_MEMORY_FILE(f) ((f) >= NV_FILE_MEM_S) - -#define NV_MOD_NEG 1 -#define NV_MOD_ABS 2 -#define NV_MOD_NOT 4 -#define NV_MOD_SAT 8 - -#define NV_TYPE_U8 0x00 -#define NV_TYPE_S8 0x01 -#define NV_TYPE_U16 0x02 -#define NV_TYPE_S16 0x03 -#define NV_TYPE_U32 0x04 -#define NV_TYPE_S32 0x05 -#define NV_TYPE_P32 0x07 -#define NV_TYPE_F32 0x09 -#define NV_TYPE_F64 0x0b -#define NV_TYPE_VEC(x, n) (NV_TYPE_##x | (n << 4)) -#define NV_TYPE_ANY 0xff - -#define NV_TYPE_ISINT(t) ((t) < 7) -#define NV_TYPE_ISSGD(t) ((t) & 1) - -#define NV_CC_FL 0x0 -#define NV_CC_LT 0x1 -#define NV_CC_EQ 0x2 -#define NV_CC_LE 0x3 -#define NV_CC_GT 0x4 -#define NV_CC_NE 0x5 -#define NV_CC_GE 0x6 -#define NV_CC_U 0x8 -#define NV_CC_TR 0xf -#define NV_CC_O 0x10 -#define NV_CC_C 0x11 -#define NV_CC_A 0x12 -#define NV_CC_S 0x13 -#define NV_CC_INVERSE(cc) ((cc) ^ 0x7) -/* for 1 bit predicates: */ -#define NV_CC_P 0 -#define NV_CC_NOT_P 1 - -uint8_t nvc0_ir_reverse_cc(uint8_t cc); - -#define NV_PC_MAX_INSTRUCTIONS 2048 -#define NV_PC_MAX_VALUES (NV_PC_MAX_INSTRUCTIONS * 4) - -#define NV_PC_MAX_BASIC_BLOCKS 1024 - -struct nv_op_info { - uint base; /* e.g. ADD_S32 -> ADD */ - char name[12]; - uint8_t type; - uint16_t mods; - unsigned flow : 1; - unsigned commutative : 1; - unsigned vector : 1; - unsigned predicate : 1; - unsigned pseudo : 1; - unsigned immediate : 3; - unsigned memory : 3; -}; - -extern struct nv_op_info nvc0_op_info_table[]; - -#define NV_BASEOP(op) (nvc0_op_info_table[op].base) -#define NV_OPTYPE(op) (nvc0_op_info_table[op].type) - -static INLINE boolean -nv_is_texture_op(uint opcode) -{ - return (opcode >= NV_OP_TEX && opcode <= NV_OP_TXQ); -} - -static INLINE boolean -nv_is_vector_op(uint opcode) -{ - return nvc0_op_info_table[opcode].vector ? TRUE : FALSE; -} - -static INLINE boolean -nv_op_commutative(uint opcode) -{ - return nvc0_op_info_table[opcode].commutative ? TRUE : FALSE; -} - -static INLINE uint8_t -nv_op_supported_src_mods(uint opcode, int s) -{ - return (nvc0_op_info_table[opcode].mods >> (s * 4)) & 0xf; -} - -static INLINE uint -nv_type_order(ubyte type) -{ - switch (type & 0xf) { - case NV_TYPE_U8: - case NV_TYPE_S8: - return 0; - case NV_TYPE_U16: - case NV_TYPE_S16: - return 1; - case NV_TYPE_U32: - case NV_TYPE_F32: - case NV_TYPE_S32: - case NV_TYPE_P32: - return 2; - case NV_TYPE_F64: - return 3; - } - assert(0); - return 0; -} - -static INLINE uint -nv_type_sizeof(ubyte type) -{ - if (type & 0xf0) - return (1 << nv_type_order(type)) * (type >> 4); - return 1 << nv_type_order(type); -} - -static INLINE uint -nv_type_sizeof_base(ubyte type) -{ - return 1 << nv_type_order(type); -} - -struct nv_reg { - uint32_t address; /* for memory locations */ - int id; /* for registers */ - ubyte file; - ubyte size; - union { - int32_t s32; - int64_t s64; - uint64_t u64; - uint32_t u32; /* expected to be 0 for $r63 */ - float f32; - double f64; - } imm; -}; - -struct nv_range { - struct nv_range *next; - int bgn; - int end; -}; - -struct nv_ref; - -struct nv_value { - struct nv_reg reg; - struct nv_instruction *insn; - struct nv_value *join; - struct nv_ref *last_use; - int n; - struct nv_range *livei; - int refc; - struct nv_value *next; - struct nv_value *prev; -}; - -struct nv_ref { - struct nv_value *value; - struct nv_instruction *insn; - struct list_head list; /* connects uses of the same value */ - uint8_t mod; - uint8_t flags; -}; - -#define NV_REF_FLAG_REGALLOC_PRIV (1 << 0) - -struct nv_basic_block; - -struct nv_instruction { - struct nv_instruction *next; - struct nv_instruction *prev; - uint opcode; - uint serial; - - struct nv_value *def[5]; - struct nv_ref *src[6]; - - int8_t predicate; /* index of predicate src */ - int8_t indirect; /* index of pointer src */ - - union { - struct { - uint8_t t; /* TIC binding */ - uint8_t s; /* TSC binding */ - } tex; - struct { - uint8_t d; /* output type */ - uint8_t s; /* input type */ - } cvt; - } ext; - - struct nv_basic_block *bb; - struct nv_basic_block *target; /* target block of control flow insn */ - - unsigned cc : 5; /* condition code */ - unsigned fixed : 1; /* don't optimize away (prematurely) */ - unsigned terminator : 1; - unsigned join : 1; - unsigned set_cond : 4; /* 2nd byte */ - unsigned saturate : 1; - unsigned centroid : 1; - unsigned flat : 1; - unsigned patch : 1; - unsigned lanes : 4; /* 3rd byte */ - unsigned tex_dim : 2; - unsigned tex_array : 1; - unsigned tex_cube : 1; - unsigned tex_shadow : 1; /* 4th byte */ - unsigned tex_live : 1; - unsigned tex_mask : 4; - - uint8_t quadop; -}; - -static INLINE int -nvi_vector_size(struct nv_instruction *nvi) -{ - int i; - assert(nvi); - for (i = 0; i < 5 && nvi->def[i]; ++i); - return i; -} - -#define CFG_EDGE_FORWARD 0 -#define CFG_EDGE_BACK 1 -#define CFG_EDGE_LOOP_ENTER 2 -#define CFG_EDGE_LOOP_LEAVE 4 -#define CFG_EDGE_FAKE 8 - -/* 'WALL' edge means where reachability check doesn't follow */ -/* 'LOOP' edge means just having to do with loops */ -#define IS_LOOP_EDGE(k) ((k) & 7) -#define IS_WALL_EDGE(k) ((k) & 9) - -struct nv_basic_block { - struct nv_instruction *entry; /* first non-phi instruction */ - struct nv_instruction *exit; - struct nv_instruction *phi; /* very first instruction */ - int num_instructions; - - struct nv_basic_block *out[2]; /* no indirect branches -> 2 */ - struct nv_basic_block *in[8]; /* hope that suffices */ - uint num_in; - ubyte out_kind[2]; - ubyte in_kind[8]; - - int id; - int subroutine; - uint priv; /* reset to 0 after you're done */ - uint pass_seq; - - uint32_t emit_pos; /* position, size in emitted code (in bytes) */ - uint32_t emit_size; - - uint32_t live_set[NV_PC_MAX_VALUES / 32]; -}; - -struct nvc0_translation_info; - -struct nv_pc { - struct nv_basic_block **root; - struct nv_basic_block *current_block; - struct nv_basic_block *parent_block; - - int loop_nesting_bound; - uint pass_seq; - - struct nv_value values[NV_PC_MAX_VALUES]; - struct nv_instruction instructions[NV_PC_MAX_INSTRUCTIONS]; - struct nv_ref **refs; - struct nv_basic_block *bb_list[NV_PC_MAX_BASIC_BLOCKS]; - int num_values; - int num_instructions; - int num_refs; - int num_blocks; - int num_subroutines; - - int max_reg[4]; - - uint32_t *immd_buf; /* populated on emit */ - unsigned immd_count; - - uint32_t *emit; - uint32_t emit_size; - uint32_t emit_pos; - - void *reloc_entries; - unsigned num_relocs; - - /* optimization enables */ - boolean opt_reload_elim; - boolean is_fragprog; -}; - -void nvc0_insn_append(struct nv_basic_block *, struct nv_instruction *); -void nvc0_insn_insert_before(struct nv_instruction *, struct nv_instruction *); -void nvc0_insn_insert_after(struct nv_instruction *, struct nv_instruction *); - -static INLINE struct nv_instruction * -nv_alloc_instruction(struct nv_pc *pc, uint opcode) -{ - struct nv_instruction *insn; - - insn = &pc->instructions[pc->num_instructions++]; - assert(pc->num_instructions < NV_PC_MAX_INSTRUCTIONS); - - insn->opcode = opcode; - insn->cc = NV_CC_P; - insn->indirect = -1; - insn->predicate = -1; - - return insn; -} - -static INLINE struct nv_instruction * -new_instruction(struct nv_pc *pc, uint opcode) -{ - struct nv_instruction *insn = nv_alloc_instruction(pc, opcode); - - nvc0_insn_append(pc->current_block, insn); - return insn; -} - -static INLINE struct nv_instruction * -new_instruction_at(struct nv_pc *pc, struct nv_instruction *at, uint opcode) -{ - struct nv_instruction *insn = nv_alloc_instruction(pc, opcode); - - nvc0_insn_insert_after(at, insn); - return insn; -} - -static INLINE struct nv_value * -new_value(struct nv_pc *pc, ubyte file, ubyte size) -{ - struct nv_value *value = &pc->values[pc->num_values]; - - assert(pc->num_values < NV_PC_MAX_VALUES - 1); - - value->n = pc->num_values++; - value->join = value; - value->reg.id = -1; - value->reg.file = file; - value->reg.size = size; - return value; -} - -static INLINE struct nv_value * -new_value_like(struct nv_pc *pc, struct nv_value *like) -{ - return new_value(pc, like->reg.file, like->reg.size); -} - -static INLINE struct nv_ref * -new_ref(struct nv_pc *pc, struct nv_value *val) -{ - int i; - struct nv_ref *ref; - - if ((pc->num_refs % 64) == 0) { - const unsigned old_size = pc->num_refs * sizeof(struct nv_ref *); - const unsigned new_size = (pc->num_refs + 64) * sizeof(struct nv_ref *); - - pc->refs = REALLOC(pc->refs, old_size, new_size); - - ref = CALLOC(64, sizeof(struct nv_ref)); - for (i = 0; i < 64; ++i) - pc->refs[pc->num_refs + i] = &ref[i]; - } - - ref = pc->refs[pc->num_refs++]; - ref->value = val; - - LIST_INITHEAD(&ref->list); - - ++val->refc; - return ref; -} - -static INLINE struct nv_basic_block * -new_basic_block(struct nv_pc *pc) -{ - struct nv_basic_block *bb; - - if (pc->num_blocks >= NV_PC_MAX_BASIC_BLOCKS) - return NULL; - - bb = CALLOC_STRUCT(nv_basic_block); - - bb->id = pc->num_blocks; - pc->bb_list[pc->num_blocks++] = bb; - return bb; -} - -static INLINE void -nv_reference(struct nv_pc *pc, - struct nv_instruction *nvi, int c, struct nv_value *s) -{ - struct nv_ref **d = &nvi->src[c]; - assert(c < 6); - - if (*d) { - --(*d)->value->refc; - LIST_DEL(&(*d)->list); - } - - if (s) { - if (!*d) { - *d = new_ref(pc, s); - (*d)->insn = nvi; - } else { - LIST_DEL(&(*d)->list); - (*d)->value = s; - ++(s->refc); - } - if (!s->last_use) - s->last_use = *d; - else - LIST_ADDTAIL(&s->last_use->list, &(*d)->list); - - s->last_use = *d; - (*d)->insn = nvi; - } else { - *d = NULL; - } -} - -/* nvc0_emit.c */ -void nvc0_emit_instruction(struct nv_pc *, struct nv_instruction *); - -/* nvc0_print.c */ -const char *nvc0_opcode_name(uint opcode); -void nvc0_print_instruction(struct nv_instruction *); - -/* nvc0_pc.c */ -void nvc0_print_function(struct nv_basic_block *root); -void nvc0_print_program(struct nv_pc *); - -boolean nvc0_insn_can_load(struct nv_instruction *, int s, - struct nv_instruction *); -boolean nvc0_insn_is_predicateable(struct nv_instruction *); - -int nvc0_insn_refcount(struct nv_instruction *); -void nvc0_insn_delete(struct nv_instruction *); -void nvc0_insns_permute(struct nv_instruction *prev, struct nv_instruction *); - -void nvc0_bblock_attach(struct nv_basic_block *parent, - struct nv_basic_block *child, ubyte edge_kind); -boolean nvc0_bblock_dominated_by(struct nv_basic_block *, - struct nv_basic_block *); -boolean nvc0_bblock_reachable_by(struct nv_basic_block *future, - struct nv_basic_block *past, - struct nv_basic_block *final); -struct nv_basic_block *nvc0_bblock_dom_frontier(struct nv_basic_block *); - -int nvc0_pc_replace_value(struct nv_pc *pc, - struct nv_value *old_val, - struct nv_value *new_val); - -struct nv_value *nvc0_pc_find_immediate(struct nv_ref *); -struct nv_value *nvc0_pc_find_constant(struct nv_ref *); - -typedef void (*nv_pc_pass_func)(void *priv, struct nv_basic_block *b); - -void nvc0_pc_pass_in_order(struct nv_basic_block *, nv_pc_pass_func, void *); - -int nvc0_pc_exec_pass0(struct nv_pc *pc); -int nvc0_pc_exec_pass1(struct nv_pc *pc); -int nvc0_pc_exec_pass2(struct nv_pc *pc); - -int nvc0_tgsi_to_nc(struct nv_pc *, struct nvc0_translation_info *); - -#endif // NV50_COMPILER_H diff --git a/src/gallium/drivers/nvc0/nvc0_pc_emit.c b/src/gallium/drivers/nvc0/nvc0_pc_emit.c deleted file mode 100644 index e35653280a1..00000000000 --- a/src/gallium/drivers/nvc0/nvc0_pc_emit.c +++ /dev/null @@ -1,1026 +0,0 @@ -/* - * Copyright 2010 Christoph Bumiller - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "nvc0_pc.h" -#include "nvc0_program.h" - -#define NVC0_FIXUP_CODE_RELOC 0 -#define NVC0_FIXUP_DATA_RELOC 1 - -struct nvc0_fixup { - uint8_t type; - int8_t shift; - uint32_t mask; - uint32_t data; - uint32_t ofst; -}; - -void -nvc0_relocate_program(struct nvc0_program *prog, - uint32_t code_base, - uint32_t data_base) -{ - struct nvc0_fixup *f = (struct nvc0_fixup *)prog->relocs; - unsigned i; - - for (i = 0; i < prog->num_relocs; ++i) { - uint32_t data; - - switch (f[i].type) { - case NVC0_FIXUP_CODE_RELOC: data = code_base + f[i].data; break; - case NVC0_FIXUP_DATA_RELOC: data = data_base + f[i].data; break; - default: - data = f[i].data; - break; - } - data = (f[i].shift < 0) ? (data >> -f[i].shift) : (data << f[i].shift); - - prog->code[f[i].ofst / 4] &= ~f[i].mask; - prog->code[f[i].ofst / 4] |= data & f[i].mask; - } -} - -static void -create_fixup(struct nv_pc *pc, uint8_t ty, - int w, uint32_t data, uint32_t m, int s) -{ - struct nvc0_fixup *f; - - const unsigned size = sizeof(struct nvc0_fixup); - const unsigned n = pc->num_relocs; - - if (!(n % 8)) - pc->reloc_entries = REALLOC(pc->reloc_entries, n * size, (n + 8) * size); - - f = (struct nvc0_fixup *)pc->reloc_entries; - - f[n].ofst = pc->emit_pos + w * 4; - f[n].type = ty; - f[n].data = data; - f[n].mask = m; - f[n].shift = s; - - ++pc->num_relocs; -} - -static INLINE ubyte -SSIZE(struct nv_instruction *nvi, int s) -{ - return nvi->src[s]->value->reg.size; -} - -static INLINE ubyte -DSIZE(struct nv_instruction *nvi, int d) -{ - return nvi->def[d]->reg.size; -} - -static INLINE struct nv_reg * -SREG(struct nv_ref *ref) -{ - if (!ref) - return NULL; - return &ref->value->join->reg; -} - -static INLINE struct nv_reg * -DREG(struct nv_value *val) -{ - if (!val) - return NULL; - return &val->join->reg; -} - -static INLINE ubyte -SFILE(struct nv_instruction *nvi, int s) -{ - return nvi->src[s]->value->reg.file; -} - -static INLINE ubyte -DFILE(struct nv_instruction *nvi, int d) -{ - return nvi->def[0]->reg.file; -} - -static INLINE void -SID(struct nv_pc *pc, struct nv_ref *ref, int pos) -{ - pc->emit[pos / 32] |= (SREG(ref) ? SREG(ref)->id : 63) << (pos % 32); -} - -static INLINE void -DID(struct nv_pc *pc, struct nv_value *val, int pos) -{ - pc->emit[pos / 32] |= (DREG(val) ? DREG(val)->id : 63) << (pos % 32); -} - -static INLINE uint32_t -get_immd_u32(struct nv_ref *ref) /* XXX: dependent on [0]:2 */ -{ - assert(ref->value->reg.file == NV_FILE_IMM); - return ref->value->reg.imm.u32; -} - -static INLINE void -set_immd_u32_l(struct nv_pc *pc, uint32_t u32) -{ - pc->emit[0] |= (u32 & 0x3f) << 26; - pc->emit[1] |= u32 >> 6; -} - -static INLINE void -set_immd_u32(struct nv_pc *pc, uint32_t u32) -{ - if ((pc->emit[0] & 0xf) == 0x2) { - set_immd_u32_l(pc, u32); - } else - if ((pc->emit[0] & 0xf) == 0x3) { - assert(!(pc->emit[1] & 0xc000)); - pc->emit[1] |= 0xc000; - assert(!(u32 & 0xfff00000)); - set_immd_u32_l(pc, u32); - } else { - assert(!(pc->emit[1] & 0xc000)); - pc->emit[1] |= 0xc000; - assert(!(u32 & 0xfff)); - set_immd_u32_l(pc, u32 >> 12); - } -} - -static INLINE void -set_immd(struct nv_pc *pc, struct nv_instruction *i, int s) -{ - set_immd_u32(pc, get_immd_u32(i->src[s])); -} - -static INLINE void -DVS(struct nv_pc *pc, struct nv_instruction *i) -{ - uint s = i->def[0]->reg.size; - int n; - for (n = 1; n < 4 && i->def[n]; ++n) - s += i->def[n]->reg.size; - pc->emit[0] |= ((s / 4) - 1) << 5; -} - -static INLINE void -SVS(struct nv_pc *pc, struct nv_ref *src) -{ - pc->emit[0] |= (SREG(src)->size / 4 - 1) << 5; -} - -static void -set_pred(struct nv_pc *pc, struct nv_instruction *i) -{ - if (i->predicate >= 0) { - SID(pc, i->src[i->predicate], 6); - if (i->cc) - pc->emit[0] |= 0x2000; /* negate */ - } else { - pc->emit[0] |= 0x1c00; - } -} - -static INLINE void -set_address_16(struct nv_pc *pc, struct nv_ref *src) -{ - pc->emit[0] |= (src->value->reg.address & 0x003f) << 26; - pc->emit[1] |= (src->value->reg.address & 0xffc0) >> 6; -} - -static INLINE unsigned -const_space_index(struct nv_instruction *i, int s) -{ - return SFILE(i, s) - NV_FILE_MEM_C(0); -} - -static void -emit_flow(struct nv_pc *pc, struct nv_instruction *i, uint8_t op) -{ - pc->emit[0] = 0x00000007; - pc->emit[1] = op << 24; - - if (op == 0x40 || (op >= 0x80 && op <= 0x98)) { - /* bra, exit, ret or kil */ - pc->emit[0] |= 0x1e0; - set_pred(pc, i); - } - - if (i->target) { - int32_t pcrel = i->target->emit_pos - (pc->emit_pos + 8); - - /* we will need relocations only for global functions */ - /* - create_fixup(pc, NVC0_FIXUP_CODE_RELOC, 0, pos, 26, 0xfc000000); - create_fixup(pc, NVC0_FIXUP_CODE_RELOC, 1, pos, -6, 0x0001ffff); - */ - - pc->emit[0] |= (pcrel & 0x3f) << 26; - pc->emit[1] |= (pcrel >> 6) & 0x3ffff; - } -} - -/* doesn't work for vfetch, export, ld, st, mov ... */ -static void -emit_form_0(struct nv_pc *pc, struct nv_instruction *i) -{ - int s; - - set_pred(pc, i); - - DID(pc, i->def[0], 14); - - for (s = 0; s < 3 && i->src[s]; ++s) { - if (SFILE(i, s) >= NV_FILE_MEM_C(0) && - SFILE(i, s) <= NV_FILE_MEM_C(15)) { - assert(!(pc->emit[1] & 0xc000)); - assert(s <= 1); - pc->emit[1] |= 0x4000 | (const_space_index(i, s) << 10); - set_address_16(pc, i->src[s]); - } else - if (SFILE(i, s) == NV_FILE_GPR) { - SID(pc, i->src[s], s ? ((s == 2) ? 49 : 26) : 20); - } else - if (SFILE(i, s) == NV_FILE_IMM) { - assert(!(pc->emit[1] & 0xc000)); - assert(s == 1 || i->opcode == NV_OP_MOV); - set_immd(pc, i, s); - } - } -} - -static void -emit_form_1(struct nv_pc *pc, struct nv_instruction *i) -{ - int s; - - set_pred(pc, i); - - DID(pc, i->def[0], 14); - - for (s = 0; s < 1 && i->src[s]; ++s) { - if (SFILE(i, s) >= NV_FILE_MEM_C(0) && - SFILE(i, s) <= NV_FILE_MEM_C(15)) { - assert(!(pc->emit[1] & 0xc000)); - assert(s <= 1); - pc->emit[1] |= 0x4000 | (const_space_index(i, s) << 10); - set_address_16(pc, i->src[s]); - } else - if (SFILE(i, s) == NV_FILE_GPR) { - SID(pc, i->src[s], 26); - } else - if (SFILE(i, s) == NV_FILE_IMM) { - assert(!(pc->emit[1] & 0xc000)); - assert(s == 1 || i->opcode == NV_OP_MOV); - set_immd(pc, i, s); - } - } -} - -static void -emit_neg_abs_1_2(struct nv_pc *pc, struct nv_instruction *i) -{ - if (i->src[0]->mod & NV_MOD_ABS) - pc->emit[0] |= 1 << 7; - if (i->src[0]->mod & NV_MOD_NEG) - pc->emit[0] |= 1 << 9; - if (i->src[1]->mod & NV_MOD_ABS) - pc->emit[0] |= 1 << 6; - if (i->src[1]->mod & NV_MOD_NEG) - pc->emit[0] |= 1 << 8; -} - -static void -emit_add_f32(struct nv_pc *pc, struct nv_instruction *i) -{ - pc->emit[0] = 0x00000000; - pc->emit[1] = 0x50000000; - - emit_form_0(pc, i); - - emit_neg_abs_1_2(pc, i); - - if (i->saturate) - pc->emit[1] |= 1 << 17; -} - -static void -emit_mul_f32(struct nv_pc *pc, struct nv_instruction *i) -{ - pc->emit[0] = 0x00000000; - pc->emit[1] = 0x58000000; - - emit_form_0(pc, i); - - if ((i->src[0]->mod ^ i->src[1]->mod) & NV_MOD_NEG) - pc->emit[1] |= 1 << 25; - - if (i->saturate) - pc->emit[0] |= 1 << 5; -} - -static void -emit_mad_f32(struct nv_pc *pc, struct nv_instruction *i) -{ - pc->emit[0] = 0x00000000; - pc->emit[1] = 0x30000000; - - emit_form_0(pc, i); - - if ((i->src[0]->mod ^ i->src[1]->mod) & NV_MOD_NEG) - pc->emit[0] |= 1 << 9; - - if (i->src[2]->mod & NV_MOD_NEG) - pc->emit[0] |= 1 << 8; - - if (i->saturate) - pc->emit[0] |= 1 << 5; -} - -static void -emit_minmax(struct nv_pc *pc, struct nv_instruction *i) -{ - pc->emit[0] = 0x00000000; - pc->emit[1] = 0x08000000; - - if (NV_BASEOP(i->opcode) == NV_OP_MAX) - pc->emit[1] |= 0x001e0000; - else - pc->emit[1] |= 0x000e0000; /* predicate ? */ - - emit_form_0(pc, i); - - emit_neg_abs_1_2(pc, i); - - switch (i->opcode) { - case NV_OP_MIN_U32: - case NV_OP_MAX_U32: - pc->emit[0] |= 3; - break; - case NV_OP_MIN_S32: - case NV_OP_MAX_S32: - pc->emit[0] |= 3 | (1 << 5); - break; - case NV_OP_MIN_F32: - case NV_OP_MAX_F32: - default: - break; - } -} - -static void -emit_tex(struct nv_pc *pc, struct nv_instruction *i) -{ - int src1 = i->tex_array + i->tex_dim + i->tex_cube; - - assert(src1 < 6); - - pc->emit[0] = 0x00000086; - pc->emit[1] = 0x80000000; - - switch (i->opcode) { - case NV_OP_TEX: pc->emit[1] = 0x80000000; break; - case NV_OP_TXB: pc->emit[1] = 0x84000000; break; - case NV_OP_TXL: pc->emit[1] = 0x86000000; break; - case NV_OP_TXF: pc->emit[1] = 0x90000000; break; - case NV_OP_TXG: pc->emit[1] = 0xe0000000; break; - default: - assert(0); - break; - } - - if (i->tex_array) - pc->emit[1] |= 0x00080000; /* layer index is u16, first value of SRC0 */ - if (i->tex_shadow) - pc->emit[1] |= 0x01000000; /* shadow is part of SRC1, after bias/lod */ - - set_pred(pc, i); - - DID(pc, i->def[0], 14); - SID(pc, i->src[0], 20); - SID(pc, i->src[src1], 26); /* may be NULL -> $r63 */ - - pc->emit[1] |= i->tex_mask << 14; - pc->emit[1] |= (i->tex_dim - 1) << 20; - if (i->tex_cube) - pc->emit[1] |= 3 << 20; - - assert(i->ext.tex.s < 16); - - pc->emit[1] |= i->ext.tex.t; - pc->emit[1] |= i->ext.tex.s << 8; - - if (i->tex_live) - pc->emit[0] |= 1 << 9; -} - -/* 0: cos, 1: sin, 2: ex2, 3: lg2, 4: rcp, 5: rsqrt */ -static void -emit_flop(struct nv_pc *pc, struct nv_instruction *i, ubyte op) -{ - pc->emit[0] = 0x00000000; - pc->emit[1] = 0xc8000000; - - set_pred(pc, i); - - DID(pc, i->def[0], 14); - SID(pc, i->src[0], 20); - - pc->emit[0] |= op << 26; - - if (op >= 3) { - if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 9; - if (i->src[0]->mod & NV_MOD_ABS) pc->emit[0] |= 1 << 7; - } else { - assert(!i->src[0]->mod); - } -} - -static void -emit_quadop(struct nv_pc *pc, struct nv_instruction *i) -{ - pc->emit[0] = 0x00000000; - pc->emit[1] = 0x48000000; - - set_pred(pc, i); - - assert(SFILE(i, 0) == NV_FILE_GPR && SFILE(i, 1) == NV_FILE_GPR); - - DID(pc, i->def[0], 14); - SID(pc, i->src[0], 20); - SID(pc, i->src[0], 26); - - pc->emit[0] |= i->lanes << 6; /* l0, l1, l2, l3, dx, dy */ - pc->emit[1] |= i->quadop; -} - -static void -emit_ddx(struct nv_pc *pc, struct nv_instruction *i) -{ - i->quadop = 0x99; - i->lanes = 4; - i->src[1] = i->src[0]; - emit_quadop(pc, i); -} - -static void -emit_ddy(struct nv_pc *pc, struct nv_instruction *i) -{ - i->quadop = 0xa5; - i->lanes = 5; - i->src[1] = i->src[0]; - emit_quadop(pc, i); -} - -/* preparation op (preex2, presin / convert to fixed point) */ -static void -emit_preop(struct nv_pc *pc, struct nv_instruction *i) -{ - pc->emit[0] = 0x00000000; - pc->emit[1] = 0x60000000; - - if (i->opcode == NV_OP_PREEX2) - pc->emit[0] |= 0x20; - - emit_form_1(pc, i); - - if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 8; - if (i->src[0]->mod & NV_MOD_ABS) pc->emit[0] |= 1 << 6; -} - -static void -emit_shift(struct nv_pc *pc, struct nv_instruction *i) -{ - pc->emit[0] = 0x00000003; - - switch (i->opcode) { - case NV_OP_SAR: - pc->emit[0] |= 0x20; /* fall through */ - case NV_OP_SHR: - pc->emit[1] = 0x58000000; - break; - case NV_OP_SHL: - default: - pc->emit[1] = 0x60000000; - break; - } - - emit_form_0(pc, i); -} - -static void -emit_bitop(struct nv_pc *pc, struct nv_instruction *i) -{ - if (SFILE(i, 1) == NV_FILE_IMM) { - pc->emit[0] = 0x00000002; - pc->emit[1] = 0x38000000; - } else { - pc->emit[0] = 0x00000003; - pc->emit[1] = 0x68000000; - } - - switch (i->opcode) { - case NV_OP_OR: - pc->emit[0] |= 0x40; - break; - case NV_OP_XOR: - pc->emit[0] |= 0x80; - break; - case NV_OP_AND: - default: - break; - } - - emit_form_0(pc, i); -} - -static void -emit_set(struct nv_pc *pc, struct nv_instruction *i) -{ - pc->emit[0] = 0x00000000; - - switch (i->opcode) { - case NV_OP_SET_S32: - pc->emit[0] |= 0x20; /* fall through */ - case NV_OP_SET_U32: - pc->emit[0] |= 0x3; - pc->emit[1] = 0x100e0000; - break; - case NV_OP_SET_F32_AND: - pc->emit[1] = 0x18000000; - break; - case NV_OP_SET_F32_OR: - pc->emit[1] = 0x18200000; - break; - case NV_OP_SET_F32_XOR: - pc->emit[1] = 0x18400000; - break; - case NV_OP_FSET_F32: - pc->emit[0] |= 0x20; /* fall through */ - case NV_OP_SET_F32: - default: - pc->emit[1] = 0x180e0000; - break; - } - - if (DFILE(i, 0) == NV_FILE_PRED) { - pc->emit[0] |= 0x1c000; - pc->emit[1] += 0x08000000; - } - - pc->emit[1] |= i->set_cond << 23; - - emit_form_0(pc, i); - - emit_neg_abs_1_2(pc, i); /* maybe assert that U/S32 don't use mods */ -} - -static void -emit_selp(struct nv_pc *pc, struct nv_instruction *i) -{ - pc->emit[0] = 0x00000004; - pc->emit[1] = 0x20000000; - - emit_form_0(pc, i); - - if (i->cc || (i->src[2]->mod & NV_MOD_NOT)) - pc->emit[1] |= 1 << 20; -} - -static void -emit_slct(struct nv_pc *pc, struct nv_instruction *i) -{ - uint8_t cc = i->set_cond; - - pc->emit[0] = 0x00000000; - - switch (i->opcode) { - case NV_OP_SLCT_S32: - pc->emit[0] |= 0x20; /* fall through */ - case NV_OP_SLCT_U32: - pc->emit[0] |= 0x3; - pc->emit[1] = 0x30000000; - break; - case NV_OP_SLCT_F32: - default: - pc->emit[1] = 0x38000000; - break; - } - - emit_form_0(pc, i); - - if (i->src[2]->mod & NV_MOD_NEG) - cc = nvc0_ir_reverse_cc(cc); - - pc->emit[1] |= cc << 23; -} - -static void -emit_cvt(struct nv_pc *pc, struct nv_instruction *i) -{ - uint32_t rint; - - pc->emit[0] = 0x00000004; - pc->emit[1] = 0x10000000; - - /* if no type conversion specified, get type from opcode */ - if (i->opcode != NV_OP_CVT && i->ext.cvt.d == i->ext.cvt.s) - i->ext.cvt.d = i->ext.cvt.s = NV_OPTYPE(i->opcode); - - switch (i->ext.cvt.d) { - case NV_TYPE_F32: - switch (i->ext.cvt.s) { - case NV_TYPE_F32: pc->emit[1] = 0x10000000; break; - case NV_TYPE_S32: pc->emit[0] |= 0x200; /* fall through */ - case NV_TYPE_U32: pc->emit[1] = 0x18000000; break; - } - break; - case NV_TYPE_S32: pc->emit[0] |= 0x80; /* fall through */ - case NV_TYPE_U32: - switch (i->ext.cvt.s) { - case NV_TYPE_F32: pc->emit[1] = 0x14000000; break; - case NV_TYPE_S32: pc->emit[0] |= 0x200; /* fall through */ - case NV_TYPE_U32: pc->emit[1] = 0x1c000000; break; - } - break; - default: - assert(!"cvt: unknown type"); - break; - } - - rint = (i->ext.cvt.d == NV_TYPE_F32) ? 1 << 7 : 0; - - if (i->opcode == NV_OP_FLOOR) { - pc->emit[0] |= rint; - pc->emit[1] |= 2 << 16; - } else - if (i->opcode == NV_OP_CEIL) { - pc->emit[0] |= rint; - pc->emit[1] |= 4 << 16; - } else - if (i->opcode == NV_OP_TRUNC) { - pc->emit[0] |= rint; - pc->emit[1] |= 6 << 16; - } - - if (i->saturate || i->opcode == NV_OP_SAT) - pc->emit[0] |= 0x20; - - if (NV_BASEOP(i->opcode) == NV_OP_ABS || i->src[0]->mod & NV_MOD_ABS) - pc->emit[0] |= 1 << 6; - if (NV_BASEOP(i->opcode) == NV_OP_NEG || i->src[0]->mod & NV_MOD_NEG) - pc->emit[0] |= 1 << 8; - - pc->emit[0] |= util_logbase2(DREG(i->def[0])->size) << 20; - pc->emit[0] |= util_logbase2(SREG(i->src[0])->size) << 23; - - emit_form_1(pc, i); -} - -static void -emit_interp(struct nv_pc *pc, struct nv_instruction *i) -{ - pc->emit[0] = 0x00000000; - pc->emit[1] = 0xc07e0000; - - DID(pc, i->def[0], 14); - - set_pred(pc, i); - - if (i->indirect) - SID(pc, i->src[i->indirect], 20); - else - SID(pc, NULL, 20); - - if (i->opcode == NV_OP_PINTERP) { - pc->emit[0] |= 0x040; - SID(pc, i->src[1], 26); - - if (i->src[0]->value->reg.address >= 0x280 && - i->src[0]->value->reg.address <= 0x29c) - pc->emit[0] |= 0x080; /* XXX: ? */ - } else { - SID(pc, NULL, 26); - } - - pc->emit[1] |= i->src[0]->value->reg.address & 0xffff; - - if (i->centroid) - pc->emit[0] |= 0x100; - else - if (i->flat) - pc->emit[0] |= 0x080; -} - -static void -emit_vfetch(struct nv_pc *pc, struct nv_instruction *i) -{ - pc->emit[0] = 0x03f00006; - pc->emit[1] = 0x06000000 | i->src[0]->value->reg.address; - if (i->patch) - pc->emit[0] |= 0x100; - - set_pred(pc, i); - - DVS(pc, i); - DID(pc, i->def[0], 14); - - SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 26); -} - -static void -emit_export(struct nv_pc *pc, struct nv_instruction *i) -{ - pc->emit[0] = 0x00000006; - pc->emit[1] = 0x0a000000; - if (i->patch) - pc->emit[0] |= 0x100; - - set_pred(pc, i); - - assert(SFILE(i, 0) == NV_FILE_MEM_V); - assert(SFILE(i, 1) == NV_FILE_GPR); - - SID(pc, i->src[1], 26); /* register source */ - SVS(pc, i->src[0]); - - pc->emit[1] |= i->src[0]->value->reg.address & 0xfff; - - SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 20); -} - -static void -emit_mov(struct nv_pc *pc, struct nv_instruction *i) -{ - if (i->opcode == NV_OP_MOV) - i->lanes = 0xf; - - if (SFILE(i, 0) == NV_FILE_IMM) { - pc->emit[0] = 0x000001e2; - pc->emit[1] = 0x18000000; - } else - if (SFILE(i, 0) == NV_FILE_PRED) { - pc->emit[0] = 0x1c000004; - pc->emit[1] = 0x080e0000; - } else { - pc->emit[0] = 0x00000004 | (i->lanes << 5); - pc->emit[1] = 0x28000000; - } - - emit_form_1(pc, i); -} - -static void -emit_ldst_size(struct nv_pc *pc, struct nv_instruction *i) -{ - assert(NV_IS_MEMORY_FILE(SFILE(i, 0))); - - switch (SSIZE(i, 0)) { - case 1: - if (NV_TYPE_ISSGD(i->ext.cvt.s)) - pc->emit[0] |= 0x20; - break; - case 2: - pc->emit[0] |= 0x40; - if (NV_TYPE_ISSGD(i->ext.cvt.s)) - pc->emit[0] |= 0x20; - break; - case 4: pc->emit[0] |= 0x80; break; - case 8: pc->emit[0] |= 0xa0; break; - case 16: pc->emit[0] |= 0xc0; break; - default: - NOUVEAU_ERR("invalid load/store size %u\n", SSIZE(i, 0)); - break; - } -} - -static void -emit_ld_common(struct nv_pc *pc, struct nv_instruction *i) -{ - emit_ldst_size(pc, i); - - set_pred(pc, i); - set_address_16(pc, i->src[0]); - - SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 20); - DID(pc, i->def[0], 14); -} - -static void -emit_ld_const(struct nv_pc *pc, struct nv_instruction *i) -{ - pc->emit[0] = 0x00000006; - pc->emit[1] = 0x14000000 | (const_space_index(i, 0) << 10); - - emit_ld_common(pc, i); -} - -static void -emit_ld(struct nv_pc *pc, struct nv_instruction *i) -{ - if (SFILE(i, 0) >= NV_FILE_MEM_C(0) && - SFILE(i, 0) <= NV_FILE_MEM_C(15)) { - if (SSIZE(i, 0) == 4 && i->indirect < 0) { - i->lanes = 0xf; - emit_mov(pc, i); - } else { - emit_ld_const(pc, i); - } - } else - if (SFILE(i, 0) == NV_FILE_MEM_L) { - pc->emit[0] = 0x00000005; - pc->emit[1] = 0xc0000000; - - emit_ld_common(pc, i); - } else { - NOUVEAU_ERR("emit_ld(%u): not handled yet\n", SFILE(i, 0)); - abort(); - } -} - -static void -emit_st(struct nv_pc *pc, struct nv_instruction *i) -{ - if (SFILE(i, 0) != NV_FILE_MEM_L) - NOUVEAU_ERR("emit_st(%u): file not handled yet\n", SFILE(i, 0)); - - pc->emit[0] = 0x00000005 | (0 << 8); /* write-back caching */ - pc->emit[1] = 0xc8000000; - - emit_ldst_size(pc, i); - - set_pred(pc, i); - set_address_16(pc, i->src[0]); - - SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 20); - DID(pc, i->src[1]->value, 14); -} - -void -nvc0_emit_instruction(struct nv_pc *pc, struct nv_instruction *i) -{ -#if NV50_DEBUG & NV50_DEBUG_SHADER - debug_printf("EMIT: "); nvc0_print_instruction(i); -#endif - - switch (i->opcode) { - case NV_OP_VFETCH: - emit_vfetch(pc, i); - break; - case NV_OP_EXPORT: - if (!pc->is_fragprog) - emit_export(pc, i); - break; - case NV_OP_MOV: - emit_mov(pc, i); - break; - case NV_OP_LD: - emit_ld(pc, i); - break; - case NV_OP_ST: - emit_st(pc, i); - break; - case NV_OP_LINTERP: - case NV_OP_PINTERP: - emit_interp(pc, i); - break; - case NV_OP_ADD_F32: - emit_add_f32(pc, i); - break; - case NV_OP_AND: - case NV_OP_OR: - case NV_OP_XOR: - emit_bitop(pc, i); - break; - case NV_OP_CVT: - case NV_OP_ABS_F32: - case NV_OP_ABS_S32: - case NV_OP_NEG_F32: - case NV_OP_NEG_S32: - case NV_OP_SAT: - case NV_OP_CEIL: - case NV_OP_FLOOR: - case NV_OP_TRUNC: - emit_cvt(pc, i); - break; - case NV_OP_DFDX: - emit_ddx(pc, i); - break; - case NV_OP_DFDY: - emit_ddy(pc, i); - break; - case NV_OP_COS: - emit_flop(pc, i, 0); - break; - case NV_OP_SIN: - emit_flop(pc, i, 1); - break; - case NV_OP_EX2: - emit_flop(pc, i, 2); - break; - case NV_OP_LG2: - emit_flop(pc, i, 3); - break; - case NV_OP_RCP: - emit_flop(pc, i, 4); - break; - case NV_OP_RSQ: - emit_flop(pc, i, 5); - break; - case NV_OP_PRESIN: - case NV_OP_PREEX2: - emit_preop(pc, i); - break; - case NV_OP_MAD_F32: - emit_mad_f32(pc, i); - break; - case NV_OP_MAX_F32: - case NV_OP_MAX_S32: - case NV_OP_MAX_U32: - case NV_OP_MIN_F32: - case NV_OP_MIN_S32: - case NV_OP_MIN_U32: - emit_minmax(pc, i); - break; - case NV_OP_MUL_F32: - emit_mul_f32(pc, i); - break; - case NV_OP_SET_F32: - case NV_OP_SET_F32_AND: - case NV_OP_SET_F32_OR: - case NV_OP_SET_F32_XOR: - case NV_OP_SET_S32: - case NV_OP_SET_U32: - case NV_OP_FSET_F32: - emit_set(pc, i); - break; - case NV_OP_SHL: - case NV_OP_SHR: - case NV_OP_SAR: - emit_shift(pc, i); - break; - case NV_OP_TEX: - case NV_OP_TXB: - case NV_OP_TXL: - emit_tex(pc, i); - break; - case NV_OP_BRA: - emit_flow(pc, i, 0x40); - break; - case NV_OP_CALL: - emit_flow(pc, i, 0x50); - break; - case NV_OP_JOINAT: - emit_flow(pc, i, 0x60); - break; - case NV_OP_EXIT: - emit_flow(pc, i, 0x80); - break; - case NV_OP_RET: - emit_flow(pc, i, 0x90); - break; - case NV_OP_KIL: - emit_flow(pc, i, 0x98); - break; - case NV_OP_JOIN: - case NV_OP_NOP: - pc->emit[0] = 0x00003de4; - pc->emit[1] = 0x40000000; - break; - case NV_OP_SELP: - emit_selp(pc, i); - break; - case NV_OP_SLCT_F32: - case NV_OP_SLCT_S32: - case NV_OP_SLCT_U32: - emit_slct(pc, i); - break; - default: - NOUVEAU_ERR("unhandled NV_OP: %d\n", i->opcode); - abort(); - break; - } - - if (i->join) - pc->emit[0] |= 0x10; -} diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c deleted file mode 100644 index 82a8397238d..00000000000 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ /dev/null @@ -1,1434 +0,0 @@ -/* - * Copyright 2010 Christoph Bumiller - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "nvc0_pc.h" -#include "nvc0_program.h" - -#define DESCEND_ARBITRARY(j, f) \ -do { \ - b->pass_seq = ctx->pc->pass_seq; \ - \ - for (j = 0; j < 2; ++j) \ - if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \ - f(ctx, b->out[j]); \ -} while (0) - -static INLINE boolean -registers_interfere(struct nv_value *a, struct nv_value *b) -{ - if (a->reg.file != b->reg.file) - return FALSE; - if (NV_IS_MEMORY_FILE(a->reg.file) || NV_IS_MEMORY_FILE(b->reg.file)) - return FALSE; - - assert(a->join->reg.id >= 0 && b->join->reg.id >= 0); - - if (a->join->reg.id < b->join->reg.id) { - return (a->join->reg.id + a->reg.size >= b->join->reg.id); - } else - if (a->join->reg.id > b->join->reg.id) { - return (b->join->reg.id + b->reg.size >= a->join->reg.id); - } - - return FALSE; -} - -static INLINE boolean -values_equal(struct nv_value *a, struct nv_value *b) -{ - if (a->reg.file != b->reg.file || a->reg.size != b->reg.size) - return FALSE; - if (NV_IS_MEMORY_FILE(a->reg.file)) - return a->reg.address == b->reg.address; - else - return a->join->reg.id == b->join->reg.id; -} - -#if 0 -static INLINE boolean -inst_commutation_check(struct nv_instruction *a, struct nv_instruction *b) -{ - int si, di; - - for (di = 0; di < 4 && a->def[di]; ++di) - for (si = 0; si < 5 && b->src[si]; ++si) - if (registers_interfere(a->def[di], b->src[si]->value)) - return FALSE; - - return TRUE; -} - -/* Check whether we can swap the order of the instructions, - * where a & b may be either the earlier or the later one. - */ -static boolean -inst_commutation_legal(struct nv_instruction *a, struct nv_instruction *b) -{ - return inst_commutation_check(a, b) && inst_commutation_check(b, a); -} -#endif - -static INLINE boolean -inst_removable(struct nv_instruction *nvi) -{ - if (nvi->opcode == NV_OP_ST) - return FALSE; - return (!(nvi->terminator || - nvi->join || - nvi->target || - nvi->fixed || - nvc0_insn_refcount(nvi))); -} - -/* Check if we do not actually have to emit this instruction. */ -static INLINE boolean -inst_is_noop(struct nv_instruction *nvi) -{ - if (nvi->opcode == NV_OP_UNDEF || nvi->opcode == NV_OP_BIND) - return TRUE; - if (nvi->terminator || nvi->join) - return FALSE; - if (nvi->def[0] && nvi->def[0]->join->reg.id < 0) - return TRUE; - if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT) - return FALSE; - if (nvi->def[0]->reg.file != nvi->src[0]->value->reg.file) - return FALSE; - - if (nvi->src[0]->value->join->reg.id < 0) { - NV50_DBGMSG(PROG_IR, "inst_is_noop: orphaned value detected\n"); - return TRUE; - } - - if (nvi->opcode == NV_OP_SELECT) - if (!values_equal(nvi->def[0], nvi->src[1]->value)) - return FALSE; - return values_equal(nvi->def[0], nvi->src[0]->value); -} - -struct nv_pass { - struct nv_pc *pc; - int n; - void *priv; -}; - -static int -nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b); - -static void -nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b) -{ - struct nv_pc *pc = (struct nv_pc *)priv; - struct nv_basic_block *in; - struct nv_instruction *nvi, *next; - int j; - - /* find first non-empty block emitted before b */ - for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->emit_size; --j); - - for (; j >= 0; --j) { - in = pc->bb_list[j]; - - /* check for no-op branches (BRA $PC+8) */ - if (in->exit && in->exit->opcode == NV_OP_BRA && in->exit->target == b) { - in->emit_size -= 8; - pc->emit_size -= 8; - - for (++j; j < pc->num_blocks; ++j) - pc->bb_list[j]->emit_pos -= 8; - - nvc0_insn_delete(in->exit); - } - b->emit_pos = in->emit_pos + in->emit_size; - - if (in->emit_size) /* no more no-op branches to b */ - break; - } - - pc->bb_list[pc->num_blocks++] = b; - - /* visit node */ - - for (nvi = b->entry; nvi; nvi = next) { - next = nvi->next; - if (inst_is_noop(nvi) || - (pc->is_fragprog && nvi->opcode == NV_OP_EXPORT)) { - nvc0_insn_delete(nvi); - } else - b->emit_size += 8; - } - pc->emit_size += b->emit_size; - -#if NV50_DEBUG & NV50_DEBUG_PROG_IR - if (!b->entry) - debug_printf("BB:%i is now empty\n", b->id); - else - debug_printf("BB:%i size = %u\n", b->id, b->emit_size); -#endif -} - -static int -nv_pc_pass2(struct nv_pc *pc, struct nv_basic_block *root) -{ - struct nv_pass pass; - - pass.pc = pc; - - pc->pass_seq++; - nv_pass_flatten(&pass, root); - - nvc0_pc_pass_in_order(root, nv_pc_pass_pre_emission, pc); - - return 0; -} - -int -nvc0_pc_exec_pass2(struct nv_pc *pc) -{ - int i, ret; - - NV50_DBGMSG(PROG_IR, "preparing %u blocks for emission\n", pc->num_blocks); - - pc->num_blocks = 0; /* will reorder bb_list */ - - for (i = 0; i < pc->num_subroutines + 1; ++i) - if (pc->root[i] && (ret = nv_pc_pass2(pc, pc->root[i]))) - return ret; - return 0; -} - -static INLINE boolean -is_cspace_load(struct nv_instruction *nvi) -{ - if (!nvi) - return FALSE; - assert(nvi->indirect != 0); - return (nvi->opcode == NV_OP_LD && - nvi->src[0]->value->reg.file >= NV_FILE_MEM_C(0) && - nvi->src[0]->value->reg.file <= NV_FILE_MEM_C(15)); -} - -static INLINE boolean -is_immd32_load(struct nv_instruction *nvi) -{ - if (!nvi) - return FALSE; - return (nvi->opcode == NV_OP_MOV && - nvi->src[0]->value->reg.file == NV_FILE_IMM && - nvi->src[0]->value->reg.size == 4); -} - -static INLINE void -check_swap_src_0_1(struct nv_instruction *nvi) -{ - struct nv_ref *src0 = nvi->src[0]; - struct nv_ref *src1 = nvi->src[1]; - - if (!nv_op_commutative(nvi->opcode) && - NV_BASEOP(nvi->opcode) != NV_OP_SET && - NV_BASEOP(nvi->opcode) != NV_OP_SLCT) - return; - assert(src0 && src1 && src0->value && src1->value); - - if (src1->value->reg.file != NV_FILE_GPR) - return; - - if (is_cspace_load(src0->value->insn)) { - if (!is_cspace_load(src1->value->insn)) { - nvi->src[0] = src1; - nvi->src[1] = src0; - } - } else - if (is_immd32_load(src0->value->insn)) { - if (!is_cspace_load(src1->value->insn) && - !is_immd32_load(src1->value->insn)) { - nvi->src[0] = src1; - nvi->src[1] = src0; - } - } - - if (nvi->src[0] != src0) { - if (NV_BASEOP(nvi->opcode) == NV_OP_SET) - nvi->set_cond = nvc0_ir_reverse_cc(nvi->set_cond); - else - if (NV_BASEOP(nvi->opcode) == NV_OP_SLCT) - nvi->set_cond = NV_CC_INVERSE(nvi->set_cond); - } -} - -static void -nvi_set_indirect_load(struct nv_pc *pc, - struct nv_instruction *nvi, struct nv_value *val) -{ - for (nvi->indirect = 0; nvi->indirect < 6 && nvi->src[nvi->indirect]; - ++nvi->indirect); - assert(nvi->indirect < 6); - nv_reference(pc, nvi, nvi->indirect, val); -} - -static int -nvc0_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b) -{ - struct nv_instruction *nvi, *ld; - int s; - - for (nvi = b->entry; nvi; nvi = nvi->next) { - check_swap_src_0_1(nvi); - - for (s = 0; s < 3 && nvi->src[s]; ++s) { - ld = nvi->src[s]->value->insn; - if (!ld || (ld->opcode != NV_OP_LD && ld->opcode != NV_OP_MOV)) - continue; - if (!nvc0_insn_can_load(nvi, s, ld)) - continue; - - /* fold it ! */ - nv_reference(ctx->pc, nvi, s, ld->src[0]->value); - if (ld->indirect >= 0) - nvi_set_indirect_load(ctx->pc, nvi, ld->src[ld->indirect]->value); - - if (!nvc0_insn_refcount(ld)) - nvc0_insn_delete(ld); - } - } - DESCEND_ARBITRARY(s, nvc0_pass_fold_loads); - - return 0; -} - -/* NOTE: Assumes loads have not yet been folded. */ -static int -nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b) -{ - struct nv_instruction *nvi, *mi, *next; - int j; - uint8_t mod; - - for (nvi = b->entry; nvi; nvi = next) { - next = nvi->next; - if (nvi->opcode == NV_OP_SUB) { - nvi->src[1]->mod ^= NV_MOD_NEG; - nvi->opcode = NV_OP_ADD; - } - - for (j = 0; j < 3 && nvi->src[j]; ++j) { - mi = nvi->src[j]->value->insn; - if (!mi) - continue; - if (mi->def[0]->refc > 1 || mi->predicate >= 0) - continue; - - if (NV_BASEOP(mi->opcode) == NV_OP_NEG) mod = NV_MOD_NEG; - else - if (NV_BASEOP(mi->opcode) == NV_OP_ABS) mod = NV_MOD_ABS; - else - continue; - assert(!(mod & mi->src[0]->mod & NV_MOD_NEG)); - - mod |= mi->src[0]->mod; - - if ((nvi->opcode == NV_OP_ABS) || (nvi->src[j]->mod & NV_MOD_ABS)) { - /* abs neg [abs] = abs */ - mod &= ~(NV_MOD_NEG | NV_MOD_ABS); - } else - if ((nvi->opcode == NV_OP_NEG) && (mod & NV_MOD_NEG)) { - /* neg as opcode and modifier on same insn cannot occur */ - /* neg neg abs = abs, neg neg = identity */ - assert(j == 0); - if (mod & NV_MOD_ABS) - nvi->opcode = NV_OP_ABS; - else - nvi->opcode = NV_OP_MOV; - mod = 0; - } - - if ((nv_op_supported_src_mods(nvi->opcode, j) & mod) != mod) - continue; - - nv_reference(ctx->pc, nvi, j, mi->src[0]->value); - - nvi->src[j]->mod ^= mod; - } - - if (nvi->opcode == NV_OP_SAT) { - mi = nvi->src[0]->value->insn; - - if (mi->def[0]->refc > 1 || - (mi->opcode != NV_OP_ADD && - mi->opcode != NV_OP_MUL && - mi->opcode != NV_OP_MAD)) - continue; - mi->saturate = 1; - mi->def[0] = nvi->def[0]; - mi->def[0]->insn = mi; - nvc0_insn_delete(nvi); - } - } - DESCEND_ARBITRARY(j, nv_pass_lower_mods); - - return 0; -} - -#define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL) - -static void -apply_modifiers(uint32_t *val, uint8_t type, uint8_t mod) -{ - if (mod & NV_MOD_ABS) { - if (type == NV_TYPE_F32) - *val &= 0x7fffffff; - else - if ((*val) & (1 << 31)) - *val = ~(*val) + 1; - } - if (mod & NV_MOD_NEG) { - if (type == NV_TYPE_F32) - *val ^= 0x80000000; - else - *val = ~(*val) + 1; - } - if (mod & NV_MOD_SAT) { - union { - float f; - uint32_t u; - int32_t i; - } u; - u.u = *val; - if (type == NV_TYPE_F32) { - u.f = CLAMP(u.f, -1.0f, 1.0f); - } else - if (type == NV_TYPE_U16) { - u.u = MIN2(u.u, 0xffff); - } else - if (type == NV_TYPE_S16) { - u.i = CLAMP(u.i, -32768, 32767); - } - *val = u.u; - } - if (mod & NV_MOD_NOT) - *val = ~*val; -} - -static void -constant_expression(struct nv_pc *pc, struct nv_instruction *nvi, - struct nv_value *src0, struct nv_value *src1) -{ - struct nv_value *val; - union { - float f32; - uint32_t u32; - int32_t s32; - } u0, u1, u; - ubyte type; - - if (!nvi->def[0]) - return; - type = NV_OPTYPE(nvi->opcode); - - u.u32 = 0; - u0.u32 = src0->reg.imm.u32; - u1.u32 = src1->reg.imm.u32; - - apply_modifiers(&u0.u32, type, nvi->src[0]->mod); - apply_modifiers(&u1.u32, type, nvi->src[1]->mod); - - switch (nvi->opcode) { - case NV_OP_MAD_F32: - if (nvi->src[2]->value->reg.file != NV_FILE_GPR) - return; - /* fall through */ - case NV_OP_MUL_F32: - u.f32 = u0.f32 * u1.f32; - break; - case NV_OP_MUL_B32: - u.u32 = u0.u32 * u1.u32; - break; - case NV_OP_ADD_F32: - u.f32 = u0.f32 + u1.f32; - break; - case NV_OP_ADD_B32: - u.u32 = u0.u32 + u1.u32; - break; - case NV_OP_SUB_F32: - u.f32 = u0.f32 - u1.f32; - break; - /* - case NV_OP_SUB_B32: - u.u32 = u0.u32 - u1.u32; - break; - */ - default: - return; - } - - val = new_value(pc, NV_FILE_IMM, nv_type_sizeof(type)); - val->reg.imm.u32 = u.u32; - - nv_reference(pc, nvi, 1, NULL); - nv_reference(pc, nvi, 0, val); - - if (nvi->opcode == NV_OP_MAD_F32) { - nvi->src[1] = nvi->src[0]; - nvi->src[0] = nvi->src[2]; - nvi->src[2] = NULL; - nvi->opcode = NV_OP_ADD_F32; - - if (val->reg.imm.u32 == 0) { - nvi->src[1] = NULL; - nvi->opcode = NV_OP_MOV; - } - } else { - nvi->opcode = NV_OP_MOV; - } -} - -static void -constant_operand(struct nv_pc *pc, - struct nv_instruction *nvi, struct nv_value *val, int s) -{ - union { - float f32; - uint32_t u32; - int32_t s32; - } u; - int shift; - int t = s ? 0 : 1; - uint op; - ubyte type; - - if (!nvi->def[0]) - return; - type = NV_OPTYPE(nvi->opcode); - - u.u32 = val->reg.imm.u32; - apply_modifiers(&u.u32, type, nvi->src[s]->mod); - - if (u.u32 == 0 && NV_BASEOP(nvi->opcode) == NV_OP_MUL) { - nvi->opcode = NV_OP_MOV; - nv_reference(pc, nvi, t, NULL); - if (s) { - nvi->src[0] = nvi->src[1]; - nvi->src[1] = NULL; - } - return; - } - - switch (nvi->opcode) { - case NV_OP_MUL_F32: - if (u.f32 == 1.0f || u.f32 == -1.0f) { - if (u.f32 == -1.0f) - nvi->src[t]->mod ^= NV_MOD_NEG; - switch (nvi->src[t]->mod) { - case 0: op = nvi->saturate ? NV_OP_SAT : NV_OP_MOV; break; - case NV_MOD_NEG: op = NV_OP_NEG_F32; break; - case NV_MOD_ABS: op = NV_OP_ABS_F32; break; - default: - return; - } - nvi->opcode = op; - nv_reference(pc, nvi, 0, nvi->src[t]->value); - nv_reference(pc, nvi, 1, NULL); - nvi->src[0]->mod = 0; - } else - if (u.f32 == 2.0f || u.f32 == -2.0f) { - if (u.f32 == -2.0f) - nvi->src[t]->mod ^= NV_MOD_NEG; - nvi->opcode = NV_OP_ADD_F32; - nv_reference(pc, nvi, s, nvi->src[t]->value); - nvi->src[s]->mod = nvi->src[t]->mod; - } - break; - case NV_OP_ADD_F32: - if (u.u32 == 0) { - switch (nvi->src[t]->mod) { - case 0: op = nvi->saturate ? NV_OP_SAT : NV_OP_MOV; break; - case NV_MOD_NEG: op = NV_OP_NEG_F32; break; - case NV_MOD_ABS: op = NV_OP_ABS_F32; break; - case NV_MOD_NEG | NV_MOD_ABS: - op = NV_OP_CVT; - nvi->ext.cvt.s = nvi->ext.cvt.d = type; - break; - default: - return; - } - nvi->opcode = op; - nv_reference(pc, nvi, 0, nvi->src[t]->value); - nv_reference(pc, nvi, 1, NULL); - if (nvi->opcode != NV_OP_CVT) - nvi->src[0]->mod = 0; - } - break; - case NV_OP_ADD_B32: - if (u.u32 == 0) { - assert(nvi->src[t]->mod == 0); - nvi->opcode = nvi->saturate ? NV_OP_CVT : NV_OP_MOV; - nvi->ext.cvt.s = nvi->ext.cvt.d = type; - nv_reference(pc, nvi, 0, nvi->src[t]->value); - nv_reference(pc, nvi, 1, NULL); - } - break; - case NV_OP_MUL_B32: - /* multiplication by 0 already handled above */ - assert(nvi->src[s]->mod == 0); - shift = ffs(u.s32) - 1; - if (shift == 0) { - nvi->opcode = NV_OP_MOV; - nv_reference(pc, nvi, 0, nvi->src[t]->value); - nv_reference(pc, nvi, 1, NULL); - } else - if (u.s32 > 0 && u.s32 == (1 << shift)) { - nvi->opcode = NV_OP_SHL; - (val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.s32 = shift; - nv_reference(pc, nvi, 0, nvi->src[t]->value); - nv_reference(pc, nvi, 1, val); - break; - } - break; - case NV_OP_RCP: - u.f32 = 1.0f / u.f32; - (val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.f32 = u.f32; - nvi->opcode = NV_OP_MOV; - assert(s == 0); - nv_reference(pc, nvi, 0, val); - break; - case NV_OP_RSQ: - u.f32 = 1.0f / sqrtf(u.f32); - (val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.f32 = u.f32; - nvi->opcode = NV_OP_MOV; - assert(s == 0); - nv_reference(pc, nvi, 0, val); - break; - default: - break; - } -} - -static void -handle_min_max(struct nv_pass *ctx, struct nv_instruction *nvi) -{ - struct nv_value *src0 = nvi->src[0]->value; - struct nv_value *src1 = nvi->src[1]->value; - - if (src0 != src1 || (nvi->src[0]->mod | nvi->src[1]->mod)) - return; - if (src0->reg.file != NV_FILE_GPR) - return; - nvc0_pc_replace_value(ctx->pc, nvi->def[0], src0); - nvc0_insn_delete(nvi); -} - -/* check if we can MUL + ADD -> MAD/FMA */ -static void -handle_add_mul(struct nv_pass *ctx, struct nv_instruction *nvi) -{ - struct nv_value *src0 = nvi->src[0]->value; - struct nv_value *src1 = nvi->src[1]->value; - struct nv_value *src; - int s; - uint8_t mod[4]; - - if (SRC_IS_MUL(src0) && src0->refc == 1) s = 0; - else - if (SRC_IS_MUL(src1) && src1->refc == 1) s = 1; - else - return; - - if ((src0->insn && src0->insn->bb != nvi->bb) || - (src1->insn && src1->insn->bb != nvi->bb)) - return; - - /* check for immediates from prior constant folding */ - if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR) - return; - src = nvi->src[s]->value; - - mod[0] = nvi->src[0]->mod; - mod[1] = nvi->src[1]->mod; - mod[2] = src->insn->src[0]->mod; - mod[3] = src->insn->src[1]->mod; - - if ((mod[0] | mod[1] | mod[2] | mod[3]) & ~NV_MOD_NEG) - return; - - nvi->opcode = NV_OP_MAD_F32; - - nv_reference(ctx->pc, nvi, s, NULL); - nvi->src[2] = nvi->src[!s]; - nvi->src[!s] = NULL; - - nv_reference(ctx->pc, nvi, 0, src->insn->src[0]->value); - nvi->src[0]->mod = mod[2] ^ mod[s]; - nv_reference(ctx->pc, nvi, 1, src->insn->src[1]->value); - nvi->src[1]->mod = mod[3]; -} - -static int -nv_pass_algebraic_opt(struct nv_pass *ctx, struct nv_basic_block *b) -{ - struct nv_instruction *nvi, *next; - int j; - - for (nvi = b->entry; nvi; nvi = next) { - struct nv_value *src0, *src1; - uint baseop = NV_BASEOP(nvi->opcode); - - next = nvi->next; - - src0 = nvc0_pc_find_immediate(nvi->src[0]); - src1 = nvc0_pc_find_immediate(nvi->src[1]); - - if (src0 && src1) { - constant_expression(ctx->pc, nvi, src0, src1); - } else { - if (src0) - constant_operand(ctx->pc, nvi, src0, 0); - else - if (src1) - constant_operand(ctx->pc, nvi, src1, 1); - } - - if (baseop == NV_OP_MIN || baseop == NV_OP_MAX) - handle_min_max(ctx, nvi); - else - if (nvi->opcode == NV_OP_ADD_F32) - handle_add_mul(ctx, nvi); - } - DESCEND_ARBITRARY(j, nv_pass_algebraic_opt); - - return 0; -} - -/* TODO: redundant store elimination */ - -struct mem_record { - struct mem_record *next; - struct nv_instruction *insn; - uint32_t ofst; - uint32_t base; - uint32_t size; -}; - -#define MEM_RECORD_POOL_SIZE 1024 - -struct pass_reld_elim { - struct nv_pc *pc; - - struct mem_record *imm; - struct mem_record *mem_v; - struct mem_record *mem_a; - struct mem_record *mem_c[16]; - struct mem_record *mem_l; - - struct mem_record pool[MEM_RECORD_POOL_SIZE]; - int alloc; -}; - -/* Extend the load operation in @rec to also cover the data loaded by @ld. - * The two loads may not overlap but reference adjacent memory locations. - */ -static void -combine_load(struct nv_pc *pc, struct mem_record *rec, - struct nv_instruction *ld) -{ - struct nv_instruction *fv = rec->insn; - struct nv_value *mem = ld->src[0]->value; - uint32_t size = rec->size + mem->reg.size; - int j; - int d = rec->size / 4; - - assert(rec->size < 16); - if (rec->ofst > mem->reg.address) { - if ((size == 8 && mem->reg.address & 3) || - (size > 8 && mem->reg.address & 7)) - return; - rec->ofst = mem->reg.address; - for (j = 0; j < d; ++j) - fv->def[mem->reg.size / 4 + j] = fv->def[j]; - d = 0; - } else - if ((size == 8 && rec->ofst & 3) || - (size > 8 && rec->ofst & 7)) { - return; - } - - for (j = 0; j < mem->reg.size / 4; ++j) { - fv->def[d] = ld->def[j]; - fv->def[d++]->insn = fv; - } - - if (fv->src[0]->value->refc > 1) - nv_reference(pc, fv, 0, new_value_like(pc, fv->src[0]->value)); - fv->src[0]->value->reg.address = rec->ofst; - fv->src[0]->value->reg.size = rec->size = size; - - nvc0_insn_delete(ld); -} - -static void -combine_export(struct mem_record *rec, struct nv_instruction *ex) -{ - -} - -static INLINE void -add_mem_record(struct pass_reld_elim *ctx, struct mem_record **rec, - uint32_t base, uint32_t ofst, struct nv_instruction *nvi) -{ - struct mem_record *it = &ctx->pool[ctx->alloc++]; - - it->next = *rec; - *rec = it; - it->base = base; - it->ofst = ofst; - it->insn = nvi; - it->size = nvi->src[0]->value->reg.size; -} - -/* vectorize and reuse loads from memory or of immediates */ -static int -nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b) -{ - struct mem_record **rec, *it; - struct nv_instruction *ld, *next; - struct nv_value *mem; - uint32_t base, ofst; - int s; - - for (ld = b->entry; ld; ld = next) { - next = ld->next; - - if (is_cspace_load(ld)) { - mem = ld->src[0]->value; - rec = &ctx->mem_c[ld->src[0]->value->reg.file - NV_FILE_MEM_C(0)]; - } else - if (ld->opcode == NV_OP_VFETCH) { - mem = ld->src[0]->value; - rec = &ctx->mem_a; - } else - if (ld->opcode == NV_OP_EXPORT) { - mem = ld->src[0]->value; - if (mem->reg.file != NV_FILE_MEM_V) - continue; - rec = &ctx->mem_v; - } else { - continue; - } - if (ld->def[0] && ld->def[0]->refc == 0) - continue; - ofst = mem->reg.address; - base = (ld->indirect >= 0) ? ld->src[ld->indirect]->value->n : 0; - - for (it = *rec; it; it = it->next) { - if (it->base == base && - ((it->ofst >> 4) == (ofst >> 4)) && - ((it->ofst + it->size == ofst) || - (it->ofst - mem->reg.size == ofst))) { - /* only NV_OP_VFETCH can load exactly 12 bytes */ - if (ld->opcode == NV_OP_LD && it->size + mem->reg.size == 12) - continue; - if (it->ofst < ofst) { - if ((it->ofst & 0xf) == 4) - continue; - } else - if ((ofst & 0xf) == 4) - continue; - break; - } - } - if (it) { - switch (ld->opcode) { - case NV_OP_EXPORT: combine_export(it, ld); break; - default: - combine_load(ctx->pc, it, ld); - break; - } - } else - if (ctx->alloc < MEM_RECORD_POOL_SIZE) { - add_mem_record(ctx, rec, base, ofst, ld); - } - } - - ctx->alloc = 0; - ctx->mem_a = ctx->mem_v = ctx->mem_l = NULL; - for (s = 0; s < 16; ++s) - ctx->mem_c[s] = NULL; - - DESCEND_ARBITRARY(s, nv_pass_mem_opt); - return 0; -} - -static void -eliminate_store(struct mem_record *rec, struct nv_instruction *st) -{ -} - -/* elimination of redundant stores */ -static int -pass_store_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b) -{ - struct mem_record **rec, *it; - struct nv_instruction *st, *next; - struct nv_value *mem; - uint32_t base, ofst, size; - int s; - - for (st = b->entry; st; st = next) { - next = st->next; - - if (st->opcode == NV_OP_ST) { - mem = st->src[0]->value; - rec = &ctx->mem_l; - } else - if (st->opcode == NV_OP_EXPORT) { - mem = st->src[0]->value; - if (mem->reg.file != NV_FILE_MEM_V) - continue; - rec = &ctx->mem_v; - } else - if (st->opcode == NV_OP_ST) { - /* TODO: purge */ - } - ofst = mem->reg.address; - base = (st->indirect >= 0) ? st->src[st->indirect]->value->n : 0; - size = mem->reg.size; - - for (it = *rec; it; it = it->next) { - if (it->base == base && - (it->ofst <= ofst && (it->ofst + size) > ofst)) - break; - } - if (it) - eliminate_store(it, st); - else - add_mem_record(ctx, rec, base, ofst, st); - } - - DESCEND_ARBITRARY(s, nv_pass_mem_opt); - return 0; -} - -/* TODO: properly handle loads from l[] memory in the presence of stores */ -static int -nv_pass_reload_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b) -{ -#if 0 - struct load_record **rec, *it; - struct nv_instruction *ld, *next; - uint64_t data[2]; - struct nv_value *val; - int j; - - for (ld = b->entry; ld; ld = next) { - next = ld->next; - if (!ld->src[0]) - continue; - val = ld->src[0]->value; - rec = NULL; - - if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) { - data[0] = val->reg.id; - data[1] = 0; - rec = &ctx->mem_v; - } else - if (ld->opcode == NV_OP_LDA) { - data[0] = val->reg.id; - data[1] = ld->src[4] ? ld->src[4]->value->n : ~0ULL; - if (val->reg.file >= NV_FILE_MEM_C(0) && - val->reg.file <= NV_FILE_MEM_C(15)) - rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)]; - else - if (val->reg.file == NV_FILE_MEM_S) - rec = &ctx->mem_s; - else - if (val->reg.file == NV_FILE_MEM_L) - rec = &ctx->mem_l; - } else - if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) { - data[0] = val->reg.imm.u32; - data[1] = 0; - rec = &ctx->imm; - } - - if (!rec || !ld->def[0]->refc) - continue; - - for (it = *rec; it; it = it->next) - if (it->data[0] == data[0] && it->data[1] == data[1]) - break; - - if (it) { - if (ld->def[0]->reg.id >= 0) - it->value = ld->def[0]; - else - if (!ld->fixed) - nvc0_pc_replace_value(ctx->pc, ld->def[0], it->value); - } else { - if (ctx->alloc == LOAD_RECORD_POOL_SIZE) - continue; - it = &ctx->pool[ctx->alloc++]; - it->next = *rec; - it->data[0] = data[0]; - it->data[1] = data[1]; - it->value = ld->def[0]; - *rec = it; - } - } - - ctx->imm = NULL; - ctx->mem_s = NULL; - ctx->mem_v = NULL; - for (j = 0; j < 16; ++j) - ctx->mem_c[j] = NULL; - ctx->mem_l = NULL; - ctx->alloc = 0; - - DESCEND_ARBITRARY(j, nv_pass_reload_elim); -#endif - return 0; -} - -static int -nv_pass_tex_mask(struct nv_pass *ctx, struct nv_basic_block *b) -{ - int i, c, j; - - for (i = 0; i < ctx->pc->num_instructions; ++i) { - struct nv_instruction *nvi = &ctx->pc->instructions[i]; - struct nv_value *def[4]; - - if (!nv_is_texture_op(nvi->opcode)) - continue; - nvi->tex_mask = 0; - - for (c = 0; c < 4; ++c) { - if (nvi->def[c]->refc) - nvi->tex_mask |= 1 << c; - def[c] = nvi->def[c]; - } - - j = 0; - for (c = 0; c < 4; ++c) - if (nvi->tex_mask & (1 << c)) - nvi->def[j++] = def[c]; - for (c = 0; c < 4; ++c) - if (!(nvi->tex_mask & (1 << c))) - nvi->def[j++] = def[c]; - assert(j == 4); - } - return 0; -} - -struct nv_pass_dce { - struct nv_pc *pc; - uint removed; -}; - -static int -nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b) -{ - int j; - struct nv_instruction *nvi, *next; - - for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = next) { - next = nvi->next; - - if (inst_removable(nvi)) { - nvc0_insn_delete(nvi); - ++ctx->removed; - } - } - DESCEND_ARBITRARY(j, nv_pass_dce); - - return 0; -} - -/* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE. - * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with - * BREAK and dummy ELSE block. - */ -static INLINE boolean -bb_is_if_else_endif(struct nv_basic_block *bb) -{ - if (!bb->out[0] || !bb->out[1]) - return FALSE; - - if (bb->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) { - return (bb->out[0]->out[1] == bb->out[1]->out[0] && - !bb->out[1]->out[1]); - } else { - return (bb->out[0]->out[0] == bb->out[1]->out[0] && - !bb->out[0]->out[1] && - !bb->out[1]->out[1]); - } -} - -/* Predicate instructions and delete any branch at the end if it is - * not a break from a loop. - */ -static void -predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b, - struct nv_value *pred, uint8_t cc) -{ - struct nv_instruction *nvi, *prev; - int s; - - if (!b->entry) - return; - for (nvi = b->entry; nvi; nvi = nvi->next) { - prev = nvi; - if (inst_is_noop(nvi)) - continue; - for (s = 0; nvi->src[s]; ++s); - assert(s < 6); - nvi->predicate = s; - nvi->cc = cc; - nv_reference(pc, nvi, nvi->predicate, pred); - } - if (prev->opcode == NV_OP_BRA && - b->out_kind[0] != CFG_EDGE_LOOP_LEAVE && - b->out_kind[1] != CFG_EDGE_LOOP_LEAVE) - nvc0_insn_delete(prev); -} - -static INLINE boolean -may_predicate_insn(struct nv_instruction *nvi, struct nv_value *pred) -{ - if (nvi->def[0] && values_equal(nvi->def[0], pred)) - return FALSE; - return nvc0_insn_is_predicateable(nvi); -} - -/* Transform IF/ELSE/ENDIF constructs into predicated instructions - * where feasible. - */ -static int -nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b) -{ - struct nv_instruction *nvi; - struct nv_value *pred; - int k; - int n0, n1; /* instruction counts of outgoing blocks */ - - if (bb_is_if_else_endif(b)) { - assert(b->exit && b->exit->opcode == NV_OP_BRA); - - assert(b->exit->predicate >= 0); - pred = b->exit->src[b->exit->predicate]->value; - - n1 = n0 = 0; - for (nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0) - if (!may_predicate_insn(nvi, pred)) - break; - if (!nvi) { - /* we're after register allocation, so there always is an ELSE block */ - for (nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1) - if (!may_predicate_insn(nvi, pred)) - break; - } - - /* 12 is an arbitrary limit */ - if (!nvi && n0 < 12 && n1 < 12) { - predicate_instructions(ctx->pc, b->out[0], pred, !b->exit->cc); - predicate_instructions(ctx->pc, b->out[1], pred, b->exit->cc); - - nvc0_insn_delete(b->exit); /* delete the branch */ - - /* and a potential joinat before it */ - if (b->exit && b->exit->opcode == NV_OP_JOINAT) - nvc0_insn_delete(b->exit); - - /* remove join operations at the end of the conditional */ - k = (b->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) ? 1 : 0; - if ((nvi = b->out[0]->out[k]->entry)) { - nvi->join = 0; - if (nvi->opcode == NV_OP_JOIN) - nvc0_insn_delete(nvi); - } - } - } - DESCEND_ARBITRARY(k, nv_pass_flatten); - - return 0; -} - -/* Tests instructions for equality, but independently of sources. */ -static boolean -is_operation_equal(struct nv_instruction *a, struct nv_instruction *b) -{ - if (a->opcode != b->opcode) - return FALSE; - if (nv_is_texture_op(a->opcode)) { - if (a->ext.tex.t != b->ext.tex.t || - a->ext.tex.s != b->ext.tex.s) - return FALSE; - if (a->tex_dim != b->tex_dim || - a->tex_array != b->tex_array || - a->tex_cube != b->tex_cube || - a->tex_shadow != b->tex_shadow || - a->tex_live != b->tex_live) - return FALSE; - } else - if (a->opcode == NV_OP_CVT) { - if (a->ext.cvt.s != b->ext.cvt.s || - a->ext.cvt.d != b->ext.cvt.d) - return FALSE; - } else - if (NV_BASEOP(a->opcode) == NV_OP_SET || - NV_BASEOP(a->opcode) == NV_OP_SLCT) { - if (a->set_cond != b->set_cond) - return FALSE; - } else - if (a->opcode == NV_OP_LINTERP || - a->opcode == NV_OP_PINTERP) { - if (a->centroid != b->centroid || - a->flat != b->flat) - return FALSE; - } - if (a->cc != b->cc) - return FALSE; - if (a->lanes != b->lanes || - a->patch != b->patch || - a->saturate != b->saturate) - return FALSE; - if (a->opcode == NV_OP_QUADOP) /* beware quadon ! */ - return FALSE; - return TRUE; -} - -/* local common subexpression elimination, stupid O(n^2) implementation */ -static int -nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b) -{ - struct nv_instruction *ir, *ik, *next; - struct nv_instruction *entry = b->phi ? b->phi : b->entry; - int s, d; - unsigned int reps; - - do { - reps = 0; - for (ir = entry; ir; ir = next) { - next = ir->next; - if (ir->fixed) - continue; - for (ik = entry; ik != ir; ik = ik->next) { - if (!is_operation_equal(ir, ik)) - continue; - if (!ir->def[0] || !ik->def[0]) - continue; - - if (ik->indirect != ir->indirect || ik->predicate != ir->predicate) - continue; - - for (d = 0; d < 4; ++d) { - if ((ir->def[d] ? 1 : 0) != (ik->def[d] ? 1 : 0)) - break; - if (ir->def[d]) { - if (!values_equal(ik->def[0], ir->def[0])) - break; - } else { - d = 4; - break; - } - } - if (d != 4) - continue; - - for (s = 0; s < 5; ++s) { - struct nv_value *a, *b; - - if ((ir->src[s] ? 1 : 0) != (ik->src[s] ? 1 : 0)) - break; - if (!ir->src[s]) { - s = 5; - break; - } - - if (ik->src[s]->mod != ir->src[s]->mod) - break; - a = ik->src[s]->value; - b = ir->src[s]->value; - if (a == b) - continue; - if (a->reg.file != b->reg.file || - a->reg.id < 0 || /* this excludes memory loads/stores */ - a->reg.id != b->reg.id) - break; - } - if (s == 5) { - nvc0_insn_delete(ir); - for (d = 0; d < 4 && ir->def[d]; ++d) - nvc0_pc_replace_value(ctx->pc, ir->def[d], ik->def[d]); - ++reps; - break; - } - } - } - } while(reps); - - DESCEND_ARBITRARY(s, nv_pass_cse); - - return 0; -} - -/* Make sure all sources of an NV_OP_BIND are distinct, they need to occupy - * neighbouring registers. CSE might have messed this up. - * Just generate a MOV for each source to avoid conflicts if they're used in - * multiple NV_OP_BIND at different positions. - * - * Add a dummy use of the pointer source of >= 8 byte loads after the load - * to prevent it from being assigned a register which overlaps the load's - * destination, which would produce random corruptions. - */ -static int -nv_pass_fixups(struct nv_pass *ctx, struct nv_basic_block *b) -{ - struct nv_value *val; - struct nv_instruction *fix, *nvi, *next; - int s; - - for (fix = b->entry; fix; fix = next) { - next = fix->next; - - if (fix->opcode == NV_OP_LD) { - if (fix->indirect >= 0 && fix->src[0]->value->reg.size >= 8) { - nvi = nv_alloc_instruction(ctx->pc, NV_OP_UNDEF); - nv_reference(ctx->pc, nvi, 0, fix->src[fix->indirect]->value); - - nvc0_insn_insert_after(fix, nvi); - } - continue; - } else - if (fix->opcode == NV_OP_BIND) { - for (s = 0; s < 4 && fix->src[s]; ++s) { - val = fix->src[s]->value; - - nvi = nv_alloc_instruction(ctx->pc, NV_OP_MOV); - nvi->def[0] = new_value_like(ctx->pc, val); - nvi->def[0]->insn = nvi; - nv_reference(ctx->pc, nvi, 0, val); - nv_reference(ctx->pc, fix, s, nvi->def[0]); - - nvc0_insn_insert_before(fix, nvi); - } - } - } - DESCEND_ARBITRARY(s, nv_pass_fixups); - - return 0; -} - -static int -nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root) -{ - struct pass_reld_elim *reldelim; - struct nv_pass pass; - struct nv_pass_dce dce; - int ret; - - pass.n = 0; - pass.pc = pc; - - /* Do CSE so we can just compare values by pointer in subsequent passes. */ - pc->pass_seq++; - ret = nv_pass_cse(&pass, root); - if (ret) - return ret; - - /* Do this first, so we don't have to pay attention - * to whether sources are supported memory loads. - */ - pc->pass_seq++; - ret = nv_pass_algebraic_opt(&pass, root); - if (ret) - return ret; - - pc->pass_seq++; - ret = nv_pass_lower_mods(&pass, root); - if (ret) - return ret; - - pc->pass_seq++; - ret = nvc0_pass_fold_loads(&pass, root); - if (ret) - return ret; - - if (pc->opt_reload_elim) { - reldelim = CALLOC_STRUCT(pass_reld_elim); - reldelim->pc = pc; - - pc->pass_seq++; - ret = nv_pass_reload_elim(reldelim, root); - if (ret) { - FREE(reldelim); - return ret; - } - memset(reldelim, 0, sizeof(struct pass_reld_elim)); - reldelim->pc = pc; - } - - /* May run DCE before load-combining since that pass will clean up - * after itself. - */ - dce.pc = pc; - do { - dce.removed = 0; - pc->pass_seq++; - ret = nv_pass_dce(&dce, root); - if (ret) - return ret; - } while (dce.removed); - - if (pc->opt_reload_elim) { - pc->pass_seq++; - ret = nv_pass_mem_opt(reldelim, root); - if (!ret) { - memset(reldelim, 0, sizeof(struct pass_reld_elim)); - reldelim->pc = pc; - - pc->pass_seq++; - ret = nv_pass_mem_opt(reldelim, root); - } - FREE(reldelim); - if (ret) - return ret; - } - - ret = nv_pass_tex_mask(&pass, root); - if (ret) - return ret; - - pc->pass_seq++; - ret = nv_pass_fixups(&pass, root); - - return ret; -} - -int -nvc0_pc_exec_pass0(struct nv_pc *pc) -{ - int i, ret; - - for (i = 0; i < pc->num_subroutines + 1; ++i) - if (pc->root[i] && (ret = nv_pc_pass0(pc, pc->root[i]))) - return ret; - return 0; -} diff --git a/src/gallium/drivers/nvc0/nvc0_pc_print.c b/src/gallium/drivers/nvc0/nvc0_pc_print.c deleted file mode 100644 index 1f37cb802d7..00000000000 --- a/src/gallium/drivers/nvc0/nvc0_pc_print.c +++ /dev/null @@ -1,381 +0,0 @@ -/* - * Copyright 2010 Christoph Bumiller - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "nvc0_pc.h" - -#define PRINT(args...) debug_printf(args) - -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) -#endif - -static const char *norm = "\x1b[00m"; -static const char *gree = "\x1b[32m"; -static const char *blue = "\x1b[34m"; -static const char *cyan = "\x1b[36m"; -static const char *yllw = "\x1b[33m"; -static const char *mgta = "\x1b[35m"; - -static const char *nv_cond_names[] = -{ - "never", "lt" , "eq" , "le" , "gt" , "ne" , "ge" , "", - "never", "ltu", "equ", "leu", "gtu", "neu", "geu", "", - "o", "c", "a", "s" -}; - -static const char *nv_modifier_strings[] = -{ - "", - "neg", - "abs", - "neg abs", - "not", - "not neg" - "not abs", - "not neg abs", - "sat", - "BAD_MOD" -}; - -const char * -nvc0_opcode_name(uint opcode) -{ - return nvc0_op_info_table[MIN2(opcode, NV_OP_COUNT)].name; -} - -static INLINE const char * -nv_type_name(ubyte type, ubyte size) -{ - switch (type) { - case NV_TYPE_U16: return "u16"; - case NV_TYPE_S16: return "s16"; - case NV_TYPE_F32: return "f32"; - case NV_TYPE_U32: return "u32"; - case NV_TYPE_S32: return "s32"; - case NV_TYPE_P32: return "p32"; - case NV_TYPE_F64: return "f64"; - case NV_TYPE_ANY: - { - switch (size) { - case 1: return "b8"; - case 2: return "b16"; - case 4: return "b32"; - case 8: return "b64"; - case 12: return "b96"; - case 16: return "b128"; - default: - return "BAD_SIZE"; - } - } - default: - return "BAD_TYPE"; - } -} - -static INLINE const char * -nv_cond_name(ubyte cc) -{ - return nv_cond_names[MIN2(cc, 19)]; -} - -static INLINE const char * -nv_modifier_string(ubyte mod) -{ - return nv_modifier_strings[MIN2(mod, 9)]; -} - -static INLINE int -nv_value_id(struct nv_value *value) -{ - if (value->join->reg.id >= 0) - return value->join->reg.id; - return value->n; -} - -static INLINE boolean -nv_value_allocated(struct nv_value *value) -{ - return (value->reg.id >= 0) ? TRUE : FALSE; -} - -static INLINE void -nv_print_address(const char c, int buf, struct nv_value *a, int offset) -{ - const char ac = (a && nv_value_allocated(a)) ? '$' : '%'; - char sg; - - if (offset < 0) { - sg = '-'; - offset = -offset; - } else { - sg = '+'; - } - - if (buf >= 0) - PRINT(" %s%c%i[", cyan, c, buf); - else - PRINT(" %s%c[", cyan, c); - if (a) - PRINT("%s%ca%i%s%c", mgta, ac, nv_value_id(a), cyan, sg); - PRINT("%s0x%x%s]", yllw, offset, cyan); -} - -static INLINE void -nv_print_value(struct nv_value *value, struct nv_value *indir, ubyte type) -{ - char reg_pfx = nv_value_allocated(value->join) ? '$' : '%'; - - if (value->reg.file != NV_FILE_PRED) - PRINT(" %s%s", gree, nv_type_name(type, value->reg.size)); - - switch (value->reg.file) { - case NV_FILE_GPR: - PRINT(" %s%cr%i", blue, reg_pfx, nv_value_id(value)); - if (value->reg.size == 8) - PRINT("d"); - if (value->reg.size == 16) - PRINT("q"); - break; - case NV_FILE_PRED: - PRINT(" %s%cp%i", mgta, reg_pfx, nv_value_id(value)); - break; - case NV_FILE_COND: - PRINT(" %s%cc%i", mgta, reg_pfx, nv_value_id(value)); - break; - case NV_FILE_MEM_L: - nv_print_address('l', -1, indir, value->reg.address); - break; - case NV_FILE_MEM_G: - nv_print_address('g', -1, indir, value->reg.address); - break; - case NV_FILE_MEM_A: - nv_print_address('a', -1, indir, value->reg.address); - break; - case NV_FILE_MEM_V: - nv_print_address('v', -1, indir, value->reg.address); - break; - case NV_FILE_IMM: - switch (type) { - case NV_TYPE_U16: - case NV_TYPE_S16: - PRINT(" %s0x%04x", yllw, value->reg.imm.u32); - break; - case NV_TYPE_F32: - PRINT(" %s%f", yllw, value->reg.imm.f32); - break; - case NV_TYPE_F64: - PRINT(" %s%f", yllw, value->reg.imm.f64); - break; - case NV_TYPE_U32: - case NV_TYPE_S32: - case NV_TYPE_P32: - case NV_TYPE_ANY: - PRINT(" %s0x%08x", yllw, value->reg.imm.u32); - break; - } - break; - default: - if (value->reg.file >= NV_FILE_MEM_C(0) && - value->reg.file <= NV_FILE_MEM_C(15)) - nv_print_address('c', value->reg.file - NV_FILE_MEM_C(0), indir, - value->reg.address); - else - NOUVEAU_ERR(" BAD_FILE[%i]", nv_value_id(value)); - break; - } -} - -static INLINE void -nv_print_ref(struct nv_ref *ref, struct nv_value *indir, ubyte type) -{ - nv_print_value(ref->value, indir, type); -} - -void -nvc0_print_instruction(struct nv_instruction *i) -{ - int s; - - PRINT("%i: ", i->serial); - - if (i->predicate >= 0) { - PRINT("%s%s", gree, i->cc ? "fl" : "tr"); - nv_print_ref(i->src[i->predicate], NULL, NV_TYPE_U8); - PRINT(" "); - } - - PRINT("%s", gree); - if (NV_BASEOP(i->opcode) == NV_OP_SET) - PRINT("%s %s", nvc0_opcode_name(i->opcode), nv_cond_name(i->set_cond)); - else - if (i->saturate) - PRINT("sat %s", nvc0_opcode_name(i->opcode)); - else - PRINT("%s", nvc0_opcode_name(i->opcode)); - - if (i->opcode == NV_OP_CVT) - nv_print_value(i->def[0], NULL, i->ext.cvt.d); - else - if (i->def[0]) - nv_print_value(i->def[0], NULL, NV_OPTYPE(i->opcode)); - else - if (i->target) - PRINT(" %s(BB:%i)", yllw, i->target->id); - else - PRINT(" #"); - - for (s = 1; s < 4 && i->def[s]; ++s) - nv_print_value(i->def[s], NULL, NV_OPTYPE(i->opcode)); - if (s > 1) - PRINT("%s ,", norm); - - for (s = 0; s < 6 && i->src[s]; ++s) { - ubyte type; - if (s == i->indirect || s == i->predicate) - continue; - if (i->opcode == NV_OP_CVT) - type = i->ext.cvt.s; - else - type = NV_OPTYPE(i->opcode); - - if (i->src[s]->mod) - PRINT(" %s%s", gree, nv_modifier_string(i->src[s]->mod)); - - if (i->indirect >= 0 && - NV_IS_MEMORY_FILE(i->src[s]->value->reg.file)) - nv_print_ref(i->src[s], i->src[i->indirect]->value, type); - else - nv_print_ref(i->src[s], NULL, type); - } - PRINT(" %s\n", norm); -} - -#define NV_MOD_SGN_12 ((NV_MOD_ABS | NV_MOD_NEG) | ((NV_MOD_ABS | NV_MOD_NEG) << 4)) -#define NV_MOD_NEG_123 (NV_MOD_NEG | (NV_MOD_NEG << 4) | (NV_MOD_NEG << 8)) -#define NV_MOD_NEG_3 (NV_MOD_NEG << 8) - -#define NV_MOD_SGN NV_MOD_SGN_12 - -struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = -{ - { NV_OP_UNDEF, "undef", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 }, - { NV_OP_BIND, "bind", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 1, 0, 1, 0, 0 }, - { NV_OP_MERGE, "merge", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 1, 0, 1, 0, 0 }, - { NV_OP_PHI, "phi", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 }, - { NV_OP_SELECT, "select", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 }, - { NV_OP_NOP, "nop", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 1, 0, 0, 0 }, - - { NV_OP_LD, "ld", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_ST, "st", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_MOV, "mov", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 1, 0 }, - { NV_OP_AND, "and", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 }, - { NV_OP_OR, "or", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 }, - { NV_OP_XOR, "xor", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 }, - { NV_OP_SHL, "shl", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 1, 0 }, - { NV_OP_SHR, "shr", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 1, 0 }, - { NV_OP_NOT, "not", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_SET, "set", NV_TYPE_ANY, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_ADD, "add", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 }, - { NV_OP_SUB, "sub", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 }, - { NV_OP_MUL, "mul", NV_TYPE_F32, NV_MOD_NEG_123, 0, 1, 0, 1, 0, 2, 2 }, - { NV_OP_MAD, "mad", NV_TYPE_F32, NV_MOD_NEG_123, 0, 1, 0, 1, 0, 2, 2 }, - { NV_OP_ABS, "abs", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_NEG, "neg", NV_TYPE_F32, NV_MOD_ABS, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_MAX, "max", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 }, - { NV_OP_MIN, "min", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 }, - { NV_OP_CVT, "cvt", NV_TYPE_ANY, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, - - { NV_OP_CEIL, "ceil", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_FLOOR, "floor", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_TRUNC, "trunc", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, - - { NV_OP_SAD, "sad", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 }, - - { NV_OP_VFETCH, "vfetch", NV_TYPE_ANY, 0, 0, 0, 1, 1, 0, 0, 0 }, - { NV_OP_PFETCH, "pfetch", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_EXPORT, "export", NV_TYPE_ANY, 0, 0, 0, 1, 1, 0, 0, 0 }, - { NV_OP_LINTERP, "linterp", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_PINTERP, "pinterp", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_EMIT, "emit", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_RESTART, "restart", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 }, - - { NV_OP_TEX, "tex", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 }, - { NV_OP_TXB, "texbias", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 }, - { NV_OP_TXL, "texlod", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 }, - { NV_OP_TXF, "texfetch", NV_TYPE_U32, 0, 0, 0, 1, 1, 0, 0, 0 }, - { NV_OP_TXQ, "texquery", NV_TYPE_U32, 0, 0, 0, 1, 1, 0, 0, 0 }, - - { NV_OP_QUADOP, "quadop", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_DFDX, "dfdx", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_DFDY, "dfdy", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, - - { NV_OP_KIL, "kil", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_BRA, "bra", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, - { NV_OP_CALL, "call", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, - { NV_OP_RET, "ret", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, - { NV_OP_RET, "exit", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, - { NV_OP_NOP, "ud", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, - { NV_OP_NOP, "ud", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, - - { NV_OP_JOINAT, "joinat", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, - { NV_OP_JOIN, "join", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 }, - - { NV_OP_ADD, "add", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 1, 0 }, - { NV_OP_MUL, "mul", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 1, 0 }, - { NV_OP_ABS, "abs", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_NEG, "neg", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_MAX, "max", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 }, - { NV_OP_MIN, "max", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 }, - { NV_OP_MAX, "min", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 }, - { NV_OP_MIN, "min", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 }, - { NV_OP_SET, "set", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 }, - { NV_OP_SET, "set", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 2, 2 }, - { NV_OP_SET, "set", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 2, 2 }, - { NV_OP_SHR, "sar", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 1, 0 }, - { NV_OP_RCP, "rcp", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_RSQ, "rsqrt", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_LG2, "lg2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_SIN, "sin", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_COS, "cos", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_EX2, "ex2", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_PRESIN, "presin", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 1 }, - { NV_OP_PREEX2, "preex2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 1 }, - { NV_OP_SAT, "sat", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, - - { NV_OP_SET_F32_AND, "and set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_SET_F32_OR, "or set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_SET_F32_XOR, "xor set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, - - { NV_OP_SELP, "selp", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, - - { NV_OP_SLCT, "slct", NV_TYPE_F32, NV_MOD_NEG_3, 0, 0, 0, 1, 0, 2, 2 }, - { NV_OP_SLCT, "slct", NV_TYPE_S32, NV_MOD_NEG_3, 0, 0, 0, 1, 0, 2, 2 }, - { NV_OP_SLCT, "slct", NV_TYPE_U32, NV_MOD_NEG_3, 0, 0, 0, 1, 0, 2, 2 }, - - { NV_OP_ADD, "sub", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 1, 0 }, - - { NV_OP_SET, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 }, - - { NV_OP_TXG, "texgrad", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 }, - - { NV_OP_UNDEF, "BAD_OP", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 } -}; diff --git a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c deleted file mode 100644 index 15bebb2134a..00000000000 --- a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c +++ /dev/null @@ -1,1053 +0,0 @@ -/* - * Copyright 2010 Christoph Bumiller - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#if NV50_DEBUG & NV50_DEBUG_PROG_RA -# define NVC0_RA_DEBUG_LIVEI -# define NVC0_RA_DEBUG_LIVE_SETS -# define NVC0_RA_DEBUG_JOIN -#endif - -#include "nvc0_pc.h" -#include "util/u_simple_list.h" - -#define NVC0_NUM_REGISTER_FILES 3 - -/* @unit_shift: log2 of min allocation unit for register */ -struct register_set { - uint32_t bits[NVC0_NUM_REGISTER_FILES][2]; - uint32_t last[NVC0_NUM_REGISTER_FILES]; - int log2_unit[NVC0_NUM_REGISTER_FILES]; - struct nv_pc *pc; -}; - -/* aliasing is allowed */ -static void -intersect_register_sets(struct register_set *dst, - struct register_set *src1, struct register_set *src2) -{ - int i; - - for (i = 0; i < NVC0_NUM_REGISTER_FILES; ++i) { - dst->bits[i][0] = src1->bits[i][0] | src2->bits[i][0]; - dst->bits[i][1] = src1->bits[i][1] | src2->bits[i][1]; - } -} - -static void -mask_register_set(struct register_set *set, uint32_t mask, uint32_t umask) -{ - int i; - - for (i = 0; i < NVC0_NUM_REGISTER_FILES; ++i) { - set->bits[i][0] = (set->bits[i][0] | mask) & umask; - set->bits[i][1] = (set->bits[i][1] | mask) & umask; - } -} - -struct nv_pc_pass { - struct nv_pc *pc; - struct nv_instruction **insns; - uint num_insns; - uint pass_seq; -}; - -static void -ranges_coalesce(struct nv_range *range) -{ - while (range->next && range->end >= range->next->bgn) { - struct nv_range *rnn = range->next->next; - assert(range->bgn <= range->next->bgn); - range->end = MAX2(range->end, range->next->end); - FREE(range->next); - range->next = rnn; - } -} - -static boolean -add_range_ex(struct nv_value *val, int bgn, int end, struct nv_range *new_range) -{ - struct nv_range *range, **nextp = &val->livei; - - if (bgn == end) /* [a, a) is invalid / empty */ - return TRUE; - - for (range = val->livei; range; range = range->next) { - if (end < range->bgn) - break; /* insert before */ - - if (bgn > range->end) { - nextp = &range->next; - continue; /* insert after */ - } - - /* overlap */ - if (bgn < range->bgn) { - range->bgn = bgn; - if (end > range->end) - range->end = end; - ranges_coalesce(range); - return TRUE; - } - if (end > range->end) { - range->end = end; - ranges_coalesce(range); - return TRUE; - } - assert(bgn >= range->bgn); - assert(end <= range->end); - return TRUE; - } - - if (!new_range) - new_range = CALLOC_STRUCT(nv_range); - - new_range->bgn = bgn; - new_range->end = end; - new_range->next = range; - *(nextp) = new_range; - return FALSE; -} - -static void -add_range(struct nv_value *val, struct nv_basic_block *b, int end) -{ - int bgn; - - if (!val->insn) /* ignore non-def values */ - return; - assert(b->entry->serial <= b->exit->serial); - assert(b->phi->serial <= end); - assert(b->exit->serial + 1 >= end); - - bgn = val->insn->serial; - if (bgn < b->entry->serial || bgn > b->exit->serial) - bgn = b->entry->serial; - - assert(bgn <= end); - - add_range_ex(val, bgn, end, NULL); -} - -#if defined(NVC0_RA_DEBUG_JOIN) || defined(NVC0_RA_DEBUG_LIVEI) -static void -livei_print(struct nv_value *a) -{ - struct nv_range *r = a->livei; - - debug_printf("livei %i: ", a->n); - while (r) { - debug_printf("[%i, %i) ", r->bgn, r->end); - r = r->next; - } - debug_printf("\n"); -} -#endif - -static void -livei_unify(struct nv_value *dst, struct nv_value *src) -{ - struct nv_range *range, *next; - - for (range = src->livei; range; range = next) { - next = range->next; - if (add_range_ex(dst, range->bgn, range->end, range)) - FREE(range); - } - src->livei = NULL; -} - -static void -livei_release(struct nv_value *val) -{ - struct nv_range *range, *next; - - for (range = val->livei; range; range = next) { - next = range->next; - FREE(range); - } -} - -static boolean -livei_have_overlap(struct nv_value *a, struct nv_value *b) -{ - struct nv_range *r_a, *r_b; - - for (r_a = a->livei; r_a; r_a = r_a->next) { - for (r_b = b->livei; r_b; r_b = r_b->next) { - if (r_b->bgn < r_a->end && - r_b->end > r_a->bgn) - return TRUE; - } - } - return FALSE; -} - -static int -livei_end(struct nv_value *a) -{ - struct nv_range *r = a->livei; - - assert(r); - while (r->next) - r = r->next; - return r->end; -} - -static boolean -livei_contains(struct nv_value *a, int pos) -{ - struct nv_range *r; - - for (r = a->livei; r && r->bgn <= pos; r = r->next) - if (r->end > pos) - return TRUE; - return FALSE; -} - -static boolean -reg_assign(struct register_set *set, struct nv_value **def, int n) -{ - int i, id, s, k; - uint32_t m; - int f = def[0]->reg.file; - - k = n; - if (k == 3) - k = 4; - s = (k * def[0]->reg.size) >> set->log2_unit[f]; - m = (1 << s) - 1; - - id = set->last[f]; - - for (i = 0; i * 32 < set->last[f]; ++i) { - if (set->bits[f][i] == 0xffffffff) - continue; - - for (id = 0; id < 32; id += s) - if (!(set->bits[f][i] & (m << id))) - break; - if (id < 32) - break; - } - if (i * 32 + id > set->last[f]) - return FALSE; - - set->bits[f][i] |= m << id; - - id += i * 32; - - set->pc->max_reg[f] = MAX2(set->pc->max_reg[f], id + s - 1); - - for (i = 0; i < n; ++i) - if (def[i]->livei) - def[i]->reg.id = id++; - - return TRUE; -} - -static INLINE void -reg_occupy(struct register_set *set, struct nv_value *val) -{ - int id = val->reg.id, f = val->reg.file; - uint32_t m; - - if (id < 0) - return; - m = (1 << (val->reg.size >> set->log2_unit[f])) - 1; - - set->bits[f][id / 32] |= m << (id % 32); - - if (set->pc->max_reg[f] < id) - set->pc->max_reg[f] = id; -} - -static INLINE void -reg_release(struct register_set *set, struct nv_value *val) -{ - int id = val->reg.id, f = val->reg.file; - uint32_t m; - - if (id < 0) - return; - m = (1 << (val->reg.size >> set->log2_unit[f])) - 1; - - set->bits[f][id / 32] &= ~(m << (id % 32)); -} - -static INLINE boolean -join_allowed(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) -{ - int i; - struct nv_value *val; - - if (a->reg.file != b->reg.file || a->reg.size != b->reg.size) - return FALSE; - - if (a->join->reg.id == b->join->reg.id) - return TRUE; - - /* either a or b or both have been assigned */ - - if (a->join->reg.id >= 0 && b->join->reg.id >= 0) - return FALSE; - else - if (b->join->reg.id >= 0) { - if (b->join->reg.id == 63) - return FALSE; - val = a; - a = b; - b = val; - } else - if (a->join->reg.id == 63) - return FALSE; - - for (i = 0; i < ctx->pc->num_values; ++i) { - val = &ctx->pc->values[i]; - - if (val->join->reg.id != a->join->reg.id) - continue; - if (val->join != a->join && livei_have_overlap(val->join, b->join)) - return FALSE; - } - return TRUE; -} - -static INLINE void -do_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) -{ - int j; - struct nv_value *bjoin = b->join; - - if (b->join->reg.id >= 0) - a->join->reg.id = b->join->reg.id; - - livei_unify(a->join, b->join); - -#ifdef NVC0_RA_DEBUG_JOIN - debug_printf("joining %i to %i\n", b->n, a->n); -#endif - - /* make a->join the new representative */ - for (j = 0; j < ctx->pc->num_values; ++j) - if (ctx->pc->values[j].join == bjoin) - ctx->pc->values[j].join = a->join; - - assert(b->join == a->join); -} - -static INLINE boolean -try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) -{ - if (!join_allowed(ctx, a, b)) { -#ifdef NVC0_RA_DEBUG_JOIN - debug_printf("cannot join %i to %i: not allowed\n", b->n, a->n); -#endif - return FALSE; - } - if (livei_have_overlap(a->join, b->join)) { -#ifdef NVC0_RA_DEBUG_JOIN - debug_printf("cannot join %i to %i: livei overlap\n", b->n, a->n); - livei_print(a); - livei_print(b); -#endif - return FALSE; - } - - do_join_values(ctx, a, b); - - return TRUE; -} - -static void -join_values_nofail(struct nv_pc_pass *ctx, - struct nv_value *a, struct nv_value *b, boolean type_only) -{ - if (type_only) { - assert(join_allowed(ctx, a, b)); - do_join_values(ctx, a, b); - } else { - boolean ok = try_join_values(ctx, a, b); - if (!ok) { - NOUVEAU_ERR("failed to coalesce values\n"); - } - } -} - -static INLINE boolean -need_new_else_block(struct nv_basic_block *b, struct nv_basic_block *p) -{ - int i = 0, n = 0; - - for (; i < 2; ++i) - if (p->out[i] && !IS_LOOP_EDGE(p->out_kind[i])) - ++n; - - return (b->num_in > 1) && (n == 2); -} - -/* Look for the @phi's operand whose definition reaches @b. */ -static int -phi_opnd_for_bb(struct nv_instruction *phi, struct nv_basic_block *b, - struct nv_basic_block *tb) -{ - struct nv_ref *srci, *srcj; - int i, j; - - for (j = -1, i = 0; i < 6 && phi->src[i]; ++i) { - srci = phi->src[i]; - /* if already replaced, check with original source first */ - if (srci->flags & NV_REF_FLAG_REGALLOC_PRIV) - srci = srci->value->insn->src[0]; - if (!nvc0_bblock_reachable_by(b, srci->value->insn->bb, NULL)) - continue; - /* NOTE: back-edges are ignored by the reachable-by check */ - if (j < 0 || !nvc0_bblock_reachable_by(srcj->value->insn->bb, - srci->value->insn->bb, NULL)) { - j = i; - srcj = srci; - } - } - if (j >= 0 && nvc0_bblock_reachable_by(b, phi->def[0]->insn->bb, NULL)) - if (!nvc0_bblock_reachable_by(srcj->value->insn->bb, - phi->def[0]->insn->bb, NULL)) - j = -1; - return j; -} - -/* For each operand of each PHI in b, generate a new value by inserting a MOV - * at the end of the block it is coming from and replace the operand with its - * result. This eliminates liveness conflicts and enables us to let values be - * copied to the right register if such a conflict exists nonetheless. - * - * These MOVs are also crucial in making sure the live intervals of phi srces - * are extended until the end of the loop, since they are not included in the - * live-in sets. - */ -static int -pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b) -{ - struct nv_instruction *i, *ni; - struct nv_value *val; - struct nv_basic_block *p, *pn; - int n, j; - - b->pass_seq = ctx->pc->pass_seq; - - for (n = 0; n < b->num_in; ++n) { - p = pn = b->in[n]; - assert(p); - - if (need_new_else_block(b, p)) { - pn = new_basic_block(ctx->pc); - - if (p->out[0] == b) - p->out[0] = pn; - else - p->out[1] = pn; - - if (p->exit->target == b) /* target to new else-block */ - p->exit->target = pn; - - b->in[n] = pn; - - pn->out[0] = b; - pn->in[0] = p; - pn->num_in = 1; - } - ctx->pc->current_block = pn; - - for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) { - j = phi_opnd_for_bb(i, p, b); - - if (j < 0) { - val = i->def[0]; - } else { - val = i->src[j]->value; - if (i->src[j]->flags & NV_REF_FLAG_REGALLOC_PRIV) { - j = -1; - /* use original value, we already encountered & replaced it */ - val = val->insn->src[0]->value; - } - } - if (j < 0) /* need an additional source ? */ - for (j = 0; j < 6 && i->src[j] && i->src[j]->value != val; ++j); - assert(j < 6); /* XXX: really ugly shaders */ - - ni = new_instruction(ctx->pc, NV_OP_MOV); - if (ni->prev && ni->prev->target) - nvc0_insns_permute(ni->prev, ni); - - ni->def[0] = new_value_like(ctx->pc, val); - ni->def[0]->insn = ni; - nv_reference(ctx->pc, ni, 0, val); - nv_reference(ctx->pc, i, j, ni->def[0]); /* new phi source = MOV def */ - i->src[j]->flags |= NV_REF_FLAG_REGALLOC_PRIV; - } - - if (pn != p && pn->exit) { - assert(!b->in[!n]->exit || b->in[!n]->exit->terminator); - /* insert terminator (branch to ENDIF) in new else block */ - ctx->pc->current_block = pn; - ni = new_instruction(ctx->pc, NV_OP_BRA); - ni->target = b; - ni->terminator = 1; - } - } - - for (j = 0; j < 2; ++j) - if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) - pass_generate_phi_movs(ctx, b->out[j]); - - return 0; -} - -#define JOIN_MASK_PHI (1 << 0) -#define JOIN_MASK_SELECT (1 << 1) -#define JOIN_MASK_MOV (1 << 2) -#define JOIN_MASK_BIND (1 << 3) - -static int -pass_join_values(struct nv_pc_pass *ctx, unsigned mask) -{ - int c, n; - - for (n = 0; n < ctx->num_insns; ++n) { - struct nv_instruction *i = ctx->insns[n]; - - switch (i->opcode) { - case NV_OP_PHI: - if (!(mask & JOIN_MASK_PHI)) - break; - for (c = 0; c < 6 && i->src[c]; ++c) - join_values_nofail(ctx, i->def[0], i->src[c]->value, FALSE); - break; - case NV_OP_MOV: - if (!(mask & JOIN_MASK_MOV)) - break; - if (i->src[0]->value->insn && !i->src[0]->value->insn->def[1]) - try_join_values(ctx, i->def[0], i->src[0]->value); - break; - case NV_OP_SELECT: - if (!(mask & JOIN_MASK_SELECT)) - break; - for (c = 0; c < 6 && i->src[c]; ++c) - join_values_nofail(ctx, i->def[0], i->src[c]->value, TRUE); - break; - case NV_OP_BIND: - if (!(mask & JOIN_MASK_BIND)) - break; - for (c = 0; c < 4 && i->src[c]; ++c) - join_values_nofail(ctx, i->def[c], i->src[c]->value, TRUE); - break; - case NV_OP_TEX: - case NV_OP_TXB: - case NV_OP_TXL: - case NV_OP_TXQ: /* on nvc0, TEX src and dst can differ */ - default: - break; - } - } - return 0; -} - -/* Order the instructions so that live intervals can be expressed in numbers. */ -static void -pass_order_instructions(void *priv, struct nv_basic_block *b) -{ - struct nv_pc_pass *ctx = (struct nv_pc_pass *)priv; - struct nv_instruction *i; - - b->pass_seq = ctx->pc->pass_seq; - - assert(!b->exit || !b->exit->next); - for (i = b->phi; i; i = i->next) { - i->serial = ctx->num_insns; - ctx->insns[ctx->num_insns++] = i; - } -} - -static void -bb_live_set_print(struct nv_pc *pc, struct nv_basic_block *b) -{ -#ifdef NVC0_RA_DEBUG_LIVE_SETS - struct nv_value *val; - int j; - - debug_printf("LIVE-INs of BB:%i: ", b->id); - - for (j = 0; j < pc->num_values; ++j) { - if (!(b->live_set[j / 32] & (1 << (j % 32)))) - continue; - val = &pc->values[j]; - if (!val->insn) - continue; - debug_printf("%i ", val->n); - } - debug_printf("\n"); -#endif -} - -static INLINE void -live_set_add(struct nv_basic_block *b, struct nv_value *val) -{ - if (!val->insn) /* don't add non-def values */ - return; - b->live_set[val->n / 32] |= 1 << (val->n % 32); -} - -static INLINE void -live_set_rem(struct nv_basic_block *b, struct nv_value *val) -{ - b->live_set[val->n / 32] &= ~(1 << (val->n % 32)); -} - -static INLINE boolean -live_set_test(struct nv_basic_block *b, struct nv_ref *ref) -{ - int n = ref->value->n; - return b->live_set[n / 32] & (1 << (n % 32)); -} - -/* The live set of a block contains those values that are live immediately - * before the beginning of the block, so do a backwards scan. - */ -static int -pass_build_live_sets(struct nv_pc_pass *ctx, struct nv_basic_block *b) -{ - struct nv_instruction *i; - int j, n, ret = 0; - - if (b->pass_seq >= ctx->pc->pass_seq) - return 0; - b->pass_seq = ctx->pc->pass_seq; - - /* slight hack for undecidedness: set phi = entry if it's undefined */ - if (!b->phi) - b->phi = b->entry; - - for (n = 0; n < 2; ++n) { - if (!b->out[n] || b->out[n] == b) - continue; - ret = pass_build_live_sets(ctx, b->out[n]); - if (ret) - return ret; - - if (n == 0) { - for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j) - b->live_set[j] = b->out[n]->live_set[j]; - } else { - for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j) - b->live_set[j] |= b->out[n]->live_set[j]; - } - } - - if (!b->entry) - return 0; - - bb_live_set_print(ctx->pc, b); - - for (i = b->exit; i != b->entry->prev; i = i->prev) { - for (j = 0; j < 5 && i->def[j]; j++) - live_set_rem(b, i->def[j]); - for (j = 0; j < 6 && i->src[j]; j++) - live_set_add(b, i->src[j]->value); - } - for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) - live_set_rem(b, i->def[0]); - - bb_live_set_print(ctx->pc, b); - - return 0; -} - -static void collect_live_values(struct nv_basic_block *b, const int n) -{ - int i; - - /* XXX: what to do about back/fake-edges (used to include both here) ? */ - if (b->out[0] && b->out_kind[0] != CFG_EDGE_FAKE) { - if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) { - for (i = 0; i < n; ++i) - b->live_set[i] = b->out[0]->live_set[i] | b->out[1]->live_set[i]; - } else { - memcpy(b->live_set, b->out[0]->live_set, n * sizeof(uint32_t)); - } - } else - if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) { - memcpy(b->live_set, b->out[1]->live_set, n * sizeof(uint32_t)); - } else { - memset(b->live_set, 0, n * sizeof(uint32_t)); - } -} - -/* NOTE: the live intervals of phi functions start at the first non-phi insn. */ -static int -pass_build_intervals(struct nv_pc_pass *ctx, struct nv_basic_block *b) -{ - struct nv_instruction *i, *i_stop; - int j, s; - const int n = (ctx->pc->num_values + 31) / 32; - - /* verify that first block does not have live-in values */ - if (b->num_in == 0) - for (j = 0; j < n; ++j) - assert(b->live_set[j] == 0); - - collect_live_values(b, n); - - /* remove live-outs def'd in a parallel block, hopefully they're all phi'd */ - for (j = 0; j < 2; ++j) { - if (!b->out[j] || !b->out[j]->phi) - continue; - for (i = b->out[j]->phi; i->opcode == NV_OP_PHI; i = i->next) { - live_set_rem(b, i->def[0]); - - for (s = 0; s < 6 && i->src[s]; ++s) { - assert(i->src[s]->value->insn); - if (nvc0_bblock_reachable_by(b, i->src[s]->value->insn->bb, - b->out[j])) - live_set_add(b, i->src[s]->value); - else - live_set_rem(b, i->src[s]->value); - } - } - } - - /* remaining live-outs are live until the end */ - if (b->exit) { - for (j = 0; j < ctx->pc->num_values; ++j) { - if (!(b->live_set[j / 32] & (1 << (j % 32)))) - continue; - add_range(&ctx->pc->values[j], b, b->exit->serial + 1); -#ifdef NVC0_RA_DEBUG_LIVEI - debug_printf("adding range for live value %i: ", j); - livei_print(&ctx->pc->values[j]); -#endif - } - } - - i_stop = b->entry ? b->entry->prev : NULL; - - /* don't have to include phi functions here (will have 0 live range) */ - for (i = b->exit; i != i_stop; i = i->prev) { - assert(i->serial >= b->phi->serial && i->serial <= b->exit->serial); - for (j = 0; j < 4 && i->def[j]; ++j) - live_set_rem(b, i->def[j]); - - for (j = 0; j < 6 && i->src[j]; ++j) { - if (!live_set_test(b, i->src[j])) { - live_set_add(b, i->src[j]->value); - add_range(i->src[j]->value, b, i->serial); -#ifdef NVC0_RA_DEBUG_LIVEI - debug_printf("adding range for source %i (ends living): ", - i->src[j]->value->n); - livei_print(i->src[j]->value); -#endif - } - } - } - - b->pass_seq = ctx->pc->pass_seq; - - if (b->out[0] && b->out[0]->pass_seq < ctx->pc->pass_seq) - pass_build_intervals(ctx, b->out[0]); - - if (b->out[1] && b->out[1]->pass_seq < ctx->pc->pass_seq) - pass_build_intervals(ctx, b->out[1]); - - return 0; -} - -static INLINE void -nvc0_ctor_register_set(struct nv_pc *pc, struct register_set *set) -{ - memset(set, 0, sizeof(*set)); - - set->last[NV_FILE_GPR] = 62; - set->last[NV_FILE_PRED] = 6; - set->last[NV_FILE_COND] = 1; - - set->log2_unit[NV_FILE_GPR] = 2; - set->log2_unit[NV_FILE_COND] = 0; - set->log2_unit[NV_FILE_PRED] = 0; - - set->pc = pc; -} - -static void -insert_ordered_tail(struct nv_value *list, struct nv_value *nval) -{ - struct nv_value *elem; - - for (elem = list->prev; - elem != list && elem->livei->bgn > nval->livei->bgn; - elem = elem->prev); - /* now elem begins before or at the same time as val */ - - nval->prev = elem; - nval->next = elem->next; - elem->next->prev = nval; - elem->next = nval; -} - -static void -collect_register_values(struct nv_pc_pass *ctx, struct nv_value *head, - boolean assigned_only) -{ - struct nv_value *val; - int k, n; - - make_empty_list(head); - - for (n = 0; n < ctx->num_insns; ++n) { - struct nv_instruction *i = ctx->insns[n]; - - /* for joined values, only the representative will have livei != NULL */ - for (k = 0; k < 5; ++k) { - if (i->def[k] && i->def[k]->livei) - if (!assigned_only || i->def[k]->reg.id >= 0) - insert_ordered_tail(head, i->def[k]); - } - } - - for (val = head->next; val != head->prev; val = val->next) { - assert(val->join == val); - assert(val->livei->bgn <= val->next->livei->bgn); - } -} - -static int -pass_linear_scan(struct nv_pc_pass *ctx) -{ - struct register_set f, free; - struct nv_value *cur, *val, *tmp[2]; - struct nv_value active, inactive, handled, unhandled; - - make_empty_list(&active); - make_empty_list(&inactive); - make_empty_list(&handled); - - nvc0_ctor_register_set(ctx->pc, &free); - - collect_register_values(ctx, &unhandled, FALSE); - - foreach_s(cur, tmp[0], &unhandled) { - remove_from_list(cur); - - foreach_s(val, tmp[1], &active) { - if (livei_end(val) <= cur->livei->bgn) { - reg_release(&free, val); - move_to_head(&handled, val); - } else - if (!livei_contains(val, cur->livei->bgn)) { - reg_release(&free, val); - move_to_head(&inactive, val); - } - } - - foreach_s(val, tmp[1], &inactive) { - if (livei_end(val) <= cur->livei->bgn) - move_to_head(&handled, val); - else - if (livei_contains(val, cur->livei->bgn)) { - reg_occupy(&free, val); - move_to_head(&active, val); - } - } - - f = free; - - foreach(val, &inactive) - if (livei_have_overlap(val, cur)) - reg_occupy(&f, val); - - foreach(val, &unhandled) - if (val->reg.id >= 0 && livei_have_overlap(val, cur)) - reg_occupy(&f, val); - - if (cur->reg.id < 0) { - boolean mem = !reg_assign(&f, &cur, 1); - - if (mem) { - NOUVEAU_ERR("out of registers\n"); - abort(); - } - } - insert_at_head(&active, cur); - reg_occupy(&free, cur); - } - - return 0; -} - -/* Allocate values defined by instructions such as TEX, which have to be - * assigned to consecutive registers. - * Linear scan doesn't really work here since the values can have different - * live intervals. - */ -static int -pass_allocate_constrained_values(struct nv_pc_pass *ctx) -{ - struct nv_value regvals, *val; - struct nv_instruction *i; - struct nv_value *defs[4]; - struct register_set regs[4]; - int n, vsize, c; - uint32_t mask; - boolean mem; - - collect_register_values(ctx, ®vals, TRUE); - - for (n = 0; n < ctx->num_insns; ++n) { - i = ctx->insns[n]; - vsize = nvi_vector_size(i); - if (!(vsize > 1)) - continue; - assert(vsize <= 4); - - for (c = 0; c < vsize; ++c) - defs[c] = i->def[c]->join; - - if (defs[0]->reg.id >= 0) { - for (c = 1; c < vsize; ++c) - assert(defs[c]->reg.id >= 0); - continue; - } - - for (c = 0; c < vsize; ++c) { - nvc0_ctor_register_set(ctx->pc, ®s[c]); - - foreach(val, ®vals) { - if (val->reg.id >= 0 && livei_have_overlap(val, defs[c])) - reg_occupy(®s[c], val); - } - mask = 0x11111111; - if (vsize == 2) /* granularity is 2 and not 4 */ - mask |= 0x11111111 << 2; - mask_register_set(®s[c], 0, mask << c); - - if (defs[c]->livei) - insert_ordered_tail(®vals, defs[c]); - } - for (c = 1; c < vsize; ++c) - intersect_register_sets(®s[0], ®s[0], ®s[c]); - - mem = !reg_assign(®s[0], &defs[0], vsize); - - if (mem) { - NOUVEAU_ERR("out of registers\n"); - abort(); - } - } - return 0; -} - -static int -nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root) -{ - struct nv_pc_pass *ctx; - int i, ret; - - NV50_DBGMSG(PROG_RA, "REGISTER ALLOCATION - entering\n"); - - ctx = CALLOC_STRUCT(nv_pc_pass); - if (!ctx) - return -1; - ctx->pc = pc; - - ctx->insns = CALLOC(NV_PC_MAX_INSTRUCTIONS, sizeof(struct nv_instruction *)); - if (!ctx->insns) { - FREE(ctx); - return -1; - } - - pc->pass_seq++; - ret = pass_generate_phi_movs(ctx, root); - assert(!ret); - -#ifdef NVC0_RA_DEBUG_LIVEI - nvc0_print_function(root); -#endif - - for (i = 0; i < pc->loop_nesting_bound; ++i) { - pc->pass_seq++; - ret = pass_build_live_sets(ctx, root); - assert(!ret && "live sets"); - if (ret) { - NOUVEAU_ERR("failed to build live sets (iteration %d)\n", i); - goto out; - } - } - - pc->pass_seq++; - nvc0_pc_pass_in_order(root, pass_order_instructions, ctx); - - pc->pass_seq++; - ret = pass_build_intervals(ctx, root); - assert(!ret && "build intervals"); - if (ret) { - NOUVEAU_ERR("failed to build live intervals\n"); - goto out; - } - -#ifdef NVC0_RA_DEBUG_LIVEI - for (i = 0; i < pc->num_values; ++i) - livei_print(&pc->values[i]); -#endif - - ret = pass_join_values(ctx, JOIN_MASK_PHI); - if (ret) - goto out; - ret = pass_join_values(ctx, JOIN_MASK_SELECT | JOIN_MASK_BIND); - if (ret) - goto out; - ret = pass_join_values(ctx, JOIN_MASK_MOV); - if (ret) - goto out; - ret = pass_allocate_constrained_values(ctx); - if (ret) - goto out; - ret = pass_linear_scan(ctx); - if (ret) - goto out; - - for (i = 0; i < pc->num_values; ++i) - livei_release(&pc->values[i]); - - NV50_DBGMSG(PROG_RA, "REGISTER ALLOCATION - leaving\n"); - -out: - FREE(ctx->insns); - FREE(ctx); - return ret; -} - -int -nvc0_pc_exec_pass1(struct nv_pc *pc) -{ - int i, ret; - - for (i = 0; i < pc->num_subroutines + 1; ++i) - if (pc->root[i] && (ret = nv_pc_pass1(pc, pc->root[i]))) - return ret; - return 0; -} diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c deleted file mode 100644 index d57be916696..00000000000 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ /dev/null @@ -1,2026 +0,0 @@ -/* - * Copyright 2010 Christoph Bumiller - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <unistd.h> - -#include "pipe/p_shader_tokens.h" -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_util.h" -#include "tgsi/tgsi_dump.h" -#include "util/u_dynarray.h" - -#include "nvc0_pc.h" -#include "nvc0_program.h" - -/* Arbitrary internal limits. */ -#define BLD_MAX_TEMPS 64 -#define BLD_MAX_ADDRS 4 -#define BLD_MAX_PREDS 4 -#define BLD_MAX_IMMDS 128 -#define BLD_MAX_OUTPS PIPE_MAX_SHADER_OUTPUTS - -#define BLD_MAX_COND_NESTING 8 -#define BLD_MAX_LOOP_NESTING 4 -#define BLD_MAX_CALL_NESTING 2 - -/* This structure represents a TGSI register. */ -struct bld_register { - struct nv_value *current; - /* collect all SSA values assigned to it */ - struct util_dynarray vals; - /* 1 bit per loop level, indicates if used/defd, reset when loop ends */ - uint16_t loop_use; - uint16_t loop_def; -}; - -static INLINE struct nv_value ** -bld_register_access(struct bld_register *reg, unsigned i) -{ - return util_dynarray_element(®->vals, struct nv_value *, i); -} - -static INLINE void -bld_register_add_val(struct bld_register *reg, struct nv_value *val) -{ - struct nv_basic_block *bb = val->insn->bb; - - if (reg->vals.size && - (util_dynarray_top(®->vals, struct nv_value *))->insn->bb == bb) - *(util_dynarray_top_ptr(®->vals, struct nv_value *)) = val; - else - util_dynarray_append(®->vals, struct nv_value *, val); -} - -static INLINE boolean -bld_register_del_val(struct bld_register *reg, struct nv_value *val) -{ - unsigned i; - - for (i = reg->vals.size / sizeof(struct nv_value *); i > 0; --i) - if (*bld_register_access(reg, i - 1) == val) - break; - if (!i) - return FALSE; - - if (i != reg->vals.size / sizeof(struct nv_value *)) - *bld_register_access(reg, i - 1) = util_dynarray_pop(®->vals, - struct nv_value *); - else - reg->vals.size -= sizeof(struct nv_value *); - - return TRUE; -} - -struct bld_context { - struct nvc0_translation_info *ti; - - struct nv_pc *pc; - struct nv_basic_block *b; - - struct tgsi_parse_context parse[BLD_MAX_CALL_NESTING]; - int call_lvl; - - struct nv_basic_block *cond_bb[BLD_MAX_COND_NESTING]; - struct nv_basic_block *join_bb[BLD_MAX_COND_NESTING]; - struct nv_basic_block *else_bb[BLD_MAX_COND_NESTING]; - int cond_lvl; - struct nv_basic_block *loop_bb[BLD_MAX_LOOP_NESTING]; - struct nv_basic_block *brkt_bb[BLD_MAX_LOOP_NESTING]; - int loop_lvl; - - ubyte out_kind; /* CFG_EDGE_FORWARD, or FAKE in case of BREAK/CONT */ - - struct bld_register tvs[BLD_MAX_TEMPS][4]; /* TGSI_FILE_TEMPORARY */ - struct bld_register avs[BLD_MAX_ADDRS][4]; /* TGSI_FILE_ADDRESS */ - struct bld_register pvs[BLD_MAX_PREDS][4]; /* TGSI_FILE_PREDICATE */ - struct bld_register ovs[BLD_MAX_OUTPS][4]; /* TGSI_FILE_OUTPUT, FP only */ - - uint32_t outputs_written[(PIPE_MAX_SHADER_OUTPUTS + 7) / 8]; - int hpos_index; - - struct nv_value *zero; - struct nv_value *frag_coord[4]; - - /* wipe on new BB */ - struct nv_value *saved_sysvals[4]; - struct nv_value *saved_addr[4][2]; - struct nv_value *saved_inputs[PIPE_MAX_SHADER_INPUTS][4]; - struct nv_value *saved_immd[BLD_MAX_IMMDS]; - uint num_immds; -}; - -static INLINE ubyte -bld_register_file(struct bld_context *bld, struct bld_register *reg) -{ - if (reg >= &bld->pvs[0][0] && - reg < &bld->ovs[0][0]) - return NV_FILE_PRED; - return NV_FILE_GPR; -} - -static INLINE struct nv_value * -bld_fetch(struct bld_context *bld, struct bld_register *regs, int i, int c) -{ - regs[i * 4 + c].loop_use |= 1 << bld->loop_lvl; - return regs[i * 4 + c].current; -} - -static struct nv_value * -bld_loop_phi(struct bld_context *, struct bld_register *, struct nv_value *); - -/* If a variable is defined in a loop without prior use, we don't need - * a phi in the loop header to account for backwards flow. - * - * However, if this variable is then also used outside the loop, we do - * need a phi after all. But we must not use this phi's def inside the - * loop, so we can eliminate the phi if it is unused later. - */ -static INLINE void -bld_store(struct bld_context *bld, - struct bld_register *regs, int i, int c, struct nv_value *val) -{ - const uint16_t m = 1 << bld->loop_lvl; - struct bld_register *reg = ®s[i * 4 + c]; - - if (bld->loop_lvl && !(m & (reg->loop_def | reg->loop_use))) - bld_loop_phi(bld, reg, val); - - reg->current = val; - bld_register_add_val(reg, reg->current); - - reg->loop_def |= 1 << bld->loop_lvl; -} - -#define FETCH_TEMP(i, c) bld_fetch(bld, &bld->tvs[0][0], i, c) -#define STORE_TEMP(i, c, v) bld_store(bld, &bld->tvs[0][0], i, c, (v)) -#define FETCH_ADDR(i, c) bld_fetch(bld, &bld->avs[0][0], i, c) -#define STORE_ADDR(i, c, v) bld_store(bld, &bld->avs[0][0], i, c, (v)) -#define FETCH_PRED(i, c) bld_fetch(bld, &bld->pvs[0][0], i, c) -#define STORE_PRED(i, c, v) bld_store(bld, &bld->pvs[0][0], i, c, (v)) -#define STORE_OUTP(i, c, v) \ - do { \ - bld_store(bld, &bld->ovs[0][0], i, c, (v)); \ - bld->outputs_written[(i) / 8] |= 1 << (((i) * 4 + (c)) % 32); \ - } while (0) - -static INLINE void -bld_clear_def_use(struct bld_register *regs, int n, int lvl) -{ - int i; - const uint16_t mask = ~(1 << lvl); - - for (i = 0; i < n * 4; ++i) { - regs[i].loop_def &= mask; - regs[i].loop_use &= mask; - } -} - -static INLINE void -bld_warn_uninitialized(struct bld_context *bld, int kind, - struct bld_register *reg, struct nv_basic_block *b) -{ -#if NV50_DEBUG & NV50_DEBUG_SHADER - long i = (reg - &bld->tvs[0][0]) / 4; - long c = (reg - &bld->tvs[0][0]) & 3; - - if (c == 3) - c = -1; - debug_printf("WARNING: TEMP[%li].%c %s used uninitialized in BB:%i\n", - i, (int)('x' + c), kind ? "may be" : "is", b->id); -#endif -} - -static INLINE struct nv_value * -bld_def(struct nv_instruction *i, int c, struct nv_value *value) -{ - i->def[c] = value; - value->insn = i; - return value; -} - -static INLINE struct nv_value * -find_by_bb(struct bld_register *reg, struct nv_basic_block *b) -{ - int i; - - if (reg->current && reg->current->insn->bb == b) - return reg->current; - - for (i = 0; i < reg->vals.size / sizeof(struct nv_value *); ++i) - if ((*bld_register_access(reg, i))->insn->bb == b) - return *bld_register_access(reg, i); - return NULL; -} - -/* Fetch value from register that was defined in the specified BB, - * or search for first definitions in all of its predecessors. - */ -static void -fetch_by_bb(struct bld_register *reg, - struct nv_value **vals, int *n, - struct nv_basic_block *b) -{ - int i; - struct nv_value *val; - - assert(*n < 16); /* MAX_COND_NESTING */ - - val = find_by_bb(reg, b); - if (val) { - for (i = 0; i < *n; ++i) - if (vals[i] == val) - return; - vals[(*n)++] = val; - return; - } - for (i = 0; i < b->num_in; ++i) - if (!IS_WALL_EDGE(b->in_kind[i])) - fetch_by_bb(reg, vals, n, b->in[i]); -} - -static INLINE boolean -nvc0_bblock_is_terminated(struct nv_basic_block *bb) -{ - return bb->exit && bb->exit->terminator; -} - -static INLINE struct nv_value * -bld_load_imm_u32(struct bld_context *bld, uint32_t u); - -static INLINE struct nv_value * -bld_undef(struct bld_context *bld, ubyte file) -{ - struct nv_instruction *nvi = new_instruction(bld->pc, NV_OP_UNDEF); - - return bld_def(nvi, 0, new_value(bld->pc, file, 4)); -} - -static struct nv_value * -bld_phi(struct bld_context *bld, struct nv_basic_block *b, - struct bld_register *reg) -{ - struct nv_basic_block *in; - struct nv_value *vals[16] = { NULL }; - struct nv_value *val; - struct nv_instruction *phi; - int i, j, n; - - do { - i = n = 0; - fetch_by_bb(reg, vals, &n, b); - - if (!n) { - bld_warn_uninitialized(bld, 0, reg, b); - return NULL; - } - - if (n == 1) { - if (nvc0_bblock_dominated_by(b, vals[0]->insn->bb)) - break; - - bld_warn_uninitialized(bld, 1, reg, b); - - /* back-tracking to insert missing value of other path */ - in = b; - while (in->in[0]) { - if (in->num_in == 1) { - in = in->in[0]; - } else { - if (!nvc0_bblock_reachable_by(in->in[0], vals[0]->insn->bb, b)) - in = in->in[0]; - else - if (!nvc0_bblock_reachable_by(in->in[1], vals[0]->insn->bb, b)) - in = in->in[1]; - else - in = in->in[0]; - } - } - bld->pc->current_block = in; - - /* should make this a no-op */ - bld_register_add_val(reg, bld_undef(bld, vals[0]->reg.file)); - continue; - } - - for (i = 0; i < n; ++i) { - /* if value dominates b, continue to the redefinitions */ - if (nvc0_bblock_dominated_by(b, vals[i]->insn->bb)) - continue; - - /* if value dominates any in-block, b should be the dom frontier */ - for (j = 0; j < b->num_in; ++j) - if (nvc0_bblock_dominated_by(b->in[j], vals[i]->insn->bb)) - break; - /* otherwise, find the dominance frontier and put the phi there */ - if (j == b->num_in) { - in = nvc0_bblock_dom_frontier(vals[i]->insn->bb); - val = bld_phi(bld, in, reg); - bld_register_add_val(reg, val); - break; - } - } - } while(i < n); - - bld->pc->current_block = b; - - if (n == 1) - return vals[0]; - - phi = new_instruction(bld->pc, NV_OP_PHI); - - bld_def(phi, 0, new_value(bld->pc, vals[0]->reg.file, vals[0]->reg.size)); - for (i = 0; i < n; ++i) - nv_reference(bld->pc, phi, i, vals[i]); - - return phi->def[0]; -} - -/* Insert a phi function in the loop header. - * For nested loops, we need to insert phi functions in all the outer - * loop headers if they don't have one yet. - * - * @def: redefinition from inside loop, or NULL if to be replaced later - */ -static struct nv_value * -bld_loop_phi(struct bld_context *bld, struct bld_register *reg, - struct nv_value *def) -{ - struct nv_instruction *phi; - struct nv_basic_block *bb = bld->pc->current_block; - struct nv_value *val = NULL; - - if (bld->ti->require_stores) /* XXX: actually only for INDEXABLE_TEMP */ - return NULL; - - if (bld->loop_lvl > 1) { - --bld->loop_lvl; - if (!((reg->loop_def | reg->loop_use) & (1 << bld->loop_lvl))) - val = bld_loop_phi(bld, reg, NULL); - ++bld->loop_lvl; - } - - if (!val) - val = bld_phi(bld, bld->pc->current_block, reg); /* old definition */ - if (!val) { - bld->pc->current_block = bld->loop_bb[bld->loop_lvl - 1]->in[0]; - val = bld_undef(bld, bld_register_file(bld, reg)); - } - - bld->pc->current_block = bld->loop_bb[bld->loop_lvl - 1]; - - phi = new_instruction(bld->pc, NV_OP_PHI); - - bld_def(phi, 0, new_value_like(bld->pc, val)); - if (!def) - def = phi->def[0]; - - bld_register_add_val(reg, phi->def[0]); - - phi->target = (struct nv_basic_block *)reg; /* cheat */ - - nv_reference(bld->pc, phi, 0, val); - nv_reference(bld->pc, phi, 1, def); - - bld->pc->current_block = bb; - - return phi->def[0]; -} - -static INLINE struct nv_value * -bld_fetch_global(struct bld_context *bld, struct bld_register *reg) -{ - const uint16_t m = 1 << bld->loop_lvl; - const uint16_t use = reg->loop_use; - - reg->loop_use |= m; - - /* If neither used nor def'd inside the loop, build a phi in foresight, - * so we don't have to replace stuff later on, which requires tracking. - */ - if (bld->loop_lvl && !((use | reg->loop_def) & m)) - return bld_loop_phi(bld, reg, NULL); - - return bld_phi(bld, bld->pc->current_block, reg); -} - -static INLINE struct nv_value * -bld_imm_u32(struct bld_context *bld, uint32_t u) -{ - int i; - unsigned n = bld->num_immds; - - for (i = 0; i < n; ++i) - if (bld->saved_immd[i]->reg.imm.u32 == u) - return bld->saved_immd[i]; - - assert(n < BLD_MAX_IMMDS); - bld->num_immds++; - - bld->saved_immd[n] = new_value(bld->pc, NV_FILE_IMM, 4); - bld->saved_immd[n]->reg.imm.u32 = u; - return bld->saved_immd[n]; -} - -static void -bld_replace_value(struct nv_pc *, struct nv_basic_block *, struct nv_value *, - struct nv_value *); - -/* Replace the source of the phi in the loop header by the last assignment, - * or eliminate the phi function if there is no assignment inside the loop. - * - * Redundancy situation 1 - (used) but (not redefined) value: - * %3 = phi %0, %3 = %3 is used - * %3 = phi %0, %4 = is new definition - * - * Redundancy situation 2 - (not used) but (redefined) value: - * %3 = phi %0, %2 = %2 is used, %3 could be used outside, deleted by DCE - */ -static void -bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb) -{ - struct nv_basic_block *save = bld->pc->current_block; - struct nv_instruction *phi, *next; - struct nv_value *val; - struct bld_register *reg; - int i, s, n; - - for (phi = bb->phi; phi && phi->opcode == NV_OP_PHI; phi = next) { - next = phi->next; - - reg = (struct bld_register *)phi->target; - phi->target = NULL; - - /* start with s == 1, src[0] is from outside the loop */ - for (s = 1, n = 0; n < bb->num_in; ++n) { - if (bb->in_kind[n] != CFG_EDGE_BACK) - continue; - - assert(s < 4); - bld->pc->current_block = bb->in[n]; - val = bld_fetch_global(bld, reg); - - for (i = 0; i < 4; ++i) - if (phi->src[i] && phi->src[i]->value == val) - break; - if (i == 4) { - /* skip values we do not want to replace */ - for (; phi->src[s] && phi->src[s]->value != phi->def[0]; ++s); - nv_reference(bld->pc, phi, s++, val); - } - } - bld->pc->current_block = save; - - if (phi->src[0]->value == phi->def[0] || - phi->src[0]->value == phi->src[1]->value) - s = 1; - else - if (phi->src[1]->value == phi->def[0]) - s = 0; - else - continue; - - if (s >= 0) { - /* eliminate the phi */ - bld_register_del_val(reg, phi->def[0]); - - ++bld->pc->pass_seq; - bld_replace_value(bld->pc, bb, phi->def[0], phi->src[s]->value); - - nvc0_insn_delete(phi); - } - } -} - -static INLINE struct nv_value * -bld_imm_f32(struct bld_context *bld, float f) -{ - return bld_imm_u32(bld, fui(f)); -} - -static struct nv_value * -bld_insn_1(struct bld_context *bld, uint opcode, struct nv_value *src0) -{ - struct nv_instruction *insn = new_instruction(bld->pc, opcode); - - nv_reference(bld->pc, insn, 0, src0); - - return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.size)); -} - -static struct nv_value * -bld_insn_2(struct bld_context *bld, uint opcode, - struct nv_value *src0, struct nv_value *src1) -{ - struct nv_instruction *insn = new_instruction(bld->pc, opcode); - - nv_reference(bld->pc, insn, 0, src0); - nv_reference(bld->pc, insn, 1, src1); - - return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.size)); -} - -static struct nv_value * -bld_insn_3(struct bld_context *bld, uint opcode, - struct nv_value *src0, struct nv_value *src1, - struct nv_value *src2) -{ - struct nv_instruction *insn = new_instruction(bld->pc, opcode); - - nv_reference(bld->pc, insn, 0, src0); - nv_reference(bld->pc, insn, 1, src1); - nv_reference(bld->pc, insn, 2, src2); - - return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.size)); -} - -static INLINE void -bld_src_predicate(struct bld_context *bld, - struct nv_instruction *nvi, int s, struct nv_value *val) -{ - nvi->predicate = s; - nv_reference(bld->pc, nvi, s, val); -} - -static INLINE void -bld_src_pointer(struct bld_context *bld, - struct nv_instruction *nvi, int s, struct nv_value *val) -{ - nvi->indirect = s; - nv_reference(bld->pc, nvi, s, val); -} - -static void -bld_lmem_store(struct bld_context *bld, struct nv_value *ptr, int ofst, - struct nv_value *val) -{ - struct nv_instruction *insn = new_instruction(bld->pc, NV_OP_ST); - struct nv_value *loc; - - loc = new_value(bld->pc, NV_FILE_MEM_L, nv_type_sizeof(NV_TYPE_U32)); - - loc->reg.address = ofst * 4; - - nv_reference(bld->pc, insn, 0, loc); - nv_reference(bld->pc, insn, 1, val); - if (ptr) - bld_src_pointer(bld, insn, 2, ptr); -} - -static struct nv_value * -bld_lmem_load(struct bld_context *bld, struct nv_value *ptr, int ofst) -{ - struct nv_value *loc, *val; - - loc = new_value(bld->pc, NV_FILE_MEM_L, nv_type_sizeof(NV_TYPE_U32)); - - loc->reg.address = ofst * 4; - - val = bld_insn_1(bld, NV_OP_LD, loc); - if (ptr) - bld_src_pointer(bld, val->insn, 1, ptr); - - return val; -} - -static struct nv_value * -bld_pow(struct bld_context *bld, struct nv_value *x, struct nv_value *e) -{ - struct nv_value *val; - - val = bld_insn_1(bld, NV_OP_LG2, x); - val = bld_insn_2(bld, NV_OP_MUL_F32, e, val); - - val = bld_insn_1(bld, NV_OP_PREEX2, val); - val = bld_insn_1(bld, NV_OP_EX2, val); - - return val; -} - -static INLINE struct nv_value * -bld_load_imm_f32(struct bld_context *bld, float f) -{ - if (f == 0.0f) - return bld->zero; - return bld_insn_1(bld, NV_OP_MOV, bld_imm_f32(bld, f)); -} - -static INLINE struct nv_value * -bld_load_imm_u32(struct bld_context *bld, uint32_t u) -{ - if (u == 0) - return bld->zero; - return bld_insn_1(bld, NV_OP_MOV, bld_imm_u32(bld, u)); -} - -static INLINE struct nv_value * -bld_setp(struct bld_context *bld, uint op, uint8_t cc, - struct nv_value *src0, struct nv_value *src1) -{ - struct nv_value *val = bld_insn_2(bld, op, src0, src1); - - val->reg.file = NV_FILE_PRED; - val->reg.size = 1; - val->insn->set_cond = cc & 0xf; - return val; -} - -static INLINE struct nv_value * -bld_cvt(struct bld_context *bld, uint8_t dt, uint8_t st, struct nv_value *src) -{ - struct nv_value *val = bld_insn_1(bld, NV_OP_CVT, src); - val->insn->ext.cvt.d = dt; - val->insn->ext.cvt.s = st; - return val; -} - -static void -bld_kil(struct bld_context *bld, struct nv_value *src) -{ - struct nv_instruction *nvi; - - src = bld_setp(bld, NV_OP_SET_F32, NV_CC_LT, src, bld->zero); - - nvi = new_instruction(bld->pc, NV_OP_KIL); - nvi->fixed = 1; - - bld_src_predicate(bld, nvi, 0, src); -} - -static void -bld_flow(struct bld_context *bld, uint opcode, - struct nv_value *pred, uint8_t cc, struct nv_basic_block *target, - boolean reconverge) -{ - struct nv_instruction *nvi; - - if (reconverge) - new_instruction(bld->pc, NV_OP_JOINAT)->fixed = 1; - - nvi = new_instruction(bld->pc, opcode); - nvi->target = target; - nvi->terminator = 1; - if (pred) { - nvi->cc = cc; - bld_src_predicate(bld, nvi, 0, pred); - } -} - -static ubyte -translate_setcc(unsigned opcode) -{ - switch (opcode) { - case TGSI_OPCODE_SLT: return NV_CC_LT; - case TGSI_OPCODE_SGE: return NV_CC_GE; - case TGSI_OPCODE_SEQ: return NV_CC_EQ; - case TGSI_OPCODE_SGT: return NV_CC_GT; - case TGSI_OPCODE_SLE: return NV_CC_LE; - case TGSI_OPCODE_SNE: return NV_CC_NE | NV_CC_U; - case TGSI_OPCODE_STR: return NV_CC_TR; - case TGSI_OPCODE_SFL: return NV_CC_FL; - - case TGSI_OPCODE_ISLT: return NV_CC_LT; - case TGSI_OPCODE_ISGE: return NV_CC_GE; - case TGSI_OPCODE_USEQ: return NV_CC_EQ; - case TGSI_OPCODE_USGE: return NV_CC_GE; - case TGSI_OPCODE_USLT: return NV_CC_LT; - case TGSI_OPCODE_USNE: return NV_CC_NE; - default: - assert(0); - return NV_CC_FL; - } -} - -static uint -translate_opcode(uint opcode) -{ - switch (opcode) { - case TGSI_OPCODE_ABS: return NV_OP_ABS_F32; - case TGSI_OPCODE_ADD: return NV_OP_ADD_F32; - case TGSI_OPCODE_SUB: return NV_OP_SUB_F32; - case TGSI_OPCODE_UADD: return NV_OP_ADD_B32; - case TGSI_OPCODE_AND: return NV_OP_AND; - case TGSI_OPCODE_EX2: return NV_OP_EX2; - case TGSI_OPCODE_CEIL: return NV_OP_CEIL; - case TGSI_OPCODE_FLR: return NV_OP_FLOOR; - case TGSI_OPCODE_TRUNC: return NV_OP_TRUNC; - case TGSI_OPCODE_COS: return NV_OP_COS; - case TGSI_OPCODE_SIN: return NV_OP_SIN; - case TGSI_OPCODE_DDX: return NV_OP_DFDX; - case TGSI_OPCODE_DDY: return NV_OP_DFDY; - case TGSI_OPCODE_F2I: - case TGSI_OPCODE_F2U: - case TGSI_OPCODE_I2F: - case TGSI_OPCODE_U2F: return NV_OP_CVT; - case TGSI_OPCODE_INEG: return NV_OP_NEG_S32; - case TGSI_OPCODE_LG2: return NV_OP_LG2; - case TGSI_OPCODE_ISHR: return NV_OP_SAR; - case TGSI_OPCODE_USHR: return NV_OP_SHR; - case TGSI_OPCODE_MAD: return NV_OP_MAD_F32; - case TGSI_OPCODE_MAX: return NV_OP_MAX_F32; - case TGSI_OPCODE_IMAX: return NV_OP_MAX_S32; - case TGSI_OPCODE_UMAX: return NV_OP_MAX_U32; - case TGSI_OPCODE_MIN: return NV_OP_MIN_F32; - case TGSI_OPCODE_IMIN: return NV_OP_MIN_S32; - case TGSI_OPCODE_UMIN: return NV_OP_MIN_U32; - case TGSI_OPCODE_MUL: return NV_OP_MUL_F32; - case TGSI_OPCODE_UMUL: return NV_OP_MUL_B32; - case TGSI_OPCODE_OR: return NV_OP_OR; - case TGSI_OPCODE_RCP: return NV_OP_RCP; - case TGSI_OPCODE_RSQ: return NV_OP_RSQ; - case TGSI_OPCODE_SAD: return NV_OP_SAD; - case TGSI_OPCODE_SHL: return NV_OP_SHL; - case TGSI_OPCODE_SLT: - case TGSI_OPCODE_SGE: - case TGSI_OPCODE_SEQ: - case TGSI_OPCODE_SGT: - case TGSI_OPCODE_SLE: - case TGSI_OPCODE_SNE: return NV_OP_FSET_F32; - case TGSI_OPCODE_ISLT: - case TGSI_OPCODE_ISGE: return NV_OP_SET_S32; - case TGSI_OPCODE_USEQ: - case TGSI_OPCODE_USGE: - case TGSI_OPCODE_USLT: - case TGSI_OPCODE_USNE: return NV_OP_SET_U32; - case TGSI_OPCODE_TEX: return NV_OP_TEX; - case TGSI_OPCODE_TXP: return NV_OP_TEX; - case TGSI_OPCODE_TXB: return NV_OP_TXB; - case TGSI_OPCODE_TXL: return NV_OP_TXL; - case TGSI_OPCODE_XOR: return NV_OP_XOR; - default: - return NV_OP_NOP; - } -} - -#if 0 -static ubyte -infer_src_type(unsigned opcode) -{ - switch (opcode) { - case TGSI_OPCODE_MOV: - case TGSI_OPCODE_AND: - case TGSI_OPCODE_OR: - case TGSI_OPCODE_XOR: - case TGSI_OPCODE_SAD: - case TGSI_OPCODE_U2F: - case TGSI_OPCODE_UADD: - case TGSI_OPCODE_UDIV: - case TGSI_OPCODE_UMOD: - case TGSI_OPCODE_UMAD: - case TGSI_OPCODE_UMUL: - case TGSI_OPCODE_UMAX: - case TGSI_OPCODE_UMIN: - case TGSI_OPCODE_USEQ: - case TGSI_OPCODE_USGE: - case TGSI_OPCODE_USLT: - case TGSI_OPCODE_USNE: - case TGSI_OPCODE_USHR: - return NV_TYPE_U32; - case TGSI_OPCODE_I2F: - case TGSI_OPCODE_IDIV: - case TGSI_OPCODE_IMAX: - case TGSI_OPCODE_IMIN: - case TGSI_OPCODE_INEG: - case TGSI_OPCODE_ISGE: - case TGSI_OPCODE_ISHR: - case TGSI_OPCODE_ISLT: - return NV_TYPE_S32; - default: - return NV_TYPE_F32; - } -} - -static ubyte -infer_dst_type(unsigned opcode) -{ - switch (opcode) { - case TGSI_OPCODE_MOV: - case TGSI_OPCODE_F2U: - case TGSI_OPCODE_AND: - case TGSI_OPCODE_OR: - case TGSI_OPCODE_XOR: - case TGSI_OPCODE_SAD: - case TGSI_OPCODE_UADD: - case TGSI_OPCODE_UDIV: - case TGSI_OPCODE_UMOD: - case TGSI_OPCODE_UMAD: - case TGSI_OPCODE_UMUL: - case TGSI_OPCODE_UMAX: - case TGSI_OPCODE_UMIN: - case TGSI_OPCODE_USEQ: - case TGSI_OPCODE_USGE: - case TGSI_OPCODE_USLT: - case TGSI_OPCODE_USNE: - case TGSI_OPCODE_USHR: - return NV_TYPE_U32; - case TGSI_OPCODE_F2I: - case TGSI_OPCODE_IDIV: - case TGSI_OPCODE_IMAX: - case TGSI_OPCODE_IMIN: - case TGSI_OPCODE_INEG: - case TGSI_OPCODE_ISGE: - case TGSI_OPCODE_ISHR: - case TGSI_OPCODE_ISLT: - return NV_TYPE_S32; - default: - return NV_TYPE_F32; - } -} -#endif - -static void -emit_store(struct bld_context *bld, const struct tgsi_full_instruction *inst, - unsigned chan, struct nv_value *res) -{ - const struct tgsi_full_dst_register *reg = &inst->Dst[0]; - struct nv_instruction *nvi; - struct nv_value *mem; - struct nv_value *ptr = NULL; - int idx; - - idx = reg->Register.Index; - assert(chan < 4); - - if (reg->Register.Indirect) - ptr = FETCH_ADDR(reg->Indirect.Index, - tgsi_util_get_src_register_swizzle(®->Indirect, 0)); - - switch (inst->Instruction.Saturate) { - case TGSI_SAT_NONE: - break; - case TGSI_SAT_ZERO_ONE: - res = bld_insn_1(bld, NV_OP_SAT, res); - break; - case TGSI_SAT_MINUS_PLUS_ONE: - res = bld_insn_2(bld, NV_OP_MAX_F32, res, bld_load_imm_f32(bld, -1.0f)); - res = bld_insn_2(bld, NV_OP_MIN_F32, res, bld_load_imm_f32(bld, 1.0f)); - break; - } - - switch (reg->Register.File) { - case TGSI_FILE_OUTPUT: - if (!res->insn) - res = bld_insn_1(bld, NV_OP_MOV, res); - - if (bld->pc->is_fragprog) { - assert(!ptr); - STORE_OUTP(idx, chan, res); - } else { - nvi = new_instruction(bld->pc, NV_OP_EXPORT); - mem = new_value(bld->pc, bld->ti->output_file, res->reg.size); - nv_reference(bld->pc, nvi, 0, mem); - nv_reference(bld->pc, nvi, 1, res); - if (!ptr) - mem->reg.address = bld->ti->output_loc[idx][chan]; - else - mem->reg.address = 0x80 + idx * 16 + chan * 4; - nvi->fixed = 1; - } - break; - case TGSI_FILE_TEMPORARY: - assert(idx < BLD_MAX_TEMPS); - if (!res->insn || res->insn->bb != bld->pc->current_block) - res = bld_insn_1(bld, NV_OP_MOV, res); - - assert(res->reg.file == NV_FILE_GPR); - - if (bld->ti->require_stores) - bld_lmem_store(bld, ptr, idx * 4 + chan, res); - else - STORE_TEMP(idx, chan, res); - break; - case TGSI_FILE_ADDRESS: - assert(idx < BLD_MAX_ADDRS); - STORE_ADDR(idx, chan, res); - break; - } -} - -static INLINE uint32_t -bld_is_output_written(struct bld_context *bld, int i, int c) -{ - if (c < 0) - return bld->outputs_written[i / 8] & (0xf << ((i * 4) % 32)); - return bld->outputs_written[i / 8] & (1 << ((i * 4 + c) % 32)); -} - -static void -bld_append_vp_ucp(struct bld_context *bld) -{ - struct nv_value *res[6]; - struct nv_value *ucp, *vtx, *out; - struct nv_instruction *insn; - int i, c; - - assert(bld->ti->prog->vp.num_ucps <= 6); - - for (c = 0; c < 4; ++c) { - vtx = bld_fetch_global(bld, &bld->ovs[bld->hpos_index][c]); - - for (i = 0; i < bld->ti->prog->vp.num_ucps; ++i) { - ucp = new_value(bld->pc, NV_FILE_MEM_C(15), 4); - ucp->reg.address = i * 16 + c * 4; - - if (c == 0) - res[i] = bld_insn_2(bld, NV_OP_MUL_F32, vtx, ucp); - else - res[i] = bld_insn_3(bld, NV_OP_MAD_F32, vtx, ucp, res[i]); - } - } - - for (i = 0; i < bld->ti->prog->vp.num_ucps; ++i) { - (out = new_value(bld->pc, NV_FILE_MEM_V, 4))->reg.address = 0x2c0 + i * 4; - (insn = new_instruction(bld->pc, NV_OP_EXPORT))->fixed = 1; - nv_reference(bld->pc, insn, 0, out); - nv_reference(bld->pc, insn, 1, res[i]); - } -} - -static void -bld_export_fp_outputs(struct bld_context *bld) -{ - struct nv_value *vals[4]; - struct nv_instruction *nvi; - int i, c, n; - - for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i) { - if (!bld_is_output_written(bld, i, -1)) - continue; - for (n = 0, c = 0; c < 4; ++c) { - if (!bld_is_output_written(bld, i, c)) - continue; - vals[n] = bld_fetch_global(bld, &bld->ovs[i][c]); - assert(vals[n]); - vals[n] = bld_insn_1(bld, NV_OP_MOV, vals[n]); - vals[n++]->reg.id = bld->ti->output_loc[i][c]; - } - assert(n); - - (nvi = new_instruction(bld->pc, NV_OP_EXPORT))->fixed = 1; - for (c = 0; c < n; ++c) - nv_reference(bld->pc, nvi, c, vals[c]); - } -} - -static void -bld_new_block(struct bld_context *bld, struct nv_basic_block *b) -{ - int i, c; - - bld->pc->current_block = b; - - for (i = 0; i < 4; ++i) - bld->saved_addr[i][0] = NULL; - for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i) - for (c = 0; c < 4; ++c) - bld->saved_inputs[i][c] = NULL; - - bld->out_kind = CFG_EDGE_FORWARD; -} - -static struct nv_value * -bld_interp(struct bld_context *bld, unsigned mode, struct nv_value *val) -{ - unsigned cent = mode & NVC0_INTERP_CENTROID; - - mode &= ~NVC0_INTERP_CENTROID; - - if (val->reg.address == 0x3fc) { - /* gl_FrontFacing: 0/~0 to -1.0/+1.0 */ - val = bld_insn_1(bld, NV_OP_LINTERP, val); - val->insn->flat = 1; - val = bld_insn_2(bld, NV_OP_SHL, val, bld_imm_u32(bld, 31)); - val = bld_insn_2(bld, NV_OP_XOR, val, bld_imm_f32(bld, -1.0f)); - return val; - } else - if (mode == NVC0_INTERP_PERSPECTIVE) { - val = bld_insn_2(bld, NV_OP_PINTERP, val, bld->frag_coord[3]); - } else { - val = bld_insn_1(bld, NV_OP_LINTERP, val); - } - - val->insn->flat = mode == NVC0_INTERP_FLAT ? 1 : 0; - val->insn->centroid = cent ? 1 : 0; - return val; -} - -static struct nv_value * -emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn, - const unsigned s, const unsigned chan) -{ - const struct tgsi_full_src_register *src = &insn->Src[s]; - struct nv_value *res = NULL; - struct nv_value *ptr = NULL; - int idx, ind_idx, dim_idx; - unsigned swz, ind_swz, sgn; - - idx = src->Register.Index; - swz = tgsi_util_get_full_src_register_swizzle(src, chan); - - if (src->Register.Indirect) { - ind_idx = src->Indirect.Index; - ind_swz = tgsi_util_get_src_register_swizzle(&src->Indirect, 0); - - ptr = FETCH_ADDR(ind_idx, ind_swz); - } - - if (src->Register.Dimension) - dim_idx = src->Dimension.Index; - else - dim_idx = 0; - - switch (src->Register.File) { - case TGSI_FILE_CONSTANT: - assert(dim_idx < 14); - res = new_value(bld->pc, NV_FILE_MEM_C(dim_idx), 4); - res->reg.address = idx * 16 + swz * 4; - res = bld_insn_1(bld, NV_OP_LD, res); - if (ptr) - bld_src_pointer(bld, res->insn, 1, ptr); - break; - case TGSI_FILE_IMMEDIATE: /* XXX: type for MOV TEMP[0], -IMM[0] */ - assert(idx < bld->ti->immd32_nr); - res = bld_load_imm_u32(bld, bld->ti->immd32[idx * 4 + swz]); - break; - case TGSI_FILE_INPUT: - assert(!src->Register.Dimension); - if (!ptr) { - res = bld->saved_inputs[idx][swz]; - if (res) - break; - } - res = new_value(bld->pc, bld->ti->input_file, 4); - if (ptr) - res->reg.address = 0x80 + idx * 16 + swz * 4; - else - res->reg.address = bld->ti->input_loc[idx][swz]; - - if (bld->pc->is_fragprog) - res = bld_interp(bld, bld->ti->interp_mode[idx], res); - else - res = bld_insn_1(bld, NV_OP_VFETCH, res); - - if (ptr) - bld_src_pointer(bld, res->insn, res->insn->src[1] ? 2 : 1, ptr); - else - bld->saved_inputs[idx][swz] = res; - break; - case TGSI_FILE_TEMPORARY: - if (bld->ti->require_stores) - res = bld_lmem_load(bld, ptr, idx * 4 + swz); - else - res = bld_fetch_global(bld, &bld->tvs[idx][swz]); - break; - case TGSI_FILE_ADDRESS: - res = bld_fetch_global(bld, &bld->avs[idx][swz]); - break; - case TGSI_FILE_PREDICATE: - res = bld_fetch_global(bld, &bld->pvs[idx][swz]); - break; - case TGSI_FILE_SYSTEM_VALUE: - assert(bld->ti->sysval_loc[idx] < 0xf00); /* >= would mean special reg */ - res = new_value(bld->pc, - bld->pc->is_fragprog ? NV_FILE_MEM_V : NV_FILE_MEM_A, 4); - res->reg.address = bld->ti->sysval_loc[idx]; - - if (res->reg.file == NV_FILE_MEM_A) - res = bld_insn_1(bld, NV_OP_VFETCH, res); - else - res = bld_interp(bld, NVC0_INTERP_FLAT, res); - - /* mesa doesn't do real integers yet :-(and in GL this should be S32) */ - res = bld_cvt(bld, NV_TYPE_F32, NV_TYPE_U32, res); - break; - default: - NOUVEAU_ERR("illegal/unhandled src reg file: %d\n", src->Register.File); - abort(); - break; - } - if (!res) - return bld_undef(bld, NV_FILE_GPR); - - sgn = tgsi_util_get_full_src_register_sign_mode(src, chan); - - switch (sgn) { - case TGSI_UTIL_SIGN_KEEP: - break; - case TGSI_UTIL_SIGN_CLEAR: - res = bld_insn_1(bld, NV_OP_ABS_F32, res); - break; - case TGSI_UTIL_SIGN_TOGGLE: - res = bld_insn_1(bld, NV_OP_NEG_F32, res); - break; - case TGSI_UTIL_SIGN_SET: - res = bld_insn_1(bld, NV_OP_ABS_F32, res); - res = bld_insn_1(bld, NV_OP_NEG_F32, res); - break; - default: - NOUVEAU_ERR("illegal/unhandled src reg sign mode\n"); - abort(); - break; - } - - return res; -} - -static void -bld_lit(struct bld_context *bld, struct nv_value *dst0[4], - const struct tgsi_full_instruction *insn) -{ - struct nv_value *val0 = NULL; - unsigned mask = insn->Dst[0].Register.WriteMask; - - if (mask & ((1 << 0) | (1 << 3))) - dst0[3] = dst0[0] = bld_load_imm_f32(bld, 1.0f); - - if (mask & (3 << 1)) { - val0 = bld_insn_2(bld, NV_OP_MAX, emit_fetch(bld, insn, 0, 0), bld->zero); - if (mask & (1 << 1)) - dst0[1] = val0; - } - - if (mask & (1 << 2)) { - struct nv_value *val1, *val3, *src1, *src3, *pred; - struct nv_value *pos128 = bld_load_imm_f32(bld, 127.999999f); - struct nv_value *neg128 = bld_load_imm_f32(bld, -127.999999f); - - src1 = emit_fetch(bld, insn, 0, 1); - src3 = emit_fetch(bld, insn, 0, 3); - - pred = bld_setp(bld, NV_OP_SET_F32, NV_CC_LE, val0, bld->zero); - - val1 = bld_insn_2(bld, NV_OP_MAX_F32, src1, bld->zero); - val3 = bld_insn_2(bld, NV_OP_MAX_F32, src3, neg128); - val3 = bld_insn_2(bld, NV_OP_MIN_F32, val3, pos128); - val3 = bld_pow(bld, val1, val3); - - dst0[2] = bld_insn_1(bld, NV_OP_MOV, bld->zero); - bld_src_predicate(bld, dst0[2]->insn, 1, pred); - - dst0[2] = bld_insn_2(bld, NV_OP_SELECT, val3, dst0[2]); - } -} - -static INLINE void -describe_texture_target(unsigned target, int *dim, - int *array, int *cube, int *shadow) -{ - *dim = *array = *cube = *shadow = 0; - - switch (target) { - case TGSI_TEXTURE_1D: - *dim = 1; - break; - case TGSI_TEXTURE_SHADOW1D: - *dim = *shadow = 1; - break; - case TGSI_TEXTURE_UNKNOWN: - case TGSI_TEXTURE_2D: - case TGSI_TEXTURE_RECT: - *dim = 2; - break; - case TGSI_TEXTURE_SHADOW2D: - case TGSI_TEXTURE_SHADOWRECT: - *dim = 2; - *shadow = 1; - break; - case TGSI_TEXTURE_3D: - *dim = 3; - break; - case TGSI_TEXTURE_CUBE: - *dim = 2; - *cube = 1; - break; - case TGSI_TEXTURE_1D_ARRAY: - *dim = *array = 1; - break; - case TGSI_TEXTURE_2D_ARRAY: - *dim = 2; - *array = 1; - break; - /* - case TGSI_TEXTURE_SHADOW1D_ARRAY: - *dim = *array = *shadow = 1; - break; - case TGSI_TEXTURE_SHADOW2D_ARRAY: - *dim = 2; - *array = *shadow = 1; - break; - case TGSI_TEXTURE_CUBE_ARRAY: - *dim = 2; - *cube = *array = 1; - break; - */ - default: - assert(0); - break; - } -} - -static struct nv_value * -bld_clone(struct bld_context *bld, struct nv_instruction *nvi) -{ - struct nv_instruction *dupi = new_instruction(bld->pc, nvi->opcode); - struct nv_instruction *next, *prev; - int c; - - next = dupi->next; - prev = dupi->prev; - - *dupi = *nvi; - - dupi->next = next; - dupi->prev = prev; - - for (c = 0; c < 5 && nvi->def[c]; ++c) - bld_def(dupi, c, new_value_like(bld->pc, nvi->def[c])); - - for (c = 0; c < 6 && nvi->src[c]; ++c) { - dupi->src[c] = NULL; - nv_reference(bld->pc, dupi, c, nvi->src[c]->value); - } - - return dupi->def[0]; -} - -/* NOTE: proj(t0) = (t0 / w) / (tc3 / w) = tc0 / tc2 handled by optimizer */ -static void -load_proj_tex_coords(struct bld_context *bld, - struct nv_value *t[4], int dim, int shadow, - const struct tgsi_full_instruction *insn) -{ - int c; - unsigned mask = (1 << dim) - 1; - - if (shadow) - mask |= 4; /* depth comparison value */ - - t[3] = emit_fetch(bld, insn, 0, 3); - if (t[3]->insn->opcode == NV_OP_PINTERP) { - t[3] = bld_clone(bld, t[3]->insn); - t[3]->insn->opcode = NV_OP_LINTERP; - nv_reference(bld->pc, t[3]->insn, 1, NULL); - } - t[3] = bld_insn_1(bld, NV_OP_RCP, t[3]); - - for (c = 0; c < 4; ++c) { - if (!(mask & (1 << c))) - continue; - t[c] = emit_fetch(bld, insn, 0, c); - - if (t[c]->insn->opcode != NV_OP_PINTERP) - continue; - mask &= ~(1 << c); - - t[c] = bld_clone(bld, t[c]->insn); - nv_reference(bld->pc, t[c]->insn, 1, t[3]); - } - if (mask == 0) - return; - - t[3] = emit_fetch(bld, insn, 0, 3); - t[3] = bld_insn_1(bld, NV_OP_RCP, t[3]); - - for (c = 0; c < 4; ++c) - if (mask & (1 << c)) - t[c] = bld_insn_2(bld, NV_OP_MUL_F32, t[c], t[3]); -} - -/* For a quad of threads / top left, top right, bottom left, bottom right - * pixels, do a different operation, and take src0 from a specific thread. - */ -#define QOP_ADD 0 -#define QOP_SUBR 1 -#define QOP_SUB 2 -#define QOP_MOV1 3 - -#define QOP(a, b, c, d) \ - ((QOP_##a << 0) | (QOP_##b << 2) | (QOP_##c << 4) | (QOP_##d << 6)) - -static INLINE struct nv_value * -bld_quadop(struct bld_context *bld, ubyte qop, struct nv_value *src0, int lane, - struct nv_value *src1, boolean wp) -{ - struct nv_value *val = bld_insn_2(bld, NV_OP_QUADOP, src0, src1); - val->insn->lanes = lane; - val->insn->quadop = qop; - if (wp) { - assert(!"quadop predicate write"); - } - return val; -} - -/* order of TGSI operands: x y z layer shadow lod/bias */ -/* order of native operands: layer x y z | lod/bias shadow */ -static struct nv_instruction * -emit_tex(struct bld_context *bld, uint opcode, int tic, int tsc, - struct nv_value *dst[4], struct nv_value *arg[4], - int dim, int array, int cube, int shadow) -{ - struct nv_value *src[4]; - struct nv_instruction *nvi, *bnd; - int c; - int s = 0; - boolean lodbias = opcode == NV_OP_TXB || opcode == NV_OP_TXL; - - if (array) - arg[dim] = bld_cvt(bld, NV_TYPE_U32, NV_TYPE_F32, arg[dim]); - - /* bind { layer x y z } and { lod/bias shadow } to adjacent regs */ - - bnd = new_instruction(bld->pc, NV_OP_BIND); - if (array) { - src[s] = new_value(bld->pc, NV_FILE_GPR, 4); - bld_def(bnd, s, src[s]); - nv_reference(bld->pc, bnd, s++, arg[dim + cube]); - } - for (c = 0; c < dim + cube; ++c, ++s) { - src[s] = bld_def(bnd, s, new_value(bld->pc, NV_FILE_GPR, 4)); - nv_reference(bld->pc, bnd, s, arg[c]); - } - - if (shadow || lodbias) { - bnd = new_instruction(bld->pc, NV_OP_BIND); - - if (lodbias) { - src[s] = new_value(bld->pc, NV_FILE_GPR, 4); - bld_def(bnd, 0, src[s++]); - nv_reference(bld->pc, bnd, 0, arg[dim + cube + array + shadow]); - } - if (shadow) { - src[s] = new_value(bld->pc, NV_FILE_GPR, 4); - bld_def(bnd, lodbias, src[s++]); - nv_reference(bld->pc, bnd, lodbias, arg[dim + cube + array]); - } - } - - nvi = new_instruction(bld->pc, opcode); - for (c = 0; c < 4; ++c) - dst[c] = bld_def(nvi, c, new_value(bld->pc, NV_FILE_GPR, 4)); - for (c = 0; c < s; ++c) - nv_reference(bld->pc, nvi, c, src[c]); - - nvi->ext.tex.t = tic; - nvi->ext.tex.s = tsc; - nvi->tex_mask = 0xf; - nvi->tex_cube = cube; - nvi->tex_dim = dim; - nvi->tex_cube = cube; - nvi->tex_shadow = shadow; - nvi->tex_array = array; - nvi->tex_live = 0; - - return nvi; -} - -static void -bld_tex(struct bld_context *bld, struct nv_value *dst0[4], - const struct tgsi_full_instruction *insn) -{ - struct nv_value *t[4], *s[3]; - uint opcode = translate_opcode(insn->Instruction.Opcode); - int c, dim, array, cube, shadow; - const int lodbias = opcode == NV_OP_TXB || opcode == NV_OP_TXL; - const int tic = insn->Src[1].Register.Index; - const int tsc = tic; - - describe_texture_target(insn->Texture.Texture, &dim, &array, &cube, &shadow); - - assert(dim + array + shadow + lodbias <= 5); - - if (!cube && !array && insn->Instruction.Opcode == TGSI_OPCODE_TXP) - load_proj_tex_coords(bld, t, dim, shadow, insn); - else { - for (c = 0; c < dim + cube + array; ++c) - t[c] = emit_fetch(bld, insn, 0, c); - if (shadow) - t[c] = emit_fetch(bld, insn, 0, MAX2(c, 2)); - } - - if (cube) { - for (c = 0; c < 3; ++c) - s[c] = bld_insn_1(bld, NV_OP_ABS_F32, t[c]); - - s[0] = bld_insn_2(bld, NV_OP_MAX_F32, s[0], s[1]); - s[0] = bld_insn_2(bld, NV_OP_MAX_F32, s[0], s[2]); - s[0] = bld_insn_1(bld, NV_OP_RCP, s[0]); - - for (c = 0; c < 3; ++c) - t[c] = bld_insn_2(bld, NV_OP_MUL_F32, t[c], s[0]); - } - - if (lodbias) - t[dim + cube + array + shadow] = emit_fetch(bld, insn, 0, 3); - - emit_tex(bld, opcode, tic, tsc, dst0, t, dim, array, cube, shadow); -} - -static INLINE struct nv_value * -bld_dot(struct bld_context *bld, const struct tgsi_full_instruction *insn, - int n) -{ - struct nv_value *dotp, *src0, *src1; - int c; - - src0 = emit_fetch(bld, insn, 0, 0); - src1 = emit_fetch(bld, insn, 1, 0); - dotp = bld_insn_2(bld, NV_OP_MUL_F32, src0, src1); - - for (c = 1; c < n; ++c) { - src0 = emit_fetch(bld, insn, 0, c); - src1 = emit_fetch(bld, insn, 1, c); - dotp = bld_insn_3(bld, NV_OP_MAD_F32, src0, src1, dotp); - } - return dotp; -} - -#define FOR_EACH_DST0_ENABLED_CHANNEL(chan, inst) \ - for (chan = 0; chan < 4; ++chan) \ - if ((inst)->Dst[0].Register.WriteMask & (1 << chan)) - -static void -bld_instruction(struct bld_context *bld, - const struct tgsi_full_instruction *insn) -{ - struct nv_value *src0; - struct nv_value *src1; - struct nv_value *src2; - struct nv_value *dst0[4] = { NULL }; - struct nv_value *temp; - int c; - uint opcode = translate_opcode(insn->Instruction.Opcode); - uint8_t mask = insn->Dst[0].Register.WriteMask; - -#if NV50_DEBUG & NV50_DEBUG_PROG_IR - debug_printf("bld_instruction:"); tgsi_dump_instruction(insn, 1); -#endif - - switch (insn->Instruction.Opcode) { - case TGSI_OPCODE_ADD: - case TGSI_OPCODE_MAX: - case TGSI_OPCODE_MIN: - case TGSI_OPCODE_MUL: - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { - src0 = emit_fetch(bld, insn, 0, c); - src1 = emit_fetch(bld, insn, 1, c); - dst0[c] = bld_insn_2(bld, opcode, src0, src1); - } - break; - case TGSI_OPCODE_ARL: - src1 = bld_imm_u32(bld, 4); - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { - src0 = emit_fetch(bld, insn, 0, c); - src0 = bld_insn_1(bld, NV_OP_FLOOR, src0); - src0->insn->ext.cvt.d = NV_TYPE_S32; - src0->insn->ext.cvt.s = NV_TYPE_F32; - dst0[c] = bld_insn_2(bld, NV_OP_SHL, src0, src1); - } - break; - case TGSI_OPCODE_CMP: - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { - src0 = emit_fetch(bld, insn, 0, c); - src1 = emit_fetch(bld, insn, 1, c); - src2 = emit_fetch(bld, insn, 2, c); - dst0[c] = bld_insn_3(bld, NV_OP_SLCT_F32, src1, src2, src0); - dst0[c]->insn->set_cond = NV_CC_LT; - } - break; - case TGSI_OPCODE_COS: - case TGSI_OPCODE_SIN: - src0 = emit_fetch(bld, insn, 0, 0); - temp = bld_insn_1(bld, NV_OP_PRESIN, src0); - if (insn->Dst[0].Register.WriteMask & 7) - temp = bld_insn_1(bld, opcode, temp); - for (c = 0; c < 3; ++c) - if (insn->Dst[0].Register.WriteMask & (1 << c)) - dst0[c] = temp; - if (!(insn->Dst[0].Register.WriteMask & (1 << 3))) - break; - src0 = emit_fetch(bld, insn, 0, 3); - temp = bld_insn_1(bld, NV_OP_PRESIN, src0); - dst0[3] = bld_insn_1(bld, opcode, temp); - break; - case TGSI_OPCODE_DP2: - temp = bld_dot(bld, insn, 2); - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) - dst0[c] = temp; - break; - case TGSI_OPCODE_DP3: - temp = bld_dot(bld, insn, 3); - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) - dst0[c] = temp; - break; - case TGSI_OPCODE_DP4: - temp = bld_dot(bld, insn, 4); - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) - dst0[c] = temp; - break; - case TGSI_OPCODE_DPH: - src0 = bld_dot(bld, insn, 3); - src1 = emit_fetch(bld, insn, 1, 3); - temp = bld_insn_2(bld, NV_OP_ADD_F32, src0, src1); - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) - dst0[c] = temp; - break; - case TGSI_OPCODE_DST: - if (insn->Dst[0].Register.WriteMask & 1) - dst0[0] = bld_imm_f32(bld, 1.0f); - if (insn->Dst[0].Register.WriteMask & 2) { - src0 = emit_fetch(bld, insn, 0, 1); - src1 = emit_fetch(bld, insn, 1, 1); - dst0[1] = bld_insn_2(bld, NV_OP_MUL_F32, src0, src1); - } - if (insn->Dst[0].Register.WriteMask & 4) - dst0[2] = emit_fetch(bld, insn, 0, 2); - if (insn->Dst[0].Register.WriteMask & 8) - dst0[3] = emit_fetch(bld, insn, 1, 3); - break; - case TGSI_OPCODE_EXP: - src0 = emit_fetch(bld, insn, 0, 0); - temp = bld_insn_1(bld, NV_OP_FLOOR, src0); - - if (insn->Dst[0].Register.WriteMask & 2) - dst0[1] = bld_insn_2(bld, NV_OP_SUB_F32, src0, temp); - if (insn->Dst[0].Register.WriteMask & 1) { - temp = bld_insn_1(bld, NV_OP_PREEX2, temp); - dst0[0] = bld_insn_1(bld, NV_OP_EX2, temp); - } - if (insn->Dst[0].Register.WriteMask & 4) { - temp = bld_insn_1(bld, NV_OP_PREEX2, src0); - dst0[2] = bld_insn_1(bld, NV_OP_EX2, temp); - } - if (insn->Dst[0].Register.WriteMask & 8) - dst0[3] = bld_imm_f32(bld, 1.0f); - break; - case TGSI_OPCODE_EX2: - src0 = emit_fetch(bld, insn, 0, 0); - temp = bld_insn_1(bld, NV_OP_PREEX2, src0); - temp = bld_insn_1(bld, NV_OP_EX2, temp); - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) - dst0[c] = temp; - break; - case TGSI_OPCODE_FRC: - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { - src0 = emit_fetch(bld, insn, 0, c); - dst0[c] = bld_insn_1(bld, NV_OP_FLOOR, src0); - dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src0, dst0[c]); - } - break; - case TGSI_OPCODE_KIL: - for (c = 0; c < 4; ++c) - bld_kil(bld, emit_fetch(bld, insn, 0, c)); - break; - case TGSI_OPCODE_KILP: - (new_instruction(bld->pc, NV_OP_KIL))->fixed = 1; - break; - case TGSI_OPCODE_IF: - { - struct nv_basic_block *b = new_basic_block(bld->pc); - struct nv_value *pred = emit_fetch(bld, insn, 0, 0); - - assert(bld->cond_lvl < BLD_MAX_COND_NESTING); - - nvc0_bblock_attach(bld->pc->current_block, b, CFG_EDGE_FORWARD); - - bld->join_bb[bld->cond_lvl] = bld->pc->current_block; - bld->cond_bb[bld->cond_lvl] = bld->pc->current_block; - - if (pred->insn && NV_BASEOP(pred->insn->opcode) == NV_OP_SET) { - pred = bld_clone(bld, pred->insn); - pred->reg.size = 1; - pred->reg.file = NV_FILE_PRED; - if (pred->insn->opcode == NV_OP_FSET_F32) - pred->insn->opcode = NV_OP_SET_F32; - } else { - pred = bld_setp(bld, NV_OP_SET_U32, NV_CC_NE | NV_CC_U, - pred, bld->zero); - } - assert(!mask); - - bld_flow(bld, NV_OP_BRA, pred, NV_CC_NOT_P, NULL, (bld->cond_lvl == 0)); - - ++bld->cond_lvl; - bld_new_block(bld, b); - } - break; - case TGSI_OPCODE_ELSE: - { - struct nv_basic_block *b = new_basic_block(bld->pc); - - --bld->cond_lvl; - nvc0_bblock_attach(bld->join_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD); - - bld->cond_bb[bld->cond_lvl]->exit->target = b; - bld->cond_bb[bld->cond_lvl] = bld->pc->current_block; - - new_instruction(bld->pc, NV_OP_BRA)->terminator = 1; - - ++bld->cond_lvl; - bld_new_block(bld, b); - } - break; - case TGSI_OPCODE_ENDIF: - { - struct nv_basic_block *b = new_basic_block(bld->pc); - - if (!nvc0_bblock_is_terminated(bld->pc->current_block)) - bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, b, FALSE); - - --bld->cond_lvl; - nvc0_bblock_attach(bld->pc->current_block, b, bld->out_kind); - nvc0_bblock_attach(bld->cond_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD); - - bld->cond_bb[bld->cond_lvl]->exit->target = b; - - bld_new_block(bld, b); - - if (!bld->cond_lvl && bld->join_bb[bld->cond_lvl]) { - bld->join_bb[bld->cond_lvl]->exit->prev->target = b; - new_instruction(bld->pc, NV_OP_JOIN)->join = 1; - } - } - break; - case TGSI_OPCODE_BGNLOOP: - { - struct nv_basic_block *bl = new_basic_block(bld->pc); - struct nv_basic_block *bb = new_basic_block(bld->pc); - - assert(bld->loop_lvl < BLD_MAX_LOOP_NESTING); - - bld->loop_bb[bld->loop_lvl] = bl; - bld->brkt_bb[bld->loop_lvl] = bb; - - nvc0_bblock_attach(bld->pc->current_block, bl, CFG_EDGE_LOOP_ENTER); - - bld_new_block(bld, bld->loop_bb[bld->loop_lvl++]); - - if (bld->loop_lvl == bld->pc->loop_nesting_bound) - bld->pc->loop_nesting_bound++; - - bld_clear_def_use(&bld->tvs[0][0], BLD_MAX_TEMPS, bld->loop_lvl); - bld_clear_def_use(&bld->avs[0][0], BLD_MAX_ADDRS, bld->loop_lvl); - bld_clear_def_use(&bld->pvs[0][0], BLD_MAX_PREDS, bld->loop_lvl); - } - break; - case TGSI_OPCODE_BRK: - { - struct nv_basic_block *bb = bld->brkt_bb[bld->loop_lvl - 1]; - - bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE); - - if (bld->out_kind == CFG_EDGE_FORWARD) /* else we already had BRK/CONT */ - nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_LOOP_LEAVE); - - bld->out_kind = CFG_EDGE_FAKE; - } - break; - case TGSI_OPCODE_CONT: - { - struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1]; - - bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE); - - nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK); - - if ((bb = bld->join_bb[bld->cond_lvl - 1])) { - bld->join_bb[bld->cond_lvl - 1] = NULL; - nvc0_insn_delete(bb->exit->prev); - } - bld->out_kind = CFG_EDGE_FAKE; - } - break; - case TGSI_OPCODE_ENDLOOP: - { - struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1]; - - if (bld->out_kind != CFG_EDGE_FAKE) { /* else we already had BRK/CONT */ - bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE); - - nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK); - } - - bld_loop_end(bld, bb); /* replace loop-side operand of the phis */ - - bld_new_block(bld, bld->brkt_bb[--bld->loop_lvl]); - } - break; - case TGSI_OPCODE_ABS: - case TGSI_OPCODE_CEIL: - case TGSI_OPCODE_FLR: - case TGSI_OPCODE_TRUNC: - case TGSI_OPCODE_DDX: - case TGSI_OPCODE_DDY: - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { - src0 = emit_fetch(bld, insn, 0, c); - dst0[c] = bld_insn_1(bld, opcode, src0); - } - break; - case TGSI_OPCODE_LIT: - bld_lit(bld, dst0, insn); - break; - case TGSI_OPCODE_LRP: - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { - src0 = emit_fetch(bld, insn, 0, c); - src1 = emit_fetch(bld, insn, 1, c); - src2 = emit_fetch(bld, insn, 2, c); - dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src1, src2); - dst0[c] = bld_insn_3(bld, NV_OP_MAD_F32, dst0[c], src0, src2); - } - break; - case TGSI_OPCODE_MOV: - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) - dst0[c] = emit_fetch(bld, insn, 0, c); - break; - case TGSI_OPCODE_MAD: - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { - src0 = emit_fetch(bld, insn, 0, c); - src1 = emit_fetch(bld, insn, 1, c); - src2 = emit_fetch(bld, insn, 2, c); - dst0[c] = bld_insn_3(bld, opcode, src0, src1, src2); - } - break; - case TGSI_OPCODE_POW: - src0 = emit_fetch(bld, insn, 0, 0); - src1 = emit_fetch(bld, insn, 1, 0); - temp = bld_pow(bld, src0, src1); - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) - dst0[c] = temp; - break; - case TGSI_OPCODE_LOG: - src0 = emit_fetch(bld, insn, 0, 0); - src0 = bld_insn_1(bld, NV_OP_ABS_F32, src0); - temp = bld_insn_1(bld, NV_OP_LG2, src0); - dst0[2] = temp; - if (insn->Dst[0].Register.WriteMask & 3) { - temp = bld_insn_1(bld, NV_OP_FLOOR, temp); - dst0[0] = temp; - } - if (insn->Dst[0].Register.WriteMask & 2) { - temp = bld_insn_1(bld, NV_OP_PREEX2, temp); - temp = bld_insn_1(bld, NV_OP_EX2, temp); - temp = bld_insn_1(bld, NV_OP_RCP, temp); - dst0[1] = bld_insn_2(bld, NV_OP_MUL_F32, src0, temp); - } - if (insn->Dst[0].Register.WriteMask & 8) - dst0[3] = bld_imm_f32(bld, 1.0f); - break; - case TGSI_OPCODE_RCP: - case TGSI_OPCODE_LG2: - src0 = emit_fetch(bld, insn, 0, 0); - temp = bld_insn_1(bld, opcode, src0); - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) - dst0[c] = temp; - break; - case TGSI_OPCODE_RSQ: - src0 = emit_fetch(bld, insn, 0, 0); - temp = bld_insn_1(bld, NV_OP_ABS_F32, src0); - temp = bld_insn_1(bld, NV_OP_RSQ, temp); - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) - dst0[c] = temp; - break; - case TGSI_OPCODE_SLT: - case TGSI_OPCODE_SGE: - case TGSI_OPCODE_SEQ: - case TGSI_OPCODE_SGT: - case TGSI_OPCODE_SLE: - case TGSI_OPCODE_SNE: - case TGSI_OPCODE_ISLT: - case TGSI_OPCODE_ISGE: - case TGSI_OPCODE_USEQ: - case TGSI_OPCODE_USGE: - case TGSI_OPCODE_USLT: - case TGSI_OPCODE_USNE: - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { - src0 = emit_fetch(bld, insn, 0, c); - src1 = emit_fetch(bld, insn, 1, c); - dst0[c] = bld_insn_2(bld, opcode, src0, src1); - dst0[c]->insn->set_cond = translate_setcc(insn->Instruction.Opcode); - } - break; - case TGSI_OPCODE_SCS: - if (insn->Dst[0].Register.WriteMask & 0x3) { - src0 = emit_fetch(bld, insn, 0, 0); - temp = bld_insn_1(bld, NV_OP_PRESIN, src0); - if (insn->Dst[0].Register.WriteMask & 0x1) - dst0[0] = bld_insn_1(bld, NV_OP_COS, temp); - if (insn->Dst[0].Register.WriteMask & 0x2) - dst0[1] = bld_insn_1(bld, NV_OP_SIN, temp); - } - if (insn->Dst[0].Register.WriteMask & 0x4) - dst0[2] = bld_imm_f32(bld, 0.0f); - if (insn->Dst[0].Register.WriteMask & 0x8) - dst0[3] = bld_imm_f32(bld, 1.0f); - break; - case TGSI_OPCODE_SSG: - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { /* XXX: set lt, set gt, sub */ - src0 = emit_fetch(bld, insn, 0, c); - src1 = bld_insn_2(bld, NV_OP_FSET_F32, src0, bld->zero); - src2 = bld_insn_2(bld, NV_OP_FSET_F32, src0, bld->zero); - src1->insn->set_cond = NV_CC_GT; - src2->insn->set_cond = NV_CC_LT; - dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src1, src2); - } - break; - case TGSI_OPCODE_SUB: - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { - src0 = emit_fetch(bld, insn, 0, c); - src1 = emit_fetch(bld, insn, 1, c); - dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src0, src1); - } - break; - case TGSI_OPCODE_TEX: - case TGSI_OPCODE_TXB: - case TGSI_OPCODE_TXL: - case TGSI_OPCODE_TXP: - bld_tex(bld, dst0, insn); - break; - case TGSI_OPCODE_XPD: - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { - if (c == 3) { - dst0[3] = bld_imm_f32(bld, 1.0f); - break; - } - src0 = emit_fetch(bld, insn, 1, (c + 1) % 3); - src1 = emit_fetch(bld, insn, 0, (c + 2) % 3); - dst0[c] = bld_insn_2(bld, NV_OP_MUL_F32, src0, src1); - - src0 = emit_fetch(bld, insn, 0, (c + 1) % 3); - src1 = emit_fetch(bld, insn, 1, (c + 2) % 3); - dst0[c] = bld_insn_3(bld, NV_OP_MAD_F32, src0, src1, dst0[c]); - - dst0[c]->insn->src[2]->mod ^= NV_MOD_NEG; - } - break; - case TGSI_OPCODE_RET: - (new_instruction(bld->pc, NV_OP_RET))->fixed = 1; - break; - case TGSI_OPCODE_END: - /* VP outputs are exported in-place as scalars, optimization later */ - if (bld->pc->is_fragprog) - bld_export_fp_outputs(bld); - if (bld->ti->append_ucp) - bld_append_vp_ucp(bld); - return; - default: - NOUVEAU_ERR("unhandled opcode %u\n", insn->Instruction.Opcode); - abort(); - return; - } - - if (insn->Dst[0].Register.File == TGSI_FILE_OUTPUT && - !bld->pc->is_fragprog) { - struct nv_instruction *mi = NULL; - uint size; - - if (bld->ti->append_ucp) { - if (bld->ti->output_loc[insn->Dst[0].Register.Index][0] == 0x70) { - bld->hpos_index = insn->Dst[0].Register.Index; - for (c = 0; c < 4; ++c) - if (mask & (1 << c)) - STORE_OUTP(insn->Dst[0].Register.Index, c, dst0[c]); - } - } - - for (c = 0; c < 4; ++c) - if (mask & (1 << c)) - if ((dst0[c]->reg.file == NV_FILE_IMM) || - (dst0[c]->reg.file == NV_FILE_GPR && dst0[c]->reg.id == 63)) - dst0[c] = bld_insn_1(bld, NV_OP_MOV, dst0[c]); - - c = 0; - if ((mask & 0x3) == 0x3) { - mask &= ~0x3; - size = 8; - mi = bld_insn_2(bld, NV_OP_BIND, dst0[0], dst0[1])->insn; - } - if ((mask & 0xc) == 0xc) { - mask &= ~0xc; - if (mi) { - size = 16; - nv_reference(bld->pc, mi, 2, dst0[2]); - nv_reference(bld->pc, mi, 3, dst0[3]); - } else { - c = 2; - size = 8; - mi = bld_insn_2(bld, NV_OP_BIND, dst0[2], dst0[3])->insn; - } - } else - if (mi && (mask & 0x4)) { - size = 12; - mask &= ~0x4; - nv_reference(bld->pc, mi, 2, dst0[2]); - } - - if (mi) { - struct nv_instruction *ex = new_instruction(bld->pc, NV_OP_EXPORT); - int s; - - nv_reference(bld->pc, ex, 0, new_value(bld->pc, NV_FILE_MEM_V, 4)); - nv_reference(bld->pc, ex, 1, mi->def[0]); - - for (s = 1; s < size / 4; ++s) { - bld_def(mi, s, new_value(bld->pc, NV_FILE_GPR, 4)); - nv_reference(bld->pc, ex, s + 1, mi->def[s]); - } - - ex->fixed = 1; - ex->src[0]->value->reg.size = size; - ex->src[0]->value->reg.address = - bld->ti->output_loc[insn->Dst[0].Register.Index][c]; - } - } - - for (c = 0; c < 4; ++c) - if (mask & (1 << c)) - emit_store(bld, insn, c, dst0[c]); -} - -static INLINE void -bld_free_registers(struct bld_register *base, int n) -{ - int i, c; - - for (i = 0; i < n; ++i) - for (c = 0; c < 4; ++c) - util_dynarray_fini(&base[i * 4 + c].vals); -} - -int -nvc0_tgsi_to_nc(struct nv_pc *pc, struct nvc0_translation_info *ti) -{ - struct bld_context *bld = CALLOC_STRUCT(bld_context); - unsigned ip; - - pc->root[0] = pc->current_block = new_basic_block(pc); - - bld->pc = pc; - bld->ti = ti; - - pc->loop_nesting_bound = 1; - - bld->zero = new_value(pc, NV_FILE_GPR, 4); - bld->zero->reg.id = 63; - - if (pc->is_fragprog) { - struct nv_value *mem = new_value(pc, NV_FILE_MEM_V, 4); - mem->reg.address = 0x7c; - - bld->frag_coord[3] = bld_insn_1(bld, NV_OP_LINTERP, mem); - bld->frag_coord[3] = bld_insn_1(bld, NV_OP_RCP, bld->frag_coord[3]); - } - - for (ip = 0; ip < ti->num_insns; ++ip) - bld_instruction(bld, &ti->insns[ip]); - - bld_free_registers(&bld->tvs[0][0], BLD_MAX_TEMPS); - bld_free_registers(&bld->avs[0][0], BLD_MAX_ADDRS); - bld_free_registers(&bld->pvs[0][0], BLD_MAX_PREDS); - bld_free_registers(&bld->ovs[0][0], PIPE_MAX_SHADER_OUTPUTS); - - FREE(bld); - return 0; -} - -/* If a variable is assigned in a loop, replace all references to the value - * from outside the loop with a phi value. - */ -static void -bld_replace_value(struct nv_pc *pc, struct nv_basic_block *b, - struct nv_value *old_val, - struct nv_value *new_val) -{ - struct nv_instruction *nvi; - - for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = nvi->next) { - int s; - for (s = 0; s < 6 && nvi->src[s]; ++s) - if (nvi->src[s]->value == old_val) - nv_reference(pc, nvi, s, new_val); - } - - b->pass_seq = pc->pass_seq; - - if (b->out[0] && b->out[0]->pass_seq < pc->pass_seq) - bld_replace_value(pc, b->out[0], old_val, new_val); - - if (b->out[1] && b->out[1]->pass_seq < pc->pass_seq) - bld_replace_value(pc, b->out[1], old_val, new_val); -} |