diff options
-rw-r--r-- | src/gallium/drivers/nv50/nv50_pc.c | 814 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_pc.h | 502 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_pc_emit.c | 1257 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_pc_optimize.c | 1161 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_pc_print.c | 321 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_pc_regalloc.c | 1096 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_tgsi_to_nc.c | 2101 |
7 files changed, 0 insertions, 7252 deletions
diff --git a/src/gallium/drivers/nv50/nv50_pc.c b/src/gallium/drivers/nv50/nv50_pc.c deleted file mode 100644 index 9137f871f5a..00000000000 --- a/src/gallium/drivers/nv50/nv50_pc.c +++ /dev/null @@ -1,814 +0,0 @@ -/* - * Copyright 2010 Christoph Bumiller - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "nv50_pc.h" -#include "nv50_program.h" - -#include <stdio.h> - -/* returns TRUE if operands 0 and 1 can be swapped */ -boolean -nv_op_commutative(uint opcode) -{ - switch (opcode) { - case NV_OP_ADD: - case NV_OP_MUL: - case NV_OP_MAD: - case NV_OP_AND: - case NV_OP_OR: - case NV_OP_XOR: - case NV_OP_MIN: - case NV_OP_MAX: - case NV_OP_SAD: - return TRUE; - default: - return FALSE; - } -} - -/* return operand to which the address register applies */ -int -nv50_indirect_opnd(struct nv_instruction *i) -{ - if (!i->src[4]) - return -1; - - switch (i->opcode) { - case NV_OP_MOV: - case NV_OP_LDA: - case NV_OP_STA: - return 0; - default: - return 1; - } -} - -boolean -nv50_nvi_can_use_imm(struct nv_instruction *nvi, int s) -{ - if (nvi->flags_src || nvi->flags_def) - return FALSE; - - switch (nvi->opcode) { - case NV_OP_ADD: - case NV_OP_MUL: - case NV_OP_AND: - case NV_OP_OR: - case NV_OP_XOR: - case NV_OP_SHL: - case NV_OP_SHR: - return (s == 1) && (nvi->src[0]->value->reg.file == NV_FILE_GPR) && - (nvi->def[0]->reg.file == NV_FILE_GPR); - case NV_OP_MOV: - assert(s == 0); - return (nvi->def[0]->reg.file == NV_FILE_GPR); - default: - return FALSE; - } -} - -boolean -nv50_nvi_can_load(struct nv_instruction *nvi, int s, struct nv_value *value) -{ - int i; - - for (i = 0; i < 3 && nvi->src[i]; ++i) - if (nvi->src[i]->value->reg.file == NV_FILE_IMM) - return FALSE; - - switch (nvi->opcode) { - case NV_OP_ABS: - case NV_OP_ADD: - case NV_OP_CEIL: - case NV_OP_FLOOR: - case NV_OP_TRUNC: - case NV_OP_CVT: - case NV_OP_ROUND: - case NV_OP_NEG: - case NV_OP_MAD: - case NV_OP_MUL: - case NV_OP_SAT: - case NV_OP_SUB: - case NV_OP_MAX: - case NV_OP_MIN: - if (s == 0 && (value->reg.file == NV_FILE_MEM_S || - value->reg.file == NV_FILE_MEM_P)) - return TRUE; - if (value->reg.file < NV_FILE_MEM_C(0) || - value->reg.file > NV_FILE_MEM_C(15)) - return FALSE; - return (s == 1) || - ((s == 2) && (nvi->src[1]->value->reg.file == NV_FILE_GPR)); - case NV_OP_MOV: - assert(s == 0); - return /* TRUE */ FALSE; /* don't turn MOVs into loads */ - default: - return FALSE; - } -} - -/* Return whether this instruction can be executed conditionally. */ -boolean -nv50_nvi_can_predicate(struct nv_instruction *nvi) -{ - int i; - - if (nvi->flags_src) - return FALSE; - for (i = 0; i < 4 && nvi->src[i]; ++i) - if (nvi->src[i]->value->reg.file == NV_FILE_IMM) - return FALSE; - return TRUE; -} - -ubyte -nv50_supported_src_mods(uint opcode, int s) -{ - switch (opcode) { - case NV_OP_ABS: - return NV_MOD_NEG | NV_MOD_ABS; /* obviously */ - case NV_OP_ADD: - case NV_OP_MUL: - case NV_OP_MAD: - return NV_MOD_NEG; - case NV_OP_DFDX: - case NV_OP_DFDY: - assert(s == 0); - return NV_MOD_NEG; - case NV_OP_MAX: - case NV_OP_MIN: - return NV_MOD_ABS; - case NV_OP_CVT: - case NV_OP_LG2: - case NV_OP_NEG: - case NV_OP_PREEX2: - case NV_OP_PRESIN: - case NV_OP_RCP: - case NV_OP_RSQ: - return NV_MOD_ABS | NV_MOD_NEG; - default: - return 0; - } -} - -/* We may want an opcode table. */ -boolean -nv50_op_can_write_flags(uint opcode) -{ - if (nv_is_vector_op(opcode)) - return FALSE; - switch (opcode) { /* obvious ones like KIL, CALL, etc. not included */ - case NV_OP_PHI: - case NV_OP_MOV: - case NV_OP_SELECT: - case NV_OP_LINTERP: - case NV_OP_PINTERP: - case NV_OP_LDA: - return FALSE; - default: - break; - } - if (opcode >= NV_OP_RCP && opcode <= NV_OP_PREEX2) - return FALSE; - return TRUE; -} - -int -nv_nvi_refcount(struct nv_instruction *nvi) -{ - int i, rc; - - rc = nvi->flags_def ? nvi->flags_def->refc : 0; - - for (i = 0; i < 4; ++i) { - if (!nvi->def[i]) - return rc; - rc += nvi->def[i]->refc; - } - return rc; -} - -int -nvcg_replace_value(struct nv_pc *pc, struct nv_value *old_val, - struct nv_value *new_val) -{ - int i, n; - - if (old_val == new_val) - return old_val->refc; - - for (i = 0, n = 0; i < pc->num_refs; ++i) { - if (pc->refs[i]->value == old_val) { - ++n; - nv_reference(pc, &pc->refs[i], new_val); - } - } - return n; -} - -struct nv_value * -nvcg_find_constant(struct nv_ref *ref) -{ - struct nv_value *src; - - if (!ref) - return NULL; - - src = ref->value; - while (src->insn && src->insn->opcode == NV_OP_MOV) { - assert(!src->insn->src[0]->mod); - src = src->insn->src[0]->value; - } - if ((src->reg.file == NV_FILE_IMM) || - (src->insn && src->insn->opcode == NV_OP_LDA && - src->insn->src[0]->value->reg.file >= NV_FILE_MEM_C(0) && - src->insn->src[0]->value->reg.file <= NV_FILE_MEM_C(15))) - return src; - return NULL; -} - -struct nv_value * -nvcg_find_immediate(struct nv_ref *ref) -{ - struct nv_value *src = nvcg_find_constant(ref); - - return (src && src->reg.file == NV_FILE_IMM) ? src : NULL; -} - -static void -nv_pc_free_refs(struct nv_pc *pc) -{ - int i; - for (i = 0; i < pc->num_refs; i += 64) - FREE(pc->refs[i]); - FREE(pc->refs); -} - -static const char * -edge_name(ubyte type) -{ - switch (type) { - case CFG_EDGE_FORWARD: return "forward"; - case CFG_EDGE_BACK: return "back"; - case CFG_EDGE_LOOP_ENTER: return "loop"; - case CFG_EDGE_LOOP_LEAVE: return "break"; - case CFG_EDGE_FAKE: return "fake"; - default: - return "?"; - } -} - -void -nv_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f, void *priv) -{ - struct nv_basic_block *bb[64], *bbb[16], *b; - int j, p, pp; - - bb[0] = root; - p = 1; - pp = 0; - - while (p > 0) { - b = bb[--p]; - b->priv = 0; - - for (j = 1; j >= 0; --j) { - if (!b->out[j]) - continue; - - switch (b->out_kind[j]) { - case CFG_EDGE_BACK: - continue; - case CFG_EDGE_FORWARD: - case CFG_EDGE_FAKE: - if (++b->out[j]->priv == b->out[j]->num_in) - bb[p++] = b->out[j]; - break; - case CFG_EDGE_LOOP_ENTER: - bb[p++] = b->out[j]; - break; - case CFG_EDGE_LOOP_LEAVE: - if (!b->out[j]->priv) { - bbb[pp++] = b->out[j]; - b->out[j]->priv = 1; - } - break; - default: - assert(0); - break; - } - } - - f(priv, b); - - if (!p) { - p = pp; - for (; pp > 0; --pp) - bb[pp - 1] = bbb[pp - 1]; - } - } -} - -static void -nv_do_print_function(void *priv, struct nv_basic_block *b) -{ - struct nv_instruction *i; - - debug_printf("=== BB %i ", b->id); - if (b->out[0]) - debug_printf("[%s -> %i] ", edge_name(b->out_kind[0]), b->out[0]->id); - if (b->out[1]) - debug_printf("[%s -> %i] ", edge_name(b->out_kind[1]), b->out[1]->id); - debug_printf("===\n"); - - i = b->phi; - if (!i) - i = b->entry; - for (; i; i = i->next) - nv_print_instruction(i); -} - -void -nv_print_function(struct nv_basic_block *root) -{ - if (root->subroutine) - debug_printf("SUBROUTINE %i\n", root->subroutine); - else - debug_printf("MAIN\n"); - - nv_pc_pass_in_order(root, nv_do_print_function, root); -} - -void -nv_print_program(struct nv_pc *pc) -{ - int i; - for (i = 0; i < pc->num_subroutines + 1; ++i) - if (pc->root[i]) - nv_print_function(pc->root[i]); -} - -#if NV50_DEBUG & NV50_DEBUG_PROG_CFLOW -static void -nv_do_print_cfgraph(struct nv_pc *pc, FILE *f, struct nv_basic_block *b) -{ - int i; - - b->pass_seq = pc->pass_seq; - - fprintf(f, "\t%i [shape=box]\n", b->id); - - for (i = 0; i < 2; ++i) { - if (!b->out[i]) - continue; - switch (b->out_kind[i]) { - case CFG_EDGE_FORWARD: - fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id); - break; - case CFG_EDGE_LOOP_ENTER: - fprintf(f, "\t%i -> %i [color=green];\n", b->id, b->out[i]->id); - break; - case CFG_EDGE_LOOP_LEAVE: - fprintf(f, "\t%i -> %i [color=red];\n", b->id, b->out[i]->id); - break; - case CFG_EDGE_BACK: - fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id); - continue; - case CFG_EDGE_FAKE: - fprintf(f, "\t%i -> %i [style=dotted];\n", b->id, b->out[i]->id); - break; - default: - assert(0); - break; - } - if (b->out[i]->pass_seq < pc->pass_seq) - nv_do_print_cfgraph(pc, f, b->out[i]); - } -} - -/* Print the control flow graph of subroutine @subr (0 == MAIN) to a file. */ -static void -nv_print_cfgraph(struct nv_pc *pc, const char *filepath, int subr) -{ - FILE *f; - - f = fopen(filepath, "a"); - if (!f) - return; - - fprintf(f, "digraph G {\n"); - - ++pc->pass_seq; - - nv_do_print_cfgraph(pc, f, pc->root[subr]); - - fprintf(f, "}\n"); - - fclose(f); -} -#endif /* NV50_DEBUG_PROG_CFLOW */ - -static INLINE void -nvcg_show_bincode(struct nv_pc *pc) -{ - unsigned i; - - for (i = 0; i < pc->bin_size / 4; ++i) { - debug_printf("0x%08x ", pc->emit[i]); - if ((i % 16) == 15) - debug_printf("\n"); - } - debug_printf("\n"); -} - -static int -nv50_emit_program(struct nv_pc *pc) -{ - uint32_t *code = pc->emit; - int n; - - NV50_DBGMSG(SHADER, "emitting program: size = %u\n", pc->bin_size); - - for (n = 0; n < pc->num_blocks; ++n) { - struct nv_instruction *i; - struct nv_basic_block *b = pc->bb_list[n]; - - for (i = b->entry; i; i = i->next) { - nv50_emit_instruction(pc, i); - - pc->bin_pos += 1 + (pc->emit[0] & 1); - pc->emit += 1 + (pc->emit[0] & 1); - } - } - assert(pc->emit == &code[pc->bin_size / 4]); - - /* XXX: we can do better than this ... */ - if (!pc->bin_size || - !(pc->emit[-2] & 1) || (pc->emit[-2] & 2) || (pc->emit[-1] & 3)) { - pc->emit[0] = 0xf0000001; - pc->emit[1] = 0xe0000000; - pc->bin_size += 8; - } - - pc->emit = code; - code[pc->bin_size / 4 - 1] |= 1; - -#if NV50_DEBUG & NV50_DEBUG_SHADER - nvcg_show_bincode(pc); -#endif - - return 0; -} - -int -nv50_generate_code(struct nv50_translation_info *ti) -{ - struct nv_pc *pc; - int ret; - int i; - - pc = CALLOC_STRUCT(nv_pc); - if (!pc) - return 1; - - pc->root = CALLOC(ti->subr_nr + 1, sizeof(pc->root[0])); - if (!pc->root) { - FREE(pc); - return 1; - } - pc->num_subroutines = ti->subr_nr; - - ret = nv50_tgsi_to_nc(pc, ti); - if (ret) - goto out; -#if NV50_DEBUG & NV50_DEBUG_PROG_IR - nv_print_program(pc); -#endif - - pc->opt_reload_elim = ti->store_to_memory ? FALSE : TRUE; - - /* optimization */ - ret = nv_pc_exec_pass0(pc); - if (ret) - goto out; -#if NV50_DEBUG & NV50_DEBUG_PROG_IR - nv_print_program(pc); -#endif - - /* register allocation */ - ret = nv_pc_exec_pass1(pc); - if (ret) - goto out; -#if NV50_DEBUG & NV50_DEBUG_PROG_CFLOW - nv_print_program(pc); - nv_print_cfgraph(pc, "nv50_shader_cfgraph.dot", 0); -#endif - - /* prepare for emission */ - ret = nv_pc_exec_pass2(pc); - if (ret) - goto out; - assert(!(pc->bin_size % 8)); - - pc->emit = CALLOC(pc->bin_size / 4 + 2, 4); - if (!pc->emit) { - ret = 3; - goto out; - } - ret = nv50_emit_program(pc); - if (ret) - goto out; - - ti->p->code_size = pc->bin_size; - ti->p->code = pc->emit; - - ti->p->immd_size = pc->immd_count * 4; - ti->p->immd = pc->immd_buf; - - /* highest 16 bit reg to num of 32 bit regs, limit to >= 4 */ - ti->p->max_gpr = MAX2(4, (pc->max_reg[NV_FILE_GPR] >> 1) + 1); - - ti->p->fixups = pc->fixups; - ti->p->num_fixups = pc->num_fixups; - - ti->p->uses_lmem = ti->store_to_memory; - - NV50_DBGMSG(SHADER, "SHADER TRANSLATION - %s\n", ret ? "failed" : "success"); - -out: - nv_pc_free_refs(pc); - - for (i = 0; i < pc->num_blocks; ++i) - FREE(pc->bb_list[i]); - if (pc->root) - FREE(pc->root); - if (ret) { /* on success, these will be referenced by nv50_program */ - if (pc->emit) - FREE(pc->emit); - if (pc->immd_buf) - FREE(pc->immd_buf); - if (pc->fixups) - FREE(pc->fixups); - } - FREE(pc); - return ret; -} - -static void -nvbb_insert_phi(struct nv_basic_block *b, struct nv_instruction *i) -{ - if (!b->phi) { - i->prev = NULL; - b->phi = i; - i->next = b->entry; - if (b->entry) { - assert(!b->entry->prev && b->exit); - b->entry->prev = i; - } else { - b->entry = i; - b->exit = i; - } - } else { - assert(b->entry); - if (b->entry->opcode == NV_OP_PHI) { /* insert after entry */ - assert(b->entry == b->exit); - b->entry->next = i; - i->prev = b->entry; - b->entry = i; - b->exit = i; - } else { /* insert before entry */ - assert(b->entry->prev && b->exit); - i->next = b->entry; - i->prev = b->entry->prev; - b->entry->prev = i; - i->prev->next = i; - } - } -} - -void -nvbb_insert_tail(struct nv_basic_block *b, struct nv_instruction *i) -{ - if (i->opcode == NV_OP_PHI) { - nvbb_insert_phi(b, i); - } else { - i->prev = b->exit; - if (b->exit) - b->exit->next = i; - b->exit = i; - if (!b->entry) - b->entry = i; - else - if (i->prev && i->prev->opcode == NV_OP_PHI) - b->entry = i; - } - - i->bb = b; - b->num_instructions++; - - if (i->prev && i->prev->is_terminator) - nv_nvi_permute(i->prev, i); -} - -void -nvi_insert_after(struct nv_instruction *at, struct nv_instruction *ni) -{ - if (!at->next) { - nvbb_insert_tail(at->bb, ni); - return; - } - ni->next = at->next; - ni->prev = at; - ni->next->prev = ni; - ni->prev->next = ni; -} - -void -nv_nvi_delete(struct nv_instruction *nvi) -{ - struct nv_basic_block *b = nvi->bb; - int j; - - /* debug_printf("REM: "); nv_print_instruction(nvi); */ - - for (j = 0; j < 5; ++j) - nv_reference(NULL, &nvi->src[j], NULL); - nv_reference(NULL, &nvi->flags_src, NULL); - - if (nvi->next) - nvi->next->prev = nvi->prev; - else { - assert(nvi == b->exit); - b->exit = nvi->prev; - } - - if (nvi->prev) - nvi->prev->next = nvi->next; - - if (nvi == b->entry) { - /* PHIs don't get hooked to b->entry */ - b->entry = nvi->next; - assert(!nvi->prev || nvi->prev->opcode == NV_OP_PHI); - } - - if (nvi == b->phi) { - if (nvi->opcode != NV_OP_PHI) - NV50_DBGMSG(PROG_IR, "NOTE: b->phi points to non-PHI instruction\n"); - - assert(!nvi->prev); - if (!nvi->next || nvi->next->opcode != NV_OP_PHI) - b->phi = NULL; - else - b->phi = nvi->next; - } -} - -void -nv_nvi_permute(struct nv_instruction *i1, struct nv_instruction *i2) -{ - struct nv_basic_block *b = i1->bb; - - assert(i1->opcode != NV_OP_PHI && - i2->opcode != NV_OP_PHI); - assert(i1->next == i2); - - if (b->exit == i2) - b->exit = i1; - - if (b->entry == i1) - b->entry = i2; - - i2->prev = i1->prev; - i1->next = i2->next; - i2->next = i1; - i1->prev = i2; - - if (i2->prev) - i2->prev->next = i2; - if (i1->next) - i1->next->prev = i1; -} - -void -nvbb_attach_block(struct nv_basic_block *parent, - struct nv_basic_block *b, ubyte edge_kind) -{ - assert(b->num_in < 8); - - if (parent->out[0]) { - assert(!parent->out[1]); - parent->out[1] = b; - parent->out_kind[1] = edge_kind; - } else { - parent->out[0] = b; - parent->out_kind[0] = edge_kind; - } - - b->in[b->num_in] = parent; - b->in_kind[b->num_in++] = edge_kind; -} - -/* NOTE: all BRKs are treated as conditional, so there are 2 outgoing BBs */ - -boolean -nvbb_dominated_by(struct nv_basic_block *b, struct nv_basic_block *d) -{ - int j; - - if (b == d) - return TRUE; - - for (j = 0; j < b->num_in; ++j) - if ((b->in_kind[j] != CFG_EDGE_BACK) && !nvbb_dominated_by(b->in[j], d)) - return FALSE; - - return j ? TRUE : FALSE; -} - -/* check if @bf (future) can be reached from @bp (past), stop at @bt */ -boolean -nvbb_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp, - struct nv_basic_block *bt) -{ - struct nv_basic_block *q[NV_PC_MAX_BASIC_BLOCKS], *b; - int i, p, n; - - p = 0; - n = 1; - q[0] = bp; - - while (p < n) { - b = q[p++]; - - if (b == bf) - break; - if (b == bt) - continue; - assert(n <= (1024 - 2)); - - for (i = 0; i < 2; ++i) { - if (b->out[i] && !IS_WALL_EDGE(b->out_kind[i]) && !b->out[i]->priv) { - q[n] = b->out[i]; - q[n++]->priv = 1; - } - } - } - for (--n; n >= 0; --n) - q[n]->priv = 0; - - return (b == bf); -} - -static struct nv_basic_block * -nvbb_find_dom_frontier(struct nv_basic_block *b, struct nv_basic_block *df) -{ - struct nv_basic_block *out; - int i; - - if (!nvbb_dominated_by(df, b)) { - for (i = 0; i < df->num_in; ++i) { - if (df->in_kind[i] == CFG_EDGE_BACK) - continue; - if (nvbb_dominated_by(df->in[i], b)) - return df; - } - } - for (i = 0; i < 2 && df->out[i]; ++i) { - if (df->out_kind[i] == CFG_EDGE_BACK) - continue; - if ((out = nvbb_find_dom_frontier(b, df->out[i]))) - return out; - } - return NULL; -} - -struct nv_basic_block * -nvbb_dom_frontier(struct nv_basic_block *b) -{ - struct nv_basic_block *df; - int i; - - for (i = 0; i < 2 && b->out[i]; ++i) - if ((df = nvbb_find_dom_frontier(b, b->out[i]))) - return df; - return NULL; -} diff --git a/src/gallium/drivers/nv50/nv50_pc.h b/src/gallium/drivers/nv50/nv50_pc.h deleted file mode 100644 index 9abefa2f2aa..00000000000 --- a/src/gallium/drivers/nv50/nv50_pc.h +++ /dev/null @@ -1,502 +0,0 @@ -/* - * Copyright 2010 Christoph Bumiller - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __NV50_COMPILER_H__ -#define __NV50_COMPILER_H__ - -#include "nv50_debug.h" - -#include "pipe/p_defines.h" -#include "util/u_inlines.h" -#include "util/u_memory.h" - -#define NV_OP_PHI 0 -#define NV_OP_EXTRACT 1 -#define NV_OP_COMBINE 2 -#define NV_OP_LDA 3 -#define NV_OP_STA 4 -#define NV_OP_MOV 5 -#define NV_OP_ADD 6 -#define NV_OP_SUB 7 -#define NV_OP_NEG 8 -#define NV_OP_MUL 9 -#define NV_OP_MAD 10 -#define NV_OP_CVT 11 -#define NV_OP_SAT 12 -#define NV_OP_NOT 13 -#define NV_OP_AND 14 -#define NV_OP_OR 15 -#define NV_OP_XOR 16 -#define NV_OP_SHL 17 -#define NV_OP_SHR 18 -#define NV_OP_RCP 19 -#define NV_OP_UNDEF 20 -#define NV_OP_RSQ 21 -#define NV_OP_LG2 22 -#define NV_OP_SIN 23 -#define NV_OP_COS 24 -#define NV_OP_EX2 25 -#define NV_OP_PRESIN 26 -#define NV_OP_PREEX2 27 -#define NV_OP_MIN 28 -#define NV_OP_MAX 29 -#define NV_OP_SET 30 -#define NV_OP_SAD 31 -#define NV_OP_KIL 32 -#define NV_OP_BRA 33 -#define NV_OP_CALL 34 -#define NV_OP_RET 35 -#define NV_OP_BREAK 36 -#define NV_OP_BREAKADDR 37 -#define NV_OP_JOINAT 38 -#define NV_OP_TEX 39 -#define NV_OP_TXB 40 -#define NV_OP_TXL 41 -#define NV_OP_TXF 42 -#define NV_OP_TXQ 43 -#define NV_OP_DFDX 44 -#define NV_OP_DFDY 45 -#define NV_OP_QUADOP 46 -#define NV_OP_LINTERP 47 -#define NV_OP_PINTERP 48 -#define NV_OP_ABS 49 -#define NV_OP_CEIL 50 -#define NV_OP_FLOOR 51 -#define NV_OP_TRUNC 52 -#define NV_OP_NOP 53 -#define NV_OP_SELECT 54 -#define NV_OP_EXPORT 55 -#define NV_OP_JOIN 56 -#define NV_OP_ROUND 57 -#define NV_OP_COUNT 58 - -#define NV_FILE_GPR 0 -#define NV_FILE_OUT 1 -#define NV_FILE_ADDR 2 -#define NV_FILE_FLAGS 3 -#define NV_FILE_IMM 16 -#define NV_FILE_MEM_S 32 -#define NV_FILE_MEM_P 33 -#define NV_FILE_MEM_V 34 -#define NV_FILE_MEM_L 48 -#define NV_FILE_MEM_G(i) (64 + i) -#define NV_FILE_MEM_C(i) (80 + i) - -#define NV_MOD_NEG 1 -#define NV_MOD_ABS 2 -#define NV_MOD_NOT 4 -#define NV_MOD_SAT 8 - -#define NV_TYPE_U8 0x00 -#define NV_TYPE_S8 0x01 -#define NV_TYPE_U16 0x02 -#define NV_TYPE_S16 0x03 -#define NV_TYPE_U32 0x04 -#define NV_TYPE_S32 0x05 -#define NV_TYPE_P32 0x07 -#define NV_TYPE_F32 0x09 -#define NV_TYPE_F64 0x0b -#define NV_TYPE_VEC(x, n) (NV_TYPE_##x | (n << 4)) -#define NV_TYPE_LO 0x00 -#define NV_TYPE_HI 0x80 -#define NV_TYPE_ANY 0xff - -#define NV_TYPE_ISINT(t) ((t) <= 5) -#define NV_TYPE_ISFLT(t) ((t) & 0x08) - -/* $cX registers contain 4 bits: OCSZ (Z is bit 0) */ -#define NV_CC_FL 0x0 -#define NV_CC_LT 0x1 -#define NV_CC_EQ 0x2 -#define NV_CC_LE 0x3 -#define NV_CC_GT 0x4 -#define NV_CC_NE 0x5 -#define NV_CC_GE 0x6 -#define NV_CC_U 0x8 -#define NV_CC_TR 0xf -#define NV_CC_O 0x10 -#define NV_CC_C 0x11 -#define NV_CC_A 0x12 -#define NV_CC_S 0x13 - -#define NV_PC_MAX_INSTRUCTIONS 2048 -#define NV_PC_MAX_VALUES (NV_PC_MAX_INSTRUCTIONS * 4) - -#define NV_PC_MAX_BASIC_BLOCKS 1024 - -static INLINE boolean -nv_is_vector_op(uint opcode) -{ - return (opcode >= NV_OP_TEX) && (opcode <= NV_OP_TXQ); -} - -static INLINE uint -nv_type_order(ubyte type) -{ - switch (type & 0xf) { - case NV_TYPE_U8: - case NV_TYPE_S8: - return 0; - case NV_TYPE_U16: - case NV_TYPE_S16: - return 1; - case NV_TYPE_U32: - case NV_TYPE_F32: - case NV_TYPE_S32: - case NV_TYPE_P32: - return 2; - case NV_TYPE_F64: - return 3; - } - assert(0); - return 0; -} - -static INLINE uint -nv_type_sizeof(ubyte type) -{ - if (type & 0xf0) - return (1 << nv_type_order(type)) * (type >> 4); - return 1 << nv_type_order(type); -} - -static INLINE uint -nv_type_sizeof_base(ubyte type) -{ - return 1 << nv_type_order(type); -} - -struct nv_reg { - int id; - ubyte file; - ubyte type; /* type of generating instruction's result */ - ubyte as_type; /* default type for new references to this value */ - union { - float f32; - double f64; - int32_t s32; - uint32_t u32; - } imm; -}; - -struct nv_range { - struct nv_range *next; - int bgn; - int end; -}; - -struct nv_value { - struct nv_reg reg; - struct nv_instruction *insn; - struct nv_value *join; - int n; - struct nv_range *livei; - int refc; - - struct nv_value *next; - struct nv_value *prev; -}; - -struct nv_ref { - struct nv_value *value; - ubyte mod; - ubyte typecast; - ubyte flags; /* not used yet */ -}; - -#define NV_REF_FLAG_REGALLOC_PRIV (1 << 0) - -struct nv_basic_block; - -struct nv_instruction { - struct nv_instruction *next; - struct nv_instruction *prev; - uint opcode; - int serial; - struct nv_value *def[4]; - struct nv_value *flags_def; - struct nv_ref *src[6]; - struct nv_ref *flags_src; - struct nv_basic_block *bb; - struct nv_basic_block *target; /* target block of control flow insn */ - ubyte cc; - unsigned set_cond : 4; - unsigned fixed : 1; /* don't optimize away */ - unsigned is_terminator : 1; - unsigned is_join : 1; - unsigned is_long : 1; /* for emission */ - /* */ - unsigned saturate : 1; - unsigned centroid : 1; - unsigned flat : 1; - unsigned lanes : 4; - unsigned tex_live : 1; - /* */ - ubyte tex_t; /* TIC binding */ - ubyte tex_s; /* TSC binding */ - unsigned tex_argc : 3; - unsigned tex_cube : 1; - unsigned tex_mask : 4; - /* */ - ubyte quadop; -}; - -static INLINE int -nvi_vector_size(struct nv_instruction *nvi) -{ - int i; - assert(nvi); - for (i = 0; i < 4 && nvi->def[i]; ++i); - return i; -} - -#define CFG_EDGE_FORWARD 0 -#define CFG_EDGE_BACK 1 -#define CFG_EDGE_LOOP_ENTER 2 -#define CFG_EDGE_LOOP_LEAVE 4 -#define CFG_EDGE_FAKE 8 - -/* 'WALL' edge means where reachability check doesn't follow */ -/* 'LOOP' edge means just having to do with loops */ -#define IS_LOOP_EDGE(k) ((k) & 7) -#define IS_WALL_EDGE(k) ((k) & 9) - -struct nv_basic_block { - struct nv_instruction *entry; /* first non-phi instruction */ - struct nv_instruction *exit; - struct nv_instruction *phi; /* very first instruction */ - int num_instructions; - - struct nv_basic_block *out[2]; /* no indirect branches -> 2 */ - struct nv_basic_block *in[8]; /* hope that suffices */ - uint num_in; - ubyte out_kind[2]; - ubyte in_kind[8]; - - int id; - int subroutine; - uint priv; /* reset to 0 after you're done */ - uint pass_seq; - - uint32_t bin_pos; /* position, size in emitted code */ - uint32_t bin_size; - - uint32_t live_set[NV_PC_MAX_VALUES / 32]; -}; - -struct nv50_translation_info; - -struct nv_pc { - struct nv_basic_block **root; - struct nv_basic_block *current_block; - struct nv_basic_block *parent_block; - - int loop_nesting_bound; - uint pass_seq; - - struct nv_value values[NV_PC_MAX_VALUES]; - struct nv_instruction instructions[NV_PC_MAX_INSTRUCTIONS]; - struct nv_ref **refs; - struct nv_basic_block *bb_list[NV_PC_MAX_BASIC_BLOCKS]; - int num_values; - int num_instructions; - int num_refs; - int num_blocks; - int num_subroutines; - - int max_reg[4]; - - uint32_t *immd_buf; /* populated on emit */ - unsigned immd_count; - - uint32_t *emit; - unsigned bin_size; - unsigned bin_pos; - - void *fixups; - unsigned num_fixups; - - /* optimization enables */ - boolean opt_reload_elim; -}; - -void nvbb_insert_tail(struct nv_basic_block *, struct nv_instruction *); -void nvi_insert_after(struct nv_instruction *, struct nv_instruction *); - -static INLINE struct nv_instruction * -nv_alloc_instruction(struct nv_pc *pc, uint opcode) -{ - struct nv_instruction *insn; - - insn = &pc->instructions[pc->num_instructions++]; - assert(pc->num_instructions < NV_PC_MAX_INSTRUCTIONS); - - insn->cc = NV_CC_TR; - insn->opcode = opcode; - - return insn; -} - -static INLINE struct nv_instruction * -new_instruction(struct nv_pc *pc, uint opcode) -{ - struct nv_instruction *insn = nv_alloc_instruction(pc, opcode); - - nvbb_insert_tail(pc->current_block, insn); - return insn; -} - -static INLINE struct nv_instruction * -new_instruction_at(struct nv_pc *pc, struct nv_instruction *at, uint opcode) -{ - struct nv_instruction *insn = nv_alloc_instruction(pc, opcode); - - nvi_insert_after(at, insn); - return insn; -} - -static INLINE struct nv_value * -new_value(struct nv_pc *pc, ubyte file, ubyte type) -{ - struct nv_value *value = &pc->values[pc->num_values]; - - assert(pc->num_values < NV_PC_MAX_VALUES - 1); - - value->n = pc->num_values++; - value->join = value; - value->reg.id = -1; - value->reg.file = file; - value->reg.type = value->reg.as_type = type; - return value; -} - -static INLINE struct nv_value * -new_value_like(struct nv_pc *pc, struct nv_value *like) -{ - struct nv_value *val = new_value(pc, like->reg.file, like->reg.type); - val->reg.as_type = like->reg.as_type; - return val; -} - -static INLINE struct nv_ref * -new_ref(struct nv_pc *pc, struct nv_value *val) -{ - int i; - struct nv_ref *ref; - - if ((pc->num_refs % 64) == 0) { - const unsigned old_size = pc->num_refs * sizeof(struct nv_ref *); - const unsigned new_size = (pc->num_refs + 64) * sizeof(struct nv_ref *); - - pc->refs = REALLOC(pc->refs, old_size, new_size); - - ref = CALLOC(64, sizeof(struct nv_ref)); - for (i = 0; i < 64; ++i) - pc->refs[pc->num_refs + i] = &ref[i]; - } - - ref = pc->refs[pc->num_refs++]; - ref->value = val; - ref->typecast = val->reg.as_type; - - ++val->refc; - return ref; -} - -static INLINE struct nv_basic_block * -new_basic_block(struct nv_pc *pc) -{ - struct nv_basic_block *bb; - - if (pc->num_blocks >= NV_PC_MAX_BASIC_BLOCKS) - return NULL; - - bb = CALLOC_STRUCT(nv_basic_block); - - bb->id = pc->num_blocks; - pc->bb_list[pc->num_blocks++] = bb; - return bb; -} - -static INLINE void -nv_reference(struct nv_pc *pc, struct nv_ref **d, struct nv_value *s) -{ - if (*d) - --(*d)->value->refc; - - if (s) { - if (!*d) - *d = new_ref(pc, s); - else { - (*d)->value = s; - ++(s->refc); - } - } else { - *d = NULL; - } -} - -/* nv50_emit.c */ -void nv50_emit_instruction(struct nv_pc *, struct nv_instruction *); -unsigned nv50_inst_min_size(struct nv_instruction *); - -/* nv50_print.c */ -const char *nv_opcode_name(uint opcode); -void nv_print_instruction(struct nv_instruction *); - -/* nv50_pc.c */ - -void nv_print_function(struct nv_basic_block *root); -void nv_print_program(struct nv_pc *); - -boolean nv_op_commutative(uint opcode); -int nv50_indirect_opnd(struct nv_instruction *); -boolean nv50_nvi_can_use_imm(struct nv_instruction *, int s); -boolean nv50_nvi_can_predicate(struct nv_instruction *); -boolean nv50_nvi_can_load(struct nv_instruction *, int s, struct nv_value *); -boolean nv50_op_can_write_flags(uint opcode); -ubyte nv50_supported_src_mods(uint opcode, int s); -int nv_nvi_refcount(struct nv_instruction *); -void nv_nvi_delete(struct nv_instruction *); -void nv_nvi_permute(struct nv_instruction *, struct nv_instruction *); -void nvbb_attach_block(struct nv_basic_block *parent, - struct nv_basic_block *, ubyte edge_kind); -boolean nvbb_dominated_by(struct nv_basic_block *, struct nv_basic_block *); -boolean nvbb_reachable_by(struct nv_basic_block *, struct nv_basic_block *, - struct nv_basic_block *); -struct nv_basic_block *nvbb_dom_frontier(struct nv_basic_block *); -int nvcg_replace_value(struct nv_pc *pc, struct nv_value *old_val, - struct nv_value *new_val); -struct nv_value *nvcg_find_immediate(struct nv_ref *); -struct nv_value *nvcg_find_constant(struct nv_ref *); - -typedef void (*nv_pc_pass_func)(void *priv, struct nv_basic_block *b); - -void nv_pc_pass_in_order(struct nv_basic_block *, nv_pc_pass_func, void *); - -int nv_pc_exec_pass0(struct nv_pc *pc); -int nv_pc_exec_pass1(struct nv_pc *pc); -int nv_pc_exec_pass2(struct nv_pc *pc); - -int nv50_tgsi_to_nc(struct nv_pc *, struct nv50_translation_info *); - -#endif // NV50_COMPILER_H diff --git a/src/gallium/drivers/nv50/nv50_pc_emit.c b/src/gallium/drivers/nv50/nv50_pc_emit.c deleted file mode 100644 index 2b177c6cb2f..00000000000 --- a/src/gallium/drivers/nv50/nv50_pc_emit.c +++ /dev/null @@ -1,1257 +0,0 @@ -/* - * Copyright 2010 Christoph Bumiller - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "nv50_context.h" -#include "nv50_pc.h" - -#define FLAGS_CC_SHIFT 7 -#define FLAGS_ID_SHIFT 12 -#define FLAGS_WR_ID_SHIFT 4 -#define FLAGS_CC_MASK (0x1f << FLAGS_CC_SHIFT) -#define FLAGS_ID_MASK (0x03 << FLAGS_ID_SHIFT) -#define FLAGS_WR_EN (1 << 6) -#define FLAGS_WR_ID_MASK (0x3 << FLAGS_WR_ID_SHIFT) - -#define NV50_FIXUP_CODE_RELOC 0 -#define NV50_FIXUP_DATA_RELOC 1 - -struct nv50_fixup { - uint8_t type; - int8_t shift; - uint32_t mask; - uint32_t data; - uint32_t offset; -}; - -void -nv50_relocate_program(struct nv50_program *p, - uint32_t code_base, - uint32_t data_base) -{ - struct nv50_fixup *f = (struct nv50_fixup *)p->fixups; - unsigned i; - - for (i = 0; i < p->num_fixups; ++i) { - uint32_t data; - - switch (f[i].type) { - case NV50_FIXUP_CODE_RELOC: data = code_base + f[i].data; break; - case NV50_FIXUP_DATA_RELOC: data = data_base + f[i].data; break; - default: - data = f[i].data; - break; - } - data = (f[i].shift < 0) ? (data >> -f[i].shift) : (data << f[i].shift); - - p->code[f[i].offset / 4] &= ~f[i].mask; - p->code[f[i].offset / 4] |= data & f[i].mask; - } -} - -static void -new_fixup(struct nv_pc *pc, uint8_t ty, int w, uint32_t data, uint32_t m, int s) -{ - struct nv50_fixup *f; - - const unsigned size = sizeof(struct nv50_fixup); - const unsigned n = pc->num_fixups; - - if (!(n % 8)) - pc->fixups = REALLOC(pc->fixups, n * size, (n + 8) * size); - - f = (struct nv50_fixup *)pc->fixups; - - f[n].offset = (pc->bin_pos + w) * 4; - f[n].type = ty; - f[n].data = data; - f[n].mask = m; - f[n].shift = s; - - ++pc->num_fixups; -} - -const ubyte nv50_inst_min_size_tab[NV_OP_COUNT] = -{ - 0, 0, 0, 8, 8, 4, 4, 4, 8, 4, 4, 8, 8, 8, 8, 8, /* 15 */ - 8, 8, 8, 4, 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, /* 31 */ - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, /* 47 */ - 4, 8, 8, 8, 8, 8, 0, 0, 8, 8 -}; - -unsigned -nv50_inst_min_size(struct nv_instruction *i) -{ - int n; - - if (nv50_inst_min_size_tab[i->opcode] > 4) - return 8; - - if (i->def[0] && i->def[0]->reg.file != NV_FILE_GPR) - return 8; - if (i->def[0]->join->reg.id > 63) - return 8; - - for (n = 0; n < 3; ++n) { - if (!i->src[n]) - break; - if (i->src[n]->value->reg.file != NV_FILE_GPR && - i->src[n]->value->reg.file != NV_FILE_MEM_V) - return 8; - if (i->src[n]->value->reg.id > 63) - return 8; - } - - if (i->flags_def || i->flags_src || i->src[4]) - return 8; - - if (i->is_join) - return 8; - - if (i->src[2]) { - if (i->saturate || i->src[2]->mod) - return 8; - if (i->src[0]->mod ^ i->src[1]->mod) - return 8; - if ((i->src[0]->mod | i->src[1]->mod) & NV_MOD_ABS) - return 8; - if (i->def[0]->join->reg.id < 0 || - i->def[0]->join->reg.id != i->src[2]->value->join->reg.id) - return 8; - } - - return nv50_inst_min_size_tab[i->opcode]; -} - -static INLINE ubyte -STYPE(struct nv_instruction *nvi, int s) -{ - return nvi->src[s]->typecast; -} - -static INLINE ubyte -DTYPE(struct nv_instruction *nvi, int d) -{ - return nvi->def[d]->reg.type; -} - -static INLINE struct nv_reg * -SREG(struct nv_ref *ref) -{ - return &ref->value->join->reg; -} - -static INLINE struct nv_reg * -DREG(struct nv_value *val) -{ - return &val->join->reg; -} - -static INLINE ubyte -SFILE(struct nv_instruction *nvi, int s) -{ - return nvi->src[s]->value->reg.file; -} - -static INLINE ubyte -DFILE(struct nv_instruction *nvi, int d) -{ - return nvi->def[0]->reg.file; -} - -static INLINE void -SID(struct nv_pc *pc, struct nv_ref *ref, int pos) -{ - pc->emit[pos / 32] |= SREG(ref)->id << (pos % 32); -} - -static INLINE void -DID(struct nv_pc *pc, struct nv_value *val, int pos) -{ - pc->emit[pos / 32] |= DREG(val)->id << (pos % 32); -} - -static INLINE uint32_t -get_immd_u32(struct nv_ref *ref) -{ - assert(ref->value->reg.file == NV_FILE_IMM); - return ref->value->reg.imm.u32; -} - -static INLINE void -set_immd_u32(struct nv_pc *pc, uint32_t u32) -{ - pc->emit[1] |= 3; - pc->emit[0] |= (u32 & 0x3f) << 16; - pc->emit[1] |= (u32 >> 6) << 2; -} - -static INLINE void -set_immd(struct nv_pc *pc, struct nv_ref *ref) -{ - assert(ref->value->reg.file == NV_FILE_IMM); - set_immd_u32(pc, get_immd_u32(ref)); -} - -/* Allocate data in immediate buffer, if we want to load the immediate - * for a constant buffer instead of inlining it into the code. - */ -static void -nv_pc_alloc_immd(struct nv_pc *pc, struct nv_ref *ref) -{ - uint32_t i, val = get_immd_u32(ref); - - for (i = 0; i < pc->immd_count; ++i) - if (pc->immd_buf[i] == val) - break; - - if (i == pc->immd_count) { - if (!(pc->immd_count % 8)) - pc->immd_buf = REALLOC(pc->immd_buf, - pc->immd_count * 4, (pc->immd_count + 8) * 4); - pc->immd_buf[pc->immd_count++] = val; - } - - SREG(ref)->id = i; -} - -static INLINE void -set_pred(struct nv_pc *pc, struct nv_instruction *i) -{ - assert(!(pc->emit[1] & 0x00003f80)); - - pc->emit[1] |= i->cc << 7; - if (i->flags_src) - pc->emit[1] |= SREG(i->flags_src)->id << 12; -} - -static INLINE void -set_pred_wr(struct nv_pc *pc, struct nv_instruction *i) -{ - assert(!(pc->emit[1] & 0x00000070)); - - if (i->flags_def) - pc->emit[1] |= (DREG(i->flags_def)->id << 4) | 0x40; -} - -static INLINE void -set_a16_bits(struct nv_pc *pc, uint id) -{ - ++id; /* $a0 is always 0 */ - pc->emit[0] |= (id & 3) << 26; - pc->emit[1] |= id & 4; -} - -static INLINE void -set_addr(struct nv_pc *pc, struct nv_instruction *i) -{ - if (i->src[4]) - set_a16_bits(pc, SREG(i->src[4])->id); -} - -static void -set_dst(struct nv_pc *pc, struct nv_value *value) -{ - struct nv_reg *reg = &value->join->reg; - - if (reg->id < 0) { - pc->emit[0] |= (127 << 2) | 1; /* set 'long'-bit to catch bugs */ - pc->emit[1] |= 0x8; - return; - } - - if (reg->file == NV_FILE_OUT) - pc->emit[1] |= 0x8; - else - if (reg->file == NV_FILE_ADDR) - assert(0); - - pc->emit[0] |= reg->id << 2; -} - -static void -set_src_0(struct nv_pc *pc, struct nv_ref *ref) -{ - struct nv_reg *reg = SREG(ref); - - if (reg->file == NV_FILE_MEM_S) - pc->emit[1] |= 0x00200000; - else - if (reg->file == NV_FILE_MEM_P) - pc->emit[0] |= 0x01800000; - else - if (reg->file != NV_FILE_GPR) - NOUVEAU_ERR("invalid src0 register file: %d\n", reg->file); - - assert(reg->id < 128); - pc->emit[0] |= reg->id << 9; -} - -static void -set_src_1(struct nv_pc *pc, struct nv_ref *ref) -{ - struct nv_reg *reg = SREG(ref); - - if (reg->file >= NV_FILE_MEM_C(0) && - reg->file <= NV_FILE_MEM_C(15)) { - assert(!(pc->emit[1] & 0x01800000)); - - pc->emit[0] |= 0x00800000; - pc->emit[1] |= (reg->file - NV_FILE_MEM_C(0)) << 22; - } else - if (reg->file != NV_FILE_GPR) - NOUVEAU_ERR("invalid src1 register file: %d\n", reg->file); - - assert(reg->id < 128); - pc->emit[0] |= reg->id << 16; -} - -static void -set_src_2(struct nv_pc *pc, struct nv_ref *ref) -{ - struct nv_reg *reg = SREG(ref); - - if (reg->file >= NV_FILE_MEM_C(0) && - reg->file <= NV_FILE_MEM_C(15)) { - assert(!(pc->emit[1] & 0x01800000)); - - pc->emit[0] |= 0x01000000; - pc->emit[1] |= (reg->file - NV_FILE_MEM_C(0)) << 22; - } else - if (reg->file != NV_FILE_GPR) - NOUVEAU_ERR("invalid src2 register file: %d\n", reg->file); - - assert(reg->id < 128); - pc->emit[1] |= reg->id << 14; -} - -/* the default form: - * - long instruction - * - 1 to 3 sources in slots 0, 1, 2 - * - address & flags - */ -static void -emit_form_MAD(struct nv_pc *pc, struct nv_instruction *i) -{ - pc->emit[0] |= 1; - - set_pred(pc, i); - set_pred_wr(pc, i); - - if (i->def[0]) - set_dst(pc, i->def[0]); - else { - pc->emit[0] |= 0x01fc; - pc->emit[1] |= 0x0008; - } - - if (i->src[0]) - set_src_0(pc, i->src[0]); - - if (i->src[1]) - set_src_1(pc, i->src[1]); - - if (i->src[2]) - set_src_2(pc, i->src[2]); - - set_addr(pc, i); -} - -/* like default form, but 2nd source in slot 2, no 3rd source */ -static void -emit_form_ADD(struct nv_pc *pc, struct nv_instruction *i) -{ - pc->emit[0] |= 1; - - if (i->def[0]) - set_dst(pc, i->def[0]); - else { - pc->emit[0] |= 0x01fc; - pc->emit[1] |= 0x0008; - } - - set_pred(pc, i); - set_pred_wr(pc, i); - - if (i->src[0]) - set_src_0(pc, i->src[0]); - - if (i->src[1]) - set_src_2(pc, i->src[1]); - - set_addr(pc, i); -} - -/* short mul */ -static void -emit_form_MUL(struct nv_pc *pc, struct nv_instruction *i) -{ - assert(!i->is_long && !(pc->emit[0] & 1)); - - assert(i->def[0]); - set_dst(pc, i->def[0]); - - if (i->src[0]) - set_src_0(pc, i->src[0]); - - if (i->src[1]) - set_src_1(pc, i->src[1]); -} - -/* default immediate form - * - 1 to 3 sources where last is immediate - * - no address or predicate possible - */ -static void -emit_form_IMM(struct nv_pc *pc, struct nv_instruction *i, ubyte mod_mask) -{ - pc->emit[0] |= 1; - - assert(i->def[0]); - assert(i->src[0]); - set_dst(pc, i->def[0]); - - assert(!i->src[4] && !i->flags_src && !i->flags_def); - - if (i->src[2]) { - set_immd(pc, i->src[2]); - set_src_0(pc, i->src[1]); - set_src_1(pc, i->src[0]); - } else - if (i->src[1]) { - set_immd(pc, i->src[1]); - set_src_0(pc, i->src[0]); - } else - set_immd(pc, i->src[0]); - - assert(!mod_mask); -} - -static void -set_ld_st_size(struct nv_pc *pc, int s, ubyte type) -{ - switch (type) { - case NV_TYPE_F64: - pc->emit[1] |= 0x8000 << s; - break; - case NV_TYPE_F32: - case NV_TYPE_S32: - case NV_TYPE_U32: - pc->emit[1] |= 0xc000 << s; - break; - case NV_TYPE_S16: - pc->emit[1] |= 0x6000 << s; - break; - case NV_TYPE_U16: - pc->emit[1] |= 0x4000 << s; - break; - case NV_TYPE_S8: - pc->emit[1] |= 0x2000 << s; - break; - default: - break; - } -} - -static void -emit_ld(struct nv_pc *pc, struct nv_instruction *i) -{ - ubyte sf = SFILE(i, 0); - - if (sf == NV_FILE_IMM) { - sf = NV_FILE_MEM_C(0); - nv_pc_alloc_immd(pc, i->src[0]); - - new_fixup(pc, NV50_FIXUP_DATA_RELOC, 0, SREG(i->src[0])->id, 0xffff, 9); - } - - if (sf == NV_FILE_MEM_S || - sf == NV_FILE_MEM_P) { - pc->emit[0] = 0x10000001; - pc->emit[1] = 0x04200000 | (0x3c << 12); - if (sf == NV_FILE_MEM_P) - pc->emit[0] |= 0x01800000; - } else - if (sf >= NV_FILE_MEM_C(0) && - sf <= NV_FILE_MEM_C(15)) { - pc->emit[0] = 0x10000001; - pc->emit[1] = 0x24000000; - pc->emit[1] |= (sf - NV_FILE_MEM_C(0)) << 22; - } else - if (sf >= NV_FILE_MEM_G(0) && - sf <= NV_FILE_MEM_G(15)) { - pc->emit[0] = 0xd0000001 | ((sf - NV_FILE_MEM_G(0)) << 16); - pc->emit[1] = 0xa0000000; - - assert(i->src[4] && SREG(i->src[4])->file == NV_FILE_GPR); - SID(pc, i->src[4], 9); - } else - if (sf == NV_FILE_MEM_L) { - pc->emit[0] = 0xd0000001; - pc->emit[1] = 0x40000000; - - set_addr(pc, i); - } else { - NOUVEAU_ERR("invalid ld source file\n"); - abort(); - } - - set_ld_st_size(pc, (sf == NV_FILE_MEM_L) ? 8 : 0, STYPE(i, 0)); - - set_dst(pc, i->def[0]); - set_pred_wr(pc, i); - - set_pred(pc, i); - - if (sf < NV_FILE_MEM_G(0) || - sf > NV_FILE_MEM_G(15)) { - SID(pc, i->src[0], 9); - set_addr(pc, i); - } -} - -static void -emit_st(struct nv_pc *pc, struct nv_instruction *i) -{ - assert(SFILE(i, 1) == NV_FILE_GPR); - assert(SFILE(i, 0) == NV_FILE_MEM_L); - - pc->emit[0] = 0xd0000001; - pc->emit[1] = 0x60000000; - - SID(pc, i->src[1], 2); - SID(pc, i->src[0], 9); - - set_ld_st_size(pc, 8, STYPE(i, 1)); - - set_addr(pc, i); - set_pred(pc, i); -} - -static int -verify_mov(struct nv_instruction *i) -{ - ubyte sf = SFILE(i, 0); - ubyte df = DFILE(i, 0); - - if (df == NV_FILE_GPR) - return 0; - - if (df != NV_FILE_OUT && - df != NV_FILE_FLAGS && - df != NV_FILE_ADDR) - return 1; - - if (sf == NV_FILE_FLAGS) - return 2; - if (sf == NV_FILE_ADDR) - return 3; - if (sf == NV_FILE_IMM && df != NV_FILE_OUT) - return 4; - - return 0; -} - -static void -emit_mov(struct nv_pc *pc, struct nv_instruction *i) -{ - assert(!verify_mov(i)); - - if (SFILE(i, 0) >= NV_FILE_MEM_S) - emit_ld(pc, i); - else - if (SFILE(i, 0) == NV_FILE_FLAGS) { - pc->emit[0] = 0x00000001 | (DREG(i->def[0])->id << 2); - pc->emit[1] = 0x20000780 | (SREG(i->src[0])->id << 12); - } else - if (SFILE(i, 0) == NV_FILE_ADDR) { - pc->emit[0] = 0x00000001 | (DREG(i->def[0])->id << 2); - pc->emit[1] = 0x40000780; - set_a16_bits(pc, SREG(i->src[0])->id); - } else - if (DFILE(i, 0) == NV_FILE_FLAGS) { - pc->emit[0] = 0x00000001; - pc->emit[1] = 0xa0000000 | (1 << 6); - set_pred(pc, i); - pc->emit[0] |= SREG(i->src[0])->id << 9; - pc->emit[1] |= DREG(i->def[0])->id << 4; - } else - if (SFILE(i, 0) == NV_FILE_IMM) { - if (i->opcode == NV_OP_LDA) { - emit_ld(pc, i); - } else { - pc->emit[0] = 0x10008001; - pc->emit[1] = 0x00000003; - - emit_form_IMM(pc, i, 0); - } - } else { - pc->emit[0] = 0x10000000; - pc->emit[0] |= DREG(i->def[0])->id << 2; - pc->emit[0] |= SREG(i->src[0])->id << 9; - - if (!i->is_long) { - pc->emit[0] |= 0x8000; - } else { - pc->emit[0] |= 0x00000001; - pc->emit[1] = 0x0403c000; - - set_pred(pc, i); - } - } - - if (DFILE(i, 0) == NV_FILE_OUT) - pc->emit[1] |= 0x8; -} - -static void -emit_interp(struct nv_pc *pc, struct nv_instruction *i) -{ - pc->emit[0] = 0x80000000; - - assert(DFILE(i, 0) == NV_FILE_GPR); - assert(SFILE(i, 0) == NV_FILE_MEM_V); - - DID(pc, i->def[0], 2); - SID(pc, i->src[0], 16); - - if (i->flat) - pc->emit[0] |= 1 << 8; - else - if (i->opcode == NV_OP_PINTERP) { - pc->emit[0] |= 1 << 25; - pc->emit[0] |= SREG(i->src[1])->id << 9; - } - - if (i->centroid) - pc->emit[0] |= 1 << 24; - - assert(i->is_long || !i->flags_src); - - if (i->is_long) { - set_pred(pc, i); - - pc->emit[1] |= - (pc->emit[0] & (3 << 24)) >> (24 - 16) | - (pc->emit[0] & (1 << 8)) >> (18 - 8); - - pc->emit[0] |= 1; - pc->emit[0] &= ~0x03000100; - } -} - -static void -emit_minmax(struct nv_pc *pc, struct nv_instruction *i) -{ - pc->emit[0] = 0x30000000; - pc->emit[1] = (i->opcode == NV_OP_MIN) ? (2 << 28) : 0; - - switch (DTYPE(i, 0)) { - case NV_TYPE_F32: - pc->emit[0] |= 0x80000000; - pc->emit[1] |= 0x80000000; - break; - case NV_TYPE_S32: - pc->emit[1] |= 0x8c000000; - break; - case NV_TYPE_U32: - pc->emit[1] |= 0x84000000; - break; - } - - emit_form_MAD(pc, i); - - if (i->src[0]->mod & NV_MOD_ABS) pc->emit[1] |= 0x00100000; - if (i->src[1]->mod & NV_MOD_ABS) pc->emit[1] |= 0x00080000; -} - -static void -emit_add_f32(struct nv_pc *pc, struct nv_instruction *i) -{ - pc->emit[0] = 0xb0000000; - - assert(!((i->src[0]->mod | i->src[1]->mod) & NV_MOD_ABS)); - - if (SFILE(i, 1) == NV_FILE_IMM) { - emit_form_IMM(pc, i, 0); - - if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 0x8000; - if (i->src[1]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 22; - } else - if (i->is_long) { - emit_form_ADD(pc, i); - - if (i->src[0]->mod & NV_MOD_NEG) pc->emit[1] |= 1 << 26; - if (i->src[1]->mod & NV_MOD_NEG) pc->emit[1] |= 1 << 27; - - if (i->saturate) - pc->emit[1] |= 0x20000000; - } else { - emit_form_MUL(pc, i); - - if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 0x8000; - if (i->src[1]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 22; - } -} - -static void -emit_add_b32(struct nv_pc *pc, struct nv_instruction *i) -{ - pc->emit[0] = 0x20008000; - - if (SFILE(i, 1) == NV_FILE_IMM) { - emit_form_IMM(pc, i, 0); - } else - if (i->is_long) { - pc->emit[0] = 0x20000000; - pc->emit[1] = 0x04000000; - emit_form_ADD(pc, i); - } else { - emit_form_MUL(pc, i); - } - - if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 28; - if (i->src[1]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 22; -} - -static void -emit_add_a16(struct nv_pc *pc, struct nv_instruction *i) -{ - int s = (i->opcode == NV_OP_MOV) ? 0 : 1; - - pc->emit[0] = 0xd0000001 | ((uint16_t)get_immd_u32(i->src[s]) << 9); - pc->emit[1] = 0x20000000; - - pc->emit[0] |= (DREG(i->def[0])->id + 1) << 2; - - set_pred(pc, i); - - if (s && i->src[0]) - set_a16_bits(pc, SREG(i->src[0])->id); -} - -static void -emit_flow(struct nv_pc *pc, struct nv_instruction *i, ubyte flow_op) -{ - pc->emit[0] = 0x00000003 | (flow_op << 28); - pc->emit[1] = 0x00000000; - - set_pred(pc, i); - - if (i->target && (i->opcode != NV_OP_BREAK)) { - uint32_t pos = i->target->bin_pos; - - new_fixup(pc, NV50_FIXUP_CODE_RELOC, 0, pos, 0xffff << 11, 9); - new_fixup(pc, NV50_FIXUP_CODE_RELOC, 1, pos, 0x3f << 14, -4); - - pc->emit[0] |= ((pos >> 2) & 0xffff) << 11; - pc->emit[1] |= ((pos >> 18) & 0x003f) << 14; - } -} - -static INLINE void -emit_add(struct nv_pc *pc, struct nv_instruction *i) -{ - if (DFILE(i, 0) == NV_FILE_ADDR) - emit_add_a16(pc, i); - else { - switch (DTYPE(i, 0)) { - case NV_TYPE_F32: - emit_add_f32(pc, i); - break; - case NV_TYPE_U32: - case NV_TYPE_S32: - emit_add_b32(pc, i); - break; - } - } -} - -static void -emit_bitop2(struct nv_pc *pc, struct nv_instruction *i) -{ - pc->emit[0] = 0xd0000000; - - if (SFILE(i, 1) == NV_FILE_IMM) { - emit_form_IMM(pc, i, 0); - - if (i->opcode == NV_OP_OR) - pc->emit[0] |= 0x0100; - else - if (i->opcode == NV_OP_XOR) - pc->emit[0] |= 0x8000; - } else { - emit_form_MAD(pc, i); - - pc->emit[1] |= 0x04000000; - - if (i->opcode == NV_OP_OR) - pc->emit[1] |= 0x4000; - else - if (i->opcode == NV_OP_XOR) - pc->emit[1] |= 0x8000; - } -} - -static void -emit_arl(struct nv_pc *pc, struct nv_instruction *i) -{ - assert(SFILE(i, 0) == NV_FILE_GPR); - assert(SFILE(i, 1) == NV_FILE_IMM); - - assert(!i->flags_def); - - pc->emit[0] = 0x00000001; - pc->emit[1] = 0xc0000000; - - pc->emit[0] |= (i->def[0]->reg.id + 1) << 2; - set_pred(pc, i); - set_src_0(pc, i->src[0]); - pc->emit[0] |= (get_immd_u32(i->src[1]) & 0x3f) << 16; -} - -static void -emit_shift(struct nv_pc *pc, struct nv_instruction *i) -{ - if (DFILE(i, 0) == NV_FILE_ADDR) { - emit_arl(pc, i); - return; - } - - pc->emit[0] = 0x30000001; - pc->emit[1] = 0xc4000000; - - if (i->opcode == NV_OP_SHR) - pc->emit[1] |= 1 << 29; - - if (SFILE(i, 1) == NV_FILE_IMM) { - pc->emit[1] |= 1 << 20; - pc->emit[0] |= (get_immd_u32(i->src[1]) & 0x7f) << 16; - - set_pred(pc, i); - } else - emit_form_MAD(pc, i); - - if (STYPE(i, 0) == NV_TYPE_S32) - pc->emit[1] |= 1 << 27; -} - -static void -emit_flop(struct nv_pc *pc, struct nv_instruction *i) -{ - struct nv_ref *src0 = i->src[0]; - - pc->emit[0] = 0x90000000; - - assert(STYPE(i, 0) == NV_TYPE_F32); - assert(SFILE(i, 0) == NV_FILE_GPR); - - if (!i->is_long) { - emit_form_MUL(pc, i); - assert(i->opcode == NV_OP_RCP && !src0->mod); - return; - } - - pc->emit[1] = (i->opcode - NV_OP_RCP) << 29; - - emit_form_MAD(pc, i); - - if (src0->mod & NV_MOD_NEG) pc->emit[1] |= 0x04000000; - if (src0->mod & NV_MOD_ABS) pc->emit[1] |= 0x00100000; -} - -static void -emit_mad_f32(struct nv_pc *pc, struct nv_instruction *i) -{ - const boolean neg_mul = (i->src[0]->mod ^ i->src[1]->mod) & NV_MOD_NEG; - const boolean neg_add = (i->src[2]->mod & NV_MOD_NEG); - - pc->emit[0] = 0xe0000000; - - if (!i->is_long) { - emit_form_MUL(pc, i); - assert(!neg_mul && !neg_add); - return; - } - - emit_form_MAD(pc, i); - - if (neg_mul) pc->emit[1] |= 0x04000000; - if (neg_add) pc->emit[1] |= 0x08000000; - - if (i->saturate) - pc->emit[1] |= 0x20000000; -} - -static INLINE void -emit_mad(struct nv_pc *pc, struct nv_instruction *i) -{ - emit_mad_f32(pc, i); -} - -static void -emit_mul_f32(struct nv_pc *pc, struct nv_instruction *i) -{ - boolean neg = (i->src[0]->mod ^ i->src[1]->mod) & NV_MOD_NEG; - - pc->emit[0] = 0xc0000000; - - if (SFILE(i, 1) == NV_FILE_IMM) { - emit_form_IMM(pc, i, 0); - - if (neg) - pc->emit[0] |= 0x8000; - } else - if (i->is_long) { - emit_form_MAD(pc, i); - - if (neg) - pc->emit[1] |= 0x08 << 24; - } else { - emit_form_MUL(pc, i); - - if (neg) - pc->emit[0] |= 0x8000; - } -} - -static void -emit_set(struct nv_pc *pc, struct nv_instruction *nvi) -{ - assert(nvi->is_long); - - pc->emit[0] = 0x30000000; - pc->emit[1] = 0x60000000; - - pc->emit[1] |= nvi->set_cond << 14; - - switch (STYPE(nvi, 0)) { - case NV_TYPE_U32: pc->emit[1] |= 0x04000000; break; - case NV_TYPE_S32: pc->emit[1] |= 0x0c000000; break; - case NV_TYPE_F32: pc->emit[0] |= 0x80000000; break; - default: - assert(0); - break; - } - - emit_form_MAD(pc, nvi); -} - -#define CVT_RN (0x00 << 16) -#define CVT_FLOOR (0x02 << 16) -#define CVT_CEIL (0x04 << 16) -#define CVT_TRUNC (0x06 << 16) -#define CVT_SAT (0x08 << 16) -#define CVT_ABS (0x10 << 16) - -#define CVT_X32_X32 0x04004000 -#define CVT_X32_S32 0x04014000 -#define CVT_F32_F32 ((0xc0 << 24) | CVT_X32_X32) -#define CVT_S32_F32 ((0x88 << 24) | CVT_X32_X32) -#define CVT_U32_F32 ((0x80 << 24) | CVT_X32_X32) -#define CVT_F32_S32 ((0x40 << 24) | CVT_X32_S32) -#define CVT_F32_U32 ((0x40 << 24) | CVT_X32_X32) -#define CVT_S32_S32 ((0x08 << 24) | CVT_X32_S32) -#define CVT_S32_U32 ((0x08 << 24) | CVT_X32_X32) -#define CVT_U32_S32 ((0x00 << 24) | CVT_X32_S32) -#define CVT_U32_U32 ((0x00 << 24) | CVT_X32_X32) - -#define CVT_NEG 0x20000000 -#define CVT_RI 0x08000000 - -static void -emit_cvt(struct nv_pc *pc, struct nv_instruction *nvi) -{ - ubyte dst_type = nvi->def[0] ? DTYPE(nvi, 0) : STYPE(nvi, 0); - - pc->emit[0] = 0xa0000000; - - switch (dst_type) { - case NV_TYPE_F32: - switch (STYPE(nvi, 0)) { - case NV_TYPE_F32: pc->emit[1] = CVT_F32_F32; break; - case NV_TYPE_S32: pc->emit[1] = CVT_F32_S32; break; - case NV_TYPE_U32: pc->emit[1] = CVT_F32_U32; break; - } - break; - case NV_TYPE_S32: - switch (STYPE(nvi, 0)) { - case NV_TYPE_F32: pc->emit[1] = CVT_S32_F32; break; - case NV_TYPE_S32: pc->emit[1] = CVT_S32_S32; break; - case NV_TYPE_U32: pc->emit[1] = CVT_S32_U32; break; - } - break; - case NV_TYPE_U32: - switch (STYPE(nvi, 0)) { - case NV_TYPE_F32: pc->emit[1] = CVT_U32_F32; break; - case NV_TYPE_S32: pc->emit[1] = CVT_U32_S32; break; - case NV_TYPE_U32: pc->emit[1] = CVT_U32_U32; break; - } - break; - } - if (pc->emit[1] == CVT_F32_F32 && - (nvi->opcode == NV_OP_CEIL || nvi->opcode == NV_OP_FLOOR || - nvi->opcode == NV_OP_TRUNC || nvi->opcode == NV_OP_ROUND)) - pc->emit[1] |= CVT_RI; - - switch (nvi->opcode) { - case NV_OP_CEIL: pc->emit[1] |= CVT_CEIL; break; - case NV_OP_FLOOR: pc->emit[1] |= CVT_FLOOR; break; - case NV_OP_TRUNC: pc->emit[1] |= CVT_TRUNC; break; - case NV_OP_ROUND: pc->emit[1] |= CVT_RN; break; - - case NV_OP_ABS: pc->emit[1] |= CVT_ABS; break; - case NV_OP_SAT: pc->emit[1] |= CVT_SAT; break; - case NV_OP_NEG: pc->emit[1] |= CVT_NEG; break; - default: - assert(nvi->opcode == NV_OP_CVT); - break; - } - assert(nvi->opcode != NV_OP_ABS || !(nvi->src[0]->mod & NV_MOD_NEG)); - - if (nvi->src[0]->mod & NV_MOD_NEG) pc->emit[1] ^= CVT_NEG; - if (nvi->src[0]->mod & NV_MOD_ABS) pc->emit[1] |= CVT_ABS; - - emit_form_MAD(pc, nvi); -} - -static void -emit_tex(struct nv_pc *pc, struct nv_instruction *i) -{ - pc->emit[0] = 0xf0000001; - pc->emit[1] = 0x00000000; - - DID(pc, i->def[0], 2); - - set_pred(pc, i); - - pc->emit[0] |= i->tex_t << 9; - pc->emit[0] |= i->tex_s << 17; - - pc->emit[0] |= (i->tex_argc - 1) << 22; - - pc->emit[0] |= (i->tex_mask & 0x3) << 25; - pc->emit[1] |= (i->tex_mask & 0xc) << 12; - - if (i->tex_live) - pc->emit[1] |= 4; - - if (i->tex_cube) - pc->emit[0] |= 0x08000000; - - if (i->opcode == NV_OP_TXB) - pc->emit[1] |= 0x20000000; - else - if (i->opcode == NV_OP_TXL) - pc->emit[1] |= 0x40000000; -} - -static void -emit_cvt2fixed(struct nv_pc *pc, struct nv_instruction *i) -{ - ubyte mod = i->src[0]->mod; - - pc->emit[0] = 0xb0000000; - pc->emit[1] = 0xc0000000; - - if (i->opcode == NV_OP_PREEX2) - pc->emit[1] |= 0x4000; - - emit_form_MAD(pc, i); - - if (mod & NV_MOD_NEG) pc->emit[1] |= 0x04000000; - if (mod & NV_MOD_ABS) pc->emit[1] |= 0x00100000; -} - -static void -emit_ddx(struct nv_pc *pc, struct nv_instruction *i) -{ - assert(i->is_long && SFILE(i, 0) == NV_FILE_GPR); - - pc->emit[0] = (i->src[0]->mod & NV_MOD_NEG) ? 0xc0240001 : 0xc0140001; - pc->emit[1] = (i->src[0]->mod & NV_MOD_NEG) ? 0x86400000 : 0x89800000; - - DID(pc, i->def[0], 2); - SID(pc, i->src[0], 9); - SID(pc, i->src[0], 32 + 14); - - set_pred(pc, i); - set_pred_wr(pc, i); -} - -static void -emit_ddy(struct nv_pc *pc, struct nv_instruction *i) -{ - assert(i->is_long && SFILE(i, 0) == NV_FILE_GPR); - - pc->emit[0] = (i->src[0]->mod & NV_MOD_NEG) ? 0xc0250001 : 0xc0150001; - pc->emit[1] = (i->src[0]->mod & NV_MOD_NEG) ? 0x85800000 : 0x8a400000; - - DID(pc, i->def[0], 2); - SID(pc, i->src[0], 9); - SID(pc, i->src[0], 32 + 14); - - set_pred(pc, i); - set_pred_wr(pc, i); -} - -static void -emit_quadop(struct nv_pc *pc, struct nv_instruction *i) -{ - pc->emit[0] = 0xc0000000; - pc->emit[1] = 0x80000000; - - emit_form_ADD(pc, i); - - pc->emit[0] |= i->lanes << 16; - - pc->emit[0] |= (i->quadop & 0x03) << 20; - pc->emit[1] |= (i->quadop & 0xfc) << 20; -} - -void -nv50_emit_instruction(struct nv_pc *pc, struct nv_instruction *i) -{ - /* nv_print_instruction(i); */ - - switch (i->opcode) { - case NV_OP_MOV: - if (DFILE(i, 0) == NV_FILE_ADDR) - emit_add_a16(pc, i); - else - emit_mov(pc, i); - break; - case NV_OP_LDA: - emit_mov(pc, i); - break; - case NV_OP_STA: - emit_st(pc, i); - break; - case NV_OP_LINTERP: - case NV_OP_PINTERP: - emit_interp(pc, i); - break; - case NV_OP_ADD: - emit_add(pc, i); - break; - case NV_OP_AND: - case NV_OP_OR: - case NV_OP_XOR: - emit_bitop2(pc, i); - break; - case NV_OP_CVT: - case NV_OP_ABS: - case NV_OP_NEG: - case NV_OP_SAT: - case NV_OP_CEIL: - case NV_OP_FLOOR: - case NV_OP_TRUNC: - case NV_OP_ROUND: - emit_cvt(pc, i); - break; - case NV_OP_DFDX: - emit_ddx(pc, i); - break; - case NV_OP_DFDY: - emit_ddy(pc, i); - break; - case NV_OP_RCP: - case NV_OP_RSQ: - case NV_OP_LG2: - case NV_OP_SIN: - case NV_OP_COS: - case NV_OP_EX2: - emit_flop(pc, i); - break; - case NV_OP_PRESIN: - case NV_OP_PREEX2: - emit_cvt2fixed(pc, i); - break; - case NV_OP_MAD: - emit_mad(pc, i); - break; - case NV_OP_MAX: - case NV_OP_MIN: - emit_minmax(pc, i); - break; - case NV_OP_MUL: - emit_mul_f32(pc, i); - break; - case NV_OP_SET: - emit_set(pc, i); - break; - case NV_OP_SHL: - case NV_OP_SHR: - emit_shift(pc, i); - break; - case NV_OP_TEX: - case NV_OP_TXB: - case NV_OP_TXL: - emit_tex(pc, i); - break; - case NV_OP_QUADOP: - emit_quadop(pc, i); - break; - case NV_OP_KIL: - emit_flow(pc, i, 0x0); - break; - case NV_OP_BRA: - emit_flow(pc, i, 0x1); - break; - case NV_OP_CALL: - emit_flow(pc, i, 0x2); - break; - case NV_OP_RET: - emit_flow(pc, i, 0x3); - break; - case NV_OP_BREAKADDR: - emit_flow(pc, i, 0x4); - break; - case NV_OP_BREAK: - emit_flow(pc, i, 0x5); - break; - case NV_OP_JOINAT: - emit_flow(pc, i, 0xa); - break; - case NV_OP_NOP: - case NV_OP_JOIN: - pc->emit[0] = 0xf0000001; - pc->emit[1] = 0xe0000000; - break; - case NV_OP_PHI: - case NV_OP_UNDEF: - case NV_OP_SUB: - NOUVEAU_ERR("operation \"%s\" should have been eliminated\n", - nv_opcode_name(i->opcode)); - break; - default: - NOUVEAU_ERR("unhandled NV_OP: %d\n", i->opcode); - abort(); - break; - } - - if (i->is_join) { - assert(i->is_long && !(pc->emit[1] & 1)); - pc->emit[1] |= 2; - } - - assert((pc->emit[0] & 1) == i->is_long); -} diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c deleted file mode 100644 index 4271731efa7..00000000000 --- a/src/gallium/drivers/nv50/nv50_pc_optimize.c +++ /dev/null @@ -1,1161 +0,0 @@ -/* - * Copyright 2010 Christoph Bumiller - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "nv50_pc.h" - -#define DESCEND_ARBITRARY(j, f) \ -do { \ - b->pass_seq = ctx->pc->pass_seq; \ - \ - for (j = 0; j < 2; ++j) \ - if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \ - f(ctx, b->out[j]); \ -} while (0) - -extern unsigned nv50_inst_min_size(struct nv_instruction *); - -struct nv_pc_pass { - struct nv_pc *pc; -}; - -static INLINE boolean -values_equal(struct nv_value *a, struct nv_value *b) -{ - /* XXX: sizes */ - return (a->reg.file == b->reg.file && a->join->reg.id == b->join->reg.id); -} - -static INLINE boolean -inst_commutation_check(struct nv_instruction *a, - struct nv_instruction *b) -{ - int si, di; - - for (di = 0; di < 4; ++di) { - if (!a->def[di]) - break; - for (si = 0; si < 5; ++si) { - if (!b->src[si]) - continue; - if (values_equal(a->def[di], b->src[si]->value)) - return FALSE; - } - } - - if (b->flags_src && b->flags_src->value == a->flags_def) - return FALSE; - - return TRUE; -} - -/* Check whether we can swap the order of the instructions, - * where a & b may be either the earlier or the later one. - */ -static boolean -inst_commutation_legal(struct nv_instruction *a, - struct nv_instruction *b) -{ - return inst_commutation_check(a, b) && inst_commutation_check(b, a); -} - -static INLINE boolean -inst_cullable(struct nv_instruction *nvi) -{ - if (nvi->opcode == NV_OP_STA) - return FALSE; - return (!(nvi->is_terminator || nvi->is_join || - nvi->target || - nvi->fixed || - nv_nvi_refcount(nvi))); -} - -static INLINE boolean -nvi_isnop(struct nv_instruction *nvi) -{ - if (nvi->opcode == NV_OP_EXPORT || nvi->opcode == NV_OP_UNDEF) - return TRUE; - - /* NOTE: 'fixed' now only means that it shouldn't be optimized away, - * but we can still remove it if it is a no-op move. - */ - if (/* nvi->fixed || */ - /* nvi->flags_src || */ /* cond. MOV to same register is still NOP */ - nvi->flags_def || - nvi->is_terminator || - nvi->is_join) - return FALSE; - - if (nvi->def[0] && nvi->def[0]->join->reg.id < 0) - return TRUE; - - if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT) - return FALSE; - - if (nvi->def[0]->reg.file != nvi->src[0]->value->reg.file) - return FALSE; - - if (nvi->src[0]->value->join->reg.id < 0) { - NV50_DBGMSG(PROG_IR, "nvi_isnop: orphaned value detected\n"); - return TRUE; - } - - if (nvi->opcode == NV_OP_SELECT) - if (!values_equal(nvi->def[0], nvi->src[1]->value)) - return FALSE; - - return values_equal(nvi->def[0], nvi->src[0]->value); -} - -struct nv_pass { - struct nv_pc *pc; - int n; - void *priv; -}; - -static int -nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b); - -static void -nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b) -{ - struct nv_pc *pc = (struct nv_pc *)priv; - struct nv_basic_block *in; - struct nv_instruction *nvi, *next; - int j; - uint size, n32 = 0; - - /* find first non-empty block emitted before b */ - for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->bin_size; --j); - for (; j >= 0; --j) { - in = pc->bb_list[j]; - - /* check for no-op branches (BRA $PC+8) */ - if (in->exit && in->exit->opcode == NV_OP_BRA && in->exit->target == b) { - in->bin_size -= 8; - pc->bin_size -= 8; - - for (++j; j < pc->num_blocks; ++j) - pc->bb_list[j]->bin_pos -= 8; - - nv_nvi_delete(in->exit); - } - b->bin_pos = in->bin_pos + in->bin_size; - - if (in->bin_size) /* no more no-op branches to b */ - break; - } - - pc->bb_list[pc->num_blocks++] = b; - - /* visit node */ - - for (nvi = b->entry; nvi; nvi = next) { - next = nvi->next; - if (nvi_isnop(nvi)) - nv_nvi_delete(nvi); - } - - for (nvi = b->entry; nvi; nvi = next) { - next = nvi->next; - - size = nv50_inst_min_size(nvi); - if (nvi->next && size < 8) - ++n32; - else - if ((n32 & 1) && nvi->next && - nv50_inst_min_size(nvi->next) == 4 && - inst_commutation_legal(nvi, nvi->next)) { - ++n32; - nv_nvi_permute(nvi, nvi->next); - next = nvi; - } else { - nvi->is_long = 1; - - b->bin_size += n32 & 1; - if (n32 & 1) - nvi->prev->is_long = 1; - n32 = 0; - } - b->bin_size += 1 + nvi->is_long; - } - - if (!b->entry) { - NV50_DBGMSG(PROG_IR, "block %p is now empty\n", b); - } else - if (!b->exit->is_long) { - assert(n32); - b->exit->is_long = 1; - b->bin_size += 1; - - /* might have del'd a hole tail of instructions */ - if (!b->exit->prev->is_long && !(n32 & 1)) { - b->bin_size += 1; - b->exit->prev->is_long = 1; - } - } - assert(!b->entry || (b->exit && b->exit->is_long)); - - pc->bin_size += b->bin_size *= 4; -} - -static int -nv_pc_pass2(struct nv_pc *pc, struct nv_basic_block *root) -{ - struct nv_pass pass; - - pass.pc = pc; - - pc->pass_seq++; - - nv_pass_flatten(&pass, root); - - nv_pc_pass_in_order(root, nv_pc_pass_pre_emission, pc); - - return 0; -} - -int -nv_pc_exec_pass2(struct nv_pc *pc) -{ - int i, ret; - - NV50_DBGMSG(PROG_IR, "preparing %u blocks for emission\n", pc->num_blocks); - - pc->num_blocks = 0; /* will reorder bb_list */ - - for (i = 0; i < pc->num_subroutines + 1; ++i) - if (pc->root[i] && (ret = nv_pc_pass2(pc, pc->root[i]))) - return ret; - return 0; -} - -static INLINE boolean -is_cmem_load(struct nv_instruction *nvi) -{ - return (nvi->opcode == NV_OP_LDA && - nvi->src[0]->value->reg.file >= NV_FILE_MEM_C(0) && - nvi->src[0]->value->reg.file <= NV_FILE_MEM_C(15)); -} - -static INLINE boolean -is_smem_load(struct nv_instruction *nvi) -{ - return (nvi->opcode == NV_OP_LDA && - (nvi->src[0]->value->reg.file == NV_FILE_MEM_S || - nvi->src[0]->value->reg.file <= NV_FILE_MEM_P)); -} - -static INLINE boolean -is_immd_move(struct nv_instruction *nvi) -{ - return (nvi->opcode == NV_OP_MOV && - nvi->src[0]->value->reg.file == NV_FILE_IMM); -} - -static INLINE void -check_swap_src_0_1(struct nv_instruction *nvi) -{ - static const ubyte cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; - - struct nv_ref *src0 = nvi->src[0], *src1 = nvi->src[1]; - - if (!nv_op_commutative(nvi->opcode)) - return; - assert(src0 && src1); - - if (src1->value->reg.file == NV_FILE_IMM) - return; - - if (is_cmem_load(src0->value->insn)) { - if (!is_cmem_load(src1->value->insn)) { - nvi->src[0] = src1; - nvi->src[1] = src0; - /* debug_printf("swapping cmem load to 1\n"); */ - } - } else - if (is_smem_load(src1->value->insn)) { - if (!is_smem_load(src0->value->insn)) { - nvi->src[0] = src1; - nvi->src[1] = src0; - /* debug_printf("swapping smem load to 0\n"); */ - } - } - - if (nvi->opcode == NV_OP_SET && nvi->src[0] != src0) - nvi->set_cond = (nvi->set_cond & ~7) | cc_swapped[nvi->set_cond & 7]; -} - -static int -nv_pass_fold_stores(struct nv_pass *ctx, struct nv_basic_block *b) -{ - struct nv_instruction *nvi, *sti, *next; - int j; - - for (sti = b->entry; sti; sti = next) { - next = sti->next; - - /* only handling MOV to $oX here */ - if (!sti->def[0] || sti->def[0]->reg.file != NV_FILE_OUT) - continue; - if (sti->opcode != NV_OP_MOV && sti->opcode != NV_OP_STA) - continue; - - nvi = sti->src[0]->value->insn; - if (!nvi || nvi->opcode == NV_OP_PHI || nv_is_vector_op(nvi->opcode)) - continue; - assert(nvi->def[0] == sti->src[0]->value); - - if (nvi->opcode == NV_OP_SELECT) - continue; - if (nvi->def[0]->refc > 1) - continue; - - /* cannot write to $oX when using immediate */ - for (j = 0; j < 4 && nvi->src[j]; ++j) - if (nvi->src[j]->value->reg.file == NV_FILE_IMM || - nvi->src[j]->value->reg.file == NV_FILE_MEM_L) - break; - if (j < 4 && nvi->src[j]) - continue; - - nvi->def[0] = sti->def[0]; - nvi->def[0]->insn = nvi; - nvi->fixed = sti->fixed; - - nv_nvi_delete(sti); - } - DESCEND_ARBITRARY(j, nv_pass_fold_stores); - - return 0; -} - -static int -nv_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b) -{ - struct nv_instruction *nvi, *ld; - int j; - - for (nvi = b->entry; nvi; nvi = nvi->next) { - check_swap_src_0_1(nvi); - - for (j = 0; j < 3; ++j) { - if (!nvi->src[j]) - break; - ld = nvi->src[j]->value->insn; - if (!ld) - continue; - - if (is_immd_move(ld) && nv50_nvi_can_use_imm(nvi, j)) { - nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value); - continue; - } - - if (ld->opcode != NV_OP_LDA) - continue; - if (!nv50_nvi_can_load(nvi, j, ld->src[0]->value)) - continue; - - if (j == 0 && ld->src[4]) /* can't load shared mem */ - continue; - - /* fold it ! */ - nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value); - if (ld->src[4]) - nv_reference(ctx->pc, &nvi->src[4], ld->src[4]->value); - - if (!nv_nvi_refcount(ld)) - nv_nvi_delete(ld); - } - } - DESCEND_ARBITRARY(j, nv_pass_fold_loads); - - return 0; -} - -/* NOTE: Assumes loads have not yet been folded. */ -static int -nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b) -{ - int j; - struct nv_instruction *nvi, *mi, *next; - ubyte mod; - - for (nvi = b->entry; nvi; nvi = next) { - next = nvi->next; - if (nvi->opcode == NV_OP_SUB) { - nvi->opcode = NV_OP_ADD; - nvi->src[1]->mod ^= NV_MOD_NEG; - } - - for (j = 0; j < 4 && nvi->src[j]; ++j) { - mi = nvi->src[j]->value->insn; - if (!mi) - continue; - if (mi->def[0]->refc > 1) - continue; - - if (mi->opcode == NV_OP_NEG) mod = NV_MOD_NEG; - else - if (mi->opcode == NV_OP_ABS) mod = NV_MOD_ABS; - else - continue; - assert(!(mod & mi->src[0]->mod & NV_MOD_NEG)); - - mod |= mi->src[0]->mod; - - if (mi->flags_def || mi->flags_src) - continue; - - if ((nvi->opcode == NV_OP_ABS) || (nvi->src[j]->mod & NV_MOD_ABS)) { - /* abs neg [abs] = abs */ - mod &= ~(NV_MOD_NEG | NV_MOD_ABS); - } else - if ((nvi->opcode == NV_OP_NEG) && (mod & NV_MOD_NEG)) { - /* neg as opcode and modifier on same insn cannot occur */ - /* neg neg abs = abs, neg neg = identity */ - assert(j == 0); - if (mod & NV_MOD_ABS) - nvi->opcode = NV_OP_ABS; - else - if (nvi->flags_def) - nvi->opcode = NV_OP_CVT; - else - nvi->opcode = NV_OP_MOV; - mod = 0; - } - - if ((nv50_supported_src_mods(nvi->opcode, j) & mod) != mod) - continue; - - nv_reference(ctx->pc, &nvi->src[j], mi->src[0]->value); - - nvi->src[j]->mod ^= mod; - } - - if (nvi->opcode == NV_OP_SAT) { - mi = nvi->src[0]->value->insn; - - if (mi->opcode != NV_OP_ADD && mi->opcode != NV_OP_MAD) - continue; - if (mi->flags_def || mi->def[0]->refc > 1) - continue; - - mi->saturate = 1; - mi->def[0] = nvi->def[0]; - mi->def[0]->insn = mi; - if (nvi->flags_def) { - mi->flags_def = nvi->flags_def; - mi->flags_def->insn = mi; - } - nv_nvi_delete(nvi); - } - } - DESCEND_ARBITRARY(j, nv_pass_lower_mods); - - return 0; -} - -#define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL) - -static void -modifiers_apply(uint32_t *val, ubyte type, ubyte mod) -{ - if (mod & NV_MOD_ABS) { - if (type == NV_TYPE_F32) - *val &= 0x7fffffff; - else - if ((*val) & (1 << 31)) - *val = ~(*val) + 1; - } - if (mod & NV_MOD_NEG) { - if (type == NV_TYPE_F32) - *val ^= 0x80000000; - else - *val = ~(*val) + 1; - } -} - -static INLINE uint -modifiers_opcode(ubyte mod) -{ - switch (mod) { - case NV_MOD_NEG: return NV_OP_NEG; - case NV_MOD_ABS: return NV_OP_ABS; - case 0: - return NV_OP_MOV; - default: - return NV_OP_NOP; - } -} - -static void -constant_expression(struct nv_pc *pc, struct nv_instruction *nvi, - struct nv_value *src0, struct nv_value *src1) -{ - struct nv_value *val; - union { - float f32; - uint32_t u32; - int32_t s32; - } u0, u1, u; - ubyte type; - - if (!nvi->def[0]) - return; - type = nvi->def[0]->reg.type; - - u.u32 = 0; - u0.u32 = src0->reg.imm.u32; - u1.u32 = src1->reg.imm.u32; - - modifiers_apply(&u0.u32, type, nvi->src[0]->mod); - modifiers_apply(&u1.u32, type, nvi->src[1]->mod); - - switch (nvi->opcode) { - case NV_OP_MAD: - if (nvi->src[2]->value->reg.file != NV_FILE_GPR) - return; - /* fall through */ - case NV_OP_MUL: - switch (type) { - case NV_TYPE_F32: u.f32 = u0.f32 * u1.f32; break; - case NV_TYPE_U32: u.u32 = u0.u32 * u1.u32; break; - case NV_TYPE_S32: u.s32 = u0.s32 * u1.s32; break; - default: - assert(0); - break; - } - break; - case NV_OP_ADD: - switch (type) { - case NV_TYPE_F32: u.f32 = u0.f32 + u1.f32; break; - case NV_TYPE_U32: u.u32 = u0.u32 + u1.u32; break; - case NV_TYPE_S32: u.s32 = u0.s32 + u1.s32; break; - default: - assert(0); - break; - } - break; - case NV_OP_SUB: - switch (type) { - case NV_TYPE_F32: u.f32 = u0.f32 - u1.f32; break; - case NV_TYPE_U32: u.u32 = u0.u32 - u1.u32; break; - case NV_TYPE_S32: u.s32 = u0.s32 - u1.s32; break; - default: - assert(0); - break; - } - break; - default: - return; - } - - nvi->opcode = NV_OP_MOV; - - val = new_value(pc, NV_FILE_IMM, type); - - val->reg.imm.u32 = u.u32; - - nv_reference(pc, &nvi->src[1], NULL); - nv_reference(pc, &nvi->src[0], val); - - if (nvi->src[2]) { /* from MAD */ - nvi->src[1] = nvi->src[0]; - nvi->src[0] = nvi->src[2]; - nvi->src[2] = NULL; - nvi->opcode = NV_OP_ADD; - - if (val->reg.imm.u32 == 0) { - nvi->src[1] = NULL; - nvi->opcode = NV_OP_MOV; - } - } -} - -static void -constant_operand(struct nv_pc *pc, - struct nv_instruction *nvi, struct nv_value *val, int s) -{ - union { - float f32; - uint32_t u32; - int32_t s32; - } u; - int t = s ? 0 : 1; - uint op; - ubyte type; - - if (!nvi->def[0]) - return; - type = nvi->def[0]->reg.type; - - u.u32 = val->reg.imm.u32; - modifiers_apply(&u.u32, type, nvi->src[s]->mod); - - switch (nvi->opcode) { - case NV_OP_MUL: - if ((type == NV_TYPE_F32 && u.f32 == 1.0f) || - (NV_TYPE_ISINT(type) && u.u32 == 1)) { - if ((op = modifiers_opcode(nvi->src[t]->mod)) == NV_OP_NOP) - break; - nvi->opcode = op; - nv_reference(pc, &nvi->src[s], NULL); - nvi->src[0] = nvi->src[t]; - nvi->src[1] = NULL; - } else - if ((type == NV_TYPE_F32 && u.f32 == 2.0f) || - (NV_TYPE_ISINT(type) && u.u32 == 2)) { - nvi->opcode = NV_OP_ADD; - nv_reference(pc, &nvi->src[s], nvi->src[t]->value); - nvi->src[s]->mod = nvi->src[t]->mod; - } else - if (type == NV_TYPE_F32 && u.f32 == -1.0f) { - if (nvi->src[t]->mod & NV_MOD_NEG) - nvi->opcode = NV_OP_MOV; - else - nvi->opcode = NV_OP_NEG; - nv_reference(pc, &nvi->src[s], NULL); - nvi->src[0] = nvi->src[t]; - nvi->src[1] = NULL; - } else - if (type == NV_TYPE_F32 && u.f32 == -2.0f) { - nvi->opcode = NV_OP_ADD; - nv_reference(pc, &nvi->src[s], nvi->src[t]->value); - nvi->src[s]->mod = (nvi->src[t]->mod ^= NV_MOD_NEG); - } else - if (u.u32 == 0) { - nvi->opcode = NV_OP_MOV; - nv_reference(pc, &nvi->src[t], NULL); - if (s) { - nvi->src[0] = nvi->src[1]; - nvi->src[1] = NULL; - } - } - break; - case NV_OP_ADD: - if (u.u32 == 0) { - if ((op = modifiers_opcode(nvi->src[t]->mod)) == NV_OP_NOP) - break; - nvi->opcode = op; - nv_reference(pc, &nvi->src[s], NULL); - nvi->src[0] = nvi->src[t]; - nvi->src[1] = NULL; - } - break; - case NV_OP_RCP: - u.f32 = 1.0f / u.f32; - (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32; - nvi->opcode = NV_OP_MOV; - assert(s == 0); - nv_reference(pc, &nvi->src[0], val); - break; - case NV_OP_RSQ: - u.f32 = 1.0f / sqrtf(u.f32); - (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32; - nvi->opcode = NV_OP_MOV; - assert(s == 0); - nv_reference(pc, &nvi->src[0], val); - break; - default: - break; - } - - if (nvi->opcode == NV_OP_MOV && nvi->flags_def) { - struct nv_instruction *cvt = new_instruction_at(pc, nvi, NV_OP_CVT); - - nv_reference(pc, &cvt->src[0], nvi->def[0]); - - cvt->flags_def = nvi->flags_def; - nvi->flags_def = NULL; - } -} - -static int -nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b) -{ - struct nv_instruction *nvi, *next; - int j; - - for (nvi = b->entry; nvi; nvi = next) { - struct nv_value *src0, *src1, *src; - int mod; - - next = nvi->next; - - src0 = nvcg_find_immediate(nvi->src[0]); - src1 = nvcg_find_immediate(nvi->src[1]); - - if (src0 && src1) - constant_expression(ctx->pc, nvi, src0, src1); - else { - if (src0) - constant_operand(ctx->pc, nvi, src0, 0); - else - if (src1) - constant_operand(ctx->pc, nvi, src1, 1); - } - - /* try to combine MUL, ADD into MAD */ - if (nvi->opcode != NV_OP_ADD) - continue; - - src0 = nvi->src[0]->value; - src1 = nvi->src[1]->value; - - if (SRC_IS_MUL(src0) && src0->refc == 1) - src = src0; - else - if (SRC_IS_MUL(src1) && src1->refc == 1) - src = src1; - else - continue; - - /* could have an immediate from above constant_* */ - if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR) - continue; - - nvi->opcode = NV_OP_MAD; - mod = nvi->src[(src == src0) ? 0 : 1]->mod; - nv_reference(ctx->pc, &nvi->src[(src == src0) ? 0 : 1], NULL); - nvi->src[2] = nvi->src[(src == src0) ? 1 : 0]; - - assert(!(mod & ~NV_MOD_NEG)); - nvi->src[0] = new_ref(ctx->pc, src->insn->src[0]->value); - nvi->src[1] = new_ref(ctx->pc, src->insn->src[1]->value); - nvi->src[0]->mod = src->insn->src[0]->mod ^ mod; - nvi->src[1]->mod = src->insn->src[1]->mod; - } - DESCEND_ARBITRARY(j, nv_pass_lower_arith); - - return 0; -} - -/* TODO: redundant store elimination */ - -struct load_record { - struct load_record *next; - uint64_t data[2]; - struct nv_value *value; -}; - -#define LOAD_RECORD_POOL_SIZE 1024 - -struct nv_pass_reld_elim { - struct nv_pc *pc; - - struct load_record *imm; - struct load_record *mem_s; - struct load_record *mem_v; - struct load_record *mem_c[16]; - struct load_record *mem_l; - - struct load_record pool[LOAD_RECORD_POOL_SIZE]; - int alloc; -}; - -/* TODO: properly handle loads from l[] memory in the presence of stores */ -static int -nv_pass_reload_elim(struct nv_pass_reld_elim *ctx, struct nv_basic_block *b) -{ - struct load_record **rec, *it; - struct nv_instruction *ld, *next; - uint64_t data[2]; - struct nv_value *val; - int j; - - for (ld = b->entry; ld; ld = next) { - next = ld->next; - if (!ld->src[0]) - continue; - val = ld->src[0]->value; - rec = NULL; - - if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) { - data[0] = val->reg.id; - data[1] = 0; - rec = &ctx->mem_v; - } else - if (ld->opcode == NV_OP_LDA) { - data[0] = val->reg.id; - data[1] = ld->src[4] ? ld->src[4]->value->n : ~0ULL; - if (val->reg.file >= NV_FILE_MEM_C(0) && - val->reg.file <= NV_FILE_MEM_C(15)) - rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)]; - else - if (val->reg.file == NV_FILE_MEM_S) - rec = &ctx->mem_s; - else - if (val->reg.file == NV_FILE_MEM_L) - rec = &ctx->mem_l; - } else - if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) { - data[0] = val->reg.imm.u32; - data[1] = 0; - rec = &ctx->imm; - } - - if (!rec || !ld->def[0]->refc) - continue; - - for (it = *rec; it; it = it->next) - if (it->data[0] == data[0] && it->data[1] == data[1]) - break; - - if (it) { - if (ld->def[0]->reg.id >= 0) - it->value = ld->def[0]; - else - if (!ld->fixed) - nvcg_replace_value(ctx->pc, ld->def[0], it->value); - } else { - if (ctx->alloc == LOAD_RECORD_POOL_SIZE) - continue; - it = &ctx->pool[ctx->alloc++]; - it->next = *rec; - it->data[0] = data[0]; - it->data[1] = data[1]; - it->value = ld->def[0]; - *rec = it; - } - } - - ctx->imm = NULL; - ctx->mem_s = NULL; - ctx->mem_v = NULL; - for (j = 0; j < 16; ++j) - ctx->mem_c[j] = NULL; - ctx->mem_l = NULL; - ctx->alloc = 0; - - DESCEND_ARBITRARY(j, nv_pass_reload_elim); - - return 0; -} - -static int -nv_pass_tex_mask(struct nv_pass *ctx, struct nv_basic_block *b) -{ - int i, c, j; - - for (i = 0; i < ctx->pc->num_instructions; ++i) { - struct nv_instruction *nvi = &ctx->pc->instructions[i]; - struct nv_value *def[4]; - - if (!nv_is_vector_op(nvi->opcode)) - continue; - nvi->tex_mask = 0; - - for (c = 0; c < 4; ++c) { - if (nvi->def[c]->refc) - nvi->tex_mask |= 1 << c; - def[c] = nvi->def[c]; - } - - j = 0; - for (c = 0; c < 4; ++c) - if (nvi->tex_mask & (1 << c)) - nvi->def[j++] = def[c]; - for (c = 0; c < 4; ++c) - if (!(nvi->tex_mask & (1 << c))) - nvi->def[j++] = def[c]; - assert(j == 4); - } - return 0; -} - -struct nv_pass_dce { - struct nv_pc *pc; - uint removed; -}; - -static int -nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b) -{ - int j; - struct nv_instruction *nvi, *next; - - for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = next) { - next = nvi->next; - - if (inst_cullable(nvi)) { - nv_nvi_delete(nvi); - - ++ctx->removed; - } - } - DESCEND_ARBITRARY(j, nv_pass_dce); - - return 0; -} - -/* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE. - * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with - * BREAK and dummy ELSE block. - */ -static INLINE boolean -bb_is_if_else_endif(struct nv_basic_block *bb) -{ - if (!bb->out[0] || !bb->out[1]) - return FALSE; - - if (bb->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) { - return (bb->out[0]->out[1] == bb->out[1]->out[0] && - !bb->out[1]->out[1]); - } else { - return (bb->out[0]->out[0] == bb->out[1]->out[0] && - !bb->out[0]->out[1] && - !bb->out[1]->out[1]); - } -} - -/* predicate instructions and remove branch at the end */ -static void -predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b, - struct nv_value *p, ubyte cc) -{ - struct nv_instruction *nvi; - - if (!b->entry) - return; - for (nvi = b->entry; nvi->next; nvi = nvi->next) { - if (!nvi_isnop(nvi)) { - nvi->cc = cc; - nv_reference(pc, &nvi->flags_src, p); - } - } - - if (nvi->opcode == NV_OP_BRA) - nv_nvi_delete(nvi); - else - if (!nvi_isnop(nvi)) { - nvi->cc = cc; - nv_reference(pc, &nvi->flags_src, p); - } -} - -/* NOTE: Run this after register allocation, we can just cut out the cflow - * instructions and hook the predicates to the conditional OPs if they are - * not using immediates; better than inserting SELECT to join definitions. - * - * NOTE: Should adapt prior optimization to make this possible more often. - */ -static int -nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b) -{ - struct nv_instruction *nvi; - struct nv_value *pred; - int i; - int n0 = 0, n1 = 0; - - if (bb_is_if_else_endif(b)) { - - NV50_DBGMSG(PROG_IR, - "pass_flatten: IF/ELSE/ENDIF construct at BB:%i\n", b->id); - - for (n0 = 0, nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0) - if (!nv50_nvi_can_predicate(nvi)) - break; - if (!nvi) { - for (n1 = 0, nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1) - if (!nv50_nvi_can_predicate(nvi)) - break; -#if NV50_DEBUG & NV50_DEBUG_PROG_IR - if (nvi) { - debug_printf("cannot predicate: "); nv_print_instruction(nvi); - } - } else { - debug_printf("cannot predicate: "); nv_print_instruction(nvi); -#endif - } - - if (!nvi && n0 < 12 && n1 < 12) { /* 12 as arbitrary limit */ - assert(b->exit && b->exit->flags_src); - pred = b->exit->flags_src->value; - - predicate_instructions(ctx->pc, b->out[0], pred, NV_CC_NE | NV_CC_U); - predicate_instructions(ctx->pc, b->out[1], pred, NV_CC_EQ); - - assert(b->exit && b->exit->opcode == NV_OP_BRA); - nv_nvi_delete(b->exit); - - if (b->exit && b->exit->opcode == NV_OP_JOINAT) - nv_nvi_delete(b->exit); - - i = (b->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) ? 1 : 0; - - if ((nvi = b->out[0]->out[i]->entry)) { - nvi->is_join = 0; - if (nvi->opcode == NV_OP_JOIN) - nv_nvi_delete(nvi); - } - } - } - DESCEND_ARBITRARY(i, nv_pass_flatten); - - return 0; -} - -/* local common subexpression elimination, stupid O(n^2) implementation */ -static int -nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b) -{ - struct nv_instruction *ir, *ik, *next; - struct nv_instruction *entry = b->phi ? b->phi : b->entry; - int s; - unsigned int reps; - - do { - reps = 0; - for (ir = entry; ir; ir = next) { - next = ir->next; - for (ik = entry; ik != ir; ik = ik->next) { - if (ir->opcode != ik->opcode || ir->fixed) - continue; - - if (!ir->def[0] || !ik->def[0] || - ik->opcode == NV_OP_LDA || - ik->opcode == NV_OP_STA || - ik->opcode == NV_OP_MOV || - nv_is_vector_op(ik->opcode)) - continue; /* ignore loads, stores & moves */ - - if (ik->src[4] || ir->src[4]) - continue; /* don't mess with address registers */ - - if (ik->flags_src || ir->flags_src || - ik->flags_def || ir->flags_def) - continue; /* and also not with flags, for now */ - - if (ik->def[0]->reg.file == NV_FILE_OUT || - ir->def[0]->reg.file == NV_FILE_OUT || - !values_equal(ik->def[0], ir->def[0])) - continue; - - for (s = 0; s < 3; ++s) { - struct nv_value *a, *b; - - if (!ik->src[s]) { - if (ir->src[s]) - break; - continue; - } - if (ik->src[s]->mod != ir->src[s]->mod) - break; - a = ik->src[s]->value; - b = ir->src[s]->value; - if (a == b) - continue; - if (a->reg.file != b->reg.file || - a->reg.id < 0 || - a->reg.id != b->reg.id) - break; - } - if (s == 3) { - nv_nvi_delete(ir); - ++reps; - nvcg_replace_value(ctx->pc, ir->def[0], ik->def[0]); - break; - } - } - } - } while(reps); - - DESCEND_ARBITRARY(s, nv_pass_cse); - - return 0; -} - -static int -nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root) -{ - struct nv_pass_reld_elim *reldelim; - struct nv_pass pass; - struct nv_pass_dce dce; - int ret; - - pass.n = 0; - pass.pc = pc; - - /* Do this first, so we don't have to pay attention - * to whether sources are supported memory loads. - */ - pc->pass_seq++; - ret = nv_pass_lower_arith(&pass, root); - if (ret) - return ret; - - pc->pass_seq++; - ret = nv_pass_lower_mods(&pass, root); - if (ret) - return ret; - - pc->pass_seq++; - ret = nv_pass_fold_loads(&pass, root); - if (ret) - return ret; - - pc->pass_seq++; - ret = nv_pass_fold_stores(&pass, root); - if (ret) - return ret; - - if (pc->opt_reload_elim) { - reldelim = CALLOC_STRUCT(nv_pass_reld_elim); - reldelim->pc = pc; - pc->pass_seq++; - ret = nv_pass_reload_elim(reldelim, root); - FREE(reldelim); - if (ret) - return ret; - } - - pc->pass_seq++; - ret = nv_pass_cse(&pass, root); - if (ret) - return ret; - - dce.pc = pc; - do { - dce.removed = 0; - pc->pass_seq++; - ret = nv_pass_dce(&dce, root); - if (ret) - return ret; - } while (dce.removed); - - ret = nv_pass_tex_mask(&pass, root); - if (ret) - return ret; - - return ret; -} - -int -nv_pc_exec_pass0(struct nv_pc *pc) -{ - int i, ret; - - for (i = 0; i < pc->num_subroutines + 1; ++i) - if (pc->root[i] && (ret = nv_pc_pass0(pc, pc->root[i]))) - return ret; - return 0; -} diff --git a/src/gallium/drivers/nv50/nv50_pc_print.c b/src/gallium/drivers/nv50/nv50_pc_print.c deleted file mode 100644 index dabbb836aa4..00000000000 --- a/src/gallium/drivers/nv50/nv50_pc_print.c +++ /dev/null @@ -1,321 +0,0 @@ -/* - * Copyright 2010 Christoph Bumiller - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "nv50_context.h" -#include "nv50_pc.h" - -#define NVXX_DEBUG 0 - -#define PRINT(args...) debug_printf(args) - -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) -#endif - -static const char *norm = "\x1b[00m"; -static const char *gree = "\x1b[32m"; -static const char *blue = "\x1b[34m"; -static const char *cyan = "\x1b[36m"; -static const char *orng = "\x1b[33m"; -static const char *mgta = "\x1b[35m"; - -static const char *nv_opcode_names[NV_OP_COUNT + 1] = { - "phi", - "extract", - "combine", - "lda", - "sta", - "mov", - "add", - "sub", - "neg", - "mul", - "mad", - "cvt", - "sat", - "not", - "and", - "or", - "xor", - "shl", - "shr", - "rcp", - "undef", - "rsqrt", - "lg2", - "sin", - "cos", - "ex2", - "presin", - "preex2", - "min", - "max", - "set", - "sad", - "kil", - "bra", - "call", - "ret", - "break", - "breakaddr", - "joinat", - "tex", - "texbias", - "texlod", - "texfetch", - "texsize", - "dfdx", - "dfdy", - "quadop", - "linterp", - "pinterp", - "abs", - "ceil", - "floor", - "trunc", - "nop", - "select", - "export", - "join", - "round", - "BAD_OP" -}; - -static const char *nv_cond_names[] = -{ - "never", "lt" , "eq" , "le" , "gt" , "ne" , "ge" , "", - "never", "ltu", "equ", "leu", "gtu", "neu", "geu", "", - "o", "c", "a", "s" -}; - -static const char *nv_modifier_strings[] = -{ - "", - "neg", - "abs", - "neg abs", - "not", - "not neg" - "not abs", - "not neg abs", - "sat", - "BAD_MOD" -}; - -const char * -nv_opcode_name(uint opcode) -{ - return nv_opcode_names[MIN2(opcode, ARRAY_SIZE(nv_opcode_names) - 1)]; -} - -static INLINE const char * -nv_type_name(ubyte type) -{ - switch (type) { - case NV_TYPE_U16: return "u16"; - case NV_TYPE_S16: return "s16"; - case NV_TYPE_F32: return "f32"; - case NV_TYPE_U32: return "u32"; - case NV_TYPE_S32: return "s32"; - case NV_TYPE_P32: return "p32"; - case NV_TYPE_F64: return "f64"; - default: - return "BAD_TYPE"; - } -} - -static INLINE const char * -nv_cond_name(ubyte cc) -{ - return nv_cond_names[MIN2(cc, 19)]; -} - -static INLINE const char * -nv_modifier_string(ubyte mod) -{ - return nv_modifier_strings[MIN2(mod, 9)]; -} - -static INLINE int -nv_value_id(struct nv_value *value) -{ - if (value->join->reg.id >= 0) - return value->join->reg.id; - return value->n; -} - -static INLINE boolean -nv_value_allocated(struct nv_value *value) -{ - return (value->reg.id >= 0) ? TRUE : FALSE; -} - -static INLINE void -nv_print_address(const char c, int buf, struct nv_value *a, int offset) -{ - const char ac = (a && nv_value_allocated(a)) ? '$' : '%'; - - if (buf >= 0) - PRINT(" %s%c%i[", cyan, c, buf); - else - PRINT(" %s%c[", cyan, c); - if (a) - PRINT("%s%ca%i%s+", mgta, ac, nv_value_id(a), cyan); - PRINT("%s0x%x%s]", orng, offset, cyan); -} - -static INLINE void -nv_print_cond(struct nv_instruction *nvi) -{ - char pfx = nv_value_allocated(nvi->flags_src->value->join) ? '$' : '%'; - - PRINT("%s%s %s%cc%i ", - gree, nv_cond_name(nvi->cc), - mgta, pfx, nv_value_id(nvi->flags_src->value)); -} - -static INLINE void -nv_print_value(struct nv_value *value, struct nv_value *ind, ubyte type) -{ - char reg_pfx = '$'; - - if (type == NV_TYPE_ANY) - type = value->reg.type; - - if (value->reg.file != NV_FILE_FLAGS) - PRINT(" %s%s", gree, nv_type_name(type)); - - if (!nv_value_allocated(value->join)) - reg_pfx = '%'; - - switch (value->reg.file) { - case NV_FILE_GPR: - PRINT(" %s%cr%i", blue, reg_pfx, nv_value_id(value)); - break; - case NV_FILE_OUT: - PRINT(" %s%co%i", mgta, reg_pfx, nv_value_id(value)); - break; - case NV_FILE_ADDR: - PRINT(" %s%ca%i", mgta, reg_pfx, nv_value_id(value)); - break; - case NV_FILE_FLAGS: - PRINT(" %s%cc%i", mgta, reg_pfx, nv_value_id(value)); - break; - case NV_FILE_MEM_L: - nv_print_address('l', -1, ind, nv_value_id(value)); - break; - case NV_FILE_MEM_S: - nv_print_address('s', -1, ind, 4 * nv_value_id(value)); - break; - case NV_FILE_MEM_P: - nv_print_address('p', -1, ind, 4 * nv_value_id(value)); - break; - case NV_FILE_MEM_V: - nv_print_address('v', -1, ind, 4 * nv_value_id(value)); - break; - case NV_FILE_IMM: - switch (type) { - case NV_TYPE_U16: - case NV_TYPE_S16: - PRINT(" %s0x%04x", orng, value->reg.imm.u32); - break; - case NV_TYPE_F32: - PRINT(" %s%f", orng, value->reg.imm.f32); - break; - case NV_TYPE_F64: - PRINT(" %s%f", orng, value->reg.imm.f64); - break; - case NV_TYPE_U32: - case NV_TYPE_S32: - case NV_TYPE_P32: - PRINT(" %s0x%08x", orng, value->reg.imm.u32); - break; - } - break; - default: - if (value->reg.file >= NV_FILE_MEM_G(0) && - value->reg.file <= NV_FILE_MEM_G(15)) - nv_print_address('g', value->reg.file - NV_FILE_MEM_G(0), ind, - nv_value_id(value) * 4); - else - if (value->reg.file >= NV_FILE_MEM_C(0) && - value->reg.file <= NV_FILE_MEM_C(15)) - nv_print_address('c', value->reg.file - NV_FILE_MEM_C(0), ind, - nv_value_id(value) * 4); - else - NOUVEAU_ERR(" BAD_FILE[%i]", nv_value_id(value)); - break; - } -} - -static INLINE void -nv_print_ref(struct nv_ref *ref, struct nv_value *ind) -{ - nv_print_value(ref->value, ind, ref->typecast); -} - -void -nv_print_instruction(struct nv_instruction *i) -{ - int j; - - PRINT("%i: ", i->serial); - - if (i->flags_src) - nv_print_cond(i); - - PRINT("%s", gree); - if (i->opcode == NV_OP_SET) - PRINT("set %s", nv_cond_name(i->set_cond)); - else - if (i->saturate) - PRINT("sat %s", nv_opcode_name(i->opcode)); - else - PRINT("%s", nv_opcode_name(i->opcode)); - - if (i->flags_def) - nv_print_value(i->flags_def, NULL, NV_TYPE_ANY); - - /* Only STORE & STA can write to MEM, and they do not def - * anything, so the address is thus part of the source. - */ - if (i->def[0]) - nv_print_value(i->def[0], NULL, NV_TYPE_ANY); - else - if (i->target) - PRINT(" %s(BB:%i)", orng, i->target->id); - else - PRINT(" #"); - - for (j = 0; j < 4; ++j) { - if (!i->src[j]) - continue; - - if (i->src[j]->mod) - PRINT(" %s%s", gree, nv_modifier_string(i->src[j]->mod)); - - nv_print_ref(i->src[j], - (j == nv50_indirect_opnd(i)) ? - i->src[4]->value : NULL); - } - PRINT(" %s%c\n", norm, i->is_long ? 'l' : 's'); -} diff --git a/src/gallium/drivers/nv50/nv50_pc_regalloc.c b/src/gallium/drivers/nv50/nv50_pc_regalloc.c deleted file mode 100644 index 12a59cb6cbd..00000000000 --- a/src/gallium/drivers/nv50/nv50_pc_regalloc.c +++ /dev/null @@ -1,1096 +0,0 @@ -/* - * Copyright 2010 Christoph Bumiller - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#if NV50_DEBUG & NV50_DEBUG_PROG_RA -# define NV50_RA_DEBUG_LIVEI -# define NV50_RA_DEBUG_LIVE_SETS -# define NV50_RA_DEBUG_JOIN -#endif - -#include "nv50_context.h" -#include "nv50_pc.h" - -#include "util/u_simple_list.h" - -#define NUM_REGISTER_FILES 4 -#define MAX_REGISTER_COUNT 256 - -struct register_set { - struct nv_pc *pc; - - uint32_t last[NUM_REGISTER_FILES]; - uint32_t bits[NUM_REGISTER_FILES][(MAX_REGISTER_COUNT + 31) / 32]; -}; - -/* using OR because a set bit means occupied/unavailable, aliasing is allowed */ -static void -intersect_register_sets(struct register_set *dst, - struct register_set *src1, struct register_set *src2) -{ - int i, j; - - for (i = 0; i < NUM_REGISTER_FILES; ++i) { - for (j = 0; j < (MAX_REGISTER_COUNT + 31) / 32; ++j) - dst->bits[i][j] = src1->bits[i][j] | src2->bits[i][j]; - } -} - -static void -mask_register_set(struct register_set *set, uint32_t mask, uint32_t umask) -{ - int i, j; - - for (i = 0; i < NUM_REGISTER_FILES; ++i) { - for (j = 0; j < (MAX_REGISTER_COUNT + 31) / 32; ++j) - set->bits[i][j] = (set->bits[i][j] | mask) & umask; - } -} - -struct nv_pc_pass { - struct nv_pc *pc; - - struct nv_instruction **insns; - int num_insns; - - uint pass_seq; -}; - -static void -ranges_coalesce(struct nv_range *range) -{ - while (range->next && range->end >= range->next->bgn) { - struct nv_range *rnn = range->next->next; - assert(range->bgn <= range->next->bgn); - range->end = MAX2(range->end, range->next->end); - FREE(range->next); - range->next = rnn; - } -} - -/* @return: TRUE if @new_range can be freed (i.e. was not reused) */ -static boolean -add_range_ex(struct nv_value *val, int bgn, int end, struct nv_range *new_range) -{ - struct nv_range *range, **nextp = &val->livei; - - if (bgn == end) /* [a, a) is invalid / empty */ - return TRUE; - - for (range = val->livei; range; range = range->next) { - if (end < range->bgn) - break; /* insert before */ - - if (bgn > range->end) { - nextp = &range->next; - continue; /* insert after */ - } - - /* overlap */ - if (bgn < range->bgn) { - range->bgn = bgn; - if (end > range->end) - range->end = end; - ranges_coalesce(range); - return TRUE; - } - if (end > range->end) { - range->end = end; - ranges_coalesce(range); - return TRUE; - } - assert(bgn >= range->bgn); - assert(end <= range->end); - return TRUE; - } - - if (!new_range) - new_range = CALLOC_STRUCT(nv_range); - - new_range->bgn = bgn; - new_range->end = end; - new_range->next = range; - *(nextp) = new_range; - return FALSE; -} - -static void -add_range(struct nv_value *val, struct nv_basic_block *b, int end) -{ - int bgn; - - if (!val->insn) /* ignore non-def values */ - return; - assert(b->entry->serial <= b->exit->serial); - assert(b->phi->serial <= end); - assert(b->exit->serial + 1 >= end); - - bgn = val->insn->serial; - if (bgn < b->entry->serial || bgn > b->exit->serial) - bgn = b->entry->serial; - - assert(bgn <= end); - - add_range_ex(val, bgn, end, NULL); -} - -#if defined(NV50_RA_DEBUG_JOIN) || defined(NV50_RA_DEBUG_LIVEI) -static void -livei_print(struct nv_value *a) -{ - struct nv_range *r = a->livei; - - debug_printf("livei %i: ", a->n); - while (r) { - debug_printf("[%i, %i) ", r->bgn, r->end); - r = r->next; - } - debug_printf("\n"); -} -#endif - -static void -livei_unify(struct nv_value *dst, struct nv_value *src) -{ - struct nv_range *range, *next; - - for (range = src->livei; range; range = next) { - next = range->next; - if (add_range_ex(dst, range->bgn, range->end, range)) - FREE(range); - } - src->livei = NULL; -} - -static void -livei_release(struct nv_value *val) -{ - struct nv_range *range, *next; - - for (range = val->livei; range; range = next) { - next = range->next; - FREE(range); - } -} - -static boolean -livei_have_overlap(struct nv_value *a, struct nv_value *b) -{ - struct nv_range *r_a, *r_b; - - for (r_a = a->livei; r_a; r_a = r_a->next) { - for (r_b = b->livei; r_b; r_b = r_b->next) { - if (r_b->bgn < r_a->end && - r_b->end > r_a->bgn) - return TRUE; - } - } - return FALSE; -} - -static int -livei_end(struct nv_value *a) -{ - struct nv_range *r = a->livei; - - assert(r); - while (r->next) - r = r->next; - return r->end; -} - -static boolean -livei_contains(struct nv_value *a, int pos) -{ - struct nv_range *r; - - for (r = a->livei; r && r->bgn <= pos; r = r->next) - if (r->end > pos) - return TRUE; - return FALSE; -} - -static boolean -reg_assign(struct register_set *set, struct nv_value **def, int n) -{ - int i, id, s; - uint m; - int f = def[0]->reg.file; - - s = n << (nv_type_order(def[0]->reg.type) - 1); - m = (1 << s) - 1; - - id = set->last[f]; - - for (i = 0; i * 32 < set->last[f]; ++i) { - if (set->bits[f][i] == 0xffffffff) - continue; - - for (id = 0; id < 32; id += s) - if (!(set->bits[f][i] & (m << id))) - break; - if (id < 32) - break; - } - if (i * 32 + id > set->last[f]) - return FALSE; - - set->bits[f][i] |= m << id; - - id += i * 32; - - set->pc->max_reg[f] = MAX2(set->pc->max_reg[f], id + s - 1); - - id >>= nv_type_order(def[0]->reg.type) - 1; - - for (i = 0; i < n; ++i) - if (def[i]->livei) - def[i]->reg.id = id++; - - return TRUE; -} - -static INLINE void -reg_occupy(struct register_set *set, struct nv_value *val) -{ - int s, id = val->reg.id, f = val->reg.file; - uint m; - - if (id < 0) - return; - s = nv_type_order(val->reg.type) - 1; - id <<= s; - m = (1 << (1 << s)) - 1; - - assert(s >= 0); /* XXX: remove me */ - - set->bits[f][id / 32] |= m << (id % 32); - - if (set->pc->max_reg[f] < id) - set->pc->max_reg[f] = id; -} - -static INLINE void -reg_release(struct register_set *set, struct nv_value *val) -{ - int s, id = val->reg.id, f = val->reg.file; - uint m; - - if (id < 0) - return; - - s = nv_type_order(val->reg.type) - 1; - id <<= s; - m = (1 << (1 << s)) - 1; - - set->bits[f][id / 32] &= ~(m << (id % 32)); -} - -static INLINE boolean -join_allowed(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) -{ - int i; - struct nv_value *val; - - if (a->reg.file != b->reg.file || - nv_type_sizeof(a->reg.type) != nv_type_sizeof(b->reg.type)) - return FALSE; - - if (a->join->reg.id == b->join->reg.id) - return TRUE; - - /* either a or b or both have been assigned */ - - if (a->join->reg.id >= 0 && b->join->reg.id >= 0) - return FALSE; - else - if (b->join->reg.id >= 0) { - val = a; - a = b; - b = val; - } - - for (i = 0; i < ctx->pc->num_values; ++i) { - val = &ctx->pc->values[i]; - - if (val->join->reg.id != a->join->reg.id) - continue; - if (val->join != a->join && livei_have_overlap(val->join, b->join)) - return FALSE; - } - return TRUE; -} - -static INLINE void -do_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) -{ - int j; - struct nv_value *bjoin = b->join; - - if (b->join->reg.id >= 0) - a->join->reg.id = b->join->reg.id; - - livei_unify(a->join, b->join); - -#ifdef NV50_RA_DEBUG_JOIN - debug_printf("joining %i to %i\n", b->n, a->n); -#endif - - /* make a->join the new representative */ - for (j = 0; j < ctx->pc->num_values; ++j) - if (ctx->pc->values[j].join == bjoin) - ctx->pc->values[j].join = a->join; - - assert(b->join == a->join); -} - -static INLINE boolean -try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) -{ - if (!join_allowed(ctx, a, b)) { -#ifdef NV50_RA_DEBUG_JOIN - debug_printf("cannot join %i to %i: not allowed\n", b->n, a->n); -#endif - return FALSE; - } - if (livei_have_overlap(a->join, b->join)) { -#ifdef NV50_RA_DEBUG_JOIN - debug_printf("cannot join %i to %i: livei overlap\n", b->n, a->n); - livei_print(a); - livei_print(b); -#endif - return FALSE; - } - - do_join_values(ctx, a, b); - - return TRUE; -} - -static void -join_values_nofail(struct nv_pc_pass *ctx, - struct nv_value *a, struct nv_value *b, boolean type_only) -{ - if (type_only) { - assert(join_allowed(ctx, a, b)); - do_join_values(ctx, a, b); - } else { - boolean ok = try_join_values(ctx, a, b); - if (!ok) { - NOUVEAU_ERR("failed to coalesce values\n"); - } - } -} - -static INLINE boolean -need_new_else_block(struct nv_basic_block *b, struct nv_basic_block *p) -{ - int i = 0, n = 0; - - for (; i < 2; ++i) - if (p->out[i] && !IS_LOOP_EDGE(p->out_kind[i])) - ++n; - - return (b->num_in > 1) && (n == 2); -} - -/* Look for the @phi's operand whose definition reaches @b. */ -static int -phi_opnd_for_bb(struct nv_instruction *phi, struct nv_basic_block *b, - struct nv_basic_block *tb) -{ - struct nv_ref *srci, *srcj; - int i, j; - - for (j = -1, i = 0; i < 6 && phi->src[i]; ++i) { - assert(i < Elements(phi->src)); - srci = phi->src[i]; - /* if already replaced, check with original source first */ - if (srci->flags & NV_REF_FLAG_REGALLOC_PRIV) - srci = srci->value->insn->src[0]; - if (!nvbb_reachable_by(b, srci->value->insn->bb, NULL)) - continue; - /* NOTE: back-edges are ignored by the reachable-by check */ - if (j < 0 || !nvbb_reachable_by(srcj->value->insn->bb, - srci->value->insn->bb, NULL)) { - j = i; - srcj = srci; - } - } - if (j >= 0 && nvbb_reachable_by(b, phi->def[0]->insn->bb, NULL)) - if (!nvbb_reachable_by(srcj->value->insn->bb, - phi->def[0]->insn->bb, NULL)) - j = -1; - return j; -} - -/* For each operand of each PHI in b, generate a new value by inserting a MOV - * at the end of the block it is coming from and replace the operand with its - * result. This eliminates liveness conflicts and enables us to let values be - * copied to the right register if such a conflict exists nonetheless. - * - * These MOVs are also crucial in making sure the live intervals of phi srces - * are extended until the end of the loop, since they are not included in the - * live-in sets. - */ -static int -pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b) -{ - struct nv_instruction *i, *ni; - struct nv_value *val; - struct nv_basic_block *p, *pn; - int n, j; - - b->pass_seq = ctx->pc->pass_seq; - - for (n = 0; n < b->num_in; ++n) { - p = pn = b->in[n]; - assert(p); - - if (need_new_else_block(b, p)) { - pn = new_basic_block(ctx->pc); - - if (p->out[0] == b) - p->out[0] = pn; - else - p->out[1] = pn; - - if (p->exit->target == b) /* target to new else-block */ - p->exit->target = pn; - - b->in[n] = pn; - - pn->out[0] = b; - pn->in[0] = p; - pn->num_in = 1; - } - ctx->pc->current_block = pn; - - for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) { - j = phi_opnd_for_bb(i, p, b); - - if (j < 0) { - val = i->def[0]; - } else { - val = i->src[j]->value; - if (i->src[j]->flags & NV_REF_FLAG_REGALLOC_PRIV) { - j = -1; - /* use original value, we already encountered & replaced it */ - val = val->insn->src[0]->value; - } - } - if (j < 0) /* need an additional source ? */ - for (j = 0; j < 5 && i->src[j] && i->src[j]->value != val; ++j); - assert(j < 5); - - ni = new_instruction(ctx->pc, NV_OP_MOV); - - /* TODO: insert instruction at correct position in the first place */ - if (ni->prev && ni->prev->target) - nv_nvi_permute(ni->prev, ni); - - ni->def[0] = new_value(ctx->pc, val->reg.file, val->reg.type); - ni->def[0]->insn = ni; - ni->src[0] = new_ref(ctx->pc, val); - - nv_reference(ctx->pc, &i->src[j], ni->def[0]); - - i->src[j]->flags |= NV_REF_FLAG_REGALLOC_PRIV; - } - - if (pn != p && pn->exit) { - assert(!b->in[!n]->exit || b->in[!n]->exit->is_terminator); - /* insert terminator (branch to ENDIF) in new else block */ - ctx->pc->current_block = pn; - ni = new_instruction(ctx->pc, NV_OP_BRA); - ni->target = b; - ni->is_terminator = 1; - } - } - - for (j = 0; j < 2; ++j) - if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) - pass_generate_phi_movs(ctx, b->out[j]); - - return 0; -} - -#define JOIN_MASK_PHI (1 << 0) -#define JOIN_MASK_SELECT (1 << 1) -#define JOIN_MASK_MOV (1 << 2) -#define JOIN_MASK_TEX (1 << 3) - -static int -pass_join_values(struct nv_pc_pass *ctx, unsigned mask) -{ - int c, n; - - for (n = 0; n < ctx->num_insns; ++n) { - struct nv_instruction *nvi, *i = ctx->insns[n]; - - switch (i->opcode) { - case NV_OP_PHI: - if (!(mask & JOIN_MASK_PHI)) - break; - for (c = 0; c < 5 && i->src[c]; ++c) - join_values_nofail(ctx, i->def[0], i->src[c]->value, FALSE); - break; - case NV_OP_MOV: - if (!(mask & JOIN_MASK_MOV)) - break; - nvi = i->src[0]->value->join->insn; - if (nvi && !nv_is_vector_op(nvi->opcode)) - try_join_values(ctx, i->def[0], i->src[0]->value); - break; - case NV_OP_SELECT: - if (!(mask & JOIN_MASK_SELECT)) - break; - for (c = 0; c < 5 && i->src[c]; ++c) - join_values_nofail(ctx, i->def[0], i->src[c]->value, TRUE); - break; - case NV_OP_TEX: - case NV_OP_TXB: - case NV_OP_TXL: - case NV_OP_TXQ: - if (!(mask & JOIN_MASK_TEX)) - break; - /* This should work without conflicts because we always generate - * extra MOVs for the sources of a TEX. - */ - for (c = 0; c < 4 && i->src[c]; ++c) - join_values_nofail(ctx, i->def[c], i->src[c]->value, TRUE); - break; - default: - break; - } - } - return 0; -} - -/* Order the instructions so that live intervals can be expressed in numbers. */ -static void -pass_order_instructions(void *priv, struct nv_basic_block *b) -{ - struct nv_pc_pass *ctx = (struct nv_pc_pass *)priv; - struct nv_instruction *i; - - b->pass_seq = ctx->pc->pass_seq; - - assert(!b->exit || !b->exit->next); - for (i = b->phi; i; i = i->next) { - i->serial = ctx->num_insns; - ctx->insns[ctx->num_insns++] = i; - } -} - -static void -bb_live_set_print(struct nv_pc *pc, struct nv_basic_block *b) -{ -#ifdef NV50_RA_DEBUG_LIVE_SETS - int j; - struct nv_value *val; - - debug_printf("LIVE-INs of BB:%i: ", b->id); - - for (j = 0; j < pc->num_values; ++j) { - if (!(b->live_set[j / 32] & (1 << (j % 32)))) - continue; - val = &pc->values[j]; - if (!val->insn) - continue; - debug_printf("%i ", val->n); - } - debug_printf("\n"); -#endif -} - -static INLINE void -live_set_add(struct nv_basic_block *b, struct nv_value *val) -{ - if (!val->insn) /* don't add non-def values */ - return; - b->live_set[val->n / 32] |= 1 << (val->n % 32); -} - -static INLINE void -live_set_rem(struct nv_basic_block *b, struct nv_value *val) -{ - b->live_set[val->n / 32] &= ~(1 << (val->n % 32)); -} - -static INLINE boolean -live_set_test(struct nv_basic_block *b, struct nv_ref *ref) -{ - int n = ref->value->n; - return b->live_set[n / 32] & (1 << (n % 32)); -} - -/* The live set of a block contains those values that are live immediately - * before the beginning of the block, so do a backwards scan. - */ -static int -pass_build_live_sets(struct nv_pc_pass *ctx, struct nv_basic_block *b) -{ - struct nv_instruction *i; - int j, n, ret = 0; - - if (b->pass_seq >= ctx->pc->pass_seq) - return 0; - b->pass_seq = ctx->pc->pass_seq; - - /* slight hack for undecidedness: set phi = entry if it's undefined */ - if (!b->phi) - b->phi = b->entry; - - for (n = 0; n < 2; ++n) { - if (!b->out[n] || b->out[n] == b) - continue; - ret = pass_build_live_sets(ctx, b->out[n]); - if (ret) - return ret; - - if (n == 0) { - for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j) - b->live_set[j] = b->out[n]->live_set[j]; - } else { - for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j) - b->live_set[j] |= b->out[n]->live_set[j]; - } - } - - if (!b->entry) - return 0; - - bb_live_set_print(ctx->pc, b); - - for (i = b->exit; i != b->entry->prev; i = i->prev) { - for (j = 0; j < 4; j++) { - if (!i->def[j]) - break; - live_set_rem(b, i->def[j]); - } - for (j = 0; j < 4; j++) { - if (!i->src[j]) - break; - live_set_add(b, i->src[j]->value); - } - if (i->src[4]) - live_set_add(b, i->src[4]->value); - if (i->flags_def) - live_set_rem(b, i->flags_def); - if (i->flags_src) - live_set_add(b, i->flags_src->value); - } - for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) - live_set_rem(b, i->def[0]); - - bb_live_set_print(ctx->pc, b); - - return 0; -} - -static void collect_live_values(struct nv_basic_block *b, const int n) -{ - int i; - - if (b->out[0] && b->out_kind[0] != CFG_EDGE_FAKE) { - if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) { - for (i = 0; i < n; ++i) - b->live_set[i] = b->out[0]->live_set[i] | b->out[1]->live_set[i]; - } else { - memcpy(b->live_set, b->out[0]->live_set, n * sizeof(uint32_t)); - } - } else - if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) { - memcpy(b->live_set, b->out[1]->live_set, n * sizeof(uint32_t)); - } else { - memset(b->live_set, 0, n * sizeof(uint32_t)); - } -} - -/* NOTE: the live intervals of phi functions start at the first non-phi insn. */ -static int -pass_build_intervals(struct nv_pc_pass *ctx, struct nv_basic_block *b) -{ - struct nv_instruction *i, *i_stop; - int j, s; - const int n = (ctx->pc->num_values + 31) / 32; - - /* verify that first block does not have live-in values */ - if (b->num_in == 0) - for (j = 0; j < n; ++j) - assert(b->live_set[j] == 0); - - collect_live_values(b, n); - - /* remove live-outs def'd in a parallel block, hopefully they're all phi'd */ - for (j = 0; j < 2; ++j) { - if (!b->out[j] || !b->out[j]->phi) - continue; - for (i = b->out[j]->phi; i->opcode == NV_OP_PHI; i = i->next) { - live_set_rem(b, i->def[0]); - - for (s = 0; s < 4; ++s) { - if (!i->src[s]) - break; - assert(i->src[s]->value->insn); - if (nvbb_reachable_by(b, i->src[s]->value->insn->bb, b->out[j])) - live_set_add(b, i->src[s]->value); - else - live_set_rem(b, i->src[s]->value); - } - } - } - - /* remaining live-outs are live until the end */ - if (b->exit) { - for (j = 0; j < ctx->pc->num_values; ++j) { - if (!(b->live_set[j / 32] & (1 << (j % 32)))) - continue; - add_range(&ctx->pc->values[j], b, b->exit->serial + 1); -#ifdef NV50_RA_DEBUG_LIVEI - debug_printf("adding range for live value %i: ", j); - livei_print(&ctx->pc->values[j]); -#endif - - } - } - - i_stop = b->entry ? b->entry->prev : NULL; - - /* don't have to include phi functions here (will have 0 live range) */ - for (i = b->exit; i != i_stop; i = i->prev) { - assert(i->serial >= b->phi->serial && i->serial <= b->exit->serial); - for (j = 0; j < 4; ++j) { - if (i->def[j]) - live_set_rem(b, i->def[j]); - } - if (i->flags_def) - live_set_rem(b, i->flags_def); - - for (j = 0; j < 5; ++j) { - if (i->src[j] && !live_set_test(b, i->src[j])) { - live_set_add(b, i->src[j]->value); - add_range(i->src[j]->value, b, i->serial); -#ifdef NV50_RA_DEBUG_LIVEI - debug_printf("adding range for source %i (ends living): ", - i->src[j]->value->n); - livei_print(i->src[j]->value); -#endif - } - } - if (i->flags_src && !live_set_test(b, i->flags_src)) { - live_set_add(b, i->flags_src->value); - add_range(i->flags_src->value, b, i->serial); -#ifdef NV50_RA_DEBUG_LIVEI - debug_printf("adding range for source %i (ends living): ", - i->flags_src->value->n); - livei_print(i->flags_src->value); -#endif - } - } - - b->pass_seq = ctx->pc->pass_seq; - - if (b->out[0] && b->out[0]->pass_seq < ctx->pc->pass_seq) - pass_build_intervals(ctx, b->out[0]); - - if (b->out[1] && b->out[1]->pass_seq < ctx->pc->pass_seq) - pass_build_intervals(ctx, b->out[1]); - - return 0; -} - -static INLINE void -nv50_ctor_register_set(struct nv_pc *pc, struct register_set *set) -{ - memset(set, 0, sizeof(*set)); - - set->last[NV_FILE_GPR] = 255; - set->last[NV_FILE_OUT] = 127; - set->last[NV_FILE_FLAGS] = 4; - set->last[NV_FILE_ADDR] = 4; - - set->pc = pc; -} - -static void -insert_ordered_tail(struct nv_value *list, struct nv_value *nval) -{ - struct nv_value *elem; - - for (elem = list->prev; - elem != list && elem->livei->bgn > nval->livei->bgn; - elem = elem->prev); - /* now elem begins before or at the same time as val */ - - nval->prev = elem; - nval->next = elem->next; - elem->next->prev = nval; - elem->next = nval; -} - -static void -collect_register_values(struct nv_pc_pass *ctx, struct nv_value *head, - boolean assigned_only) -{ - struct nv_value *val; - int k, n; - - make_empty_list(head); - - for (n = 0; n < ctx->num_insns; ++n) { - struct nv_instruction *i = ctx->insns[n]; - - /* for joined values, only the representative will have livei != NULL */ - for (k = 0; k < 4; ++k) { - if (i->def[k] && i->def[k]->livei) - if (!assigned_only || i->def[k]->reg.id >= 0) - insert_ordered_tail(head, i->def[k]); - } - if (i->flags_def && i->flags_def->livei) - if (!assigned_only || i->flags_def->reg.id >= 0) - insert_ordered_tail(head, i->flags_def); - } - - for (val = head->next; val != head->prev; val = val->next) { - assert(val->join == val); - assert(val->livei->bgn <= val->next->livei->bgn); - } -} - -static int -pass_linear_scan(struct nv_pc_pass *ctx, int iter) -{ - struct register_set f, free; - struct nv_value *cur, *val, *tmp[2]; - struct nv_value active, inactive, handled, unhandled; - - make_empty_list(&active); - make_empty_list(&inactive); - make_empty_list(&handled); - - nv50_ctor_register_set(ctx->pc, &free); - - collect_register_values(ctx, &unhandled, FALSE); - - foreach_s(cur, tmp[0], &unhandled) { - remove_from_list(cur); - - foreach_s(val, tmp[1], &active) { - if (livei_end(val) <= cur->livei->bgn) { - reg_release(&free, val); - move_to_head(&handled, val); - } else - if (!livei_contains(val, cur->livei->bgn)) { - reg_release(&free, val); - move_to_head(&inactive, val); - } - } - - foreach_s(val, tmp[1], &inactive) { - if (livei_end(val) <= cur->livei->bgn) - move_to_head(&handled, val); - else - if (livei_contains(val, cur->livei->bgn)) { - reg_occupy(&free, val); - move_to_head(&active, val); - } - } - - f = free; - - foreach(val, &inactive) - if (livei_have_overlap(val, cur)) - reg_occupy(&f, val); - - foreach(val, &unhandled) - if (val->reg.id >= 0 && livei_have_overlap(val, cur)) - reg_occupy(&f, val); - - if (cur->reg.id < 0) { - boolean mem = !reg_assign(&f, &cur, 1); - - if (mem) { - NOUVEAU_ERR("out of registers\n"); - abort(); - } - } - insert_at_head(&active, cur); - reg_occupy(&free, cur); - } - - return 0; -} - -/* Allocate values defined by instructions such as TEX, which have to be - * assigned to consecutive registers. - * Linear scan doesn't really work here since the values can have different - * live intervals. - */ -static int -pass_allocate_constrained_values(struct nv_pc_pass *ctx) -{ - struct nv_value regvals, *val; - struct nv_instruction *i; - struct nv_value *defs[4]; - struct register_set regs[4]; - int n, vsize, c; - uint32_t mask; - boolean mem; - - collect_register_values(ctx, ®vals, TRUE); - - for (n = 0; n < ctx->num_insns; ++n) { - i = ctx->insns[n]; - vsize = nvi_vector_size(i); - if (!(vsize > 1)) - continue; - assert(vsize <= 4); - for (c = 0; c < vsize; ++c) - defs[c] = i->def[c]->join; - - if (defs[0]->reg.id >= 0) { - for (c = 1; c < vsize; ++c) - assert(defs[c]->reg.id >= 0); - continue; - } - - /* Compute registers available for this "vector" of consecutive registers. - * Each value (component) has its own independent live interval. - */ - for (c = 0; c < vsize; ++c) { - nv50_ctor_register_set(ctx->pc, ®s[c]); - - foreach(val, ®vals) { - if (val->reg.id >= 0 && livei_have_overlap(val, defs[c])) - reg_occupy(®s[c], val); - } - /* Only 32 bit GPRs will be allocated here, but register set - * granularity for GPRs is 16 bit. - */ - mask = 0x03030303; - if (vsize == 2) /* granularity is 2 and not 4 */ - mask |= 0x03030303 << 4; - mask_register_set(®s[c], 0, mask << (c * 2)); - - if (defs[c]->livei) - insert_ordered_tail(®vals, defs[c]); - } - for (c = 1; c < vsize; ++c) - intersect_register_sets(®s[0], ®s[0], ®s[c]); - - mem = !reg_assign(®s[0], &defs[0], vsize); - - if (mem) { - NOUVEAU_ERR("out of registers\n"); - abort(); - } - } - return 0; -} - -static int -nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root) -{ - struct nv_pc_pass *ctx; - int i, ret; - - NV50_DBGMSG(PROG_RA, "REGISTER ALLOCATION - entering\n"); - - ctx = CALLOC_STRUCT(nv_pc_pass); - if (!ctx) - return -1; - ctx->pc = pc; - - ctx->insns = CALLOC(NV_PC_MAX_INSTRUCTIONS, sizeof(struct nv_instruction *)); - if (!ctx->insns) { - FREE(ctx); - return -1; - } - - pc->pass_seq++; - ret = pass_generate_phi_movs(ctx, root); - assert(!ret); - - for (i = 0; i < pc->loop_nesting_bound; ++i) { - pc->pass_seq++; - ret = pass_build_live_sets(ctx, root); - assert(!ret && "live sets"); - if (ret) { - NOUVEAU_ERR("failed to build live sets (iteration %d)\n", i); - goto out; - } - } - - pc->pass_seq++; - nv_pc_pass_in_order(root, pass_order_instructions, ctx); - - pc->pass_seq++; - ret = pass_build_intervals(ctx, root); - assert(!ret && "build intervals"); - if (ret) { - NOUVEAU_ERR("failed to build live intervals\n"); - goto out; - } - -#ifdef NV50_RA_DEBUG_LIVEI - for (i = 0; i < pc->num_values; ++i) - livei_print(&pc->values[i]); -#endif - - ret = pass_join_values(ctx, JOIN_MASK_PHI); - if (ret) - goto out; - ret = pass_join_values(ctx, JOIN_MASK_SELECT | JOIN_MASK_TEX); - if (ret) - goto out; - ret = pass_join_values(ctx, JOIN_MASK_MOV); - if (ret) - goto out; - ret = pass_allocate_constrained_values(ctx); - if (ret) - goto out; - ret = pass_linear_scan(ctx, 1); - if (ret) - goto out; - - for (i = 0; i < pc->num_values; ++i) - livei_release(&pc->values[i]); - - NV50_DBGMSG(PROG_RA, "REGISTER ALLOCATION - leaving\n"); - -out: - FREE(ctx->insns); - FREE(ctx); - return ret; -} - -int -nv_pc_exec_pass1(struct nv_pc *pc) -{ - int i, ret; - - for (i = 0; i < pc->num_subroutines + 1; ++i) - if (pc->root[i] && (ret = nv_pc_pass1(pc, pc->root[i]))) - return ret; - return 0; -} diff --git a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c deleted file mode 100644 index 717a9029be6..00000000000 --- a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c +++ /dev/null @@ -1,2101 +0,0 @@ -/* - * Copyright 2010 Christoph Bumiller - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <unistd.h> - -#include "nv50_context.h" -#include "nv50_pc.h" - -#include "pipe/p_shader_tokens.h" -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_util.h" - -#include "tgsi/tgsi_dump.h" - -#define BLD_MAX_TEMPS 64 -#define BLD_MAX_ADDRS 4 -#define BLD_MAX_PREDS 4 -#define BLD_MAX_IMMDS 128 - -#define BLD_MAX_COND_NESTING 8 -#define BLD_MAX_LOOP_NESTING 4 -#define BLD_MAX_CALL_NESTING 2 - -/* collects all values assigned to the same TGSI register */ -struct bld_value_stack { - struct nv_value *top; - struct nv_value **body; - unsigned size; - uint16_t loop_use; /* 1 bit per loop level, indicates if used/defd */ - uint16_t loop_def; -}; - -static INLINE void -bld_vals_push_val(struct bld_value_stack *stk, struct nv_value *val) -{ - assert(!stk->size || (stk->body[stk->size - 1] != val)); - - if (!(stk->size % 8)) { - unsigned old_sz = (stk->size + 0) * sizeof(struct nv_value *); - unsigned new_sz = (stk->size + 8) * sizeof(struct nv_value *); - stk->body = (struct nv_value **)REALLOC(stk->body, old_sz, new_sz); - } - stk->body[stk->size++] = val; -} - -static INLINE boolean -bld_vals_del_val(struct bld_value_stack *stk, struct nv_value *val) -{ - unsigned i; - - for (i = stk->size; i > 0; --i) - if (stk->body[i - 1] == val) - break; - if (!i) - return FALSE; - - if (i != stk->size) - stk->body[i - 1] = stk->body[stk->size - 1]; - - --stk->size; /* XXX: old size in REALLOC */ - return TRUE; -} - -static INLINE void -bld_vals_push(struct bld_value_stack *stk) -{ - bld_vals_push_val(stk, stk->top); - stk->top = NULL; -} - -static INLINE void -bld_push_values(struct bld_value_stack *stacks, int n) -{ - int i, c; - - for (i = 0; i < n; ++i) - for (c = 0; c < 4; ++c) - if (stacks[i * 4 + c].top) - bld_vals_push(&stacks[i * 4 + c]); -} - -struct bld_context { - struct nv50_translation_info *ti; - - struct nv_pc *pc; - struct nv_basic_block *b; - - struct tgsi_parse_context parse[BLD_MAX_CALL_NESTING]; - int call_lvl; - - struct nv_basic_block *cond_bb[BLD_MAX_COND_NESTING]; - struct nv_basic_block *join_bb[BLD_MAX_COND_NESTING]; - struct nv_basic_block *else_bb[BLD_MAX_COND_NESTING]; - int cond_lvl; - struct nv_basic_block *loop_bb[BLD_MAX_LOOP_NESTING]; - struct nv_basic_block *brkt_bb[BLD_MAX_LOOP_NESTING]; - int loop_lvl; - - ubyte out_kind; /* CFG_EDGE_FORWARD, or FAKE in case of BREAK/CONT */ - - struct bld_value_stack tvs[BLD_MAX_TEMPS][4]; /* TGSI_FILE_TEMPORARY */ - struct bld_value_stack avs[BLD_MAX_ADDRS][4]; /* TGSI_FILE_ADDRESS */ - struct bld_value_stack pvs[BLD_MAX_PREDS][4]; /* TGSI_FILE_PREDICATE */ - struct bld_value_stack ovs[PIPE_MAX_SHADER_OUTPUTS][4]; - - uint32_t outputs_written[(PIPE_MAX_SHADER_OUTPUTS + 7) / 8]; - - struct nv_value *frgcrd[4]; - struct nv_value *sysval[4]; - - /* wipe on new BB */ - struct nv_value *saved_addr[4][2]; - struct nv_value *saved_inputs[128]; - struct nv_value *saved_immd[BLD_MAX_IMMDS]; - uint num_immds; -}; - -static INLINE ubyte -bld_stack_file(struct bld_context *bld, struct bld_value_stack *stk) -{ - if (stk < &bld->avs[0][0]) - return NV_FILE_GPR; - else - if (stk < &bld->pvs[0][0]) - return NV_FILE_ADDR; - else - if (stk < &bld->ovs[0][0]) - return NV_FILE_FLAGS; - else - return NV_FILE_OUT; -} - -static INLINE struct nv_value * -bld_fetch(struct bld_context *bld, struct bld_value_stack *stk, int i, int c) -{ - stk[i * 4 + c].loop_use |= 1 << bld->loop_lvl; - - return stk[i * 4 + c].top; -} - -static struct nv_value * -bld_loop_phi(struct bld_context *, struct bld_value_stack *, struct nv_value *); - -/* If a variable is defined in a loop without prior use, we don't need - * a phi in the loop header to account for backwards flow. - * - * However, if this variable is then also used outside the loop, we do - * need a phi after all. But we must not use this phi's def inside the - * loop, so we can eliminate the phi if it is unused later. - */ -static INLINE void -bld_store(struct bld_context *bld, struct bld_value_stack *stk, int i, int c, - struct nv_value *val) -{ - const uint16_t m = 1 << bld->loop_lvl; - - stk = &stk[i * 4 + c]; - - if (bld->loop_lvl && !(m & (stk->loop_def | stk->loop_use))) - bld_loop_phi(bld, stk, val); - - stk->top = val; - stk->loop_def |= 1 << bld->loop_lvl; -} - -static INLINE void -bld_clear_def_use(struct bld_value_stack *stk, int n, int lvl) -{ - int i; - const uint16_t mask = ~(1 << lvl); - - for (i = 0; i < n * 4; ++i) { - stk[i].loop_def &= mask; - stk[i].loop_use &= mask; - } -} - -#define FETCH_TEMP(i, c) bld_fetch(bld, &bld->tvs[0][0], i, c) -#define STORE_TEMP(i, c, v) bld_store(bld, &bld->tvs[0][0], i, c, (v)) -#define FETCH_ADDR(i, c) bld_fetch(bld, &bld->avs[0][0], i, c) -#define STORE_ADDR(i, c, v) bld_store(bld, &bld->avs[0][0], i, c, (v)) -#define FETCH_PRED(i, c) bld_fetch(bld, &bld->pvs[0][0], i, c) -#define STORE_PRED(i, c, v) bld_store(bld, &bld->pvs[0][0], i, c, (v)) - -#define STORE_OUTR(i, c, v) \ - do { \ - bld->ovs[i][c].top = (v); \ - bld->outputs_written[(i) / 8] |= 1 << (((i) * 4 + (c)) % 32); \ - } while (0) - -static INLINE void -bld_warn_uninitialized(struct bld_context *bld, int kind, - struct bld_value_stack *stk, struct nv_basic_block *b) -{ -#if NV50_DEBUG & NV50_DEBUG_PROG_IR - long i = (stk - &bld->tvs[0][0]) / 4; - long c = (stk - &bld->tvs[0][0]) & 3; - - if (c == 3) - c = -1; - - debug_printf("WARNING: TEMP[%li].%c %s used uninitialized in BB:%i\n", - i, (int)('x' + c), kind ? "may be" : "is", b->id); -#endif -} - -static INLINE struct nv_value * -bld_def(struct nv_instruction *i, int c, struct nv_value *value) -{ - i->def[c] = value; - value->insn = i; - return value; -} - -static INLINE struct nv_value * -find_by_bb(struct bld_value_stack *stack, struct nv_basic_block *b) -{ - int i; - - if (stack->top && stack->top->insn->bb == b) - return stack->top; - - for (i = stack->size - 1; i >= 0; --i) - if (stack->body[i]->insn->bb == b) - return stack->body[i]; - return NULL; -} - -/* fetch value from stack that was defined in the specified basic block, - * or search for first definitions in all of its predecessors - */ -static void -fetch_by_bb(struct bld_value_stack *stack, - struct nv_value **vals, int *n, - struct nv_basic_block *b) -{ - int i; - struct nv_value *val; - - assert(*n < 16); /* MAX_COND_NESTING */ - - val = find_by_bb(stack, b); - if (val) { - for (i = 0; i < *n; ++i) - if (vals[i] == val) - return; - vals[(*n)++] = val; - return; - } - for (i = 0; i < b->num_in; ++i) - if (!IS_WALL_EDGE(b->in_kind[i])) - fetch_by_bb(stack, vals, n, b->in[i]); -} - -static INLINE boolean -nvbb_is_terminated(struct nv_basic_block *bb) -{ - return bb->exit && bb->exit->is_terminator; -} - -static INLINE struct nv_value * -bld_load_imm_u32(struct bld_context *bld, uint32_t u); - -static INLINE struct nv_value * -bld_undef(struct bld_context *bld, ubyte file) -{ - struct nv_instruction *nvi = new_instruction(bld->pc, NV_OP_UNDEF); - - return bld_def(nvi, 0, new_value(bld->pc, file, NV_TYPE_U32)); -} - -static struct nv_value * -bld_phi(struct bld_context *bld, struct nv_basic_block *b, - struct bld_value_stack *stack) -{ - struct nv_basic_block *in; - struct nv_value *vals[16] = { 0 }; - struct nv_value *val; - struct nv_instruction *phi; - int i, j, n; - - do { - i = n = 0; - fetch_by_bb(stack, vals, &n, b); - - if (!n) { - bld_warn_uninitialized(bld, 0, stack, b); - return NULL; - } - - if (n == 1) { - if (nvbb_dominated_by(b, vals[0]->insn->bb)) - break; - - bld_warn_uninitialized(bld, 1, stack, b); - - /* back-tracking to insert missing value of other path */ - in = b; - while (in->in[0]) { - if (in->num_in == 1) { - in = in->in[0]; - } else { - if (!nvbb_reachable_by(in->in[0], vals[0]->insn->bb, b)) - in = in->in[0]; - else - if (!nvbb_reachable_by(in->in[1], vals[0]->insn->bb, b)) - in = in->in[1]; - else - in = in->in[0]; - } - } - bld->pc->current_block = in; - - /* should make this a no-op */ - bld_vals_push_val(stack, bld_undef(bld, vals[0]->reg.file)); - continue; - } - - for (i = 0; i < n; ++i) { - /* if value dominates b, continue to the redefinitions */ - if (nvbb_dominated_by(b, vals[i]->insn->bb)) - continue; - - /* if value dominates any in-block, b should be the dom frontier */ - for (j = 0; j < b->num_in; ++j) - if (nvbb_dominated_by(b->in[j], vals[i]->insn->bb)) - break; - /* otherwise, find the dominance frontier and put the phi there */ - if (j == b->num_in) { - in = nvbb_dom_frontier(vals[i]->insn->bb); - val = bld_phi(bld, in, stack); - bld_vals_push_val(stack, val); - break; - } - } - } while(i < n); - - bld->pc->current_block = b; - - if (n == 1) - return vals[0]; - - phi = new_instruction(bld->pc, NV_OP_PHI); - - bld_def(phi, 0, new_value(bld->pc, vals[0]->reg.file, vals[0]->reg.type)); - for (i = 0; i < n; ++i) - phi->src[i] = new_ref(bld->pc, vals[i]); - - return phi->def[0]; -} - -/* Insert a phi function in the loop header. - * For nested loops, we need to insert phi functions in all the outer - * loop headers if they don't have one yet. - * - * @def: redefinition from inside loop, or NULL if to be replaced later - */ -static struct nv_value * -bld_loop_phi(struct bld_context *bld, struct bld_value_stack *stack, - struct nv_value *def) -{ - struct nv_instruction *phi; - struct nv_basic_block *bb = bld->pc->current_block; - struct nv_value *val = NULL; - - if (bld->loop_lvl > 1) { - --bld->loop_lvl; - if (!((stack->loop_def | stack->loop_use) & (1 << bld->loop_lvl))) - val = bld_loop_phi(bld, stack, NULL); - ++bld->loop_lvl; - } - - if (!val) - val = bld_phi(bld, bld->pc->current_block, stack); /* old definition */ - if (!val) { - bld->pc->current_block = bld->loop_bb[bld->loop_lvl - 1]->in[0]; - val = bld_undef(bld, bld_stack_file(bld, stack)); - } - - bld->pc->current_block = bld->loop_bb[bld->loop_lvl - 1]; - - phi = new_instruction(bld->pc, NV_OP_PHI); - - bld_def(phi, 0, new_value_like(bld->pc, val)); - if (!def) - def = phi->def[0]; - - bld_vals_push_val(stack, phi->def[0]); - - phi->target = (struct nv_basic_block *)stack; /* cheat */ - - nv_reference(bld->pc, &phi->src[0], val); - nv_reference(bld->pc, &phi->src[1], def); - - bld->pc->current_block = bb; - - return phi->def[0]; -} - -static INLINE struct nv_value * -bld_fetch_global(struct bld_context *bld, struct bld_value_stack *stack) -{ - const uint16_t m = 1 << bld->loop_lvl; - const uint16_t use = stack->loop_use; - - stack->loop_use |= m; - - /* If neither used nor def'd inside the loop, build a phi in foresight, - * so we don't have to replace stuff later on, which requires tracking. - */ - if (bld->loop_lvl && !((use | stack->loop_def) & m)) - return bld_loop_phi(bld, stack, NULL); - - return bld_phi(bld, bld->pc->current_block, stack); -} - -static INLINE struct nv_value * -bld_imm_u32(struct bld_context *bld, uint32_t u) -{ - int i; - unsigned n = bld->num_immds; - - for (i = 0; i < n; ++i) - if (bld->saved_immd[i]->reg.imm.u32 == u) - return bld->saved_immd[i]; - assert(n < BLD_MAX_IMMDS); - - bld->num_immds++; - - bld->saved_immd[n] = new_value(bld->pc, NV_FILE_IMM, NV_TYPE_U32); - bld->saved_immd[n]->reg.imm.u32 = u; - return bld->saved_immd[n]; -} - -static void -bld_replace_value(struct nv_pc *, struct nv_basic_block *, struct nv_value *, - struct nv_value *); - -/* Replace the source of the phi in the loop header by the last assignment, - * or eliminate the phi function if there is no assignment inside the loop. - * - * Redundancy situation 1 - (used) but (not redefined) value: - * %3 = phi %0, %3 = %3 is used - * %3 = phi %0, %4 = is new definition - * - * Redundancy situation 2 - (not used) but (redefined) value: - * %3 = phi %0, %2 = %2 is used, %3 could be used outside, deleted by DCE - */ -static void -bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb) -{ - struct nv_basic_block *save = bld->pc->current_block; - struct nv_instruction *phi, *next; - struct nv_value *val; - struct bld_value_stack *stk; - int i, s, n; - - for (phi = bb->phi; phi && phi->opcode == NV_OP_PHI; phi = next) { - next = phi->next; - - stk = (struct bld_value_stack *)phi->target; - phi->target = NULL; - - /* start with s == 1, src[0] is from outside the loop */ - for (s = 1, n = 0; n < bb->num_in; ++n) { - if (bb->in_kind[n] != CFG_EDGE_BACK) - continue; - - assert(s < 4); - bld->pc->current_block = bb->in[n]; - val = bld_fetch_global(bld, stk); - - for (i = 0; i < 4; ++i) - if (phi->src[i] && phi->src[i]->value == val) - break; - if (i == 4) { - /* skip values we do not want to replace */ - for (; phi->src[s] && phi->src[s]->value != phi->def[0]; ++s); - nv_reference(bld->pc, &phi->src[s++], val); - } - } - bld->pc->current_block = save; - - if (phi->src[0]->value == phi->def[0] || - phi->src[0]->value == phi->src[1]->value) - s = 1; - else - if (phi->src[1]->value == phi->def[0]) - s = 0; - else - continue; - - if (s >= 0) { - /* eliminate the phi */ - bld_vals_del_val(stk, phi->def[0]); - - ++bld->pc->pass_seq; - bld_replace_value(bld->pc, bb, phi->def[0], phi->src[s]->value); - - nv_nvi_delete(phi); - } - } -} - -static INLINE struct nv_value * -bld_imm_f32(struct bld_context *bld, float f) -{ - return bld_imm_u32(bld, fui(f)); -} - -#define SET_TYPE(v, t) ((v)->reg.type = (v)->reg.as_type = (t)) - -static struct nv_value * -bld_insn_1(struct bld_context *bld, uint opcode, struct nv_value *src0) -{ - struct nv_instruction *insn = new_instruction(bld->pc, opcode); - - nv_reference(bld->pc, &insn->src[0], src0); - - return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.as_type)); -} - -static struct nv_value * -bld_insn_2(struct bld_context *bld, uint opcode, - struct nv_value *src0, struct nv_value *src1) -{ - struct nv_instruction *insn = new_instruction(bld->pc, opcode); - - nv_reference(bld->pc, &insn->src[0], src0); - nv_reference(bld->pc, &insn->src[1], src1); - - return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.as_type)); -} - -static struct nv_value * -bld_insn_3(struct bld_context *bld, uint opcode, - struct nv_value *src0, struct nv_value *src1, - struct nv_value *src2) -{ - struct nv_instruction *insn = new_instruction(bld->pc, opcode); - - nv_reference(bld->pc, &insn->src[0], src0); - nv_reference(bld->pc, &insn->src[1], src1); - nv_reference(bld->pc, &insn->src[2], src2); - - return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.as_type)); -} - -static struct nv_value * -bld_duplicate_insn(struct bld_context *bld, struct nv_instruction *nvi) -{ - struct nv_instruction *dupi = new_instruction(bld->pc, nvi->opcode); - int c; - - if (nvi->def[0]) - bld_def(dupi, 0, new_value_like(bld->pc, nvi->def[0])); - - if (nvi->flags_def) { - dupi->flags_def = new_value_like(bld->pc, nvi->flags_def); - dupi->flags_def->insn = dupi; - } - - for (c = 0; c < 5; ++c) - if (nvi->src[c]) - nv_reference(bld->pc, &dupi->src[c], nvi->src[c]->value); - if (nvi->flags_src) - nv_reference(bld->pc, &dupi->flags_src, nvi->flags_src->value); - - dupi->cc = nvi->cc; - dupi->saturate = nvi->saturate; - dupi->centroid = nvi->centroid; - dupi->flat = nvi->flat; - - return dupi->def[0]; -} - -static void -bld_lmem_store(struct bld_context *bld, struct nv_value *ptr, int ofst, - struct nv_value *val) -{ - struct nv_instruction *insn = new_instruction(bld->pc, NV_OP_STA); - struct nv_value *loc; - - loc = new_value(bld->pc, NV_FILE_MEM_L, NV_TYPE_U32); - - loc->reg.id = ofst * 4; - - nv_reference(bld->pc, &insn->src[0], loc); - nv_reference(bld->pc, &insn->src[1], val); - nv_reference(bld->pc, &insn->src[4], ptr); -} - -static struct nv_value * -bld_lmem_load(struct bld_context *bld, struct nv_value *ptr, int ofst) -{ - struct nv_value *loc, *val; - - loc = new_value(bld->pc, NV_FILE_MEM_L, NV_TYPE_U32); - - loc->reg.id = ofst * 4; - - val = bld_insn_1(bld, NV_OP_LDA, loc); - - nv_reference(bld->pc, &val->insn->src[4], ptr); - - return val; -} - -#define BLD_INSN_1_EX(d, op, dt, s0, s0t) \ - do { \ - (d) = bld_insn_1(bld, (NV_OP_##op), (s0)); \ - SET_TYPE(d, NV_TYPE_##dt); \ - (d)->insn->src[0]->typecast = NV_TYPE_##s0t; \ - } while(0) - -#define BLD_INSN_2_EX(d, op, dt, s0, s0t, s1, s1t) \ - do { \ - (d) = bld_insn_2(bld, (NV_OP_##op), (s0), (s1)); \ - SET_TYPE(d, NV_TYPE_##dt); \ - (d)->insn->src[0]->typecast = NV_TYPE_##s0t; \ - (d)->insn->src[1]->typecast = NV_TYPE_##s1t; \ - } while(0) - -static struct nv_value * -bld_pow(struct bld_context *bld, struct nv_value *x, struct nv_value *e) -{ - struct nv_value *val; - - BLD_INSN_1_EX(val, LG2, F32, x, F32); - BLD_INSN_2_EX(val, MUL, F32, e, F32, val, F32); - val = bld_insn_1(bld, NV_OP_PREEX2, val); - val = bld_insn_1(bld, NV_OP_EX2, val); - - return val; -} - -static INLINE struct nv_value * -bld_load_imm_f32(struct bld_context *bld, float f) -{ - struct nv_value *imm = bld_insn_1(bld, NV_OP_MOV, bld_imm_f32(bld, f)); - - SET_TYPE(imm, NV_TYPE_F32); - return imm; -} - -static INLINE struct nv_value * -bld_load_imm_u32(struct bld_context *bld, uint32_t u) -{ - return bld_insn_1(bld, NV_OP_MOV, bld_imm_u32(bld, u)); -} - -static struct nv_value * -bld_get_address(struct bld_context *bld, int id, struct nv_value *indirect) -{ - int i; - struct nv_instruction *nvi; - struct nv_value *val; - - for (i = 0; i < 4; ++i) { - if (!bld->saved_addr[i][0]) - break; - if (bld->saved_addr[i][1] == indirect) { - nvi = bld->saved_addr[i][0]->insn; - if (nvi->src[0]->value->reg.imm.u32 == id) - return bld->saved_addr[i][0]; - } - } - i &= 3; - - val = bld_imm_u32(bld, id); - if (indirect) - val = bld_insn_2(bld, NV_OP_ADD, indirect, val); - else - val = bld_insn_1(bld, NV_OP_MOV, val); - - bld->saved_addr[i][0] = val; - bld->saved_addr[i][0]->reg.file = NV_FILE_ADDR; - bld->saved_addr[i][0]->reg.type = NV_TYPE_U16; - bld->saved_addr[i][1] = indirect; - return bld->saved_addr[i][0]; -} - - -static struct nv_value * -bld_predicate(struct bld_context *bld, struct nv_value *src, boolean bool_only) -{ - struct nv_instruction *s0i, *nvi = src->insn; - - if (!nvi) { - nvi = bld_insn_1(bld, - (src->reg.file == NV_FILE_IMM) ? NV_OP_MOV : NV_OP_LDA, - src)->insn; - src = nvi->def[0]; - } else - if (bool_only) { - while (nvi->opcode == NV_OP_ABS || nvi->opcode == NV_OP_NEG || - nvi->opcode == NV_OP_CVT) { - s0i = nvi->src[0]->value->insn; - if (!s0i || !nv50_op_can_write_flags(s0i->opcode)) - break; - nvi = s0i; - assert(!nvi->flags_src); - } - } - - if (!nv50_op_can_write_flags(nvi->opcode) || - nvi->bb != bld->pc->current_block) { - nvi = new_instruction(bld->pc, NV_OP_CVT); - nv_reference(bld->pc, &nvi->src[0], src); - } - - if (!nvi->flags_def) { - nvi->flags_def = new_value(bld->pc, NV_FILE_FLAGS, NV_TYPE_U16); - nvi->flags_def->insn = nvi; - } - return nvi->flags_def; -} - -static void -bld_kil(struct bld_context *bld, struct nv_value *src) -{ - struct nv_instruction *nvi; - - src = bld_predicate(bld, src, FALSE); - nvi = new_instruction(bld->pc, NV_OP_KIL); - nvi->fixed = 1; - nvi->flags_src = new_ref(bld->pc, src); - nvi->cc = NV_CC_LT; -} - -static void -bld_flow(struct bld_context *bld, uint opcode, ubyte cc, - struct nv_value *src, struct nv_basic_block *target, - boolean plan_reconverge) -{ - struct nv_instruction *nvi; - - if (plan_reconverge) - new_instruction(bld->pc, NV_OP_JOINAT)->fixed = 1; - - nvi = new_instruction(bld->pc, opcode); - nvi->is_terminator = 1; - nvi->cc = cc; - nvi->target = target; - if (src) - nvi->flags_src = new_ref(bld->pc, src); -} - -static ubyte -translate_setcc(unsigned opcode) -{ - switch (opcode) { - case TGSI_OPCODE_SLT: return NV_CC_LT; - case TGSI_OPCODE_SGE: return NV_CC_GE; - case TGSI_OPCODE_SEQ: return NV_CC_EQ; - case TGSI_OPCODE_SGT: return NV_CC_GT; - case TGSI_OPCODE_SLE: return NV_CC_LE; - case TGSI_OPCODE_SNE: return NV_CC_NE | NV_CC_U; - case TGSI_OPCODE_STR: return NV_CC_TR; - case TGSI_OPCODE_SFL: return NV_CC_FL; - - case TGSI_OPCODE_ISLT: return NV_CC_LT; - case TGSI_OPCODE_ISGE: return NV_CC_GE; - case TGSI_OPCODE_USEQ: return NV_CC_EQ; - case TGSI_OPCODE_USGE: return NV_CC_GE; - case TGSI_OPCODE_USLT: return NV_CC_LT; - case TGSI_OPCODE_USNE: return NV_CC_NE; - default: - assert(0); - return NV_CC_FL; - } -} - -static uint -translate_opcode(uint opcode) -{ - switch (opcode) { - case TGSI_OPCODE_ABS: return NV_OP_ABS; - case TGSI_OPCODE_ADD: - case TGSI_OPCODE_SUB: - case TGSI_OPCODE_UADD: return NV_OP_ADD; - case TGSI_OPCODE_AND: return NV_OP_AND; - case TGSI_OPCODE_EX2: return NV_OP_EX2; - case TGSI_OPCODE_CEIL: return NV_OP_CEIL; - case TGSI_OPCODE_FLR: return NV_OP_FLOOR; - case TGSI_OPCODE_TRUNC: return NV_OP_TRUNC; - case TGSI_OPCODE_ROUND: return NV_OP_ROUND; - case TGSI_OPCODE_COS: return NV_OP_COS; - case TGSI_OPCODE_SIN: return NV_OP_SIN; - case TGSI_OPCODE_DDX: return NV_OP_DFDX; - case TGSI_OPCODE_DDY: return NV_OP_DFDY; - case TGSI_OPCODE_F2I: - case TGSI_OPCODE_F2U: - case TGSI_OPCODE_I2F: - case TGSI_OPCODE_U2F: return NV_OP_CVT; - case TGSI_OPCODE_INEG: return NV_OP_NEG; - case TGSI_OPCODE_LG2: return NV_OP_LG2; - case TGSI_OPCODE_ISHR: - case TGSI_OPCODE_USHR: return NV_OP_SHR; - case TGSI_OPCODE_MAD: - case TGSI_OPCODE_UMAD: return NV_OP_MAD; - case TGSI_OPCODE_MAX: - case TGSI_OPCODE_IMAX: - case TGSI_OPCODE_UMAX: return NV_OP_MAX; - case TGSI_OPCODE_MIN: - case TGSI_OPCODE_IMIN: - case TGSI_OPCODE_UMIN: return NV_OP_MIN; - case TGSI_OPCODE_MUL: - case TGSI_OPCODE_UMUL: return NV_OP_MUL; - case TGSI_OPCODE_OR: return NV_OP_OR; - case TGSI_OPCODE_RCP: return NV_OP_RCP; - case TGSI_OPCODE_RSQ: return NV_OP_RSQ; - case TGSI_OPCODE_SAD: return NV_OP_SAD; - case TGSI_OPCODE_SHL: return NV_OP_SHL; - case TGSI_OPCODE_SLT: - case TGSI_OPCODE_SGE: - case TGSI_OPCODE_SEQ: - case TGSI_OPCODE_SGT: - case TGSI_OPCODE_SLE: - case TGSI_OPCODE_SNE: - case TGSI_OPCODE_ISLT: - case TGSI_OPCODE_ISGE: - case TGSI_OPCODE_USEQ: - case TGSI_OPCODE_USGE: - case TGSI_OPCODE_USLT: - case TGSI_OPCODE_USNE: return NV_OP_SET; - case TGSI_OPCODE_TEX: return NV_OP_TEX; - case TGSI_OPCODE_TXP: return NV_OP_TEX; - case TGSI_OPCODE_TXB: return NV_OP_TXB; - case TGSI_OPCODE_TXL: return NV_OP_TXL; - case TGSI_OPCODE_TXD: return NV_OP_TEX; - case TGSI_OPCODE_XOR: return NV_OP_XOR; - default: - return NV_OP_NOP; - } -} - -static ubyte -infer_src_type(unsigned opcode) -{ - switch (opcode) { - case TGSI_OPCODE_MOV: - case TGSI_OPCODE_AND: - case TGSI_OPCODE_OR: - case TGSI_OPCODE_XOR: - case TGSI_OPCODE_SAD: - case TGSI_OPCODE_U2F: - case TGSI_OPCODE_UADD: - case TGSI_OPCODE_UDIV: - case TGSI_OPCODE_UMOD: - case TGSI_OPCODE_UMAD: - case TGSI_OPCODE_UMUL: - case TGSI_OPCODE_UMAX: - case TGSI_OPCODE_UMIN: - case TGSI_OPCODE_USEQ: - case TGSI_OPCODE_USGE: - case TGSI_OPCODE_USLT: - case TGSI_OPCODE_USNE: - case TGSI_OPCODE_USHR: - return NV_TYPE_U32; - case TGSI_OPCODE_I2F: - case TGSI_OPCODE_IDIV: - case TGSI_OPCODE_IMAX: - case TGSI_OPCODE_IMIN: - case TGSI_OPCODE_INEG: - case TGSI_OPCODE_ISGE: - case TGSI_OPCODE_ISHR: - case TGSI_OPCODE_ISLT: - return NV_TYPE_S32; - default: - return NV_TYPE_F32; - } -} - -static ubyte -infer_dst_type(unsigned opcode) -{ - switch (opcode) { - case TGSI_OPCODE_MOV: - case TGSI_OPCODE_F2U: - case TGSI_OPCODE_AND: - case TGSI_OPCODE_OR: - case TGSI_OPCODE_XOR: - case TGSI_OPCODE_SAD: - case TGSI_OPCODE_UADD: - case TGSI_OPCODE_UDIV: - case TGSI_OPCODE_UMOD: - case TGSI_OPCODE_UMAD: - case TGSI_OPCODE_UMUL: - case TGSI_OPCODE_UMAX: - case TGSI_OPCODE_UMIN: - case TGSI_OPCODE_USEQ: - case TGSI_OPCODE_USGE: - case TGSI_OPCODE_USLT: - case TGSI_OPCODE_USNE: - case TGSI_OPCODE_USHR: - return NV_TYPE_U32; - case TGSI_OPCODE_F2I: - case TGSI_OPCODE_IDIV: - case TGSI_OPCODE_IMAX: - case TGSI_OPCODE_IMIN: - case TGSI_OPCODE_INEG: - case TGSI_OPCODE_ISGE: - case TGSI_OPCODE_ISHR: - case TGSI_OPCODE_ISLT: - return NV_TYPE_S32; - default: - return NV_TYPE_F32; - } -} - -static void -emit_store(struct bld_context *bld, const struct tgsi_full_instruction *inst, - unsigned chan, struct nv_value *value) -{ - struct nv_value *ptr; - const struct tgsi_full_dst_register *reg = &inst->Dst[0]; - - if (reg->Register.Indirect) { - ptr = FETCH_ADDR(reg->Indirect.Index, - tgsi_util_get_src_register_swizzle(®->Indirect, 0)); - } else { - ptr = NULL; - } - - assert(chan < 4); - - if (inst->Instruction.Opcode != TGSI_OPCODE_MOV) - value->reg.type = infer_dst_type(inst->Instruction.Opcode); - - switch (inst->Instruction.Saturate) { - case TGSI_SAT_NONE: - break; - case TGSI_SAT_ZERO_ONE: - BLD_INSN_1_EX(value, SAT, F32, value, F32); - break; - case TGSI_SAT_MINUS_PLUS_ONE: - value->reg.as_type = NV_TYPE_F32; - value = bld_insn_2(bld, NV_OP_MAX, value, bld_load_imm_f32(bld, -1.0f)); - value = bld_insn_2(bld, NV_OP_MIN, value, bld_load_imm_f32(bld, 1.0f)); - break; - } - - switch (reg->Register.File) { - case TGSI_FILE_OUTPUT: - if (!value->insn && (bld->ti->output_file == NV_FILE_OUT)) - value = bld_insn_1(bld, NV_OP_MOV, value); - value = bld_insn_1(bld, NV_OP_MOV, value); - value->reg.file = bld->ti->output_file; - - if (bld->ti->p->type == PIPE_SHADER_FRAGMENT) { - STORE_OUTR(reg->Register.Index, chan, value); - } else { - value->insn->fixed = 1; - value->reg.id = bld->ti->output_map[reg->Register.Index][chan]; - } - break; - case TGSI_FILE_TEMPORARY: - assert(reg->Register.Index < BLD_MAX_TEMPS); - if (!value->insn || (value->insn->bb != bld->pc->current_block)) - value = bld_insn_1(bld, NV_OP_MOV, value); - value->reg.file = NV_FILE_GPR; - - if (bld->ti->store_to_memory) - bld_lmem_store(bld, ptr, reg->Register.Index * 4 + chan, value); - else - STORE_TEMP(reg->Register.Index, chan, value); - break; - case TGSI_FILE_ADDRESS: - assert(reg->Register.Index < BLD_MAX_ADDRS); - value->reg.file = NV_FILE_ADDR; - value->reg.type = NV_TYPE_U16; - STORE_ADDR(reg->Register.Index, chan, value); - break; - } -} - -static INLINE uint32_t -bld_is_output_written(struct bld_context *bld, int i, int c) -{ - if (c < 0) - return bld->outputs_written[i / 8] & (0xf << ((i * 4) % 32)); - return bld->outputs_written[i / 8] & (1 << ((i * 4 + c) % 32)); -} - -static void -bld_export_outputs(struct bld_context *bld) -{ - struct nv_value *vals[4]; - struct nv_instruction *nvi; - int i, c, n; - - bld_push_values(&bld->ovs[0][0], PIPE_MAX_SHADER_OUTPUTS); - - for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i) { - if (!bld_is_output_written(bld, i, -1)) - continue; - for (n = 0, c = 0; c < 4; ++c) { - if (!bld_is_output_written(bld, i, c)) - continue; - vals[n] = bld_fetch_global(bld, &bld->ovs[i][c]); - assert(vals[n]); - vals[n] = bld_insn_1(bld, NV_OP_MOV, vals[n]); - vals[n++]->reg.id = bld->ti->output_map[i][c]; - } - assert(n); - - (nvi = new_instruction(bld->pc, NV_OP_EXPORT))->fixed = 1; - - for (c = 0; c < n; ++c) - nvi->src[c] = new_ref(bld->pc, vals[c]); - } -} - -static void -bld_new_block(struct bld_context *bld, struct nv_basic_block *b) -{ - int i; - - bld_push_values(&bld->tvs[0][0], BLD_MAX_TEMPS); - bld_push_values(&bld->avs[0][0], BLD_MAX_ADDRS); - bld_push_values(&bld->pvs[0][0], BLD_MAX_PREDS); - bld_push_values(&bld->ovs[0][0], PIPE_MAX_SHADER_OUTPUTS); - - bld->pc->current_block = b; - - for (i = 0; i < 4; ++i) - bld->saved_addr[i][0] = NULL; - - for (i = 0; i < 128; ++i) - bld->saved_inputs[i] = NULL; - - bld->out_kind = CFG_EDGE_FORWARD; -} - -static struct nv_value * -bld_saved_input(struct bld_context *bld, unsigned i, unsigned c) -{ - unsigned idx = bld->ti->input_map[i][c]; - - if (bld->ti->p->type != PIPE_SHADER_FRAGMENT) - return NULL; - if (bld->saved_inputs[idx]) - return bld->saved_inputs[idx]; - return NULL; -} - -static struct nv_value * -bld_interpolate(struct bld_context *bld, unsigned mode, struct nv_value *val) -{ - if (val->reg.id == 255) { - /* gl_FrontFacing: 0/~0 to -1.0/+1.0 */ - val = bld_insn_1(bld, NV_OP_LINTERP, val); - val = bld_insn_2(bld, NV_OP_SHL, val, bld_imm_u32(bld, 31)); - val->insn->src[0]->typecast = NV_TYPE_U32; - val = bld_insn_2(bld, NV_OP_XOR, val, bld_imm_f32(bld, -1.0f)); - val->insn->src[0]->typecast = NV_TYPE_U32; - } else - if (mode & (NV50_INTERP_LINEAR | NV50_INTERP_FLAT)) - val = bld_insn_1(bld, NV_OP_LINTERP, val); - else - val = bld_insn_2(bld, NV_OP_PINTERP, val, bld->frgcrd[3]); - - val->insn->flat = (mode & NV50_INTERP_FLAT) ? 1 : 0; - val->insn->centroid = (mode & NV50_INTERP_CENTROID) ? 1 : 0; - return val; -} - -static struct nv_value * -emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn, - const unsigned s, const unsigned chan) -{ - const struct tgsi_full_src_register *src = &insn->Src[s]; - struct nv_value *res; - struct nv_value *ptr = NULL; - unsigned idx, swz, dim_idx, ind_idx, ind_swz, sgn; - ubyte type = infer_src_type(insn->Instruction.Opcode); - - idx = src->Register.Index; - swz = tgsi_util_get_full_src_register_swizzle(src, chan); - dim_idx = -1; - ind_idx = -1; - ind_swz = 0; - - if (src->Register.Indirect) { - ind_idx = src->Indirect.Index; - ind_swz = tgsi_util_get_src_register_swizzle(&src->Indirect, 0); - - ptr = FETCH_ADDR(ind_idx, ind_swz); - } - if (idx >= (128 / 4) && src->Register.File == TGSI_FILE_CONSTANT) - ptr = bld_get_address(bld, (idx * 16) & ~0x1ff, ptr); - - switch (src->Register.File) { - case TGSI_FILE_CONSTANT: - dim_idx = src->Dimension.Index; - assert(dim_idx < 15); - - res = new_value(bld->pc, NV_FILE_MEM_C(dim_idx), type); - SET_TYPE(res, type); - res->reg.id = (idx * 4 + swz) & 127; - res = bld_insn_1(bld, NV_OP_LDA, res); - - if (ptr) - res->insn->src[4] = new_ref(bld->pc, ptr); - break; - case TGSI_FILE_IMMEDIATE: - assert(idx < bld->ti->immd32_nr); - res = bld_load_imm_u32(bld, bld->ti->immd32[idx * 4 + swz]); - - switch (bld->ti->immd32_ty[idx]) { - case TGSI_IMM_FLOAT32: SET_TYPE(res, NV_TYPE_F32); break; - case TGSI_IMM_UINT32: SET_TYPE(res, NV_TYPE_U32); break; - case TGSI_IMM_INT32: SET_TYPE(res, NV_TYPE_S32); break; - default: - SET_TYPE(res, type); - break; - } - break; - case TGSI_FILE_INPUT: - res = bld_saved_input(bld, idx, swz); - if (res && (insn->Instruction.Opcode != TGSI_OPCODE_TXP)) - break; - - res = new_value(bld->pc, bld->ti->input_file, type); - res->reg.id = bld->ti->input_map[idx][swz]; - - if (res->reg.file == NV_FILE_MEM_V) { - res = bld_interpolate(bld, bld->ti->interp_mode[idx], res); - } else { - assert(src->Dimension.Dimension == 0); - res = bld_insn_1(bld, NV_OP_LDA, res); - assert(res->reg.type == type); - } - bld->saved_inputs[bld->ti->input_map[idx][swz]] = res; - break; - case TGSI_FILE_TEMPORARY: - if (bld->ti->store_to_memory) - res = bld_lmem_load(bld, ptr, idx * 4 + swz); - else - res = bld_fetch_global(bld, &bld->tvs[idx][swz]); - break; - case TGSI_FILE_ADDRESS: - res = bld_fetch_global(bld, &bld->avs[idx][swz]); - break; - case TGSI_FILE_PREDICATE: - res = bld_fetch_global(bld, &bld->pvs[idx][swz]); - break; - case TGSI_FILE_SYSTEM_VALUE: - res = new_value(bld->pc, bld->ti->input_file, NV_TYPE_U32); - res->reg.id = bld->ti->sysval_map[idx]; - res = bld_insn_1(bld, NV_OP_LDA, res); - res = bld_insn_1(bld, NV_OP_CVT, res); - res->reg.type = NV_TYPE_F32; - break; - default: - NOUVEAU_ERR("illegal/unhandled src reg file: %d\n", src->Register.File); - abort(); - break; - } - if (!res) - return bld_undef(bld, NV_FILE_GPR); - - sgn = tgsi_util_get_full_src_register_sign_mode(src, chan); - - if (insn->Instruction.Opcode != TGSI_OPCODE_MOV) - res->reg.as_type = type; - else - if (sgn != TGSI_UTIL_SIGN_KEEP) /* apparently "MOV A, -B" assumes float */ - res->reg.as_type = NV_TYPE_F32; - - switch (sgn) { - case TGSI_UTIL_SIGN_KEEP: - break; - case TGSI_UTIL_SIGN_CLEAR: - res = bld_insn_1(bld, NV_OP_ABS, res); - break; - case TGSI_UTIL_SIGN_TOGGLE: - res = bld_insn_1(bld, NV_OP_NEG, res); - break; - case TGSI_UTIL_SIGN_SET: - res = bld_insn_1(bld, NV_OP_ABS, res); - res = bld_insn_1(bld, NV_OP_NEG, res); - break; - default: - NOUVEAU_ERR("illegal/unhandled src reg sign mode\n"); - abort(); - break; - } - - return res; -} - -static void -bld_lit(struct bld_context *bld, struct nv_value *dst0[4], - const struct tgsi_full_instruction *insn) -{ - struct nv_value *val0 = NULL; - struct nv_value *zero = NULL; - unsigned mask = insn->Dst[0].Register.WriteMask; - - if (mask & ((1 << 0) | (1 << 3))) - dst0[3] = dst0[0] = bld_load_imm_f32(bld, 1.0f); - - if (mask & (3 << 1)) { - zero = bld_load_imm_f32(bld, 0.0f); - val0 = bld_insn_2(bld, NV_OP_MAX, emit_fetch(bld, insn, 0, 0), zero); - - if (mask & (1 << 1)) - dst0[1] = val0; - } - - if (mask & (1 << 2)) { - struct nv_value *val1, *val3, *src1, *src3; - struct nv_value *pos128 = bld_load_imm_f32(bld, 127.999999f); - struct nv_value *neg128 = bld_load_imm_f32(bld, -127.999999f); - - src1 = emit_fetch(bld, insn, 0, 1); - src3 = emit_fetch(bld, insn, 0, 3); - - val0->insn->flags_def = new_value(bld->pc, NV_FILE_FLAGS, NV_TYPE_U16); - val0->insn->flags_def->insn = val0->insn; - - val1 = bld_insn_2(bld, NV_OP_MAX, src1, zero); - val3 = bld_insn_2(bld, NV_OP_MAX, src3, neg128); - val3 = bld_insn_2(bld, NV_OP_MIN, val3, pos128); - val3 = bld_pow(bld, val1, val3); - - dst0[2] = bld_insn_1(bld, NV_OP_MOV, zero); - dst0[2]->insn->cc = NV_CC_LE; - dst0[2]->insn->flags_src = new_ref(bld->pc, val0->insn->flags_def); - - dst0[2] = bld_insn_2(bld, NV_OP_SELECT, val3, dst0[2]); - } -} - -static INLINE void -get_tex_dim(const struct tgsi_full_instruction *insn, int *dim, int *arg) -{ - switch (insn->Texture.Texture) { - case TGSI_TEXTURE_1D: - *arg = *dim = 1; - break; - case TGSI_TEXTURE_SHADOW1D: - *dim = 1; - *arg = 2; - break; - case TGSI_TEXTURE_UNKNOWN: - case TGSI_TEXTURE_2D: - case TGSI_TEXTURE_RECT: - *arg = *dim = 2; - break; - case TGSI_TEXTURE_SHADOW2D: - case TGSI_TEXTURE_SHADOWRECT: - *dim = 2; - *arg = 3; - break; - case TGSI_TEXTURE_3D: - case TGSI_TEXTURE_CUBE: - *dim = *arg = 3; - break; - default: - assert(0); - break; - } -} - -static void -load_proj_tex_coords(struct bld_context *bld, - struct nv_value *t[4], int dim, int arg, - const struct tgsi_full_instruction *insn) -{ - int c, mask; - - mask = (1 << dim) - 1; - if (arg != dim) - mask |= 4; /* depth comparison value */ - - t[3] = emit_fetch(bld, insn, 0, 3); - - if (t[3]->insn->opcode == NV_OP_PINTERP) { - t[3] = bld_duplicate_insn(bld, t[3]->insn); - t[3]->insn->opcode = NV_OP_LINTERP; - nv_reference(bld->pc, &t[3]->insn->src[1], NULL); - } - - t[3] = bld_insn_1(bld, NV_OP_RCP, t[3]); - - for (c = 0; c < 4; ++c) { - if (!(mask & (1 << c))) - continue; - t[c] = emit_fetch(bld, insn, 0, c); - - if (t[c]->insn->opcode != NV_OP_LINTERP && - t[c]->insn->opcode != NV_OP_PINTERP) - continue; - t[c] = bld_duplicate_insn(bld, t[c]->insn); - t[c]->insn->opcode = NV_OP_PINTERP; - nv_reference(bld->pc, &t[c]->insn->src[1], t[3]); - - mask &= ~(1 << c); - } - - for (c = 0; mask; ++c, mask >>= 1) { - if (!(mask & 1)) - continue; - t[c] = bld_insn_2(bld, NV_OP_MUL, t[c], t[3]); - } -} - -/* For a quad of threads / top left, top right, bottom left, bottom right - * pixels, do a different operation, and take src0 from a specific thread. - */ -#define QOP_ADD 0 -#define QOP_SUBR 1 -#define QOP_SUB 2 -#define QOP_MOV1 3 - -#define QOP(a, b, c, d) \ - ((QOP_##a << 0) | (QOP_##b << 2) | (QOP_##c << 4) | (QOP_##d << 6)) - -static INLINE struct nv_value * -bld_quadop(struct bld_context *bld, ubyte qop, struct nv_value *src0, int lane, - struct nv_value *src1, boolean wp) -{ - struct nv_value *val = bld_insn_2(bld, NV_OP_QUADOP, src0, src1); - val->insn->lanes = lane; - val->insn->quadop = qop; - if (wp) { - val->insn->flags_def = new_value(bld->pc, NV_FILE_FLAGS, NV_TYPE_U16); - val->insn->flags_def->insn = val->insn; - } - return val; -} - -static INLINE struct nv_value * -bld_cmov(struct bld_context *bld, - struct nv_value *src, ubyte cc, struct nv_value *cr) -{ - src = bld_insn_1(bld, NV_OP_MOV, src); - - src->insn->cc = cc; - src->insn->flags_src = new_ref(bld->pc, cr); - - return src; -} - -static struct nv_instruction * -emit_tex(struct bld_context *bld, uint opcode, - struct nv_value *dst[4], struct nv_value *t_in[4], - int argc, int tic, int tsc, int cube) -{ - struct nv_value *t[4]; - struct nv_instruction *nvi; - int c; - - /* the inputs to a tex instruction must be separate values */ - for (c = 0; c < argc; ++c) { - t[c] = bld_insn_1(bld, NV_OP_MOV, t_in[c]); - SET_TYPE(t[c], NV_TYPE_F32); - t[c]->insn->fixed = 1; - } - - nvi = new_instruction(bld->pc, opcode); - - for (c = 0; c < 4; ++c) - dst[c] = bld_def(nvi, c, new_value(bld->pc, NV_FILE_GPR, NV_TYPE_F32)); - - for (c = 0; c < argc; ++c) - nvi->src[c] = new_ref(bld->pc, t[c]); - - nvi->tex_t = tic; - nvi->tex_s = tsc; - nvi->tex_mask = 0xf; - nvi->tex_cube = cube; - nvi->tex_live = 0; - nvi->tex_argc = argc; - - return nvi; -} - -static void -bld_texlod_sequence(struct bld_context *bld, - struct nv_value *dst[4], struct nv_value *t[4], int arg, - int tic, int tsc, int cube) -{ - emit_tex(bld, NV_OP_TXL, dst, t, arg, tic, tsc, cube); /* TODO */ -} - - -/* The lanes of a quad are grouped by the bit in the condition register - * they have set, which is selected by differing bias values. - * Move the input values for TEX into a new register set for each group - * and execute TEX only for a specific group. - * We always need to use 4 new registers for the inputs/outputs because - * the implicitly calculated derivatives must be correct. - */ -static void -bld_texbias_sequence(struct bld_context *bld, - struct nv_value *dst[4], struct nv_value *t[4], int arg, - int tic, int tsc, int cube) -{ - struct nv_instruction *sel, *tex; - struct nv_value *bit[4], *cr[4], *res[4][4], *val; - int l, c; - - const ubyte cc[4] = { NV_CC_EQ, NV_CC_S, NV_CC_C, NV_CC_O }; - - for (l = 0; l < 4; ++l) { - bit[l] = bld_load_imm_u32(bld, 1 << l); - - val = bld_quadop(bld, QOP(SUBR, SUBR, SUBR, SUBR), - t[arg - 1], l, t[arg - 1], TRUE); - - cr[l] = bld_cmov(bld, bit[l], NV_CC_EQ, val->insn->flags_def); - - cr[l]->reg.file = NV_FILE_FLAGS; - SET_TYPE(cr[l], NV_TYPE_U16); - } - - sel = new_instruction(bld->pc, NV_OP_SELECT); - - for (l = 0; l < 4; ++l) - sel->src[l] = new_ref(bld->pc, cr[l]); - - bld_def(sel, 0, new_value(bld->pc, NV_FILE_FLAGS, NV_TYPE_U16)); - - for (l = 0; l < 4; ++l) { - tex = emit_tex(bld, NV_OP_TXB, dst, t, arg, tic, tsc, cube); - - tex->cc = cc[l]; - tex->flags_src = new_ref(bld->pc, sel->def[0]); - - for (c = 0; c < 4; ++c) - res[l][c] = tex->def[c]; - } - - for (l = 0; l < 4; ++l) - for (c = 0; c < 4; ++c) - res[l][c] = bld_cmov(bld, res[l][c], cc[l], sel->def[0]); - - for (c = 0; c < 4; ++c) { - sel = new_instruction(bld->pc, NV_OP_SELECT); - - for (l = 0; l < 4; ++l) - sel->src[l] = new_ref(bld->pc, res[l][c]); - - bld_def(sel, 0, (dst[c] = new_value(bld->pc, NV_FILE_GPR, NV_TYPE_F32))); - } -} - -static boolean -bld_is_constant(struct nv_value *val) -{ - if (val->reg.file == NV_FILE_IMM) - return TRUE; - return val->insn && nvcg_find_constant(val->insn->src[0]); -} - -static void -bld_tex(struct bld_context *bld, struct nv_value *dst0[4], - const struct tgsi_full_instruction *insn) -{ - struct nv_value *t[4], *s[3]; - uint opcode = translate_opcode(insn->Instruction.Opcode); - int arg, dim, c; - const int tic = insn->Src[1].Register.Index; - const int tsc = tic; - const int cube = (insn->Texture.Texture == TGSI_TEXTURE_CUBE) ? 1 : 0; - - get_tex_dim(insn, &dim, &arg); - - if (!cube && insn->Instruction.Opcode == TGSI_OPCODE_TXP) - load_proj_tex_coords(bld, t, dim, arg, insn); - else { - for (c = 0; c < dim; ++c) - t[c] = emit_fetch(bld, insn, 0, c); - if (arg != dim) - t[dim] = emit_fetch(bld, insn, 0, 2); - } - - if (cube) { - assert(dim >= 3); - for (c = 0; c < 3; ++c) - s[c] = bld_insn_1(bld, NV_OP_ABS, t[c]); - - s[0] = bld_insn_2(bld, NV_OP_MAX, s[0], s[1]); - s[0] = bld_insn_2(bld, NV_OP_MAX, s[0], s[2]); - s[0] = bld_insn_1(bld, NV_OP_RCP, s[0]); - - for (c = 0; c < 3; ++c) - t[c] = bld_insn_2(bld, NV_OP_MUL, t[c], s[0]); - } - - if (opcode == NV_OP_TXB || opcode == NV_OP_TXL) { - t[arg++] = emit_fetch(bld, insn, 0, 3); - - if ((bld->ti->p->type == PIPE_SHADER_FRAGMENT) && - !bld_is_constant(t[arg - 1])) { - if (opcode == NV_OP_TXB) - bld_texbias_sequence(bld, dst0, t, arg, tic, tsc, cube); - else - bld_texlod_sequence(bld, dst0, t, arg, tic, tsc, cube); - return; - } - } - - emit_tex(bld, opcode, dst0, t, arg, tic, tsc, cube); -} - -static INLINE struct nv_value * -bld_dot(struct bld_context *bld, const struct tgsi_full_instruction *insn, - int n) -{ - struct nv_value *dotp, *src0, *src1; - int c; - - src0 = emit_fetch(bld, insn, 0, 0); - src1 = emit_fetch(bld, insn, 1, 0); - dotp = bld_insn_2(bld, NV_OP_MUL, src0, src1); - - for (c = 1; c < n; ++c) { - src0 = emit_fetch(bld, insn, 0, c); - src1 = emit_fetch(bld, insn, 1, c); - dotp = bld_insn_3(bld, NV_OP_MAD, src0, src1, dotp); - } - return dotp; -} - -#define FOR_EACH_DST0_ENABLED_CHANNEL(chan, inst) \ - for (chan = 0; chan < 4; ++chan) \ - if ((inst)->Dst[0].Register.WriteMask & (1 << chan)) - -static void -bld_instruction(struct bld_context *bld, - const struct tgsi_full_instruction *insn) -{ - struct nv50_program *prog = bld->ti->p; - const struct tgsi_full_dst_register *dreg = &insn->Dst[0]; - struct nv_value *src0; - struct nv_value *src1; - struct nv_value *src2; - struct nv_value *dst0[4] = { 0 }; - struct nv_value *temp; - int c; - uint opcode = translate_opcode(insn->Instruction.Opcode); - -#if NV50_DEBUG & NV50_DEBUG_PROG_IR - debug_printf("bld_instruction:"); tgsi_dump_instruction(insn, 1); -#endif - - switch (insn->Instruction.Opcode) { - case TGSI_OPCODE_ADD: - case TGSI_OPCODE_MAX: - case TGSI_OPCODE_MIN: - case TGSI_OPCODE_MUL: - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { - src0 = emit_fetch(bld, insn, 0, c); - src1 = emit_fetch(bld, insn, 1, c); - dst0[c] = bld_insn_2(bld, opcode, src0, src1); - } - break; - case TGSI_OPCODE_ARL: - src1 = bld_imm_u32(bld, 4); - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { - src0 = emit_fetch(bld, insn, 0, c); - temp = bld_insn_1(bld, NV_OP_FLOOR, src0); - SET_TYPE(temp, NV_TYPE_S32); - dst0[c] = bld_insn_2(bld, NV_OP_SHL, temp, src1); - } - break; - case TGSI_OPCODE_CMP: - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { - src0 = emit_fetch(bld, insn, 0, c); - src1 = emit_fetch(bld, insn, 1, c); - src2 = emit_fetch(bld, insn, 2, c); - src0 = bld_predicate(bld, src0, FALSE); - - src1 = bld_insn_1(bld, NV_OP_MOV, src1); - src1->insn->flags_src = new_ref(bld->pc, src0); - src1->insn->cc = NV_CC_LT; - - src2 = bld_insn_1(bld, NV_OP_MOV, src2); - src2->insn->flags_src = new_ref(bld->pc, src0); - src2->insn->cc = NV_CC_GE; - - dst0[c] = bld_insn_2(bld, NV_OP_SELECT, src1, src2); - } - break; - case TGSI_OPCODE_COS: - case TGSI_OPCODE_SIN: - src0 = emit_fetch(bld, insn, 0, 0); - temp = bld_insn_1(bld, NV_OP_PRESIN, src0); - if (insn->Dst[0].Register.WriteMask & 7) - temp = bld_insn_1(bld, opcode, temp); - for (c = 0; c < 3; ++c) - if (insn->Dst[0].Register.WriteMask & (1 << c)) - dst0[c] = temp; - if (!(insn->Dst[0].Register.WriteMask & (1 << 3))) - break; - src0 = emit_fetch(bld, insn, 0, 3); - temp = bld_insn_1(bld, NV_OP_PRESIN, src0); - dst0[3] = bld_insn_1(bld, opcode, temp); - break; - case TGSI_OPCODE_DP2: - temp = bld_dot(bld, insn, 2); - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) - dst0[c] = temp; - break; - case TGSI_OPCODE_DP3: - temp = bld_dot(bld, insn, 3); - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) - dst0[c] = temp; - break; - case TGSI_OPCODE_DP4: - temp = bld_dot(bld, insn, 4); - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) - dst0[c] = temp; - break; - case TGSI_OPCODE_DPH: - src0 = bld_dot(bld, insn, 3); - src1 = emit_fetch(bld, insn, 1, 3); - temp = bld_insn_2(bld, NV_OP_ADD, src0, src1); - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) - dst0[c] = temp; - break; - case TGSI_OPCODE_DST: - if (insn->Dst[0].Register.WriteMask & 1) - dst0[0] = bld_imm_f32(bld, 1.0f); - if (insn->Dst[0].Register.WriteMask & 2) { - src0 = emit_fetch(bld, insn, 0, 1); - src1 = emit_fetch(bld, insn, 1, 1); - dst0[1] = bld_insn_2(bld, NV_OP_MUL, src0, src1); - } - if (insn->Dst[0].Register.WriteMask & 4) - dst0[2] = emit_fetch(bld, insn, 0, 2); - if (insn->Dst[0].Register.WriteMask & 8) - dst0[3] = emit_fetch(bld, insn, 1, 3); - break; - case TGSI_OPCODE_EXP: - src0 = emit_fetch(bld, insn, 0, 0); - temp = bld_insn_1(bld, NV_OP_FLOOR, src0); - - if (insn->Dst[0].Register.WriteMask & 2) - dst0[1] = bld_insn_2(bld, NV_OP_SUB, src0, temp); - if (insn->Dst[0].Register.WriteMask & 1) { - temp = bld_insn_1(bld, NV_OP_PREEX2, temp); - dst0[0] = bld_insn_1(bld, NV_OP_EX2, temp); - } - if (insn->Dst[0].Register.WriteMask & 4) { - temp = bld_insn_1(bld, NV_OP_PREEX2, src0); - dst0[2] = bld_insn_1(bld, NV_OP_EX2, temp); - } - if (insn->Dst[0].Register.WriteMask & 8) - dst0[3] = bld_imm_f32(bld, 1.0f); - break; - case TGSI_OPCODE_EX2: - src0 = emit_fetch(bld, insn, 0, 0); - temp = bld_insn_1(bld, NV_OP_PREEX2, src0); - temp = bld_insn_1(bld, NV_OP_EX2, temp); - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) - dst0[c] = temp; - break; - case TGSI_OPCODE_FRC: - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { - src0 = emit_fetch(bld, insn, 0, c); - dst0[c] = bld_insn_1(bld, NV_OP_FLOOR, src0); - dst0[c] = bld_insn_2(bld, NV_OP_SUB, src0, dst0[c]); - } - break; - case TGSI_OPCODE_KIL: - for (c = 0; c < 4; ++c) { - src0 = emit_fetch(bld, insn, 0, c); - bld_kil(bld, src0); - } - break; - case TGSI_OPCODE_KILP: - (new_instruction(bld->pc, NV_OP_KIL))->fixed = 1; - break; - case TGSI_OPCODE_IF: - { - struct nv_basic_block *b = new_basic_block(bld->pc); - - assert(bld->cond_lvl < BLD_MAX_COND_NESTING); - - nvbb_attach_block(bld->pc->current_block, b, CFG_EDGE_FORWARD); - - bld->join_bb[bld->cond_lvl] = bld->pc->current_block; - bld->cond_bb[bld->cond_lvl] = bld->pc->current_block; - - src1 = bld_predicate(bld, emit_fetch(bld, insn, 0, 0), TRUE); - - bld_flow(bld, NV_OP_BRA, NV_CC_EQ, src1, NULL, (bld->cond_lvl == 0)); - - ++bld->cond_lvl; - bld_new_block(bld, b); - } - break; - case TGSI_OPCODE_ELSE: - { - struct nv_basic_block *b = new_basic_block(bld->pc); - - --bld->cond_lvl; - nvbb_attach_block(bld->join_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD); - - bld->cond_bb[bld->cond_lvl]->exit->target = b; - bld->cond_bb[bld->cond_lvl] = bld->pc->current_block; - - new_instruction(bld->pc, NV_OP_BRA)->is_terminator = 1; - - ++bld->cond_lvl; - bld_new_block(bld, b); - } - break; - case TGSI_OPCODE_ENDIF: - { - struct nv_basic_block *b = new_basic_block(bld->pc); - - if (!nvbb_is_terminated(bld->pc->current_block)) - bld_flow(bld, NV_OP_BRA, NV_CC_TR, NULL, b, FALSE); - - --bld->cond_lvl; - nvbb_attach_block(bld->pc->current_block, b, bld->out_kind); - nvbb_attach_block(bld->cond_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD); - - bld->cond_bb[bld->cond_lvl]->exit->target = b; - - bld_new_block(bld, b); - - if (!bld->cond_lvl && bld->join_bb[bld->cond_lvl]) { - bld->join_bb[bld->cond_lvl]->exit->prev->target = b; - new_instruction(bld->pc, NV_OP_JOIN)->is_join = TRUE; - } - } - break; - case TGSI_OPCODE_BGNLOOP: - { - struct nv_basic_block *bl = new_basic_block(bld->pc); - struct nv_basic_block *bb = new_basic_block(bld->pc); - - assert(bld->loop_lvl < BLD_MAX_LOOP_NESTING); - - bld->loop_bb[bld->loop_lvl] = bl; - bld->brkt_bb[bld->loop_lvl] = bb; - - bld_flow(bld, NV_OP_BREAKADDR, NV_CC_TR, NULL, bb, FALSE); - - nvbb_attach_block(bld->pc->current_block, bl, CFG_EDGE_LOOP_ENTER); - - bld_new_block(bld, bld->loop_bb[bld->loop_lvl++]); - - if (bld->loop_lvl == bld->pc->loop_nesting_bound) - bld->pc->loop_nesting_bound++; - - bld_clear_def_use(&bld->tvs[0][0], BLD_MAX_TEMPS, bld->loop_lvl); - bld_clear_def_use(&bld->avs[0][0], BLD_MAX_ADDRS, bld->loop_lvl); - bld_clear_def_use(&bld->pvs[0][0], BLD_MAX_PREDS, bld->loop_lvl); - } - break; - case TGSI_OPCODE_BRK: - { - struct nv_basic_block *bb = bld->brkt_bb[bld->loop_lvl - 1]; - - bld_flow(bld, NV_OP_BREAK, NV_CC_TR, NULL, bb, FALSE); - - if (bld->out_kind == CFG_EDGE_FORWARD) /* else we already had BRK/CONT */ - nvbb_attach_block(bld->pc->current_block, bb, CFG_EDGE_LOOP_LEAVE); - - bld->out_kind = CFG_EDGE_FAKE; - } - break; - case TGSI_OPCODE_CONT: - { - struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1]; - - bld_flow(bld, NV_OP_BRA, NV_CC_TR, NULL, bb, FALSE); - - nvbb_attach_block(bld->pc->current_block, bb, CFG_EDGE_BACK); - - if ((bb = bld->join_bb[bld->cond_lvl - 1])) { - bld->join_bb[bld->cond_lvl - 1] = NULL; - nv_nvi_delete(bb->exit->prev); - } - bld->out_kind = CFG_EDGE_FAKE; - } - break; - case TGSI_OPCODE_ENDLOOP: - { - struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1]; - - if (!nvbb_is_terminated(bld->pc->current_block)) - bld_flow(bld, NV_OP_BRA, NV_CC_TR, NULL, bb, FALSE); - - nvbb_attach_block(bld->pc->current_block, bb, CFG_EDGE_BACK); - - bld_loop_end(bld, bb); /* replace loop-side operand of the phis */ - - bld_new_block(bld, bld->brkt_bb[--bld->loop_lvl]); - } - break; - case TGSI_OPCODE_ABS: - case TGSI_OPCODE_CEIL: - case TGSI_OPCODE_FLR: - case TGSI_OPCODE_TRUNC: - case TGSI_OPCODE_ROUND: - case TGSI_OPCODE_DDX: - case TGSI_OPCODE_DDY: - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { - src0 = emit_fetch(bld, insn, 0, c); - dst0[c] = bld_insn_1(bld, opcode, src0); - } - break; - case TGSI_OPCODE_LIT: - bld_lit(bld, dst0, insn); - break; - case TGSI_OPCODE_LRP: - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { - src0 = emit_fetch(bld, insn, 0, c); - src1 = emit_fetch(bld, insn, 1, c); - src2 = emit_fetch(bld, insn, 2, c); - dst0[c] = bld_insn_2(bld, NV_OP_SUB, src1, src2); - dst0[c] = bld_insn_3(bld, NV_OP_MAD, dst0[c], src0, src2); - } - break; - case TGSI_OPCODE_MOV: - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) - dst0[c] = emit_fetch(bld, insn, 0, c); - break; - case TGSI_OPCODE_MAD: - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { - src0 = emit_fetch(bld, insn, 0, c); - src1 = emit_fetch(bld, insn, 1, c); - src2 = emit_fetch(bld, insn, 2, c); - dst0[c] = bld_insn_3(bld, opcode, src0, src1, src2); - } - break; - case TGSI_OPCODE_POW: - src0 = emit_fetch(bld, insn, 0, 0); - src1 = emit_fetch(bld, insn, 1, 0); - temp = bld_pow(bld, src0, src1); - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) - dst0[c] = temp; - break; - case TGSI_OPCODE_LOG: - src0 = emit_fetch(bld, insn, 0, 0); - src0 = bld_insn_1(bld, NV_OP_ABS, src0); - temp = bld_insn_1(bld, NV_OP_LG2, src0); - dst0[2] = temp; - if (insn->Dst[0].Register.WriteMask & 3) { - temp = bld_insn_1(bld, NV_OP_FLOOR, temp); - dst0[0] = temp; - } - if (insn->Dst[0].Register.WriteMask & 2) { - temp = bld_insn_1(bld, NV_OP_PREEX2, temp); - temp = bld_insn_1(bld, NV_OP_EX2, temp); - temp = bld_insn_1(bld, NV_OP_RCP, temp); - dst0[1] = bld_insn_2(bld, NV_OP_MUL, src0, temp); - } - if (insn->Dst[0].Register.WriteMask & 8) - dst0[3] = bld_imm_f32(bld, 1.0f); - break; - case TGSI_OPCODE_RCP: - case TGSI_OPCODE_LG2: - src0 = emit_fetch(bld, insn, 0, 0); - temp = bld_insn_1(bld, opcode, src0); - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) - dst0[c] = temp; - break; - case TGSI_OPCODE_RSQ: - src0 = emit_fetch(bld, insn, 0, 0); - temp = bld_insn_1(bld, NV_OP_ABS, src0); - temp = bld_insn_1(bld, NV_OP_RSQ, temp); - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) - dst0[c] = temp; - break; - case TGSI_OPCODE_SLT: - case TGSI_OPCODE_SGE: - case TGSI_OPCODE_SEQ: - case TGSI_OPCODE_SGT: - case TGSI_OPCODE_SLE: - case TGSI_OPCODE_SNE: - case TGSI_OPCODE_ISLT: - case TGSI_OPCODE_ISGE: - case TGSI_OPCODE_USEQ: - case TGSI_OPCODE_USGE: - case TGSI_OPCODE_USLT: - case TGSI_OPCODE_USNE: - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { - src0 = emit_fetch(bld, insn, 0, c); - src1 = emit_fetch(bld, insn, 1, c); - dst0[c] = bld_insn_2(bld, NV_OP_SET, src0, src1); - dst0[c]->insn->set_cond = translate_setcc(insn->Instruction.Opcode); - SET_TYPE(dst0[c], infer_dst_type(insn->Instruction.Opcode)); - - dst0[c]->insn->src[0]->typecast = - dst0[c]->insn->src[1]->typecast = - infer_src_type(insn->Instruction.Opcode); - - if (dst0[c]->reg.type != NV_TYPE_F32) - break; - dst0[c]->reg.as_type = NV_TYPE_S32; - dst0[c] = bld_insn_1(bld, NV_OP_ABS, dst0[c]); - dst0[c] = bld_insn_1(bld, NV_OP_CVT, dst0[c]); - SET_TYPE(dst0[c], NV_TYPE_F32); - } - break; - case TGSI_OPCODE_SCS: - if (insn->Dst[0].Register.WriteMask & 0x3) { - src0 = emit_fetch(bld, insn, 0, 0); - temp = bld_insn_1(bld, NV_OP_PRESIN, src0); - if (insn->Dst[0].Register.WriteMask & 0x1) - dst0[0] = bld_insn_1(bld, NV_OP_COS, temp); - if (insn->Dst[0].Register.WriteMask & 0x2) - dst0[1] = bld_insn_1(bld, NV_OP_SIN, temp); - } - if (insn->Dst[0].Register.WriteMask & 0x4) - dst0[2] = bld_imm_f32(bld, 0.0f); - if (insn->Dst[0].Register.WriteMask & 0x8) - dst0[3] = bld_imm_f32(bld, 1.0f); - break; - case TGSI_OPCODE_SSG: - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { - src0 = emit_fetch(bld, insn, 0, c); - src1 = bld_predicate(bld, src0, FALSE); - temp = bld_insn_2(bld, NV_OP_AND, src0, bld_imm_u32(bld, 0x80000000)); - temp = bld_insn_2(bld, NV_OP_OR, temp, bld_imm_f32(bld, 1.0f)); - dst0[c] = bld_insn_2(bld, NV_OP_XOR, temp, temp); - dst0[c]->insn->cc = NV_CC_EQ; - nv_reference(bld->pc, &dst0[c]->insn->flags_src, src1); - dst0[c] = bld_insn_2(bld, NV_OP_SELECT, dst0[c], temp); - } - break; - case TGSI_OPCODE_SUB: - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { - src0 = emit_fetch(bld, insn, 0, c); - src1 = emit_fetch(bld, insn, 1, c); - dst0[c] = bld_insn_2(bld, NV_OP_ADD, src0, src1); - dst0[c]->insn->src[1]->mod ^= NV_MOD_NEG; - } - break; - case TGSI_OPCODE_TEX: - case TGSI_OPCODE_TXB: - case TGSI_OPCODE_TXL: - case TGSI_OPCODE_TXP: - case TGSI_OPCODE_TXD: // fake - bld_tex(bld, dst0, insn); - break; - case TGSI_OPCODE_XPD: - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { - if (c == 3) { - dst0[3] = bld_imm_f32(bld, 1.0f); - break; - } - src0 = emit_fetch(bld, insn, 1, (c + 1) % 3); - src1 = emit_fetch(bld, insn, 0, (c + 2) % 3); - dst0[c] = bld_insn_2(bld, NV_OP_MUL, src0, src1); - - src0 = emit_fetch(bld, insn, 0, (c + 1) % 3); - src1 = emit_fetch(bld, insn, 1, (c + 2) % 3); - dst0[c] = bld_insn_3(bld, NV_OP_MAD, src0, src1, dst0[c]); - - dst0[c]->insn->src[2]->mod ^= NV_MOD_NEG; - } - break; - case TGSI_OPCODE_RET: - (new_instruction(bld->pc, NV_OP_RET))->fixed = 1; - break; - case TGSI_OPCODE_END: - if (bld->ti->p->type == PIPE_SHADER_FRAGMENT) - bld_export_outputs(bld); - break; - default: - NOUVEAU_ERR("unhandled opcode %u\n", insn->Instruction.Opcode); - abort(); - break; - } - - FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) - emit_store(bld, insn, c, dst0[c]); - - if (prog->type == PIPE_SHADER_VERTEX && prog->vp.clpd_nr && - dreg->Register.File == TGSI_FILE_OUTPUT && !dreg->Register.Indirect && - prog->out[dreg->Register.Index].sn == TGSI_SEMANTIC_POSITION) { - - int p; - for (p = 0; p < prog->vp.clpd_nr; p++) { - struct nv_value *clipd = NULL; - - for (c = 0; c < 4; c++) { - temp = new_value(bld->pc, NV_FILE_MEM_C(15), NV_TYPE_F32); - temp->reg.id = p * 4 + c; - temp = bld_insn_1(bld, NV_OP_LDA, temp); - - clipd = clipd ? - bld_insn_3(bld, NV_OP_MAD, dst0[c], temp, clipd) : - bld_insn_2(bld, NV_OP_MUL, dst0[c], temp); - } - - temp = bld_insn_1(bld, NV_OP_MOV, clipd); - temp->reg.file = NV_FILE_OUT; - temp->reg.id = bld->ti->p->vp.clpd + p; - temp->insn->fixed = 1; - } - } -} - -static INLINE void -bld_free_value_trackers(struct bld_value_stack *base, int n) -{ - int i, c; - - for (i = 0; i < n; ++i) - for (c = 0; c < 4; ++c) - if (base[i * 4 + c].body) - FREE(base[i * 4 + c].body); -} - -int -nv50_tgsi_to_nc(struct nv_pc *pc, struct nv50_translation_info *ti) -{ - struct bld_context *bld = CALLOC_STRUCT(bld_context); - int c; - unsigned ip; - - pc->root[0] = pc->current_block = new_basic_block(pc); - - bld->pc = pc; - bld->ti = ti; - - pc->loop_nesting_bound = 1; - - c = util_bitcount(bld->ti->p->fp.interp >> 24); - if (c && ti->p->type == PIPE_SHADER_FRAGMENT) { - bld->frgcrd[3] = new_value(pc, NV_FILE_MEM_V, NV_TYPE_F32); - bld->frgcrd[3]->reg.id = c - 1; - bld->frgcrd[3] = bld_insn_1(bld, NV_OP_LINTERP, bld->frgcrd[3]); - bld->frgcrd[3] = bld_insn_1(bld, NV_OP_RCP, bld->frgcrd[3]); - } - - for (ip = 0; ip < ti->inst_nr; ++ip) - bld_instruction(bld, &ti->insns[ip]); - - bld_free_value_trackers(&bld->tvs[0][0], BLD_MAX_TEMPS); - bld_free_value_trackers(&bld->avs[0][0], BLD_MAX_ADDRS); - bld_free_value_trackers(&bld->pvs[0][0], BLD_MAX_PREDS); - - bld_free_value_trackers(&bld->ovs[0][0], PIPE_MAX_SHADER_OUTPUTS); - - FREE(bld); - return 0; -} - -/* If a variable is assigned in a loop, replace all references to the value - * from outside the loop with a phi value. - */ -static void -bld_replace_value(struct nv_pc *pc, struct nv_basic_block *b, - struct nv_value *old_val, - struct nv_value *new_val) -{ - struct nv_instruction *nvi; - - for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = nvi->next) { - int s; - for (s = 0; s < 5; ++s) { - if (!nvi->src[s]) - continue; - if (nvi->src[s]->value == old_val) - nv_reference(pc, &nvi->src[s], new_val); - } - if (nvi->flags_src && nvi->flags_src->value == old_val) - nv_reference(pc, &nvi->flags_src, new_val); - } - - b->pass_seq = pc->pass_seq; - - if (b->out[0] && b->out[0]->pass_seq < pc->pass_seq) - bld_replace_value(pc, b->out[0], old_val, new_val); - - if (b->out[1] && b->out[1]->pass_seq < pc->pass_seq) - bld_replace_value(pc, b->out[1], old_val, new_val); -} |