diff options
Diffstat (limited to 'src/gallium/drivers/nv50/nv50_pc.c')
-rw-r--r-- | src/gallium/drivers/nv50/nv50_pc.c | 804 |
1 files changed, 804 insertions, 0 deletions
diff --git a/src/gallium/drivers/nv50/nv50_pc.c b/src/gallium/drivers/nv50/nv50_pc.c new file mode 100644 index 00000000000..bb464ec4c9f --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_pc.c @@ -0,0 +1,804 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* #define NV50PC_DEBUG */ + +#include "nv50_pc.h" +#include "nv50_program.h" + +#include <stdio.h> + +/* returns TRUE if operands 0 and 1 can be swapped */ +boolean +nv_op_commutative(uint opcode) +{ + switch (opcode) { + case NV_OP_ADD: + case NV_OP_MUL: + case NV_OP_MAD: + case NV_OP_AND: + case NV_OP_OR: + case NV_OP_XOR: + case NV_OP_MIN: + case NV_OP_MAX: + case NV_OP_SAD: + return TRUE; + default: + return FALSE; + } +} + +/* return operand to which the address register applies */ +int +nv50_indirect_opnd(struct nv_instruction *i) +{ + if (!i->src[4]) + return -1; + + switch (i->opcode) { + case NV_OP_MOV: + case NV_OP_LDA: + case NV_OP_STA: + return 0; + default: + return 1; + } +} + +boolean +nv50_nvi_can_use_imm(struct nv_instruction *nvi, int s) +{ + if (nvi->flags_src || nvi->flags_def) + return FALSE; + + switch (nvi->opcode) { + case NV_OP_ADD: + case NV_OP_MUL: + case NV_OP_AND: + case NV_OP_OR: + case NV_OP_XOR: + case NV_OP_SHL: + case NV_OP_SHR: + return (s == 1) && (nvi->src[0]->value->reg.file == NV_FILE_GPR) && + (nvi->def[0]->reg.file == NV_FILE_GPR); + case NV_OP_MOV: + assert(s == 0); + return (nvi->def[0]->reg.file == NV_FILE_GPR); + default: + return FALSE; + } +} + +boolean +nv50_nvi_can_load(struct nv_instruction *nvi, int s, struct nv_value *value) +{ + int i; + + for (i = 0; i < 3 && nvi->src[i]; ++i) + if (nvi->src[i]->value->reg.file == NV_FILE_IMM) + return FALSE; + + switch (nvi->opcode) { + case NV_OP_ABS: + case NV_OP_ADD: + case NV_OP_CEIL: + case NV_OP_FLOOR: + case NV_OP_TRUNC: + case NV_OP_CVT: + case NV_OP_NEG: + case NV_OP_MAD: + case NV_OP_MUL: + case NV_OP_SAT: + case NV_OP_SUB: + case NV_OP_MAX: + case NV_OP_MIN: + if (s == 0 && (value->reg.file == NV_FILE_MEM_S || + value->reg.file == NV_FILE_MEM_P)) + return TRUE; + if (value->reg.file < NV_FILE_MEM_C(0) || + value->reg.file > NV_FILE_MEM_C(15)) + return FALSE; + return (s == 1) || + ((s == 2) && (nvi->src[1]->value->reg.file == NV_FILE_GPR)); + case NV_OP_MOV: + assert(s == 0); + return /* TRUE */ FALSE; /* don't turn MOVs into loads */ + default: + return FALSE; + } +} + +/* Return whether this instruction can be executed conditionally. */ +boolean +nv50_nvi_can_predicate(struct nv_instruction *nvi) +{ + int i; + + if (nvi->flags_src) + return FALSE; + for (i = 0; i < 4 && nvi->src[i]; ++i) + if (nvi->src[i]->value->reg.file == NV_FILE_IMM) + return FALSE; + return TRUE; +} + +ubyte +nv50_supported_src_mods(uint opcode, int s) +{ + switch (opcode) { + case NV_OP_ABS: + return NV_MOD_NEG | NV_MOD_ABS; /* obviously */ + case NV_OP_ADD: + case NV_OP_MUL: + case NV_OP_MAD: + return NV_MOD_NEG; + case NV_OP_DFDX: + case NV_OP_DFDY: + assert(s == 0); + return NV_MOD_NEG; + case NV_OP_MAX: + case NV_OP_MIN: + return NV_MOD_ABS; + case NV_OP_CVT: + case NV_OP_LG2: + case NV_OP_NEG: + case NV_OP_PREEX2: + case NV_OP_PRESIN: + case NV_OP_RCP: + case NV_OP_RSQ: + return NV_MOD_ABS | NV_MOD_NEG; + default: + return 0; + } +} + +/* We may want an opcode table. */ +boolean +nv50_op_can_write_flags(uint opcode) +{ + if (nv_is_vector_op(opcode)) + return FALSE; + switch (opcode) { /* obvious ones like KIL, CALL, etc. not included */ + case NV_OP_PHI: + case NV_OP_MOV: + case NV_OP_LINTERP: + case NV_OP_PINTERP: + case NV_OP_LDA: + return FALSE; + default: + break; + } + if (opcode >= NV_OP_RCP && opcode <= NV_OP_PREEX2) + return FALSE; + return TRUE; +} + +int +nv_nvi_refcount(struct nv_instruction *nvi) +{ + int i, rc; + + rc = nvi->flags_def ? nvi->flags_def->refc : 0; + + for (i = 0; i < 4; ++i) { + if (!nvi->def[i]) + return rc; + rc += nvi->def[i]->refc; + } + return rc; +} + +int +nvcg_replace_value(struct nv_pc *pc, struct nv_value *old_val, + struct nv_value *new_val) +{ + int i, n; + + if (old_val == new_val) + return old_val->refc; + + for (i = 0, n = 0; i < pc->num_refs; ++i) { + if (pc->refs[i]->value == old_val) { + ++n; + nv_reference(pc, &pc->refs[i], new_val); + } + } + return n; +} + +struct nv_value * +nvcg_find_constant(struct nv_ref *ref) +{ + struct nv_value *src; + + if (!ref) + return NULL; + + src = ref->value; + while (src->insn && src->insn->opcode == NV_OP_MOV) { + assert(!src->insn->src[0]->mod); + src = src->insn->src[0]->value; + } + if ((src->reg.file == NV_FILE_IMM) || + (src->insn && src->insn->opcode == NV_OP_LDA && + src->insn->src[0]->value->reg.file >= NV_FILE_MEM_C(0) && + src->insn->src[0]->value->reg.file <= NV_FILE_MEM_C(15))) + return src; + return NULL; +} + +struct nv_value * +nvcg_find_immediate(struct nv_ref *ref) +{ + struct nv_value *src = nvcg_find_constant(ref); + + return (src && src->reg.file == NV_FILE_IMM) ? src : NULL; +} + +static void +nv_pc_free_refs(struct nv_pc *pc) +{ + int i; + for (i = 0; i < pc->num_refs; i += 64) + FREE(pc->refs[i]); + FREE(pc->refs); +} + +static const char * +edge_name(ubyte type) +{ + switch (type) { + case CFG_EDGE_FORWARD: return "forward"; + case CFG_EDGE_BACK: return "back"; + case CFG_EDGE_LOOP_ENTER: return "loop"; + case CFG_EDGE_LOOP_LEAVE: return "break"; + case CFG_EDGE_FAKE: return "fake"; + default: + return "?"; + } +} + +void +nv_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f, void *priv) +{ + struct nv_basic_block *bb[64], *bbb[16], *b; + int j, p, pp; + + bb[0] = root; + p = 1; + pp = 0; + + while (p > 0) { + b = bb[--p]; + b->priv = 0; + + for (j = 1; j >= 0; --j) { + if (!b->out[j]) + continue; + + switch (b->out_kind[j]) { + case CFG_EDGE_BACK: + continue; + case CFG_EDGE_FORWARD: + case CFG_EDGE_FAKE: + if (++b->out[j]->priv == b->out[j]->num_in) + bb[p++] = b->out[j]; + break; + case CFG_EDGE_LOOP_ENTER: + bb[p++] = b->out[j]; + break; + case CFG_EDGE_LOOP_LEAVE: + bbb[pp++] = b->out[j]; + break; + default: + assert(0); + break; + } + } + + f(priv, b); + + if (!p) { + p = pp; + for (; pp > 0; --pp) + bb[pp - 1] = bbb[pp - 1]; + } + } +} + +static void +nv_do_print_function(void *priv, struct nv_basic_block *b) +{ + struct nv_instruction *i = b->phi; + + debug_printf("=== BB %i ", b->id); + if (b->out[0]) + debug_printf("[%s -> %i] ", edge_name(b->out_kind[0]), b->out[0]->id); + if (b->out[1]) + debug_printf("[%s -> %i] ", edge_name(b->out_kind[1]), b->out[1]->id); + debug_printf("===\n"); + + i = b->phi; + if (!i) + i = b->entry; + for (; i; i = i->next) + nv_print_instruction(i); +} + +void +nv_print_function(struct nv_basic_block *root) +{ + if (root->subroutine) + debug_printf("SUBROUTINE %i\n", root->subroutine); + else + debug_printf("MAIN\n"); + + nv_pc_pass_in_order(root, nv_do_print_function, root); +} + +void +nv_print_program(struct nv_pc *pc) +{ + int i; + for (i = 0; i < pc->num_subroutines + 1; ++i) + if (pc->root[i]) + nv_print_function(pc->root[i]); +} + +#ifdef NV50PC_DEBUG +static void +nv_do_print_cfgraph(struct nv_pc *pc, FILE *f, struct nv_basic_block *b) +{ + int i; + + b->pass_seq = pc->pass_seq; + + fprintf(f, "\t%i [shape=box]\n", b->id); + + for (i = 0; i < 2; ++i) { + if (!b->out[i]) + continue; + switch (b->out_kind[i]) { + case CFG_EDGE_FORWARD: + fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id); + break; + case CFG_EDGE_LOOP_ENTER: + fprintf(f, "\t%i -> %i [color=green];\n", b->id, b->out[i]->id); + break; + case CFG_EDGE_LOOP_LEAVE: + fprintf(f, "\t%i -> %i [color=red];\n", b->id, b->out[i]->id); + break; + case CFG_EDGE_BACK: + fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id); + continue; + case CFG_EDGE_FAKE: + fprintf(f, "\t%i -> %i [style=dotted];\n", b->id, b->out[i]->id); + break; + default: + assert(0); + break; + } + if (b->out[i]->pass_seq < pc->pass_seq) + nv_do_print_cfgraph(pc, f, b->out[i]); + } +} + +/* Print the control flow graph of subroutine @subr (0 == MAIN) to a file. */ +static void +nv_print_cfgraph(struct nv_pc *pc, const char *filepath, int subr) +{ + FILE *f; + + f = fopen(filepath, "a"); + if (!f) + return; + + fprintf(f, "digraph G {\n"); + + ++pc->pass_seq; + + nv_do_print_cfgraph(pc, f, pc->root[subr]); + + fprintf(f, "}\n"); + + fclose(f); +} +#endif + +static INLINE void +nvcg_show_bincode(struct nv_pc *pc) +{ + unsigned i; + + for (i = 0; i < pc->bin_size / 4; ++i) { + debug_printf("0x%08x ", pc->emit[i]); + if ((i % 16) == 15) + debug_printf("\n"); + } + debug_printf("\n"); +} + +static int +nv50_emit_program(struct nv_pc *pc) +{ + uint32_t *code = pc->emit; + int n; + + NV50_DBGMSG("emitting program: size = %u\n", pc->bin_size); + + for (n = 0; n < pc->num_blocks; ++n) { + struct nv_instruction *i; + struct nv_basic_block *b = pc->bb_list[n]; + + for (i = b->entry; i; i = i->next) { + nv50_emit_instruction(pc, i); + + pc->bin_pos += 1 + (pc->emit[0] & 1); + pc->emit += 1 + (pc->emit[0] & 1); + } + } + assert(pc->emit == &code[pc->bin_size / 4]); + + /* XXX: we can do better than this ... */ + if (!(pc->emit[-2] & 1) || (pc->emit[-2] & 2) || (pc->emit[-1] & 3)) { + pc->emit[0] = 0xf0000001; + pc->emit[1] = 0xe0000000; + pc->bin_size += 8; + } + + pc->emit = code; + code[pc->bin_size / 4 - 1] |= 1; + +#ifdef NV50PC_DEBUG + nvcg_show_bincode(pc); +#endif + + return 0; +} + +int +nv50_generate_code(struct nv50_translation_info *ti) +{ + struct nv_pc *pc; + int ret; + int i; + + pc = CALLOC_STRUCT(nv_pc); + if (!pc) + return 1; + + pc->root = CALLOC(ti->subr_nr + 1, sizeof(pc->root[0])); + if (!pc->root) { + FREE(pc); + return 1; + } + pc->num_subroutines = ti->subr_nr; + + ret = nv50_tgsi_to_nc(pc, ti); + if (ret) + goto out; +#ifdef NV50PC_DEBUG + nv_print_program(pc); +#endif + + pc->opt_reload_elim = ti->store_to_memory ? FALSE : TRUE; + + /* optimization */ + ret = nv_pc_exec_pass0(pc); + if (ret) + goto out; +#ifdef NV50PC_DEBUG + nv_print_program(pc); +#endif + + /* register allocation */ + ret = nv_pc_exec_pass1(pc); + if (ret) + goto out; +#ifdef NV50PC_DEBUG + nv_print_program(pc); + nv_print_cfgraph(pc, "nv50_shader_cfgraph.dot", 0); +#endif + + /* prepare for emission */ + ret = nv_pc_exec_pass2(pc); + if (ret) + goto out; + + pc->emit = CALLOC(pc->bin_size / 4 + 2, 4); + if (!pc->emit) { + ret = 3; + goto out; + } + ret = nv50_emit_program(pc); + if (ret) + goto out; + + ti->p->code_size = pc->bin_size; + ti->p->code = pc->emit; + + ti->p->immd_size = pc->immd_count * 4; + ti->p->immd = pc->immd_buf; + + /* highest 16 bit reg to num of 32 bit regs, limit to >= 4 */ + ti->p->max_gpr = MAX2(4, (pc->max_reg[NV_FILE_GPR] >> 1) + 1); + + ti->p->fixups = pc->fixups; + ti->p->num_fixups = pc->num_fixups; + + NV50_DBGMSG("SHADER TRANSLATION - %s\n", ret ? "failure" : "success"); + +out: + nv_pc_free_refs(pc); + + for (i = 0; i < pc->num_blocks; ++i) + FREE(pc->bb_list[i]); + if (pc->root) + FREE(pc->root); + if (ret) { /* on success, these will be referenced by nv50_program */ + if (pc->emit) + FREE(pc->emit); + if (pc->immd_buf) + FREE(pc->immd_buf); + if (pc->fixups) + FREE(pc->fixups); + } + FREE(pc); + return ret; +} + +static void +nvbb_insert_phi(struct nv_basic_block *b, struct nv_instruction *i) +{ + if (!b->phi) { + i->prev = NULL; + b->phi = i; + i->next = b->entry; + if (b->entry) { + assert(!b->entry->prev && b->exit); + b->entry->prev = i; + } else { + b->entry = i; + b->exit = i; + } + } else { + assert(b->entry); + if (b->entry->opcode == NV_OP_PHI) { /* insert after entry */ + assert(b->entry == b->exit); + b->entry->next = i; + i->prev = b->entry; + b->entry = i; + b->exit = i; + } else { /* insert before entry */ + assert(b->entry->prev && b->exit); + i->next = b->entry; + i->prev = b->entry->prev; + b->entry->prev = i; + i->prev->next = i; + } + } +} + +void +nvbb_insert_tail(struct nv_basic_block *b, struct nv_instruction *i) +{ + if (i->opcode == NV_OP_PHI) { + nvbb_insert_phi(b, i); + } else { + i->prev = b->exit; + if (b->exit) + b->exit->next = i; + b->exit = i; + if (!b->entry) + b->entry = i; + else + if (i->prev && i->prev->opcode == NV_OP_PHI) + b->entry = i; + } + + i->bb = b; + b->num_instructions++; +} + +void +nvi_insert_after(struct nv_instruction *at, struct nv_instruction *ni) +{ + if (!at->next) { + nvbb_insert_tail(at->bb, ni); + return; + } + ni->next = at->next; + ni->prev = at; + ni->next->prev = ni; + ni->prev->next = ni; +} + +void +nv_nvi_delete(struct nv_instruction *nvi) +{ + struct nv_basic_block *b = nvi->bb; + int j; + + /* debug_printf("REM: "); nv_print_instruction(nvi); */ + + for (j = 0; j < 5; ++j) + nv_reference(NULL, &nvi->src[j], NULL); + nv_reference(NULL, &nvi->flags_src, NULL); + + if (nvi->next) + nvi->next->prev = nvi->prev; + else { + assert(nvi == b->exit); + b->exit = nvi->prev; + } + + if (nvi->prev) + nvi->prev->next = nvi->next; + + if (nvi == b->entry) { + /* PHIs don't get hooked to b->entry */ + b->entry = nvi->next; + assert(!nvi->prev || nvi->prev->opcode == NV_OP_PHI); + } + + if (nvi == b->phi) { + if (nvi->opcode != NV_OP_PHI) + NV50_DBGMSG("NOTE: b->phi points to non-PHI instruction\n"); + + assert(!nvi->prev); + if (!nvi->next || nvi->next->opcode != NV_OP_PHI) + b->phi = NULL; + else + b->phi = nvi->next; + } +} + +void +nv_nvi_permute(struct nv_instruction *i1, struct nv_instruction *i2) +{ + struct nv_basic_block *b = i1->bb; + + assert(i1->opcode != NV_OP_PHI && + i2->opcode != NV_OP_PHI); + assert(i1->next == i2); + + if (b->exit == i2) + b->exit = i1; + + if (b->entry == i1) + b->entry = i2; + + i2->prev = i1->prev; + i1->next = i2->next; + i2->next = i1; + i1->prev = i2; + + if (i2->prev) + i2->prev->next = i2; + if (i1->next) + i1->next->prev = i1; +} + +void +nvbb_attach_block(struct nv_basic_block *parent, + struct nv_basic_block *b, ubyte edge_kind) +{ + assert(b->num_in < 8); + + if (parent->out[0]) { + assert(!parent->out[1]); + parent->out[1] = b; + parent->out_kind[1] = edge_kind; + } else { + parent->out[0] = b; + parent->out_kind[0] = edge_kind; + } + + b->in[b->num_in] = parent; + b->in_kind[b->num_in++] = edge_kind; +} + +/* NOTE: all BRKs are treated as conditional, so there are 2 outgoing BBs */ + +boolean +nvbb_dominated_by(struct nv_basic_block *b, struct nv_basic_block *d) +{ + int j; + + if (b == d) + return TRUE; + + for (j = 0; j < b->num_in; ++j) + if ((b->in_kind[j] != CFG_EDGE_BACK) && !nvbb_dominated_by(b->in[j], d)) + return FALSE; + + return j ? TRUE : FALSE; +} + +/* check if @bf (future) can be reached from @bp (past), stop at @bt */ +boolean +nvbb_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp, + struct nv_basic_block *bt) +{ + struct nv_basic_block *q[NV_PC_MAX_BASIC_BLOCKS], *b; + int i, p, n; + + p = 0; + n = 1; + q[0] = bp; + + while (p < n) { + b = q[p++]; + + if (b == bf) + break; + if (b == bt) + continue; + assert(n <= (1024 - 2)); + + for (i = 0; i < 2; ++i) { + if (b->out[i] && !IS_WALL_EDGE(b->out_kind[i]) && !b->out[i]->priv) { + q[n] = b->out[i]; + q[n++]->priv = 1; + } + } + } + for (--n; n >= 0; --n) + q[n]->priv = 0; + + return (b == bf); +} + +static struct nv_basic_block * +nvbb_find_dom_frontier(struct nv_basic_block *b, struct nv_basic_block *df) +{ + struct nv_basic_block *out; + int i; + + if (!nvbb_dominated_by(df, b)) { + for (i = 0; i < df->num_in; ++i) { + if (df->in_kind[i] == CFG_EDGE_BACK) + continue; + if (nvbb_dominated_by(df->in[i], b)) + return df; + } + } + for (i = 0; i < 2 && df->out[i]; ++i) { + if (df->out_kind[i] == CFG_EDGE_BACK) + continue; + if ((out = nvbb_find_dom_frontier(b, df->out[i]))) + return out; + } + return NULL; +} + +struct nv_basic_block * +nvbb_dom_frontier(struct nv_basic_block *b) +{ + struct nv_basic_block *df; + int i; + + for (i = 0; i < 2 && b->out[i]; ++i) + if ((df = nvbb_find_dom_frontier(b, b->out[i]))) + return df; + return NULL; +} |