/* * Copyright 2013 Vadim Girlin * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * on the rights to use, copy, modify, merge, publish, distribute, sub * license, and/or sell copies of the Software, and to permit persons to whom * the Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. * * Authors: * Vadim Girlin */ #define FBC_DEBUG 0 #if FBC_DEBUG #define FBC_DUMP(q) do { q } while (0) #else #define FBC_DUMP(q) #endif #include #include "sb_bc.h" #include "sb_shader.h" #include "sb_pass.h" namespace r600_sb { using std::cerr; int bc_finalizer::run() { regions_vec &rv = sh.get_regions(); for (regions_vec::reverse_iterator I = rv.rbegin(), E = rv.rend(); I != E; ++I) { region_node *r = *I; assert(r); bool loop = r->is_loop(); if (loop) finalize_loop(r); else finalize_if(r); r->expand(); } run_on(sh.root); cf_peephole(); // workaround for some problems on r6xx/7xx // add ALU NOP to each vertex shader if (!ctx.is_egcm() && sh.target == TARGET_VS) { cf_node *c = sh.create_clause(NST_ALU_CLAUSE); alu_group_node *g = sh.create_alu_group(); alu_node *a = sh.create_alu(); a->bc.set_op(ALU_OP0_NOP); a->bc.last = 1; g->push_back(a); c->push_back(g); sh.root->push_back(c); c = sh.create_cf(CF_OP_NOP); sh.root->push_back(c); last_cf = c; } if (last_cf->bc.op_ptr->flags & CF_ALU) { last_cf = sh.create_cf(CF_OP_NOP); sh.root->push_back(last_cf); } if (ctx.is_cayman()) last_cf->insert_after(sh.create_cf(CF_OP_CF_END)); else last_cf->bc.end_of_program = 1; for (unsigned t = EXP_PIXEL; t < EXP_TYPE_COUNT; ++t) { cf_node *le = last_export[t]; if (le) le->bc.set_op(CF_OP_EXPORT_DONE); } sh.ngpr = ngpr; sh.nstack = nstack; return 0; } void bc_finalizer::finalize_loop(region_node* r) { cf_node *loop_start = sh.create_cf(CF_OP_LOOP_START_DX10); cf_node *loop_end = sh.create_cf(CF_OP_LOOP_END); loop_start->jump_after(loop_end); loop_end->jump_after(loop_start); for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end(); I != E; ++I) { depart_node *dep = *I; cf_node *loop_break = sh.create_cf(CF_OP_LOOP_BREAK); loop_break->jump(loop_end); dep->push_back(loop_break); dep->expand(); } // FIXME produces unnecessary LOOP_CONTINUE for (repeat_vec::iterator I = r->repeats.begin(), E = r->repeats.end(); I != E; ++I) { repeat_node *rep = *I; if (!(rep->parent == r && rep->prev == NULL)) { cf_node *loop_cont = sh.create_cf(CF_OP_LOOP_CONTINUE); loop_cont->jump(loop_end); rep->push_back(loop_cont); } rep->expand(); } r->push_front(loop_start); r->push_back(loop_end); } void bc_finalizer::finalize_if(region_node* r) { update_nstack(r); // expecting the following control flow structure here: // - region // { // - depart/repeat 1 (it may be depart/repeat for some outer region) // { // - if // { // - depart/repeat 2 (possibly for outer region) // { // - some optional code // } // } // - optional code> ... // } // } container_node *repdep1 = static_cast(r->first); assert(repdep1->is_depart() || repdep1->is_repeat()); if_node *n_if = static_cast(repdep1->first); if (n_if) { assert(n_if->is_if()); container_node *repdep2 = static_cast(n_if->first); assert(repdep2->is_depart() || repdep2->is_repeat()); cf_node *if_jump = sh.create_cf(CF_OP_JUMP); cf_node *if_pop = sh.create_cf(CF_OP_POP); if_pop->bc.pop_count = 1; if_pop->jump_after(if_pop); r->push_front(if_jump); r->push_back(if_pop); bool has_else = n_if->next; if (has_else) { cf_node *nelse = sh.create_cf(CF_OP_ELSE); n_if->insert_after(nelse); if_jump->jump(nelse); nelse->jump_after(if_pop); nelse->bc.pop_count = 1; } else { if_jump->jump_after(if_pop); if_jump->bc.pop_count = 1; } n_if->expand(); } for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end(); I != E; ++I) { (*I)->expand(); } r->departs.clear(); assert(r->repeats.empty()); } void bc_finalizer::run_on(container_node* c) { for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { node *n = *I; if (n->is_alu_group()) { finalize_alu_group(static_cast(n)); } else { if (n->is_fetch_inst()) { finalize_fetch(static_cast(n)); } else if (n->is_cf_inst()) { finalize_cf(static_cast(n)); } else if (n->is_alu_clause()) { } else if (n->is_fetch_clause()) { } else { assert(!"unexpected node"); } if (n->is_container()) run_on(static_cast(n)); } } } void bc_finalizer::finalize_alu_group(alu_group_node* g) { alu_node *last = NULL; for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) { alu_node *n = static_cast(*I); unsigned slot = n->bc.slot; value *d = n->dst.empty() ? NULL : n->dst[0]; if (d && d->is_special_reg()) { assert(n->bc.op_ptr->flags & AF_MOVA); d = NULL; } sel_chan fdst = d ? d->get_final_gpr() : sel_chan(0, 0); if (d) { assert(fdst.chan() == slot || slot == SLOT_TRANS); } n->bc.dst_gpr = fdst.sel(); n->bc.dst_chan = d ? fdst.chan() : slot < SLOT_TRANS ? slot : 0; if (d && d->is_rel() && d->rel && !d->rel->is_const()) { n->bc.dst_rel = 1; update_ngpr(d->array->gpr.sel() + d->array->array_size -1); } else { n->bc.dst_rel = 0; } n->bc.write_mask = d != NULL; n->bc.last = 0; if (n->bc.op_ptr->flags & AF_PRED) { n->bc.update_pred = (n->dst[1] != NULL); n->bc.update_exec_mask = (n->dst[2] != NULL); } // FIXME handle predication here n->bc.pred_sel = PRED_SEL_OFF; update_ngpr(n->bc.dst_gpr); finalize_alu_src(g, n); last = n; } last->bc.last = 1; } void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) { vvec &sv = a->src; FBC_DUMP( cerr << "finalize_alu_src: "; dump::dump_op(a); cerr << "\n"; ); unsigned si = 0; for (vvec::iterator I = sv.begin(), E = sv.end(); I != E; ++I, ++si) { value *v = *I; assert(v); bc_alu_src &src = a->bc.src[si]; sel_chan sc; src.rel = 0; sel_chan gpr; switch (v->kind) { case VLK_REL_REG: sc = v->get_final_gpr(); src.sel = sc.sel(); src.chan = sc.chan(); if (!v->rel->is_const()) { src.rel = 1; update_ngpr(v->array->gpr.sel() + v->array->array_size -1); } else src.rel = 0; break; case VLK_REG: gpr = v->get_final_gpr(); src.sel = gpr.sel(); src.chan = gpr.chan(); update_ngpr(src.sel); break; case VLK_TEMP: src.sel = v->gpr.sel(); src.chan = v->gpr.chan(); update_ngpr(src.sel); break; case VLK_UNDEF: case VLK_CONST: { literal lv = v->literal_value; src.chan = 0; if (lv == literal(0)) src.sel = ALU_SRC_0; else if (lv == literal(0.5f)) src.sel = ALU_SRC_0_5; else if (lv == literal(1.0f)) src.sel = ALU_SRC_1; else if (lv == literal(1)) src.sel = ALU_SRC_1_INT; else if (lv == literal(-1)) src.sel = ALU_SRC_M_1_INT; else { src.sel = ALU_SRC_LITERAL; src.chan = g->literal_chan(lv); src.value = lv; } break; } case VLK_KCACHE: { cf_node *clause = static_cast(g->parent); assert(clause->is_alu_clause()); sel_chan k = translate_kcache(clause, v); assert(k && "kcache translation failed"); src.sel = k.sel(); src.chan = k.chan(); break; } case VLK_PARAM: case VLK_SPECIAL_CONST: src.sel = v->select.sel(); src.chan = v->select.chan(); break; default: assert(!"unknown value kind"); break; } } while (si < 3) { a->bc.src[si++].sel = 0; } } void bc_finalizer::emit_set_grad(fetch_node* f) { assert(f->src.size() == 12); unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H }; unsigned arg_start = 0; for (unsigned op = 0; op < 2; ++op) { fetch_node *n = sh.create_fetch(); n->bc.set_op(ops[op]); // FIXME extract this loop into a separate method and reuse it int reg = -1; arg_start += 4; for (unsigned chan = 0; chan < 4; ++chan) { n->bc.dst_sel[chan] = SEL_MASK; unsigned sel = SEL_MASK; value *v = f->src[arg_start + chan]; if (!v || v->is_undef()) { sel = SEL_MASK; } else if (v->is_const()) { literal l = v->literal_value; if (l == literal(0)) sel = SEL_0; else if (l == literal(1.0f)) sel = SEL_1; else { cerr << "invalid fetch constant operand " << chan << " "; dump::dump_op(f); cerr << "\n"; abort(); } } else if (v->is_any_gpr()) { unsigned vreg = v->gpr.sel(); unsigned vchan = v->gpr.chan(); if (reg == -1) reg = vreg; else if ((unsigned)reg != vreg) { cerr << "invalid fetch source operand " << chan << " "; dump::dump_op(f); cerr << "\n"; abort(); } sel = vchan; } else { cerr << "invalid fetch source operand " << chan << " "; dump::dump_op(f); cerr << "\n"; abort(); } n->bc.src_sel[chan] = sel; } if (reg >= 0) update_ngpr(reg); n->bc.src_gpr = reg >= 0 ? reg : 0; f->insert_before(n); } } void bc_finalizer::finalize_fetch(fetch_node* f) { int reg = -1; // src unsigned src_count = 4; unsigned flags = f->bc.op_ptr->flags; if (flags & FF_VTX) { src_count = 1; } else if (flags & FF_USEGRAD) { emit_set_grad(f); } for (unsigned chan = 0; chan < src_count; ++chan) { unsigned sel = f->bc.src_sel[chan]; if (sel > SEL_W) continue; value *v = f->src[chan]; if (v->is_undef()) { sel = SEL_MASK; } else if (v->is_const()) { literal l = v->literal_value; if (l == literal(0)) sel = SEL_0; else if (l == literal(1.0f)) sel = SEL_1; else { cerr << "invalid fetch constant operand " << chan << " "; dump::dump_op(f); cerr << "\n"; abort(); } } else if (v->is_any_gpr()) { unsigned vreg = v->gpr.sel(); unsigned vchan = v->gpr.chan(); if (reg == -1) reg = vreg; else if ((unsigned)reg != vreg) { cerr << "invalid fetch source operand " << chan << " "; dump::dump_op(f); cerr << "\n"; abort(); } sel = vchan; } else { cerr << "invalid fetch source operand " << chan << " "; dump::dump_op(f); cerr << "\n"; abort(); } f->bc.src_sel[chan] = sel; } if (reg >= 0) update_ngpr(reg); f->bc.src_gpr = reg >= 0 ? reg : 0; // dst reg = -1; unsigned dst_swz[4] = {SEL_MASK, SEL_MASK, SEL_MASK, SEL_MASK}; for (unsigned chan = 0; chan < 4; ++chan) { unsigned sel = f->bc.dst_sel[chan]; if (sel == SEL_MASK) continue; value *v = f->dst[chan]; if (!v) continue; if (v->is_any_gpr()) { unsigned vreg = v->gpr.sel(); unsigned vchan = v->gpr.chan(); if (reg == -1) reg = vreg; else if ((unsigned)reg != vreg) { cerr << "invalid fetch dst operand " << chan << " "; dump::dump_op(f); cerr << "\n"; abort(); } dst_swz[vchan] = sel; } else { cerr << "invalid fetch dst operand " << chan << " "; dump::dump_op(f); cerr << "\n"; abort(); } } for (unsigned i = 0; i < 4; ++i) f->bc.dst_sel[i] = dst_swz[i]; assert(reg >= 0); if (reg >= 0) update_ngpr(reg); f->bc.dst_gpr = reg >= 0 ? reg : 0; } void bc_finalizer::finalize_cf(cf_node* c) { unsigned flags = c->bc.op_ptr->flags; if (flags & CF_CALL) { update_nstack(c->get_parent_region(), ctx.is_cayman() ? 1 : 2); } c->bc.end_of_program = 0; last_cf = c; if (flags & CF_EXP) { c->bc.set_op(CF_OP_EXPORT); last_export[c->bc.type] = c; int reg = -1; for (unsigned chan = 0; chan < 4; ++chan) { unsigned sel = c->bc.sel[chan]; if (sel > SEL_W) continue; value *v = c->src[chan]; if (v->is_undef()) { sel = SEL_MASK; } else if (v->is_const()) { literal l = v->literal_value; if (l == literal(0)) sel = SEL_0; else if (l == literal(1.0f)) sel = SEL_1; else { cerr << "invalid export constant operand " << chan << " "; dump::dump_op(c); cerr << "\n"; abort(); } } else if (v->is_any_gpr()) { unsigned vreg = v->gpr.sel(); unsigned vchan = v->gpr.chan(); if (reg == -1) reg = vreg; else if ((unsigned)reg != vreg) { cerr << "invalid export source operand " << chan << " "; dump::dump_op(c); cerr << "\n"; abort(); } sel = vchan; } else { cerr << "invalid export source operand " << chan << " "; dump::dump_op(c); cerr << "\n"; abort(); } c->bc.sel[chan] = sel; } if (reg >= 0) update_ngpr(reg); c->bc.rw_gpr = reg >= 0 ? reg : 0; } else if (flags & CF_MEM) { int reg = -1; unsigned mask = 0; for (unsigned chan = 0; chan < 4; ++chan) { value *v = c->src[chan]; if (!v || v->is_undef()) continue; if (!v->is_any_gpr() || v->gpr.chan() != chan) { cerr << "invalid source operand " << chan << " "; dump::dump_op(c); cerr << "\n"; abort(); } unsigned vreg = v->gpr.sel(); if (reg == -1) reg = vreg; else if ((unsigned)reg != vreg) { cerr << "invalid source operand " << chan << " "; dump::dump_op(c); cerr << "\n"; abort(); } mask |= (1 << chan); } assert(reg >= 0 && mask); if (reg >= 0) update_ngpr(reg); c->bc.rw_gpr = reg >= 0 ? reg : 0; c->bc.comp_mask = mask; if ((flags & CF_RAT) && (c->bc.type & 1)) { reg = -1; for (unsigned chan = 0; chan < 4; ++chan) { value *v = c->src[4 + chan]; if (!v || v->is_undef()) continue; if (!v->is_any_gpr() || v->gpr.chan() != chan) { cerr << "invalid source operand " << chan << " "; dump::dump_op(c); cerr << "\n"; abort(); } unsigned vreg = v->gpr.sel(); if (reg == -1) reg = vreg; else if ((unsigned)reg != vreg) { cerr << "invalid source operand " << chan << " "; dump::dump_op(c); cerr << "\n"; abort(); } } assert(reg >= 0); if (reg >= 0) update_ngpr(reg); c->bc.index_gpr = reg >= 0 ? reg : 0; } } else { #if 0 if ((flags & (CF_BRANCH | CF_LOOP)) && !sh.uses_gradients) { c->bc.valid_pixel_mode = 1; } #endif } } sel_chan bc_finalizer::translate_kcache(cf_node* alu, value* v) { unsigned sel = v->select.sel(); unsigned bank = sel >> 12; unsigned chan = v->select.chan(); static const unsigned kc_base[] = {128, 160, 256, 288}; sel &= 4095; unsigned line = sel >> 4; for (unsigned k = 0; k < 4; ++k) { bc_kcache &kc = alu->bc.kc[k]; if (kc.mode == KC_LOCK_NONE) break; if (kc.bank == bank && (kc.addr == line || (kc.mode == KC_LOCK_2 && kc.addr + 1 == line))) { sel = kc_base[k] + (sel - (kc.addr << 4)); return sel_chan(sel, chan); } } assert(!"kcache translation error"); return 0; } void bc_finalizer::update_ngpr(unsigned gpr) { if (gpr < MAX_GPR - ctx.alu_temp_gprs && gpr >= ngpr) ngpr = gpr + 1; } void bc_finalizer::update_nstack(region_node* r, unsigned add) { unsigned loops = 0; unsigned ifs = 0; while (r) { if (r->is_loop()) ++loops; else ++ifs; r = r->get_parent_region(); } unsigned stack_elements = (loops * ctx.stack_entry_size) + ifs + add; // FIXME calculate more precisely if (ctx.is_evergreen()) { ++stack_elements; } else { stack_elements += 2; if (ctx.is_cayman()) ++stack_elements; } unsigned stack_entries = (stack_elements + 3) >> 2; if (nstack < stack_entries) nstack = stack_entries; } void bc_finalizer::cf_peephole() { for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E; I = N) { N = I; ++N; cf_node *c = static_cast(*I); if (c->jump_after_target) { c->jump_target = static_cast(c->jump_target->next); c->jump_after_target = false; } if (c->is_cf_op(CF_OP_POP)) { node *p = c->prev; if (p->is_alu_clause()) { cf_node *a = static_cast(p); if (a->bc.op == CF_OP_ALU) { a->bc.set_op(CF_OP_ALU_POP_AFTER); c->remove(); } } } else if (c->is_cf_op(CF_OP_JUMP) && c->jump_target == c->next) { // if JUMP is immediately followed by its jump target, // then JUMP is useless and we can eliminate it c->remove(); } } } } // namespace r600_sb