diff options
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_bc.h | 27 | ||||
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_bc_builder.cpp | 4 | ||||
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_bc_parser.cpp | 224 | ||||
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_core.cpp | 45 | ||||
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_shader.cpp | 4 | ||||
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_shader.h | 3 |
6 files changed, 163 insertions, 144 deletions
diff --git a/src/gallium/drivers/r600/sb/sb_bc.h b/src/gallium/drivers/r600/sb/sb_bc.h index 9c6ed46d051..9f65098848d 100644 --- a/src/gallium/drivers/r600/sb/sb_bc.h +++ b/src/gallium/drivers/r600/sb/sb_bc.h @@ -674,40 +674,39 @@ class bc_parser { typedef std::stack<region_node*> region_stack; region_stack loop_stack; - int enable_dump; - int optimize; - public: - bc_parser(sb_context &sctx, r600_bytecode *bc, r600_shader* pshader, - int dump_source, int optimize) : + bc_parser(sb_context &sctx, r600_bytecode *bc, r600_shader* pshader) : ctx(sctx), dec(), bc(bc), pshader(pshader), dw(), bc_ndw(), max_cf(), sh(), error(), slots(), cgroup(), - cf_map(), loop_stack(), enable_dump(dump_source), - optimize(optimize) { } + cf_map(), loop_stack() { } - int parse(); + int decode(); + int prepare(); shader* get_shader() { assert(!error); return sh; } private: - int parse_shader(); + int decode_shader(); int parse_decls(); - int parse_cf(unsigned &i, bool &eop); + int decode_cf(unsigned &i, bool &eop); - int parse_alu_clause(cf_node *cf); - int parse_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt); + int decode_alu_clause(cf_node *cf); + int decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt); - int parse_fetch_clause(cf_node *cf); + int decode_fetch_clause(cf_node *cf); int prepare_ir(); + int prepare_alu_clause(cf_node *cf); + int prepare_alu_group(cf_node* cf, alu_group_node *g); + int prepare_fetch_clause(cf_node *cf); + int prepare_loop(cf_node *c); int prepare_if(cf_node *c); - int prepare_alu_clause(cf_node *c); }; diff --git a/src/gallium/drivers/r600/sb/sb_bc_builder.cpp b/src/gallium/drivers/r600/sb/sb_bc_builder.cpp index b0c2e41c33f..f40e4694b8d 100644 --- a/src/gallium/drivers/r600/sb/sb_bc_builder.cpp +++ b/src/gallium/drivers/r600/sb/sb_bc_builder.cpp @@ -94,10 +94,6 @@ int bc_builder::build() { cf_pos = bb.get_pos(); } - if (sh.enable_dump) { - bc_dump(sh, cerr, &bb).run(); - } - return 0; } diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp index 83292874d6b..9f3ecc51cd9 100644 --- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp +++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp @@ -47,7 +47,7 @@ namespace r600_sb { using std::cerr; -int bc_parser::parse() { +int bc_parser::decode() { dw = bc->bytecode; bc_ndw = bc->ndw; @@ -71,47 +71,27 @@ int bc_parser::parse() { t = TARGET_FETCH; } - sh = new shader(ctx, t, bc->debug_id, enable_dump); - int r = parse_shader(); + sh = new shader(ctx, t, bc->debug_id); + int r = decode_shader(); delete dec; - if (r) - return r; - sh->ngpr = bc->ngpr; sh->nstack = bc->nstack; - if (sh->target != TARGET_FETCH) { - sh->src_stats.ndw = bc->ndw; - sh->collect_stats(false); - } - - if (enable_dump) { - bc_dump(*sh, cerr, bc->bytecode, bc_ndw).run(); - } - - if (!optimize) - return 0; - - prepare_ir(); - return r; } -int bc_parser::parse_shader() { +int bc_parser::decode_shader() { int r = 0; unsigned i = 0; bool eop = false; sh->init(); - if (pshader) - parse_decls(); - do { eop = false; - if ((r = parse_cf(i, eop))) + if ((r = decode_cf(i, eop))) return r; } while (!eop || (i >> 1) <= max_cf); @@ -119,34 +99,34 @@ int bc_parser::parse_shader() { return 0; } -int bc_parser::parse_decls() { - -// sh->prepare_regs(rs.bc.ngpr); - - if (pshader->indirect_files & ~(1 << TGSI_FILE_CONSTANT)) { +int bc_parser::prepare() { + int r = 0; + if ((r = parse_decls())) + return r; + if ((r = prepare_ir())) + return r; + return 0; +} -#if SB_NO_ARRAY_INFO +int bc_parser::parse_decls() { + if (!pshader) { sh->add_gpr_array(0, pshader->bc.ngpr, 0x0F); + return 0; + } -#else + if (pshader->indirect_files & ~(1 << TGSI_FILE_CONSTANT)) { assert(pshader->num_arrays); if (pshader->num_arrays) { - for (unsigned i = 0; i < pshader->num_arrays; ++i) { r600_shader_array &a = pshader->arrays[i]; sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask); } - } else { sh->add_gpr_array(0, pshader->bc.ngpr, 0x0F); } - - -#endif - } if (sh->target == TARGET_VS) @@ -183,12 +163,10 @@ int bc_parser::parse_decls() { } } - return 0; } - -int bc_parser::parse_cf(unsigned &i, bool &eop) { +int bc_parser::decode_cf(unsigned &i, bool &eop) { int r; @@ -210,18 +188,15 @@ int bc_parser::parse_cf(unsigned &i, bool &eop) { cf_op_flags flags = (cf_op_flags)cf->bc.op_ptr->flags; if (flags & CF_ALU) { - if ((r = parse_alu_clause(cf))) + if ((r = decode_alu_clause(cf))) return r; } else if (flags & CF_FETCH) { - if ((r = parse_fetch_clause(cf))) + if ((r = decode_fetch_clause(cf))) return r;; } else if (flags & CF_EXP) { assert(!cf->bc.rw_rel); } else if (flags & (CF_STRM | CF_RAT)) { assert(!cf->bc.rw_rel); - } else if (cf->bc.op == CF_OP_CALL_FS) { - sh->init_call_fs(cf); - cf->flags |= NF_SCHEDULE_EARLY | NF_DONT_MOVE; } else if (flags & CF_BRANCH) { if (cf->bc.addr > max_cf) max_cf = cf->bc.addr; @@ -232,7 +207,7 @@ int bc_parser::parse_cf(unsigned &i, bool &eop) { return 0; } -int bc_parser::parse_alu_clause(cf_node* cf) { +int bc_parser::decode_alu_clause(cf_node* cf) { unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1, gcnt; cf->subtype = NST_ALU_CLAUSE; @@ -243,7 +218,7 @@ int bc_parser::parse_alu_clause(cf_node* cf) { unsigned ng = 0; do { - parse_alu_group(cf, i, gcnt); + decode_alu_group(cf, i, gcnt); assert(gcnt <= cnt); cnt -= gcnt; ng++; @@ -252,16 +227,17 @@ int bc_parser::parse_alu_clause(cf_node* cf) { return 0; } -int bc_parser::parse_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) { +int bc_parser::decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) { int r; alu_node *n; alu_group_node *g = sh->create_alu_group(); cgroup = !cgroup; memset(slots[cgroup], 0, 5*sizeof(slots[0][0])); - gcnt = 0; + unsigned literal_mask = 0; + do { n = sh->create_alu(); g->push_back(n); @@ -280,11 +256,62 @@ int bc_parser::parse_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) { assert(n->bc.last); - unsigned literal_mask = 0; + for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) { + n = static_cast<alu_node*>(*I); + + for (int k = 0; k < n->bc.op_ptr->src_count; ++k) { + bc_alu_src &src = n->bc.src[k]; + if (src.sel == ALU_SRC_LITERAL) { + literal_mask |= (1 << src.chan); + src.value.u = dw[i + src.chan]; + } + } + } + + unsigned literal_ndw = 0; + while (literal_mask) { + g->literals.push_back(dw[i + literal_ndw]); + literal_ndw += 1; + literal_mask >>= 1; + } + + literal_ndw = (literal_ndw + 1) & ~1u; + + i += literal_ndw; + gcnt += literal_ndw >> 1; + + cf->push_back(g); + return 0; +} + +int bc_parser::prepare_alu_clause(cf_node* cf) { + + // loop over alu groups + for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) { + assert(I->subtype == NST_ALU_GROUP); + alu_group_node *g = static_cast<alu_group_node*>(*I); + prepare_alu_group(cf, g); + } + + return 0; +} + +int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) { + + alu_node *n; + + cgroup = !cgroup; + memset(slots[cgroup], 0, 5*sizeof(slots[0][0])); for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) { n = static_cast<alu_node*>(*I); + + if (!sh->assign_slot(n, slots[cgroup])) { + assert(!"alu slot assignment failed"); + return -1; + } + unsigned src_count = n->bc.op_ptr->src_count; if (ctx.alu_slots(n->bc.op) & AF_4SLOT) @@ -340,10 +367,6 @@ int bc_parser::parse_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) { bc_alu_src &src = n->bc.src[s]; if (src.sel == ALU_SRC_LITERAL) { - unsigned chan = src.chan; - - literal_mask |= (1 << chan); - src.value.u = dw[i+chan]; n->src[s] = sh->get_const_value(src.value); } else if (src.sel == ALU_SRC_PS || src.sel == ALU_SRC_PV) { unsigned pgroup = !cgroup, prev_slot = src.sel == ALU_SRC_PS ? @@ -430,38 +453,52 @@ int bc_parser::parse_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) { if (p) { g->push_front(p); - } - unsigned literal_ndw = 0; - while (literal_mask) { - g->literals.push_back(dw[i + literal_ndw]); - literal_ndw += 1; - literal_mask >>= 1; - } + if (p->count() == 3 && ctx.is_cayman()) { + // cayman's scalar instruction that can use 3 or 4 slots - literal_ndw = (literal_ndw + 1) & ~1u; + // FIXME for simplicity we'll always add 4th slot, + // but probably we might want to always remove 4th slot and make + // sure that regalloc won't choose 'w' component for dst - i += literal_ndw; - gcnt += literal_ndw >> 1; + alu_node *f = static_cast<alu_node*>(p->first); + alu_node *a = sh->create_alu(); + a->src = f->src; + a->dst.resize(f->dst.size()); + a->bc = f->bc; + a->bc.slot = SLOT_W; + p->push_back(a); + } + } - cf->push_back(g); return 0; } -int bc_parser::parse_fetch_clause(cf_node* cf) { +int bc_parser::decode_fetch_clause(cf_node* cf) { int r; unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1; cf->subtype = NST_TEX_CLAUSE; - vvec grad_v, grad_h; - while (cnt--) { fetch_node *n = sh->create_fetch(); cf->push_back(n); if ((r = dec->decode_fetch(i, n->bc))) return r; + } + return 0; +} + +int bc_parser::prepare_fetch_clause(cf_node *cf) { + + vvec grad_v, grad_h; + + for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) { + + fetch_node *n = static_cast<fetch_node*>(*I); + assert(n->is_valid()); + unsigned flags = n->bc.op_ptr->flags; unsigned vtx = flags & FF_VTX; @@ -527,6 +564,7 @@ int bc_parser::parse_fetch_clause(cf_node* cf) { } } + return 0; } @@ -540,7 +578,14 @@ int bc_parser::prepare_ir() { unsigned flags = c->bc.op_ptr->flags; - if (flags & CF_LOOP_START) { + if (flags & CF_ALU) { + prepare_alu_clause(c); + } else if (flags & CF_FETCH) { + prepare_fetch_clause(c); + } else if (c->bc.op == CF_OP_CALL_FS) { + sh->init_call_fs(c); + c->flags |= NF_SCHEDULE_EARLY | NF_DONT_MOVE; + } else if (flags & CF_LOOP_START) { prepare_loop(c); } else if (c->bc.op == CF_OP_JUMP) { prepare_if(c); @@ -560,10 +605,6 @@ int bc_parser::prepare_ir() { dep->move(c->parent->first, c); c->replace_with(dep); sh->simplify_dep_rep(dep); - } else if (flags & CF_ALU && ctx.is_cayman()) { - // postprocess cayman's 3-slot instructions (ex-trans-only) - // FIXME it shouldn't be required with proper handling - prepare_alu_clause(c); } else if (flags & CF_EXP) { // unroll burst exports @@ -735,40 +776,5 @@ int bc_parser::prepare_if(cf_node* c) { return 0; } -int bc_parser::prepare_alu_clause(cf_node* c) { - - // loop over alu groups - for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { - assert(I->subtype == NST_ALU_GROUP); - - alu_group_node *g = static_cast<alu_group_node*>(*I); - - // loop over alu_group items - for (node_iterator I2 = g->begin(), E2 = g->end(); I2 != E2; ++I2) { - if (I2->subtype != NST_ALU_PACKED_INST) - continue; - - alu_packed_node *p = static_cast<alu_packed_node*>(*I2); - - if (p->count() == 3) { - // cayman's scalar instruction that takes 3 or 4 slots - - // FIXME for simplicity we'll always add 4th slot, - // but probably we might want to always remove 4th slot and make - // sure that regalloc won't choose w component for dst - - alu_node *f = static_cast<alu_node*>(p->first); - alu_node *a = sh->create_alu(); - a->src = f->src; - a->dst.resize(f->dst.size()); - a->bc = f->bc; - a->bc.slot = SLOT_W; - p->push_back(a); - } - } - } - - return 0; -} } // namespace r600_sb diff --git a/src/gallium/drivers/r600/sb/sb_core.cpp b/src/gallium/drivers/r600/sb/sb_core.cpp index b919fa419d4..17a8b878e0f 100644 --- a/src/gallium/drivers/r600/sb/sb_core.cpp +++ b/src/gallium/drivers/r600/sb/sb_core.cpp @@ -94,7 +94,7 @@ void r600_sb_context_destroy(void * sctx) { int r600_sb_bytecode_process(struct r600_context *rctx, struct r600_bytecode *bc, struct r600_shader *pshader, - int dump_source_bytecode, + int dump_bytecode, int optimize) { int r = 0; unsigned shader_id = bc->debug_id; @@ -111,13 +111,29 @@ int r600_sb_bytecode_process(struct r600_context *rctx, SB_DUMP_STAT( cerr << "\nsb: shader " << shader_id << "\n"; ); - bc_parser parser(*ctx, bc, pshader, dump_source_bytecode, optimize); + bc_parser parser(*ctx, bc, pshader); - if ((r = parser.parse())) { - assert(0); + if ((r = parser.decode())) { + assert(!"sb: bytecode decoding error"); return r; } + shader *sh = parser.get_shader(); + + if (dump_bytecode) { + bc_dump(*sh, cerr, bc->bytecode, bc->ndw).run(); + } + + if (!optimize) { + delete sh; + return 0; + } + + if (sh->target != TARGET_FETCH) { + sh->src_stats.ndw = bc->ndw; + sh->collect_stats(false); + } + /* skip some shaders (use shaders from default backend) * dskip_start - range start, dskip_end - range_end, * e.g. start = 5, end = 6 means shaders 5 & 6 @@ -138,14 +154,13 @@ int r600_sb_bytecode_process(struct r600_context *rctx, } } - shader *sh = parser.get_shader(); - SB_DUMP_PASS( cerr << "\n\n###### after parse\n"; sh->dump_ir(); ); - - if (!optimize) { - delete sh; - return 0; + if ((r = parser.prepare())) { + assert(!"sb: bytecode parsing error"); + return r; } + SB_DUMP_PASS( cerr << "\n\n###### after parse\n"; sh->dump_ir(); ); + #define SB_RUN_PASS(n, dump) \ do { \ r = n(*sh).run(); \ @@ -222,8 +237,13 @@ int r600_sb_bytecode_process(struct r600_context *rctx, return r; } + bytecode &nbc = builder.get_bytecode(); + + if (dump_bytecode) { + bc_dump(*sh, cerr, &nbc).run(); + } + if (!sb_context::dry_run) { - bytecode &nbc = builder.get_bytecode(); free(bc->bytecode); bc->ndw = nbc.ndw(); @@ -233,10 +253,9 @@ int r600_sb_bytecode_process(struct r600_context *rctx, bc->ngpr = sh->ngpr; bc->nstack = sh->nstack; } else { - SB_DUMP_STAT( cerr << "SB_USE_NEW_BYTECODE is not enabled\n"; ); + SB_DUMP_STAT( cerr << "sb: dry run: optimized bytecode is not used\n"; ); } - if (sb_context::dump_stat) { int64_t t = os_time_get_nano() - time_start; diff --git a/src/gallium/drivers/r600/sb/sb_shader.cpp b/src/gallium/drivers/r600/sb/sb_shader.cpp index 5944ba66f48..f0665efb2ca 100644 --- a/src/gallium/drivers/r600/sb/sb_shader.cpp +++ b/src/gallium/drivers/r600/sb/sb_shader.cpp @@ -33,11 +33,11 @@ namespace r600_sb { using std::cerr; -shader::shader(sb_context &sctx, shader_target t, unsigned id, bool dump) +shader::shader(sb_context &sctx, shader_target t, unsigned id) : ctx(sctx), next_temp_value_index(temp_regid_offset), prep_regs_count(), pred_sels(), regions(), inputs(), undef(), val_pool(sizeof(value)), - pool(), all_nodes(), src_stats(), opt_stats(), errors(), enable_dump(dump), + pool(), all_nodes(), src_stats(), opt_stats(), errors(), optimized(), id(id), coal(*this), bbs(), target(t), vt(ex), ex(*this), root(), diff --git a/src/gallium/drivers/r600/sb/sb_shader.h b/src/gallium/drivers/r600/sb/sb_shader.h index b2e3837c4c0..5362e395e97 100644 --- a/src/gallium/drivers/r600/sb/sb_shader.h +++ b/src/gallium/drivers/r600/sb/sb_shader.h @@ -271,7 +271,6 @@ public: error_map errors; - bool enable_dump; bool optimized; unsigned id; @@ -296,7 +295,7 @@ public: unsigned ngpr, nstack; - shader(sb_context &sctx, shader_target t, unsigned id, bool dump); + shader(sb_context &sctx, shader_target t, unsigned id); ~shader(); |