summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/r600/sb/sb_bc.h27
-rw-r--r--src/gallium/drivers/r600/sb/sb_bc_builder.cpp4
-rw-r--r--src/gallium/drivers/r600/sb/sb_bc_parser.cpp224
-rw-r--r--src/gallium/drivers/r600/sb/sb_core.cpp45
-rw-r--r--src/gallium/drivers/r600/sb/sb_shader.cpp4
-rw-r--r--src/gallium/drivers/r600/sb/sb_shader.h3
6 files changed, 163 insertions, 144 deletions
diff --git a/src/gallium/drivers/r600/sb/sb_bc.h b/src/gallium/drivers/r600/sb/sb_bc.h
index 9c6ed46d051..9f65098848d 100644
--- a/src/gallium/drivers/r600/sb/sb_bc.h
+++ b/src/gallium/drivers/r600/sb/sb_bc.h
@@ -674,40 +674,39 @@ class bc_parser {
typedef std::stack<region_node*> region_stack;
region_stack loop_stack;
- int enable_dump;
- int optimize;
-
public:
- bc_parser(sb_context &sctx, r600_bytecode *bc, r600_shader* pshader,
- int dump_source, int optimize) :
+ bc_parser(sb_context &sctx, r600_bytecode *bc, r600_shader* pshader) :
ctx(sctx), dec(), bc(bc), pshader(pshader),
dw(), bc_ndw(), max_cf(),
sh(), error(), slots(), cgroup(),
- cf_map(), loop_stack(), enable_dump(dump_source),
- optimize(optimize) { }
+ cf_map(), loop_stack() { }
- int parse();
+ int decode();
+ int prepare();
shader* get_shader() { assert(!error); return sh; }
private:
- int parse_shader();
+ int decode_shader();
int parse_decls();
- int parse_cf(unsigned &i, bool &eop);
+ int decode_cf(unsigned &i, bool &eop);
- int parse_alu_clause(cf_node *cf);
- int parse_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt);
+ int decode_alu_clause(cf_node *cf);
+ int decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt);
- int parse_fetch_clause(cf_node *cf);
+ int decode_fetch_clause(cf_node *cf);
int prepare_ir();
+ int prepare_alu_clause(cf_node *cf);
+ int prepare_alu_group(cf_node* cf, alu_group_node *g);
+ int prepare_fetch_clause(cf_node *cf);
+
int prepare_loop(cf_node *c);
int prepare_if(cf_node *c);
- int prepare_alu_clause(cf_node *c);
};
diff --git a/src/gallium/drivers/r600/sb/sb_bc_builder.cpp b/src/gallium/drivers/r600/sb/sb_bc_builder.cpp
index b0c2e41c33f..f40e4694b8d 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_builder.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_builder.cpp
@@ -94,10 +94,6 @@ int bc_builder::build() {
cf_pos = bb.get_pos();
}
- if (sh.enable_dump) {
- bc_dump(sh, cerr, &bb).run();
- }
-
return 0;
}
diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
index 83292874d6b..9f3ecc51cd9 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
@@ -47,7 +47,7 @@ namespace r600_sb {
using std::cerr;
-int bc_parser::parse() {
+int bc_parser::decode() {
dw = bc->bytecode;
bc_ndw = bc->ndw;
@@ -71,47 +71,27 @@ int bc_parser::parse() {
t = TARGET_FETCH;
}
- sh = new shader(ctx, t, bc->debug_id, enable_dump);
- int r = parse_shader();
+ sh = new shader(ctx, t, bc->debug_id);
+ int r = decode_shader();
delete dec;
- if (r)
- return r;
-
sh->ngpr = bc->ngpr;
sh->nstack = bc->nstack;
- if (sh->target != TARGET_FETCH) {
- sh->src_stats.ndw = bc->ndw;
- sh->collect_stats(false);
- }
-
- if (enable_dump) {
- bc_dump(*sh, cerr, bc->bytecode, bc_ndw).run();
- }
-
- if (!optimize)
- return 0;
-
- prepare_ir();
-
return r;
}
-int bc_parser::parse_shader() {
+int bc_parser::decode_shader() {
int r = 0;
unsigned i = 0;
bool eop = false;
sh->init();
- if (pshader)
- parse_decls();
-
do {
eop = false;
- if ((r = parse_cf(i, eop)))
+ if ((r = decode_cf(i, eop)))
return r;
} while (!eop || (i >> 1) <= max_cf);
@@ -119,34 +99,34 @@ int bc_parser::parse_shader() {
return 0;
}
-int bc_parser::parse_decls() {
-
-// sh->prepare_regs(rs.bc.ngpr);
-
- if (pshader->indirect_files & ~(1 << TGSI_FILE_CONSTANT)) {
+int bc_parser::prepare() {
+ int r = 0;
+ if ((r = parse_decls()))
+ return r;
+ if ((r = prepare_ir()))
+ return r;
+ return 0;
+}
-#if SB_NO_ARRAY_INFO
+int bc_parser::parse_decls() {
+ if (!pshader) {
sh->add_gpr_array(0, pshader->bc.ngpr, 0x0F);
+ return 0;
+ }
-#else
+ if (pshader->indirect_files & ~(1 << TGSI_FILE_CONSTANT)) {
assert(pshader->num_arrays);
if (pshader->num_arrays) {
-
for (unsigned i = 0; i < pshader->num_arrays; ++i) {
r600_shader_array &a = pshader->arrays[i];
sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask);
}
-
} else {
sh->add_gpr_array(0, pshader->bc.ngpr, 0x0F);
}
-
-
-#endif
-
}
if (sh->target == TARGET_VS)
@@ -183,12 +163,10 @@ int bc_parser::parse_decls() {
}
}
-
return 0;
}
-
-int bc_parser::parse_cf(unsigned &i, bool &eop) {
+int bc_parser::decode_cf(unsigned &i, bool &eop) {
int r;
@@ -210,18 +188,15 @@ int bc_parser::parse_cf(unsigned &i, bool &eop) {
cf_op_flags flags = (cf_op_flags)cf->bc.op_ptr->flags;
if (flags & CF_ALU) {
- if ((r = parse_alu_clause(cf)))
+ if ((r = decode_alu_clause(cf)))
return r;
} else if (flags & CF_FETCH) {
- if ((r = parse_fetch_clause(cf)))
+ if ((r = decode_fetch_clause(cf)))
return r;;
} else if (flags & CF_EXP) {
assert(!cf->bc.rw_rel);
} else if (flags & (CF_STRM | CF_RAT)) {
assert(!cf->bc.rw_rel);
- } else if (cf->bc.op == CF_OP_CALL_FS) {
- sh->init_call_fs(cf);
- cf->flags |= NF_SCHEDULE_EARLY | NF_DONT_MOVE;
} else if (flags & CF_BRANCH) {
if (cf->bc.addr > max_cf)
max_cf = cf->bc.addr;
@@ -232,7 +207,7 @@ int bc_parser::parse_cf(unsigned &i, bool &eop) {
return 0;
}
-int bc_parser::parse_alu_clause(cf_node* cf) {
+int bc_parser::decode_alu_clause(cf_node* cf) {
unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1, gcnt;
cf->subtype = NST_ALU_CLAUSE;
@@ -243,7 +218,7 @@ int bc_parser::parse_alu_clause(cf_node* cf) {
unsigned ng = 0;
do {
- parse_alu_group(cf, i, gcnt);
+ decode_alu_group(cf, i, gcnt);
assert(gcnt <= cnt);
cnt -= gcnt;
ng++;
@@ -252,16 +227,17 @@ int bc_parser::parse_alu_clause(cf_node* cf) {
return 0;
}
-int bc_parser::parse_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) {
+int bc_parser::decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) {
int r;
alu_node *n;
alu_group_node *g = sh->create_alu_group();
cgroup = !cgroup;
memset(slots[cgroup], 0, 5*sizeof(slots[0][0]));
-
gcnt = 0;
+ unsigned literal_mask = 0;
+
do {
n = sh->create_alu();
g->push_back(n);
@@ -280,11 +256,62 @@ int bc_parser::parse_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) {
assert(n->bc.last);
- unsigned literal_mask = 0;
+ for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
+ n = static_cast<alu_node*>(*I);
+
+ for (int k = 0; k < n->bc.op_ptr->src_count; ++k) {
+ bc_alu_src &src = n->bc.src[k];
+ if (src.sel == ALU_SRC_LITERAL) {
+ literal_mask |= (1 << src.chan);
+ src.value.u = dw[i + src.chan];
+ }
+ }
+ }
+
+ unsigned literal_ndw = 0;
+ while (literal_mask) {
+ g->literals.push_back(dw[i + literal_ndw]);
+ literal_ndw += 1;
+ literal_mask >>= 1;
+ }
+
+ literal_ndw = (literal_ndw + 1) & ~1u;
+
+ i += literal_ndw;
+ gcnt += literal_ndw >> 1;
+
+ cf->push_back(g);
+ return 0;
+}
+
+int bc_parser::prepare_alu_clause(cf_node* cf) {
+
+ // loop over alu groups
+ for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) {
+ assert(I->subtype == NST_ALU_GROUP);
+ alu_group_node *g = static_cast<alu_group_node*>(*I);
+ prepare_alu_group(cf, g);
+ }
+
+ return 0;
+}
+
+int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) {
+
+ alu_node *n;
+
+ cgroup = !cgroup;
+ memset(slots[cgroup], 0, 5*sizeof(slots[0][0]));
for (node_iterator I = g->begin(), E = g->end();
I != E; ++I) {
n = static_cast<alu_node*>(*I);
+
+ if (!sh->assign_slot(n, slots[cgroup])) {
+ assert(!"alu slot assignment failed");
+ return -1;
+ }
+
unsigned src_count = n->bc.op_ptr->src_count;
if (ctx.alu_slots(n->bc.op) & AF_4SLOT)
@@ -340,10 +367,6 @@ int bc_parser::parse_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) {
bc_alu_src &src = n->bc.src[s];
if (src.sel == ALU_SRC_LITERAL) {
- unsigned chan = src.chan;
-
- literal_mask |= (1 << chan);
- src.value.u = dw[i+chan];
n->src[s] = sh->get_const_value(src.value);
} else if (src.sel == ALU_SRC_PS || src.sel == ALU_SRC_PV) {
unsigned pgroup = !cgroup, prev_slot = src.sel == ALU_SRC_PS ?
@@ -430,38 +453,52 @@ int bc_parser::parse_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) {
if (p) {
g->push_front(p);
- }
- unsigned literal_ndw = 0;
- while (literal_mask) {
- g->literals.push_back(dw[i + literal_ndw]);
- literal_ndw += 1;
- literal_mask >>= 1;
- }
+ if (p->count() == 3 && ctx.is_cayman()) {
+ // cayman's scalar instruction that can use 3 or 4 slots
- literal_ndw = (literal_ndw + 1) & ~1u;
+ // FIXME for simplicity we'll always add 4th slot,
+ // but probably we might want to always remove 4th slot and make
+ // sure that regalloc won't choose 'w' component for dst
- i += literal_ndw;
- gcnt += literal_ndw >> 1;
+ alu_node *f = static_cast<alu_node*>(p->first);
+ alu_node *a = sh->create_alu();
+ a->src = f->src;
+ a->dst.resize(f->dst.size());
+ a->bc = f->bc;
+ a->bc.slot = SLOT_W;
+ p->push_back(a);
+ }
+ }
- cf->push_back(g);
return 0;
}
-int bc_parser::parse_fetch_clause(cf_node* cf) {
+int bc_parser::decode_fetch_clause(cf_node* cf) {
int r;
unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1;
cf->subtype = NST_TEX_CLAUSE;
- vvec grad_v, grad_h;
-
while (cnt--) {
fetch_node *n = sh->create_fetch();
cf->push_back(n);
if ((r = dec->decode_fetch(i, n->bc)))
return r;
+ }
+ return 0;
+}
+
+int bc_parser::prepare_fetch_clause(cf_node *cf) {
+
+ vvec grad_v, grad_h;
+
+ for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) {
+
+ fetch_node *n = static_cast<fetch_node*>(*I);
+ assert(n->is_valid());
+
unsigned flags = n->bc.op_ptr->flags;
unsigned vtx = flags & FF_VTX;
@@ -527,6 +564,7 @@ int bc_parser::parse_fetch_clause(cf_node* cf) {
}
}
+
return 0;
}
@@ -540,7 +578,14 @@ int bc_parser::prepare_ir() {
unsigned flags = c->bc.op_ptr->flags;
- if (flags & CF_LOOP_START) {
+ if (flags & CF_ALU) {
+ prepare_alu_clause(c);
+ } else if (flags & CF_FETCH) {
+ prepare_fetch_clause(c);
+ } else if (c->bc.op == CF_OP_CALL_FS) {
+ sh->init_call_fs(c);
+ c->flags |= NF_SCHEDULE_EARLY | NF_DONT_MOVE;
+ } else if (flags & CF_LOOP_START) {
prepare_loop(c);
} else if (c->bc.op == CF_OP_JUMP) {
prepare_if(c);
@@ -560,10 +605,6 @@ int bc_parser::prepare_ir() {
dep->move(c->parent->first, c);
c->replace_with(dep);
sh->simplify_dep_rep(dep);
- } else if (flags & CF_ALU && ctx.is_cayman()) {
- // postprocess cayman's 3-slot instructions (ex-trans-only)
- // FIXME it shouldn't be required with proper handling
- prepare_alu_clause(c);
} else if (flags & CF_EXP) {
// unroll burst exports
@@ -735,40 +776,5 @@ int bc_parser::prepare_if(cf_node* c) {
return 0;
}
-int bc_parser::prepare_alu_clause(cf_node* c) {
-
- // loop over alu groups
- for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
- assert(I->subtype == NST_ALU_GROUP);
-
- alu_group_node *g = static_cast<alu_group_node*>(*I);
-
- // loop over alu_group items
- for (node_iterator I2 = g->begin(), E2 = g->end(); I2 != E2; ++I2) {
- if (I2->subtype != NST_ALU_PACKED_INST)
- continue;
-
- alu_packed_node *p = static_cast<alu_packed_node*>(*I2);
-
- if (p->count() == 3) {
- // cayman's scalar instruction that takes 3 or 4 slots
-
- // FIXME for simplicity we'll always add 4th slot,
- // but probably we might want to always remove 4th slot and make
- // sure that regalloc won't choose w component for dst
-
- alu_node *f = static_cast<alu_node*>(p->first);
- alu_node *a = sh->create_alu();
- a->src = f->src;
- a->dst.resize(f->dst.size());
- a->bc = f->bc;
- a->bc.slot = SLOT_W;
- p->push_back(a);
- }
- }
- }
-
- return 0;
-}
} // namespace r600_sb
diff --git a/src/gallium/drivers/r600/sb/sb_core.cpp b/src/gallium/drivers/r600/sb/sb_core.cpp
index b919fa419d4..17a8b878e0f 100644
--- a/src/gallium/drivers/r600/sb/sb_core.cpp
+++ b/src/gallium/drivers/r600/sb/sb_core.cpp
@@ -94,7 +94,7 @@ void r600_sb_context_destroy(void * sctx) {
int r600_sb_bytecode_process(struct r600_context *rctx,
struct r600_bytecode *bc,
struct r600_shader *pshader,
- int dump_source_bytecode,
+ int dump_bytecode,
int optimize) {
int r = 0;
unsigned shader_id = bc->debug_id;
@@ -111,13 +111,29 @@ int r600_sb_bytecode_process(struct r600_context *rctx,
SB_DUMP_STAT( cerr << "\nsb: shader " << shader_id << "\n"; );
- bc_parser parser(*ctx, bc, pshader, dump_source_bytecode, optimize);
+ bc_parser parser(*ctx, bc, pshader);
- if ((r = parser.parse())) {
- assert(0);
+ if ((r = parser.decode())) {
+ assert(!"sb: bytecode decoding error");
return r;
}
+ shader *sh = parser.get_shader();
+
+ if (dump_bytecode) {
+ bc_dump(*sh, cerr, bc->bytecode, bc->ndw).run();
+ }
+
+ if (!optimize) {
+ delete sh;
+ return 0;
+ }
+
+ if (sh->target != TARGET_FETCH) {
+ sh->src_stats.ndw = bc->ndw;
+ sh->collect_stats(false);
+ }
+
/* skip some shaders (use shaders from default backend)
* dskip_start - range start, dskip_end - range_end,
* e.g. start = 5, end = 6 means shaders 5 & 6
@@ -138,14 +154,13 @@ int r600_sb_bytecode_process(struct r600_context *rctx,
}
}
- shader *sh = parser.get_shader();
- SB_DUMP_PASS( cerr << "\n\n###### after parse\n"; sh->dump_ir(); );
-
- if (!optimize) {
- delete sh;
- return 0;
+ if ((r = parser.prepare())) {
+ assert(!"sb: bytecode parsing error");
+ return r;
}
+ SB_DUMP_PASS( cerr << "\n\n###### after parse\n"; sh->dump_ir(); );
+
#define SB_RUN_PASS(n, dump) \
do { \
r = n(*sh).run(); \
@@ -222,8 +237,13 @@ int r600_sb_bytecode_process(struct r600_context *rctx,
return r;
}
+ bytecode &nbc = builder.get_bytecode();
+
+ if (dump_bytecode) {
+ bc_dump(*sh, cerr, &nbc).run();
+ }
+
if (!sb_context::dry_run) {
- bytecode &nbc = builder.get_bytecode();
free(bc->bytecode);
bc->ndw = nbc.ndw();
@@ -233,10 +253,9 @@ int r600_sb_bytecode_process(struct r600_context *rctx,
bc->ngpr = sh->ngpr;
bc->nstack = sh->nstack;
} else {
- SB_DUMP_STAT( cerr << "SB_USE_NEW_BYTECODE is not enabled\n"; );
+ SB_DUMP_STAT( cerr << "sb: dry run: optimized bytecode is not used\n"; );
}
-
if (sb_context::dump_stat) {
int64_t t = os_time_get_nano() - time_start;
diff --git a/src/gallium/drivers/r600/sb/sb_shader.cpp b/src/gallium/drivers/r600/sb/sb_shader.cpp
index 5944ba66f48..f0665efb2ca 100644
--- a/src/gallium/drivers/r600/sb/sb_shader.cpp
+++ b/src/gallium/drivers/r600/sb/sb_shader.cpp
@@ -33,11 +33,11 @@ namespace r600_sb {
using std::cerr;
-shader::shader(sb_context &sctx, shader_target t, unsigned id, bool dump)
+shader::shader(sb_context &sctx, shader_target t, unsigned id)
: ctx(sctx), next_temp_value_index(temp_regid_offset),
prep_regs_count(), pred_sels(),
regions(), inputs(), undef(), val_pool(sizeof(value)),
- pool(), all_nodes(), src_stats(), opt_stats(), errors(), enable_dump(dump),
+ pool(), all_nodes(), src_stats(), opt_stats(), errors(),
optimized(), id(id),
coal(*this), bbs(),
target(t), vt(ex), ex(*this), root(),
diff --git a/src/gallium/drivers/r600/sb/sb_shader.h b/src/gallium/drivers/r600/sb/sb_shader.h
index b2e3837c4c0..5362e395e97 100644
--- a/src/gallium/drivers/r600/sb/sb_shader.h
+++ b/src/gallium/drivers/r600/sb/sb_shader.h
@@ -271,7 +271,6 @@ public:
error_map errors;
- bool enable_dump;
bool optimized;
unsigned id;
@@ -296,7 +295,7 @@ public:
unsigned ngpr, nstack;
- shader(sb_context &sctx, shader_target t, unsigned id, bool dump);
+ shader(sb_context &sctx, shader_target t, unsigned id);
~shader();