summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRob Clark <[email protected]>2014-10-18 15:28:16 -0400
committerRob Clark <[email protected]>2014-10-20 21:42:44 -0400
commit8a0ffedd8de51eaf980855283c4525dba6dc5847 (patch)
tree91b1a9e527826679fac39959538678896a4c4328
parentab33a240890a7ef147d4b8cf35c27ae1932a1dbe (diff)
freedreno/ir3: fix potential gpu lockup with kill
It seems like the hardware is unhappy if we execute a kill instruction prior to last input (ei). Probably the shader thread stops executing and the end-input flag is never set. Signed-off-by: Rob Clark <[email protected]>
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3.c11
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3.h10
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_depth.c16
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_sched.c26
4 files changed, 61 insertions, 2 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c
index 70d37ffdeb5..60d4e4a15d5 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3.c
@@ -81,6 +81,8 @@ void ir3_destroy(struct ir3 *shader)
shader->chunk = chunk->next;
free(chunk);
}
+ free(shader->instrs);
+ free(shader->baryfs);
free(shader);
}
@@ -596,6 +598,15 @@ static void insert_instr(struct ir3 *shader,
shader->instrs_sz * sizeof(shader->instrs[0]));
}
shader->instrs[shader->instrs_count++] = instr;
+
+ if (is_input(instr)) {
+ if (shader->baryfs_count == shader->baryfs_sz) {
+ shader->baryfs_sz = MAX2(2 * shader->baryfs_sz, 16);
+ shader->baryfs = realloc(shader->baryfs,
+ shader->baryfs_sz * sizeof(shader->baryfs[0]));
+ }
+ shader->baryfs[shader->baryfs_count++] = instr;
+ }
}
struct ir3_block * ir3_block_create(struct ir3 *shader,
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h
index d2d3dcaadb9..21992f68ced 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3.h
@@ -210,7 +210,11 @@ struct ir3_instruction {
* result of moving a const to a reg would have a low cost, so to
* it could make sense to duplicate the instruction at various
* points where the result is needed to reduce register footprint.
+ *
+ * DEPTH_UNUSED used to mark unused instructions after depth
+ * calculation pass.
*/
+#define DEPTH_UNUSED ~0
unsigned depth;
};
struct ir3_instruction *next;
@@ -224,6 +228,8 @@ struct ir3_heap_chunk;
struct ir3 {
unsigned instrs_count, instrs_sz;
struct ir3_instruction **instrs;
+ unsigned baryfs_count, baryfs_sz;
+ struct ir3_instruction **baryfs;
unsigned heap_idx;
struct ir3_heap_chunk *chunk;
};
@@ -272,6 +278,10 @@ static inline void ir3_clear_mark(struct ir3 *shader)
/* TODO would be nice to drop the instruction array.. for
* new compiler, _clear_mark() is all we use it for, and
* we could probably manage a linked list instead..
+ *
+ * Also, we'll probably want to mark instructions within
+ * a block, so tracking the list of instrs globally is
+ * unlikely to be what we want.
*/
unsigned i;
for (i = 0; i < shader->instrs_count; i++) {
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_depth.c b/src/gallium/drivers/freedreno/ir3/ir3_depth.c
index dcc0362f0c8..76413d41589 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_depth.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_depth.c
@@ -150,10 +150,22 @@ void ir3_block_depth(struct ir3_block *block)
if (block->outputs[i])
ir3_instr_depth(block->outputs[i]);
- /* at this point, any unvisited input is unused: */
+ /* mark un-used instructions: */
+ for (i = 0; i < block->shader->instrs_count; i++) {
+ struct ir3_instruction *instr = block->shader->instrs[i];
+
+ /* just consider instructions within this block: */
+ if (instr->block != block)
+ continue;
+
+ if (!ir3_instr_check_mark(instr))
+ instr->depth = DEPTH_UNUSED;
+ }
+
+ /* cleanup unused inputs: */
for (i = 0; i < block->ninputs; i++) {
struct ir3_instruction *in = block->inputs[i];
- if (in && !ir3_instr_check_mark(in))
+ if (in && (in->depth == DEPTH_UNUSED))
block->inputs[i] = NULL;
}
}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_sched.c b/src/gallium/drivers/freedreno/ir3/ir3_sched.c
index 24d7c6397c1..b2ef8111f56 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_sched.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_sched.c
@@ -242,6 +242,32 @@ static int trysched(struct ir3_sched_ctx *ctx,
if (delay)
return delay;
+ /* if the instruction is a kill, we need to ensure *every*
+ * bary.f is scheduled. The hw seems unhappy if the thread
+ * gets killed before the end-input (ei) flag is hit.
+ *
+ * We could do this by adding each bary.f instruction as
+ * virtual ssa src for the kill instruction. But we have
+ * fixed length instr->regs[].
+ *
+ * TODO this wouldn't be quite right if we had multiple
+ * basic blocks, if any block was conditional. We'd need
+ * to schedule the bary.f's outside of any block which
+ * was conditional that contained a kill.. I think..
+ */
+ if (is_kill(instr)) {
+ struct ir3 *ir = instr->block->shader;
+ unsigned i;
+
+ for (i = 0; i < ir->baryfs_count; i++) {
+ if (ir->baryfs[i]->depth == DEPTH_UNUSED)
+ continue;
+ delay = trysched(ctx, ir->baryfs[i]);
+ if (delay)
+ return delay;
+ }
+ }
+
/* if this is a write to address/predicate register, and that
* register is currently in use, we need to defer until it is
* free: