diff options
author | Rob Clark <[email protected]> | 2014-10-18 15:28:16 -0400 |
---|---|---|
committer | Rob Clark <[email protected]> | 2014-10-20 21:42:44 -0400 |
commit | 8a0ffedd8de51eaf980855283c4525dba6dc5847 (patch) | |
tree | 91b1a9e527826679fac39959538678896a4c4328 | |
parent | ab33a240890a7ef147d4b8cf35c27ae1932a1dbe (diff) |
freedreno/ir3: fix potential gpu lockup with kill
It seems like the hardware is unhappy if we execute a kill instruction
prior to last input (ei). Probably the shader thread stops executing
and the end-input flag is never set.
Signed-off-by: Rob Clark <[email protected]>
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3.c | 11 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3.h | 10 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_depth.c | 16 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_sched.c | 26 |
4 files changed, 61 insertions, 2 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c index 70d37ffdeb5..60d4e4a15d5 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.c +++ b/src/gallium/drivers/freedreno/ir3/ir3.c @@ -81,6 +81,8 @@ void ir3_destroy(struct ir3 *shader) shader->chunk = chunk->next; free(chunk); } + free(shader->instrs); + free(shader->baryfs); free(shader); } @@ -596,6 +598,15 @@ static void insert_instr(struct ir3 *shader, shader->instrs_sz * sizeof(shader->instrs[0])); } shader->instrs[shader->instrs_count++] = instr; + + if (is_input(instr)) { + if (shader->baryfs_count == shader->baryfs_sz) { + shader->baryfs_sz = MAX2(2 * shader->baryfs_sz, 16); + shader->baryfs = realloc(shader->baryfs, + shader->baryfs_sz * sizeof(shader->baryfs[0])); + } + shader->baryfs[shader->baryfs_count++] = instr; + } } struct ir3_block * ir3_block_create(struct ir3 *shader, diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index d2d3dcaadb9..21992f68ced 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -210,7 +210,11 @@ struct ir3_instruction { * result of moving a const to a reg would have a low cost, so to * it could make sense to duplicate the instruction at various * points where the result is needed to reduce register footprint. + * + * DEPTH_UNUSED used to mark unused instructions after depth + * calculation pass. */ +#define DEPTH_UNUSED ~0 unsigned depth; }; struct ir3_instruction *next; @@ -224,6 +228,8 @@ struct ir3_heap_chunk; struct ir3 { unsigned instrs_count, instrs_sz; struct ir3_instruction **instrs; + unsigned baryfs_count, baryfs_sz; + struct ir3_instruction **baryfs; unsigned heap_idx; struct ir3_heap_chunk *chunk; }; @@ -272,6 +278,10 @@ static inline void ir3_clear_mark(struct ir3 *shader) /* TODO would be nice to drop the instruction array.. for * new compiler, _clear_mark() is all we use it for, and * we could probably manage a linked list instead.. + * + * Also, we'll probably want to mark instructions within + * a block, so tracking the list of instrs globally is + * unlikely to be what we want. */ unsigned i; for (i = 0; i < shader->instrs_count; i++) { diff --git a/src/gallium/drivers/freedreno/ir3/ir3_depth.c b/src/gallium/drivers/freedreno/ir3/ir3_depth.c index dcc0362f0c8..76413d41589 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_depth.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_depth.c @@ -150,10 +150,22 @@ void ir3_block_depth(struct ir3_block *block) if (block->outputs[i]) ir3_instr_depth(block->outputs[i]); - /* at this point, any unvisited input is unused: */ + /* mark un-used instructions: */ + for (i = 0; i < block->shader->instrs_count; i++) { + struct ir3_instruction *instr = block->shader->instrs[i]; + + /* just consider instructions within this block: */ + if (instr->block != block) + continue; + + if (!ir3_instr_check_mark(instr)) + instr->depth = DEPTH_UNUSED; + } + + /* cleanup unused inputs: */ for (i = 0; i < block->ninputs; i++) { struct ir3_instruction *in = block->inputs[i]; - if (in && !ir3_instr_check_mark(in)) + if (in && (in->depth == DEPTH_UNUSED)) block->inputs[i] = NULL; } } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_sched.c b/src/gallium/drivers/freedreno/ir3/ir3_sched.c index 24d7c6397c1..b2ef8111f56 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_sched.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_sched.c @@ -242,6 +242,32 @@ static int trysched(struct ir3_sched_ctx *ctx, if (delay) return delay; + /* if the instruction is a kill, we need to ensure *every* + * bary.f is scheduled. The hw seems unhappy if the thread + * gets killed before the end-input (ei) flag is hit. + * + * We could do this by adding each bary.f instruction as + * virtual ssa src for the kill instruction. But we have + * fixed length instr->regs[]. + * + * TODO this wouldn't be quite right if we had multiple + * basic blocks, if any block was conditional. We'd need + * to schedule the bary.f's outside of any block which + * was conditional that contained a kill.. I think.. + */ + if (is_kill(instr)) { + struct ir3 *ir = instr->block->shader; + unsigned i; + + for (i = 0; i < ir->baryfs_count; i++) { + if (ir->baryfs[i]->depth == DEPTH_UNUSED) + continue; + delay = trysched(ctx, ir->baryfs[i]); + if (delay) + return delay; + } + } + /* if this is a write to address/predicate register, and that * register is currently in use, we need to defer until it is * free: |