From afde9294b54c60212d12292d21dd5a4f067c4e8b Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sat, 31 Mar 2018 14:36:37 -0400 Subject: freedreno/ir3: fix issue w/ glamor composite shaders Fixes an issue that became possible when we started lowering phi webs to regs (a7ea2b4e) (although was not really seen until we also switched to using peephole select pass (ec8bc54a) instead of lowering *all* if/else to select). If texture coord (or anything else that uses create_collect() to collect scalar values in a sequence of scalar registers) was consuming a value produced on either side of an if/else (ie. a phi lowered to nir reg, which in ir3 is an "array" of length 1) then register allocation would happen incorrectly and we'd end up sampling from garbage coordinates. Signed-off-by: Rob Clark --- .../drivers/freedreno/ir3/ir3_compiler_nir.c | 35 ++++++++++++++++++++-- src/gallium/drivers/freedreno/ir3/ir3_cp.c | 3 ++ 2 files changed, 36 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/freedreno/ir3') diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 62342be2bbb..3d3c85dd03b 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -686,8 +686,39 @@ create_collect(struct ir3_context *ctx, struct ir3_instruction *const *arr, collect = ir3_instr_create2(block, OPC_META_FI, 1 + arrsz); ir3_reg_create(collect, 0, flags); /* dst */ for (unsigned i = 0; i < arrsz; i++) { - compile_assert(ctx, (arr[i]->regs[0]->flags & IR3_REG_HALF) == flags); - ir3_reg_create(collect, 0, IR3_REG_SSA | flags)->instr = arr[i]; + struct ir3_instruction *elem = arr[i]; + + /* Since arrays are pre-colored in RA, we can't assume that + * things will end up in the right place. (Ie. if a collect + * joins elements from two different arrays.) So insert an + * extra mov. + * + * We could possibly skip this if all the collected elements + * are contiguous elements in a single array.. not sure how + * likely that is to happen. + * + * Fixes a problem with glamor shaders, that in effect do + * something like: + * + * if (foo) + * texcoord = .. + * else + * texcoord = .. + * color = texture2D(tex, texcoord); + * + * In this case, texcoord will end up as nir registers (which + * translate to ir3 array's of length 1. And we can't assume + * the two (or more) arrays will get allocated in consecutive + * scalar registers. + * + */ + if (elem->regs[0]->flags & IR3_REG_ARRAY) { + type_t type = (flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32; + elem = ir3_MOV(block, elem, type); + } + + compile_assert(ctx, (elem->regs[0]->flags & IR3_REG_HALF) == flags); + ir3_reg_create(collect, 0, IR3_REG_SSA | flags)->instr = elem; } return collect; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cp.c b/src/gallium/drivers/freedreno/ir3/ir3_cp.c index 67a7714b9c5..e3a3a9db690 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cp.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cp.c @@ -59,6 +59,9 @@ static bool is_eligible_mov(struct ir3_instruction *instr, bool allow_flags) if (src->flags & IR3_REG_RELATIV) return false; + if (src->flags & IR3_REG_ARRAY) + return false; + if (!allow_flags) if (src->flags & (IR3_REG_FABS | IR3_REG_FNEG | IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT)) -- cgit v1.2.3