diff options
author | Rob Clark <[email protected]> | 2015-06-13 09:14:31 -0400 |
---|---|---|
committer | Rob Clark <[email protected]> | 2015-06-21 08:01:12 -0400 |
commit | 66a93a0ff9aa402c37aa9d00b4489715d611b496 (patch) | |
tree | f2c86866aa26482b2ebbb0bd04794386ae9b561c | |
parent | 1ee4d51e7a68f8f2dcb52a0e2f9af81fdbe078a2 (diff) |
freedreno/ir3: pass sz to split_dest()
For query_levels, we generate a getinfo with writemask of (z), which RA
will consider as size==3. But we were still generating four fanouts.
Which meant that RA would see it as two different register classes,
depending on the path to definer. Ie. on the getinfo instruction itself
it would see size==3, but when chasing back through the fanouts it would
see size==4.
Easiest way to solve that is to just generate the chain of neighboring
fanouts to have the correct size in the first place.
Note: we may eventually want split_dest() to take start/end or wrmask
instead, since really we only need size==1. But RA is not clever enough
for that, query_levels is not that common, and the other two registers
that get allocated are never used so those register slots can be
immediately re-used. So bunch of work for probably no real gain.
Signed-off-by: Rob Clark <[email protected]>
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c | 10 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_ra.c | 2 |
2 files changed, 7 insertions, 5 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 0c2600b8100..48b1d8f3606 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -822,10 +822,10 @@ create_frag_face(struct ir3_compile *ctx, unsigned comp) */ static void split_dest(struct ir3_block *block, struct ir3_instruction **dst, - struct ir3_instruction *src) + struct ir3_instruction *src, unsigned n) { struct ir3_instruction *prev = NULL; - for (int i = 0, j = 0; i < 4; i++) { + for (int i = 0, j = 0; i < n; i++) { struct ir3_instruction *split = ir3_instr_create(block, -1, OPC_META_FO); ir3_reg_create(split, 0, IR3_REG_SSA); @@ -1699,7 +1699,7 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) create_collect(b, src0, nsrc0), create_collect(b, src1, nsrc1)); - split_dest(b, dst, sam); + split_dest(b, dst, sam, 4); } static void @@ -1716,7 +1716,7 @@ emit_tex_query_levels(struct ir3_compile *ctx, nir_tex_instr *tex) /* even though there is only one component, since it ends * up in .z rather than .x, we need a split_dest() */ - split_dest(b, dst, sam); + split_dest(b, dst, sam, 3); /* The # of levels comes from getinfo.z. We need to add 1 to it, since * the value in TEX_CONST_0 is zero-based. @@ -1744,7 +1744,7 @@ emit_tex_txs(struct ir3_compile *ctx, nir_tex_instr *tex) sam = ir3_SAM(b, OPC_GETSIZE, TYPE_U32, TGSI_WRITEMASK_XYZW, flags, tex->sampler_index, tex->sampler_index, lod, NULL); - split_dest(b, dst, sam); + split_dest(b, dst, sam, 4); /* Array size actually ends up in .w rather than .z. This doesn't * matter for miplevel 0, but for higher mips the value in z is diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c index 359cd9a0d5d..e5aba859fab 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c @@ -424,6 +424,8 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off) *sz = MAX2(*sz, dsz); + /* Fanout's are grouped, so *off should already valid */ + d = dd; } |