summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorHans de Goede <[email protected]>2016-04-21 15:43:51 +0200
committerHans de Goede <[email protected]>2016-04-25 11:45:07 +0200
commit787a53988cc6bb7a0f2b43c216837d683336b33f (patch)
treed686b44932ed464e6865a4cb231bec98588ba482 /src
parent0831eb94b9adde3f1542ca75a1f80dbe165b8f31 (diff)
nouveau: codegen: combineLd/St do not combine indirect loads
combineLd/St would combine, i.e. : st u32 # g[$r2+0x0] $r2 st u32 # g[$r2+0x4] $r3 into: st u64 # g[$r2+0x0] $r2d But this is only valid if r2 contains an 8 byte aligned address, which is not guaranteed for compute shaders This commit checks for src0 dim 0 not being indirect when combining loads / stores as combining indirect loads / stores may break alignment rules. Signed-off-by: Hans de Goede <[email protected]> Reviewed-by: Ilia Mirkin <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp7
1 files changed, 7 insertions, 0 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index fea388685fa..e62ac064d59 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -2203,6 +2203,9 @@ MemoryOpt::combineLd(Record *rec, Instruction *ld)
if (((size == 0x8) && (MIN2(offLd, offRc) & 0x7)) ||
((size == 0xc) && (MIN2(offLd, offRc) & 0xf)))
return false;
+ // for compute indirect loads are not guaranteed to be aligned
+ if (prog->getType() == Program::TYPE_COMPUTE && rec->rel[0])
+ return false;
assert(sizeRc + sizeLd <= 16 && offRc != offLd);
@@ -2255,8 +2258,12 @@ MemoryOpt::combineSt(Record *rec, Instruction *st)
if (!prog->getTarget()->
isAccessSupported(st->getSrc(0)->reg.file, typeOfSize(size)))
return false;
+ // no unaligned stores
if (size == 8 && MIN2(offRc, offSt) & 0x7)
return false;
+ // for compute indirect stores are not guaranteed to be aligned
+ if (prog->getType() == Program::TYPE_COMPUTE && rec->rel[0])
+ return false;
st->takeExtraSources(0, extra); // save predicate and indirect address