summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKarol Herbst <[email protected]>2018-04-22 22:23:13 +0200
committerKarol Herbst <[email protected]>2018-04-24 22:31:59 +0200
commite4f675dc42887734b43b549784955e81d284b202 (patch)
tree005d8e86ea619176c7d62082ff82061c104a2267
parent0d5ce25c1ca23abc6d91538f4374a18509091060 (diff)
gm107/ir/lib: fix sched in div u32 builtin
Imad needs to set a read barrier. With significant big work groups I was getting wrong results for div u32. Turns out the issue was with the sched opcodes. Signed-off-by: Karol Herbst <[email protected]> Reviewed-by: Samuel Pitoiset <[email protected]>
-rw-r--r--src/gallium/drivers/nouveau/codegen/lib/gm107.asm4
-rw-r--r--src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h4
2 files changed, 4 insertions, 4 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/lib/gm107.asm b/src/gallium/drivers/nouveau/codegen/lib/gm107.asm
index 90741b6c59f..7ee5f8fc65b 100644
--- a/src/gallium/drivers/nouveau/codegen/lib/gm107.asm
+++ b/src/gallium/drivers/nouveau/codegen/lib/gm107.asm
@@ -27,11 +27,11 @@ gm107_div_u32:
imul u32 u32 $r3 $r1 $r2
imad u32 u32 hi $r2 $r2 $r3 $r2
imul u32 u32 $r3 $r1 $r2
- sched (st 0x6 wr 0x0 wt 0x1) (st 0x6 wr 0x0 wt 0x1) (st 0x6 wr 0x0 wt 0x1)
+ sched (st 0x6 wr 0x0 wt 0x1) (st 0x6 wr 0x0 wt 0x1) (st 0x6 wr 0x0 rd 0x1 wt 0x1)
imad u32 u32 hi $r2 $r2 $r3 $r2
imul u32 u32 $r3 $r1 $r2
imad u32 u32 hi $r2 $r2 $r3 $r2
- sched (st 0x6) (st 0x6 wr 0x0 rd 0x1 wt 0x1) (st 0xf wr 0x0 rd 0x1 wt 0x2)
+ sched (st 0x6 wt 0x2) (st 0x6 wr 0x0 rd 0x1 wt 0x1) (st 0xf wr 0x0 rd 0x1 wt 0x2)
mov $r3 $r0 0xf
imul u32 u32 hi $r0 $r0 $r2
i2i u32 u32 $r2 neg $r1
diff --git a/src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h b/src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h
index 8708a94b0a6..65c93f7ae89 100644
--- a/src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h
+++ b/src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h
@@ -16,11 +16,11 @@ uint64_t gm107_builtin_code[] = {
0x5c38000000270103,
0x5a40010000370202,
0x5c38000000270103,
- 0x003c1801e0c00f06,
+ 0x00241801e0c00f06,
0x5a40010000370202,
0x5c38000000270103,
0x5a40010000370202,
- 0x00443c0120c007e6,
+ 0x00443c0120c017e6,
0x5c98078000070003,
0x5c38008000270000,
0x5ce0200000170a02,