diff options
author | Karol Herbst <[email protected]> | 2018-04-22 22:23:13 +0200 |
---|---|---|
committer | Karol Herbst <[email protected]> | 2018-04-24 22:31:59 +0200 |
commit | e4f675dc42887734b43b549784955e81d284b202 (patch) | |
tree | 005d8e86ea619176c7d62082ff82061c104a2267 /src | |
parent | 0d5ce25c1ca23abc6d91538f4374a18509091060 (diff) |
gm107/ir/lib: fix sched in div u32 builtin
Imad needs to set a read barrier.
With significant big work groups I was getting wrong results for div u32. Turns
out the issue was with the sched opcodes.
Signed-off-by: Karol Herbst <[email protected]>
Reviewed-by: Samuel Pitoiset <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/lib/gm107.asm | 4 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h | 4 |
2 files changed, 4 insertions, 4 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/lib/gm107.asm b/src/gallium/drivers/nouveau/codegen/lib/gm107.asm index 90741b6c59f..7ee5f8fc65b 100644 --- a/src/gallium/drivers/nouveau/codegen/lib/gm107.asm +++ b/src/gallium/drivers/nouveau/codegen/lib/gm107.asm @@ -27,11 +27,11 @@ gm107_div_u32: imul u32 u32 $r3 $r1 $r2 imad u32 u32 hi $r2 $r2 $r3 $r2 imul u32 u32 $r3 $r1 $r2 - sched (st 0x6 wr 0x0 wt 0x1) (st 0x6 wr 0x0 wt 0x1) (st 0x6 wr 0x0 wt 0x1) + sched (st 0x6 wr 0x0 wt 0x1) (st 0x6 wr 0x0 wt 0x1) (st 0x6 wr 0x0 rd 0x1 wt 0x1) imad u32 u32 hi $r2 $r2 $r3 $r2 imul u32 u32 $r3 $r1 $r2 imad u32 u32 hi $r2 $r2 $r3 $r2 - sched (st 0x6) (st 0x6 wr 0x0 rd 0x1 wt 0x1) (st 0xf wr 0x0 rd 0x1 wt 0x2) + sched (st 0x6 wt 0x2) (st 0x6 wr 0x0 rd 0x1 wt 0x1) (st 0xf wr 0x0 rd 0x1 wt 0x2) mov $r3 $r0 0xf imul u32 u32 hi $r0 $r0 $r2 i2i u32 u32 $r2 neg $r1 diff --git a/src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h b/src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h index 8708a94b0a6..65c93f7ae89 100644 --- a/src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h +++ b/src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h @@ -16,11 +16,11 @@ uint64_t gm107_builtin_code[] = { 0x5c38000000270103, 0x5a40010000370202, 0x5c38000000270103, - 0x003c1801e0c00f06, + 0x00241801e0c00f06, 0x5a40010000370202, 0x5c38000000270103, 0x5a40010000370202, - 0x00443c0120c007e6, + 0x00443c0120c017e6, 0x5c98078000070003, 0x5c38008000270000, 0x5ce0200000170a02, |