summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/nouveau/codegen/lib
diff options
context:
space:
mode:
authorBen Skeggs <[email protected]>2014-05-09 15:56:05 +1000
committerBen Skeggs <[email protected]>2014-05-15 09:54:49 +1000
commitd548d47edf9f05e6dbf9656abc2f8e78d02cb2f6 (patch)
tree9427f4339be135386d41b160c1e1e38b7e80eb56 /src/gallium/drivers/nouveau/codegen/lib
parent7b9475fa652b9df6d599edbea8fa5049fdd995e1 (diff)
nvc0: add maxwell (sm50) compiler backend
The big missing part here is proper sched data calculations, but hopefully the chosen placeholder will be sufficient for now. Passes piglit as well as GK107 does. Signed-off-by: Ben Skeggs <[email protected]> Reviewed-by: Ilia Mirkin <[email protected]>
Diffstat (limited to 'src/gallium/drivers/nouveau/codegen/lib')
-rw-r--r--src/gallium/drivers/nouveau/codegen/lib/Makefile4
-rw-r--r--src/gallium/drivers/nouveau/codegen/lib/gm107.asm115
-rw-r--r--src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h97
3 files changed, 215 insertions, 1 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/lib/Makefile b/src/gallium/drivers/nouveau/codegen/lib/Makefile
index 28a41a3f41e..06d1979d8b2 100644
--- a/src/gallium/drivers/nouveau/codegen/lib/Makefile
+++ b/src/gallium/drivers/nouveau/codegen/lib/Makefile
@@ -1,6 +1,6 @@
ENVYAS ?= envyas
-all: gf100.asm.h gk104.asm.h gk110.asm.h
+all: gf100.asm.h gk104.asm.h gk110.asm.h gm107.asm.h
gf100.asm.h: %.asm.h: %.asm
$(ENVYAS) -a -W -mnvc0 -Vnvc0 $< -o $@
@@ -8,3 +8,5 @@ gk104.asm.h: %.asm.h: %.asm
$(ENVYAS) -a -W -mnvc0 -Vnve4 $< -o $@
gk110.asm.h: %.asm.h: %.asm
$(ENVYAS) -a -W -mgk110 $< -o $@
+gm107.asm.h: %.asm.h: %.asm
+ $(ENVYAS) -a -W -mgm107 $< -o $@
diff --git a/src/gallium/drivers/nouveau/codegen/lib/gm107.asm b/src/gallium/drivers/nouveau/codegen/lib/gm107.asm
new file mode 100644
index 00000000000..758cc81a159
--- /dev/null
+++ b/src/gallium/drivers/nouveau/codegen/lib/gm107.asm
@@ -0,0 +1,115 @@
+.section #gm107_builtin_code
+// DIV U32
+//
+// UNR recurrence (q = a / b):
+// look for z such that 2^32 - b <= b * z < 2^32
+// then q - 1 <= (a * z) / 2^32 <= q
+//
+// INPUT: $r0: dividend, $r1: divisor
+// OUTPUT: $r0: result, $r1: modulus
+// CLOBBER: $r2 - $r3, $p0 - $p1
+// SIZE: 22 / 14 * 8 bytes
+//
+gm107_div_u32:
+ sched 0x7e0 0x7e0 0x7e0
+ flo u32 $r2 $r1
+ lop xor 1 $r2 $r2 0x1f
+ mov $r3 0x1 0xf
+ sched 0x7e0 0x7e0 0x7e0
+ shl $r2 $r3 $r2
+ i2i u32 u32 $r1 neg $r1
+ imul u32 u32 $r3 $r1 $r2
+ sched 0x7e0 0x7e0 0x7e0
+ imad u32 u32 hi $r2 $r2 $r3 $r2
+ imul u32 u32 $r3 $r1 $r2
+ imad u32 u32 hi $r2 $r2 $r3 $r2
+ sched 0x7e0 0x7e0 0x7e0
+ imul u32 u32 $r3 $r1 $r2
+ imad u32 u32 hi $r2 $r2 $r3 $r2
+ imul u32 u32 $r3 $r1 $r2
+ sched 0x7e0 0x7e0 0x7e0
+ imad u32 u32 hi $r2 $r2 $r3 $r2
+ imul u32 u32 $r3 $r1 $r2
+ imad u32 u32 hi $r2 $r2 $r3 $r2
+ sched 0x7e0 0x7e0 0x7e0
+ mov $r3 $r0 0xf
+ imul u32 u32 hi $r0 $r0 $r2
+ i2i u32 u32 $r2 neg $r1
+ sched 0x7e0 0x7e0 0x7e0
+ imad u32 u32 $r1 $r1 $r0 $r3
+ isetp ge u32 and $p0 1 $r1 $r2 1
+ $p0 iadd $r1 $r1 neg $r2
+ sched 0x7e0 0x7e0 0x7e0
+ $p0 iadd $r0 $r0 0x1
+ $p0 isetp ge u32 and $p0 1 $r1 $r2 1
+ $p0 iadd $r1 $r1 neg $r2
+ sched 0x7e0 0x7e0 0x7e0
+ $p0 iadd $r0 $r0 0x1
+ ret
+ nop 0
+
+// DIV S32, like DIV U32 after taking ABS(inputs)
+//
+// INPUT: $r0: dividend, $r1: divisor
+// OUTPUT: $r0: result, $r1: modulus
+// CLOBBER: $r2 - $r3, $p0 - $p3
+//
+gm107_div_s32:
+ sched 0x7e0 0x7e0 0x7e0
+ isetp lt and $p2 0x1 $r0 0 1
+ isetp lt xor $p3 1 $r1 0 $p2
+ i2i s32 s32 $r0 abs $r0
+ sched 0x7e0 0x7e0 0x7e0
+ i2i s32 s32 $r1 abs $r1
+ flo u32 $r2 $r1
+ lop xor 1 $r2 $r2 0x1f
+ sched 0x7e0 0x7e0 0x7e0
+ mov $r3 0x1 0xf
+ shl $r2 $r3 $r2
+ i2i u32 u32 $r1 neg $r1
+ sched 0x7e0 0x7e0 0x7e0
+ imul u32 u32 $r3 $r1 $r2
+ imad u32 u32 hi $r2 $r2 $r3 $r2
+ imul u32 u32 $r3 $r1 $r2
+ sched 0x7e0 0x7e0 0x7e0
+ imad u32 u32 hi $r2 $r2 $r3 $r2
+ imul u32 u32 $r3 $r1 $r2
+ imad u32 u32 hi $r2 $r2 $r3 $r2
+ sched 0x7e0 0x7e0 0x7e0
+ imul u32 u32 $r3 $r1 $r2
+ imad u32 u32 hi $r2 $r2 $r3 $r2
+ imul u32 u32 $r3 $r1 $r2
+ sched 0x7e0 0x7e0 0x7e0
+ imad u32 u32 hi $r2 $r2 $r3 $r2
+ mov $r3 $r0 0xf
+ imul u32 u32 hi $r0 $r0 $r2
+ sched 0x7e0 0x7e0 0x7e0
+ i2i u32 u32 $r2 neg $r1
+ imad u32 u32 $r1 $r1 $r0 $r3
+ isetp ge u32 and $p0 1 $r1 $r2 1
+ sched 0x7e0 0x7e0 0x7e0
+ $p0 iadd $r1 $r1 neg $r2
+ $p0 iadd $r0 $r0 0x1
+ $p0 isetp ge u32 and $p0 1 $r1 $r2 1
+ sched 0x7e0 0x7e0 0x7e0
+ $p0 iadd $r1 $r1 neg $r2
+ $p0 iadd $r0 $r0 0x1
+ $p3 i2i s32 s32 $r0 neg $r0
+ sched 0x7e0 0x7e0 0x7e0
+ $p2 i2i s32 s32 $r1 neg $r1
+ ret
+ nop 0
+
+// STUB
+gm107_rcp_f64:
+gm107_rsq_f64:
+ sched 0x7e0 0x7e0 0x7e0
+ ret
+ nop 0
+ nop 0
+
+.section #gm107_builtin_offsets
+.b64 #gm107_div_u32
+.b64 #gm107_div_s32
+.b64 #gm107_rcp_f64
+.b64 #gm107_rsq_f64
diff --git a/src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h b/src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h
new file mode 100644
index 00000000000..7be25da5532
--- /dev/null
+++ b/src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h
@@ -0,0 +1,97 @@
+uint64_t gm107_builtin_code[] = {
+/* 0x0000: gm107_div_u32 */
+ 0x001f8000fc0007e0,
+ 0x5c30000000170002,
+ 0x3847040001f70202,
+ 0x3898078000170003,
+ 0x001f8000fc0007e0,
+ 0x5c48000000270302,
+ 0x5ce0200000170a01,
+ 0x5c38000000270103,
+ 0x001f8000fc0007e0,
+ 0x5a40010000370202,
+ 0x5c38000000270103,
+ 0x5a40010000370202,
+ 0x001f8000fc0007e0,
+ 0x5c38000000270103,
+ 0x5a40010000370202,
+ 0x5c38000000270103,
+ 0x001f8000fc0007e0,
+ 0x5a40010000370202,
+ 0x5c38000000270103,
+ 0x5a40010000370202,
+ 0x001f8000fc0007e0,
+ 0x5c98078000070003,
+ 0x5c38008000270000,
+ 0x5ce0200000170a02,
+ 0x001f8000fc0007e0,
+ 0x5a00018000070101,
+ 0x5b6c038000270107,
+ 0x5c11000000200101,
+ 0x001f8000fc0007e0,
+ 0x3810000000100000,
+ 0x5b6c038000200107,
+ 0x5c11000000200101,
+ 0x001f8000fc0007e0,
+ 0x3810000000100000,
+ 0xe32000000007000f,
+ 0x50b0000000070f00,
+/* 0x0120: gm107_div_s32 */
+ 0x001f8000fc0007e0,
+ 0x5b6303800ff70017,
+ 0x5b6341000ff7011f,
+ 0x5ce2000000073a00,
+ 0x001f8000fc0007e0,
+ 0x5ce2000000173a01,
+ 0x5c30000000170002,
+ 0x3847040001f70202,
+ 0x001f8000fc0007e0,
+ 0x3898078000170003,
+ 0x5c48000000270302,
+ 0x5ce0200000170a01,
+ 0x001f8000fc0007e0,
+ 0x5c38000000270103,
+ 0x5a40010000370202,
+ 0x5c38000000270103,
+ 0x001f8000fc0007e0,
+ 0x5a40010000370202,
+ 0x5c38000000270103,
+ 0x5a40010000370202,
+ 0x001f8000fc0007e0,
+ 0x5c38000000270103,
+ 0x5a40010000370202,
+ 0x5c38000000270103,
+ 0x001f8000fc0007e0,
+ 0x5a40010000370202,
+ 0x5c98078000070003,
+ 0x5c38008000270000,
+ 0x001f8000fc0007e0,
+ 0x5ce0200000170a02,
+ 0x5a00018000070101,
+ 0x5b6c038000270107,
+ 0x001f8000fc0007e0,
+ 0x5c11000000200101,
+ 0x3810000000100000,
+ 0x5b6c038000200107,
+ 0x001f8000fc0007e0,
+ 0x5c11000000200101,
+ 0x3810000000100000,
+ 0x5ce0200000033a00,
+ 0x001f8000fc0007e0,
+ 0x5ce0200000123a01,
+ 0xe32000000007000f,
+ 0x50b0000000070f00,
+/* 0x0280: gm107_rcp_f64 */
+/* 0x0280: gm107_rsq_f64 */
+ 0x001f8000fc0007e0,
+ 0xe32000000007000f,
+ 0x50b0000000070f00,
+ 0x50b0000000070f00,
+};
+
+uint64_t gm107_builtin_offsets[] = {
+ 0x0000000000000000,
+ 0x0000000000000120,
+ 0x0000000000000280,
+ 0x0000000000000280,
+};