summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/nouveau/codegen/lib/gk110.asm
diff options
context:
space:
mode:
authorBen Skeggs <[email protected]>2014-05-09 15:55:47 +1000
committerBen Skeggs <[email protected]>2014-05-15 09:54:12 +1000
commit0079a375a58b288caacc2721f5a34b8f1233e7d1 (patch)
tree4d7b244b3cb826e9cbccf090fe549fea51351736 /src/gallium/drivers/nouveau/codegen/lib/gk110.asm
parent737477dac33d68b00b34019258d663945fbfeb56 (diff)
nvc0: allow for easier modification of compiler library routines
Signed-off-by: Ben Skeggs <[email protected]> Reviewed-by: Ilia Mirkin <[email protected]>
Diffstat (limited to 'src/gallium/drivers/nouveau/codegen/lib/gk110.asm')
-rw-r--r--src/gallium/drivers/nouveau/codegen/lib/gk110.asm98
1 files changed, 98 insertions, 0 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/lib/gk110.asm b/src/gallium/drivers/nouveau/codegen/lib/gk110.asm
new file mode 100644
index 00000000000..be17871edd4
--- /dev/null
+++ b/src/gallium/drivers/nouveau/codegen/lib/gk110.asm
@@ -0,0 +1,98 @@
+.section #gk110_builtin_code
+// DIV U32
+//
+// UNR recurrence (q = a / b):
+// look for z such that 2^32 - b <= b * z < 2^32
+// then q - 1 <= (a * z) / 2^32 <= q
+//
+// INPUT: $r0: dividend, $r1: divisor
+// OUTPUT: $r0: result, $r1: modulus
+// CLOBBER: $r2 - $r3, $p0 - $p1
+// SIZE: 22 / 14 * 8 bytes
+//
+gk110_div_u32:
+ sched 0x28282804280428
+ bfind u32 $r2 $r1
+ xor b32 $r2 $r2 0x1f
+ mov b32 $r3 0x1
+ shl b32 $r2 $r3 clamp $r2
+ cvt u32 $r1 neg u32 $r1
+ mul $r3 u32 $r1 u32 $r2
+ add $r2 (mul high u32 $r2 u32 $r3) $r2
+ sched 0x28282828282828
+ mul $r3 u32 $r1 u32 $r2
+ add $r2 (mul high u32 $r2 u32 $r3) $r2
+ mul $r3 u32 $r1 u32 $r2
+ add $r2 (mul high u32 $r2 u32 $r3) $r2
+ mul $r3 u32 $r1 u32 $r2
+ add $r2 (mul high u32 $r2 u32 $r3) $r2
+ mul $r3 u32 $r1 u32 $r2
+ sched 0x042c2828042804
+ add $r2 (mul high u32 $r2 u32 $r3) $r2
+ mov b32 $r3 $r0
+ mul high $r0 u32 $r0 u32 $r2
+ cvt u32 $r2 neg u32 $r1
+ add $r1 (mul u32 $r1 u32 $r0) $r3
+ set $p0 0x1 ge u32 $r1 $r2
+ $p0 sub b32 $r1 $r1 $r2
+ sched 0x20282e20042c28
+ $p0 add b32 $r0 $r0 0x1
+ $p0 set $p0 0x1 ge u32 $r1 $r2
+ $p0 sub b32 $r1 $r1 $r2
+ $p0 add b32 $r0 $r0 0x1
+ ret
+
+// DIV S32, like DIV U32 after taking ABS(inputs)
+//
+// INPUT: $r0: dividend, $r1: divisor
+// OUTPUT: $r0: result, $r1: modulus
+// CLOBBER: $r2 - $r3, $p0 - $p3
+//
+gk110_div_s32:
+ set $p2 0x1 lt s32 $r0 0x0
+ set $p3 0x1 lt s32 $r1 0x0 xor $p2
+ sched 0x28042804282820
+ cvt s32 $r0 abs s32 $r0
+ cvt s32 $r1 abs s32 $r1
+ bfind u32 $r2 $r1
+ xor b32 $r2 $r2 0x1f
+ mov b32 $r3 0x1
+ shl b32 $r2 $r3 clamp $r2
+ cvt u32 $r1 neg u32 $r1
+ sched 0x28282828282828
+ mul $r3 u32 $r1 u32 $r2
+ add $r2 (mul high u32 $r2 u32 $r3) $r2
+ mul $r3 u32 $r1 u32 $r2
+ add $r2 (mul high u32 $r2 u32 $r3) $r2
+ mul $r3 u32 $r1 u32 $r2
+ add $r2 (mul high u32 $r2 u32 $r3) $r2
+ mul $r3 u32 $r1 u32 $r2
+ sched 0x28280428042828
+ add $r2 (mul high u32 $r2 u32 $r3) $r2
+ mul $r3 u32 $r1 u32 $r2
+ add $r2 (mul high u32 $r2 u32 $r3) $r2
+ mov b32 $r3 $r0
+ mul high $r0 u32 $r0 u32 $r2
+ cvt u32 $r2 neg u32 $r1
+ add $r1 (mul u32 $r1 u32 $r0) $r3
+ sched 0x2028042c28042c
+ set $p0 0x1 ge u32 $r1 $r2
+ $p0 sub b32 $r1 $r1 $r2
+ $p0 add b32 $r0 $r0 0x1
+ $p0 set $p0 0x1 ge u32 $r1 $r2
+ $p0 sub b32 $r1 $r1 $r2
+ $p0 add b32 $r0 $r0 0x1
+ $p3 cvt s32 $r0 neg s32 $r0
+ sched 0x2c200428042e04
+ $p2 cvt s32 $r1 neg s32 $r1
+ ret
+
+gk110_rcp_f64:
+gk110_rsq_f64:
+ ret
+
+.section #gk110_builtin_offsets
+.b64 #gk110_div_u32
+.b64 #gk110_div_s32
+.b64 #gk110_rcp_f64
+.b64 #gk110_rsq_f64