diff options
author | Ben Skeggs <[email protected]> | 2014-05-09 15:56:05 +1000 |
---|---|---|
committer | Ben Skeggs <[email protected]> | 2014-05-15 09:54:49 +1000 |
commit | d548d47edf9f05e6dbf9656abc2f8e78d02cb2f6 (patch) | |
tree | 9427f4339be135386d41b160c1e1e38b7e80eb56 /src/gallium/drivers/nouveau/codegen/lib | |
parent | 7b9475fa652b9df6d599edbea8fa5049fdd995e1 (diff) |
nvc0: add maxwell (sm50) compiler backend
The big missing part here is proper sched data calculations, but
hopefully the chosen placeholder will be sufficient for now.
Passes piglit as well as GK107 does.
Signed-off-by: Ben Skeggs <[email protected]>
Reviewed-by: Ilia Mirkin <[email protected]>
Diffstat (limited to 'src/gallium/drivers/nouveau/codegen/lib')
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/lib/Makefile | 4 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/lib/gm107.asm | 115 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h | 97 |
3 files changed, 215 insertions, 1 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/lib/Makefile b/src/gallium/drivers/nouveau/codegen/lib/Makefile index 28a41a3f41e..06d1979d8b2 100644 --- a/src/gallium/drivers/nouveau/codegen/lib/Makefile +++ b/src/gallium/drivers/nouveau/codegen/lib/Makefile @@ -1,6 +1,6 @@ ENVYAS ?= envyas -all: gf100.asm.h gk104.asm.h gk110.asm.h +all: gf100.asm.h gk104.asm.h gk110.asm.h gm107.asm.h gf100.asm.h: %.asm.h: %.asm $(ENVYAS) -a -W -mnvc0 -Vnvc0 $< -o $@ @@ -8,3 +8,5 @@ gk104.asm.h: %.asm.h: %.asm $(ENVYAS) -a -W -mnvc0 -Vnve4 $< -o $@ gk110.asm.h: %.asm.h: %.asm $(ENVYAS) -a -W -mgk110 $< -o $@ +gm107.asm.h: %.asm.h: %.asm + $(ENVYAS) -a -W -mgm107 $< -o $@ diff --git a/src/gallium/drivers/nouveau/codegen/lib/gm107.asm b/src/gallium/drivers/nouveau/codegen/lib/gm107.asm new file mode 100644 index 00000000000..758cc81a159 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/lib/gm107.asm @@ -0,0 +1,115 @@ +.section #gm107_builtin_code +// DIV U32 +// +// UNR recurrence (q = a / b): +// look for z such that 2^32 - b <= b * z < 2^32 +// then q - 1 <= (a * z) / 2^32 <= q +// +// INPUT: $r0: dividend, $r1: divisor +// OUTPUT: $r0: result, $r1: modulus +// CLOBBER: $r2 - $r3, $p0 - $p1 +// SIZE: 22 / 14 * 8 bytes +// +gm107_div_u32: + sched 0x7e0 0x7e0 0x7e0 + flo u32 $r2 $r1 + lop xor 1 $r2 $r2 0x1f + mov $r3 0x1 0xf + sched 0x7e0 0x7e0 0x7e0 + shl $r2 $r3 $r2 + i2i u32 u32 $r1 neg $r1 + imul u32 u32 $r3 $r1 $r2 + sched 0x7e0 0x7e0 0x7e0 + imad u32 u32 hi $r2 $r2 $r3 $r2 + imul u32 u32 $r3 $r1 $r2 + imad u32 u32 hi $r2 $r2 $r3 $r2 + sched 0x7e0 0x7e0 0x7e0 + imul u32 u32 $r3 $r1 $r2 + imad u32 u32 hi $r2 $r2 $r3 $r2 + imul u32 u32 $r3 $r1 $r2 + sched 0x7e0 0x7e0 0x7e0 + imad u32 u32 hi $r2 $r2 $r3 $r2 + imul u32 u32 $r3 $r1 $r2 + imad u32 u32 hi $r2 $r2 $r3 $r2 + sched 0x7e0 0x7e0 0x7e0 + mov $r3 $r0 0xf + imul u32 u32 hi $r0 $r0 $r2 + i2i u32 u32 $r2 neg $r1 + sched 0x7e0 0x7e0 0x7e0 + imad u32 u32 $r1 $r1 $r0 $r3 + isetp ge u32 and $p0 1 $r1 $r2 1 + $p0 iadd $r1 $r1 neg $r2 + sched 0x7e0 0x7e0 0x7e0 + $p0 iadd $r0 $r0 0x1 + $p0 isetp ge u32 and $p0 1 $r1 $r2 1 + $p0 iadd $r1 $r1 neg $r2 + sched 0x7e0 0x7e0 0x7e0 + $p0 iadd $r0 $r0 0x1 + ret + nop 0 + +// DIV S32, like DIV U32 after taking ABS(inputs) +// +// INPUT: $r0: dividend, $r1: divisor +// OUTPUT: $r0: result, $r1: modulus +// CLOBBER: $r2 - $r3, $p0 - $p3 +// +gm107_div_s32: + sched 0x7e0 0x7e0 0x7e0 + isetp lt and $p2 0x1 $r0 0 1 + isetp lt xor $p3 1 $r1 0 $p2 + i2i s32 s32 $r0 abs $r0 + sched 0x7e0 0x7e0 0x7e0 + i2i s32 s32 $r1 abs $r1 + flo u32 $r2 $r1 + lop xor 1 $r2 $r2 0x1f + sched 0x7e0 0x7e0 0x7e0 + mov $r3 0x1 0xf + shl $r2 $r3 $r2 + i2i u32 u32 $r1 neg $r1 + sched 0x7e0 0x7e0 0x7e0 + imul u32 u32 $r3 $r1 $r2 + imad u32 u32 hi $r2 $r2 $r3 $r2 + imul u32 u32 $r3 $r1 $r2 + sched 0x7e0 0x7e0 0x7e0 + imad u32 u32 hi $r2 $r2 $r3 $r2 + imul u32 u32 $r3 $r1 $r2 + imad u32 u32 hi $r2 $r2 $r3 $r2 + sched 0x7e0 0x7e0 0x7e0 + imul u32 u32 $r3 $r1 $r2 + imad u32 u32 hi $r2 $r2 $r3 $r2 + imul u32 u32 $r3 $r1 $r2 + sched 0x7e0 0x7e0 0x7e0 + imad u32 u32 hi $r2 $r2 $r3 $r2 + mov $r3 $r0 0xf + imul u32 u32 hi $r0 $r0 $r2 + sched 0x7e0 0x7e0 0x7e0 + i2i u32 u32 $r2 neg $r1 + imad u32 u32 $r1 $r1 $r0 $r3 + isetp ge u32 and $p0 1 $r1 $r2 1 + sched 0x7e0 0x7e0 0x7e0 + $p0 iadd $r1 $r1 neg $r2 + $p0 iadd $r0 $r0 0x1 + $p0 isetp ge u32 and $p0 1 $r1 $r2 1 + sched 0x7e0 0x7e0 0x7e0 + $p0 iadd $r1 $r1 neg $r2 + $p0 iadd $r0 $r0 0x1 + $p3 i2i s32 s32 $r0 neg $r0 + sched 0x7e0 0x7e0 0x7e0 + $p2 i2i s32 s32 $r1 neg $r1 + ret + nop 0 + +// STUB +gm107_rcp_f64: +gm107_rsq_f64: + sched 0x7e0 0x7e0 0x7e0 + ret + nop 0 + nop 0 + +.section #gm107_builtin_offsets +.b64 #gm107_div_u32 +.b64 #gm107_div_s32 +.b64 #gm107_rcp_f64 +.b64 #gm107_rsq_f64 diff --git a/src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h b/src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h new file mode 100644 index 00000000000..7be25da5532 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h @@ -0,0 +1,97 @@ +uint64_t gm107_builtin_code[] = { +/* 0x0000: gm107_div_u32 */ + 0x001f8000fc0007e0, + 0x5c30000000170002, + 0x3847040001f70202, + 0x3898078000170003, + 0x001f8000fc0007e0, + 0x5c48000000270302, + 0x5ce0200000170a01, + 0x5c38000000270103, + 0x001f8000fc0007e0, + 0x5a40010000370202, + 0x5c38000000270103, + 0x5a40010000370202, + 0x001f8000fc0007e0, + 0x5c38000000270103, + 0x5a40010000370202, + 0x5c38000000270103, + 0x001f8000fc0007e0, + 0x5a40010000370202, + 0x5c38000000270103, + 0x5a40010000370202, + 0x001f8000fc0007e0, + 0x5c98078000070003, + 0x5c38008000270000, + 0x5ce0200000170a02, + 0x001f8000fc0007e0, + 0x5a00018000070101, + 0x5b6c038000270107, + 0x5c11000000200101, + 0x001f8000fc0007e0, + 0x3810000000100000, + 0x5b6c038000200107, + 0x5c11000000200101, + 0x001f8000fc0007e0, + 0x3810000000100000, + 0xe32000000007000f, + 0x50b0000000070f00, +/* 0x0120: gm107_div_s32 */ + 0x001f8000fc0007e0, + 0x5b6303800ff70017, + 0x5b6341000ff7011f, + 0x5ce2000000073a00, + 0x001f8000fc0007e0, + 0x5ce2000000173a01, + 0x5c30000000170002, + 0x3847040001f70202, + 0x001f8000fc0007e0, + 0x3898078000170003, + 0x5c48000000270302, + 0x5ce0200000170a01, + 0x001f8000fc0007e0, + 0x5c38000000270103, + 0x5a40010000370202, + 0x5c38000000270103, + 0x001f8000fc0007e0, + 0x5a40010000370202, + 0x5c38000000270103, + 0x5a40010000370202, + 0x001f8000fc0007e0, + 0x5c38000000270103, + 0x5a40010000370202, + 0x5c38000000270103, + 0x001f8000fc0007e0, + 0x5a40010000370202, + 0x5c98078000070003, + 0x5c38008000270000, + 0x001f8000fc0007e0, + 0x5ce0200000170a02, + 0x5a00018000070101, + 0x5b6c038000270107, + 0x001f8000fc0007e0, + 0x5c11000000200101, + 0x3810000000100000, + 0x5b6c038000200107, + 0x001f8000fc0007e0, + 0x5c11000000200101, + 0x3810000000100000, + 0x5ce0200000033a00, + 0x001f8000fc0007e0, + 0x5ce0200000123a01, + 0xe32000000007000f, + 0x50b0000000070f00, +/* 0x0280: gm107_rcp_f64 */ +/* 0x0280: gm107_rsq_f64 */ + 0x001f8000fc0007e0, + 0xe32000000007000f, + 0x50b0000000070f00, + 0x50b0000000070f00, +}; + +uint64_t gm107_builtin_offsets[] = { + 0x0000000000000000, + 0x0000000000000120, + 0x0000000000000280, + 0x0000000000000280, +}; |