SPARC normal tnl optimizations.

author: davem69 <davem69> 2001-06-06 11:46:04 +0000
committer: davem69 <davem69> 2001-06-06 11:46:04 +0000
commit: 6f365c21d796310a9ea70d8420e6879eb5abb6ae (patch)
tree: a17d087d3affc09de3caf515b5a05b063ad72dfa /src/mesa/sparc
parent: 775355a88a0927e2e3a855036c26950397a61d7b (diff)
3 files changed, 644 insertions, 2 deletions
diff --git a/src/mesa/sparc/norm.S b/src/mesa/sparc/norm.S
new file mode 100644
index 00000000000..8ec6334d708
--- /dev/null
+++ b/src/mesa/sparc/norm.S
@@ -0,0 +1,603 @@
+/* $Id: norm.S,v 1.1 2001/06/06 11:46:04 davem69 Exp $ */
+
+#include "sparc_matrix.h"
+
+	.text
+
+#ifdef __sparc_v9__
+#define STACK_VAR_OFF	(2047 + (8 * 16))
+#else
+#define STACK_VAR_OFF	(4 * 16)
+#endif
+
+	/* Newton-Raphson approximation turns out to be slower
+	 * (and less accurate) than direct fsqrts/fdivs.
+	 */
+#define ONE_DOT_ZERO	0x3f800000
+
+	.globl	_mesa_sparc_transform_normalize_normals
+_mesa_sparc_transform_normalize_normals:
+	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
+
+	sethi	%hi(ONE_DOT_ZERO), %g2
+	sub	%sp, 16, %sp
+	st	%g2, [%sp + STACK_VAR_OFF+0x0]
+	st	%o1, [%sp + STACK_VAR_OFF+0x4]
+	ld	[%sp + STACK_VAR_OFF+0x0], %f12	! f12 = 1.0f
+	ld	[%sp + STACK_VAR_OFF+0x4], %f15	! f15 = scale
+	add	%sp, 16, %sp
+
+	LDPTR	[%o0 + MAT_INV], %o0		! o0 = mat->inv
+	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
+	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
+	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
+	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
+
+	LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
+
+	/* dest->count = in->count */
+	st	%g1, [%o4 + V4F_COUNT]
+
+	cmp	%g1, 1
+	bl	7f
+	 cmp	%o3, 0
+	bne	4f
+	 clr	%o4				! 'i' for STRIDE_LOOP
+
+1:	/* LENGTHS == NULL */
+	ld	[%o5 + 0x00], %f0		! ux = from[0]
+	ld	[%o5 + 0x04], %f1		! uy = from[1]
+	ld	[%o5 + 0x08], %f2		! uz = from[2]
+	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
+	add	%o4, 1, %o4			! i++
+
+	/* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2)
+	 * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6)
+	 * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10)
+	 */
+	fmuls	%f0, M0, %f3			! FGM	Group
+	fmuls	%f1, M1, %f4			! FGM	Group
+	fmuls	%f0, M4, %f5			! FGM	Group
+	fmuls	%f1, M5, %f6			! FGM	Group
+	fmuls	%f0, M8, %f7			! FGM	Group	f3 available
+	fmuls	%f1, M9, %f8			! FGM	Group	f4 available
+	fadds	%f3, %f4, %f3			! FGA
+	fmuls	%f2, M2, %f10			! FGM	Group	f5 available
+	fmuls	%f2, M6, %f0			! FGM	Group	f6 available
+	fadds	%f5, %f6, %f5			! FGA
+	fmuls	%f2, M10, %f4			! FGM	Group	f7 available
+	fadds	%f7, %f8, %f7			! FGA	Group	f8,f3 available
+	fadds	%f3, %f10, %f3			! FGA	Group	f10 available
+	fadds	%f5, %f0, %f5			! FGA	Group	stall f0,f5 available
+	fadds	%f7, %f4, %f7			! FGA	Group	stall f4,f7 available
+
+	/* f3=tx, f5=ty, f7=tz */
+
+	/* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
+	fmuls	%f3, %f3, %f6			! FGM	Group	f3 available
+	fmuls	%f5, %f5, %f8			! FGM	Group	f5 available
+	fmuls	%f7, %f7, %f10			! FGM	Group	f7 available
+	fadds	%f6, %f8, %f6			! FGA	Group	2cyc stall f6,f8 available
+	fadds	%f6, %f10, %f6			! FGA	Group	4cyc stall f6,f10 available
+
+	/* scale (f6) = 1.0 / sqrt(len) */
+	fsqrts	%f6, %f6			! FDIV  20 cycles
+	fdivs	%f12, %f6, %f6			! FDIV	14 cycles
+
+	fmuls	%f3, %f6, %f3
+	st	%f3, [%g3 + 0x00]		! out[i][0] = tx * scale
+	fmuls	%f5, %f6, %f5
+	st	%f5, [%g3 + 0x04]		! out[i][1] = ty * scale
+	fmuls	%f7, %f6, %f7
+	st	%f7, [%g3 + 0x08]		! out[i][2] = tz * scale
+
+	cmp	%o4, %g1			! continue if (i < count)
+	bl	1b
+	 add	%g3, 0x0c, %g3			! advance out vector pointer
+
+	ba	7f
+	 nop
+
+4:	/* LENGTHS != NULL */
+	fmuls	M0, %f15, M0
+	fmuls	M1, %f15, M1
+	fmuls	M2, %f15, M2
+	fmuls	M4, %f15, M4
+	fmuls	M5, %f15, M5
+	fmuls	M6, %f15, M6
+	fmuls	M8, %f15, M8
+	fmuls	M9, %f15, M9
+	fmuls	M10, %f15, M10
+
+5:
+	ld	[%o5 + 0x00], %f0		! ux = from[0]
+	ld	[%o5 + 0x04], %f1		! uy = from[1]
+	ld	[%o5 + 0x08], %f2		! uz = from[2]
+	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
+	add	%o4, 1, %o4			! i++
+
+	/* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2)
+	 * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6)
+	 * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10)
+	 */
+	fmuls	%f0, M0, %f3			! FGM	Group
+	fmuls	%f1, M1, %f4			! FGM	Group
+	fmuls	%f0, M4, %f5			! FGM	Group
+	fmuls	%f1, M5, %f6			! FGM	Group
+	fmuls	%f0, M8, %f7			! FGM	Group	f3 available
+	fmuls	%f1, M9, %f8			! FGM	Group	f4 available
+	fadds	%f3, %f4, %f3			! FGA
+	fmuls	%f2, M2, %f10			! FGM	Group	f5 available
+	fmuls	%f2, M6, %f0			! FGM	Group	f6 available
+	fadds	%f5, %f6, %f5			! FGA
+	fmuls	%f2, M10, %f4			! FGM	Group	f7 available
+	fadds	%f7, %f8, %f7			! FGA	Group	f8,f3 available
+	fadds	%f3, %f10, %f3			! FGA	Group	f10 available
+	ld	[%o3], %f13			! LSU
+	fadds	%f5, %f0, %f5			! FGA	Group	stall f0,f5 available
+	add	%o3, 4, %o3			! IEU0
+	fadds	%f7, %f4, %f7			! FGA	Group	stall f4,f7 available
+
+	/* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
+
+	fmuls	%f3, %f13, %f3
+	st	%f3, [%g3 + 0x00]		! out[i][0] = tx * len
+	fmuls	%f5, %f13, %f5
+	st	%f5, [%g3 + 0x04]		! out[i][1] = ty * len
+	fmuls	%f7, %f13, %f7
+	st	%f7, [%g3 + 0x08]		! out[i][2] = tz * len
+
+	cmp	%o4, %g1			! continue if (i < count)
+	bl	5b
+	 add	%g3, 0x0c, %g3			! advance out vector pointer
+
+7:	retl
+	 nop
+	
+	.globl	_mesa_sparc_transform_normalize_normals_no_rot
+_mesa_sparc_transform_normalize_normals_no_rot:
+	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
+
+	sethi	%hi(ONE_DOT_ZERO), %g2
+	sub	%sp, 16, %sp
+	st	%g2, [%sp + STACK_VAR_OFF+0x0]
+	st	%o1, [%sp + STACK_VAR_OFF+0x4]
+	ld	[%sp + STACK_VAR_OFF+0x0], %f12	! f12 = 1.0f
+	ld	[%sp + STACK_VAR_OFF+0x4], %f15	! f15 = scale
+	add	%sp, 16, %sp
+
+	LDPTR	[%o0 + MAT_INV], %o0		! o0 = mat->inv
+	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
+	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
+	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
+	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
+
+	LDMATRIX_0_5_10(%o0)
+
+	/* dest->count = in->count */
+	st	%g1, [%o4 + V4F_COUNT]
+
+	cmp	%g1, 1
+	bl	7f
+	 cmp	%o3, 0
+	bne	4f
+	 clr	%o4				! 'i' for STRIDE_LOOP
+
+1:	/* LENGTHS == NULL */
+	ld	[%o5 + 0x00], %f0		! ux = from[0]
+	ld	[%o5 + 0x04], %f1		! uy = from[1]
+	ld	[%o5 + 0x08], %f2		! uz = from[2]
+	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
+	add	%o4, 1, %o4			! i++
+
+	/* tx (f3) = (ux * m0)
+	 * ty (f5) = (uy * m5)
+	 * tz (f7) = (uz * m10)
+	 */
+	fmuls	%f0, M0, %f3			! FGM	Group
+	fmuls	%f1, M5, %f5			! FGM	Group
+	fmuls	%f2, M10, %f7			! FGM	Group
+
+	/* f3=tx, f5=ty, f7=tz */
+
+	/* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
+	fmuls	%f3, %f3, %f6			! FGM	Group	stall, f3 available
+	fmuls	%f5, %f5, %f8			! FGM	Group	f5 available
+	fmuls	%f7, %f7, %f10			! FGM	Group	f7 available
+	fadds	%f6, %f8, %f6			! FGA	Group	2cyc stall f6,f8 available
+	fadds	%f6, %f10, %f6			! FGA	Group	4cyc stall f6,f10 available
+
+	/* scale (f6) = 1.0 / sqrt(len) */
+	fsqrts	%f6, %f6			! FDIV  20 cycles
+	fdivs	%f12, %f6, %f6			! FDIV	14 cycles
+
+	fmuls	%f3, %f6, %f3
+	st	%f3, [%g3 + 0x00]		! out[i][0] = tx * scale
+	fmuls	%f5, %f6, %f5
+	st	%f5, [%g3 + 0x04]		! out[i][1] = ty * scale
+	fmuls	%f7, %f6, %f7
+	st	%f7, [%g3 + 0x08]		! out[i][2] = tz * scale
+
+	cmp	%o4, %g1			! continue if (i < count)
+	bl	1b
+	 add	%g3, 0x0c, %g3			! advance out vector pointer
+
+	ba	7f
+	 nop
+
+4:	/* LENGTHS != NULL */
+	fmuls	M0, %f15, M0
+	fmuls	M5, %f15, M5
+	fmuls	M10, %f15, M10
+
+5:
+	ld	[%o5 + 0x00], %f0		! ux = from[0]
+	ld	[%o5 + 0x04], %f1		! uy = from[1]
+	ld	[%o5 + 0x08], %f2		! uz = from[2]
+	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
+	add	%o4, 1, %o4			! i++
+
+	/* tx (f3) = (ux * m0)
+	 * ty (f5) = (uy * m5)
+	 * tz (f7) = (uz * m10)
+	 */
+	fmuls	%f0, M0, %f3			! FGM	Group
+	ld	[%o3], %f13			! LSU
+	fmuls	%f1, M5, %f5			! FGM	Group
+	add	%o3, 4, %o3			! IEU0
+	fmuls	%f2, M10, %f7			! FGM	Group
+
+	/* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
+
+	fmuls	%f3, %f13, %f3
+	st	%f3, [%g3 + 0x00]		! out[i][0] = tx * len
+	fmuls	%f5, %f13, %f5
+	st	%f5, [%g3 + 0x04]		! out[i][1] = ty * len
+	fmuls	%f7, %f13, %f7
+	st	%f7, [%g3 + 0x08]		! out[i][2] = tz * len
+
+	cmp	%o4, %g1			! continue if (i < count)
+	bl	5b
+	 add	%g3, 0x0c, %g3			! advance out vector pointer
+
+7:	retl
+	 nop
+
+	.globl	_mesa_sparc_transform_rescale_normals_no_rot
+_mesa_sparc_transform_rescale_normals_no_rot:
+	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
+	sub	%sp, 16, %sp
+	st	%o1, [%sp + STACK_VAR_OFF+0x0]
+	ld	[%sp + STACK_VAR_OFF+0x0], %f15	! f15 = scale
+	add	%sp, 16, %sp
+
+	LDPTR	[%o0 + MAT_INV], %o0		! o0 = mat->inv
+	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
+	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
+	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
+	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
+
+	LDMATRIX_0_5_10(%o0)
+
+	/* dest->count = in->count */
+	st	%g1, [%o4 + V4F_COUNT]
+
+	cmp	%g1, 1
+	bl	7f
+	 clr	%o4				! 'i' for STRIDE_LOOP
+
+	fmuls	M0, %f15, M0
+	fmuls	M5, %f15, M5
+	fmuls	M10, %f15, M10
+
+1:	ld	[%o5 + 0x00], %f0		! ux = from[0]
+	ld	[%o5 + 0x04], %f1		! uy = from[1]
+	ld	[%o5 + 0x08], %f2		! uz = from[2]
+	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
+	add	%o4, 1, %o4			! i++
+
+	/* tx (f3) = (ux * m0)
+	 * ty (f5) = (uy * m5)
+	 * tz (f7) = (uz * m10)
+	 */
+	fmuls	%f0, M0, %f3			! FGM	Group
+	st	%f3, [%g3 + 0x00]		! LSU
+	fmuls	%f1, M5, %f5			! FGM	Group
+	st	%f5, [%g3 + 0x04]		! LSU
+	fmuls	%f2, M10, %f7			! FGM	Group
+	st	%f7, [%g3 + 0x08]		! LSU
+
+	cmp	%o4, %g1			! continue if (i < count)
+	bl	1b
+	 add	%g3, 0x0c, %g3			! advance out vector pointer
+
+7:	retl
+	 nop
+
+	.globl	_mesa_sparc_transform_rescale_normals
+_mesa_sparc_transform_rescale_normals:
+	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
+	sub	%sp, 16, %sp
+	st	%o1, [%sp + STACK_VAR_OFF+0x0]
+	ld	[%sp + STACK_VAR_OFF+0x0], %f15	! f15 = scale
+	add	%sp, 16, %sp
+
+	LDPTR	[%o0 + MAT_INV], %o0		! o0 = mat->inv
+	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
+	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
+	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
+	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
+
+	LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
+
+	/* dest->count = in->count */
+	st	%g1, [%o4 + V4F_COUNT]
+
+	cmp	%g1, 1
+	bl	7f
+	 clr	%o4				! 'i' for STRIDE_LOOP
+
+	fmuls	M0, %f15, M0
+	fmuls	M1, %f15, M1
+	fmuls	M2, %f15, M2
+	fmuls	M4, %f15, M4
+	fmuls	M5, %f15, M5
+	fmuls	M6, %f15, M6
+	fmuls	M8, %f15, M8
+	fmuls	M9, %f15, M9
+	fmuls	M10, %f15, M10
+
+1:	ld	[%o5 + 0x00], %f0		! ux = from[0]
+	ld	[%o5 + 0x04], %f1		! uy = from[1]
+	ld	[%o5 + 0x08], %f2		! uz = from[2]
+	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
+	add	%o4, 1, %o4			! i++
+
+	fmuls	%f0, M0, %f3			! FGM	Group
+	fmuls	%f1, M1, %f4			! FGM	Group
+	fmuls	%f0, M4, %f5			! FGM	Group
+	fmuls	%f1, M5, %f6			! FGM	Group
+	fmuls	%f0, M8, %f7			! FGM	Group	f3 available
+	fmuls	%f1, M9, %f8			! FGM	Group	f4 available
+	fadds	%f3, %f4, %f3			! FGA
+	fmuls	%f2, M2, %f10			! FGM	Group	f5 available
+	fmuls	%f2, M6, %f0			! FGM	Group	f6 available
+	fadds	%f5, %f6, %f5			! FGA
+	fmuls	%f2, M10, %f4			! FGM	Group	f7 available
+	fadds	%f7, %f8, %f7			! FGA	Group	f8,f3 available
+	fadds	%f3, %f10, %f3			! FGA	Group	f10 available
+	st	%f3, [%g3 + 0x00]		! LSU
+	fadds	%f5, %f0, %f5			! FGA	Group	stall f0,f5 available
+	st	%f5, [%g3 + 0x04]		! LSU
+	fadds	%f7, %f4, %f7			! FGA	Group	stall f4,f7 available
+	st	%f7, [%g3 + 0x08]		! LSU
+
+	cmp	%o4, %g1			! continue if (i < count)
+	bl	1b
+	 add	%g3, 0x0c, %g3			! advance out vector pointer
+
+7:	retl
+	 nop
+
+	.globl	_mesa_sparc_transform_normals_no_rot
+_mesa_sparc_transform_normals_no_rot:
+	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
+	LDPTR	[%o0 + MAT_INV], %o0		! o0 = mat->inv
+	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
+	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
+	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
+	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
+
+	LDMATRIX_0_5_10(%o0)
+
+	/* dest->count = in->count */
+	st	%g1, [%o4 + V4F_COUNT]
+
+	cmp	%g1, 1
+	bl	7f
+	 clr	%o4				! 'i' for STRIDE_LOOP
+
+1:	ld	[%o5 + 0x00], %f0		! ux = from[0]
+	ld	[%o5 + 0x04], %f1		! uy = from[1]
+	ld	[%o5 + 0x08], %f2		! uz = from[2]
+	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
+	add	%o4, 1, %o4			! i++
+
+	/* tx (f3) = (ux * m0)
+	 * ty (f5) = (uy * m5)
+	 * tz (f7) = (uz * m10)
+	 */
+	fmuls	%f0, M0, %f3			! FGM	Group
+	st	%f3, [%g3 + 0x00]		! LSU
+	fmuls	%f1, M5, %f5			! FGM	Group
+	st	%f5, [%g3 + 0x04]		! LSU
+	fmuls	%f2, M10, %f7			! FGM	Group
+	st	%f7, [%g3 + 0x08]		! LSU
+
+	cmp	%o4, %g1			! continue if (i < count)
+	bl	1b
+	 add	%g3, 0x0c, %g3			! advance out vector pointer
+
+7:	retl
+	 nop
+
+	.globl	_mesa_sparc_transform_normals
+_mesa_sparc_transform_normals:
+	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
+	LDPTR	[%o0 + MAT_INV], %o0		! o0 = mat->inv
+	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
+	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
+	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
+	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
+
+	LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
+
+	/* dest->count = in->count */
+	st	%g1, [%o4 + V4F_COUNT]
+
+	cmp	%g1, 1
+	bl	7f
+	 clr	%o4				! 'i' for STRIDE_LOOP
+
+1:	ld	[%o5 + 0x00], %f0		! ux = from[0]
+	ld	[%o5 + 0x04], %f1		! uy = from[1]
+	ld	[%o5 + 0x08], %f2		! uz = from[2]
+	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
+	add	%o4, 1, %o4			! i++
+
+	fmuls	%f0, M0, %f3			! FGM	Group
+	fmuls	%f1, M1, %f4			! FGM	Group
+	fmuls	%f0, M4, %f5			! FGM	Group
+	fmuls	%f1, M5, %f6			! FGM	Group
+	fmuls	%f0, M8, %f7			! FGM	Group	f3 available
+	fmuls	%f1, M9, %f8			! FGM	Group	f4 available
+	fadds	%f3, %f4, %f3			! FGA
+	fmuls	%f2, M2, %f10			! FGM	Group	f5 available
+	fmuls	%f2, M6, %f0			! FGM	Group	f6 available
+	fadds	%f5, %f6, %f5			! FGA
+	fmuls	%f2, M10, %f4			! FGM	Group	f7 available
+	fadds	%f7, %f8, %f7			! FGA	Group	f8,f3 available
+	fadds	%f3, %f10, %f3			! FGA	Group	f10 available
+	st	%f3, [%g3 + 0x00]		! LSU
+	fadds	%f5, %f0, %f5			! FGA	Group	stall f0,f5 available
+	st	%f5, [%g3 + 0x04]		! LSU
+	fadds	%f7, %f4, %f7			! FGA	Group	stall f4,f7 available
+	st	%f7, [%g3 + 0x08]		! LSU
+
+	cmp	%o4, %g1			! continue if (i < count)
+	bl	1b
+	 add	%g3, 0x0c, %g3			! advance out vector pointer
+
+7:	retl
+	 nop
+
+	.globl	_mesa_sparc_normalize_normals
+_mesa_sparc_normalize_normals:
+	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
+
+	sethi	%hi(ONE_DOT_ZERO), %g2
+	sub	%sp, 16, %sp
+	st	%g2, [%sp + STACK_VAR_OFF+0x0]
+	ld	[%sp + STACK_VAR_OFF+0x0], %f12	! f12 = 1.0f
+	add	%sp, 16, %sp
+
+	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
+	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
+	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
+	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
+
+	/* dest->count = in->count */
+	st	%g1, [%o4 + V4F_COUNT]
+
+	cmp	%g1, 1
+	bl	7f
+	 cmp	%o3, 0
+	bne	4f
+	 clr	%o4				! 'i' for STRIDE_LOOP
+
+1:	/* LENGTHS == NULL */
+	ld	[%o5 + 0x00], %f3		! ux = from[0]
+	ld	[%o5 + 0x04], %f5		! uy = from[1]
+	ld	[%o5 + 0x08], %f7		! uz = from[2]
+	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
+	add	%o4, 1, %o4			! i++
+
+	/* f3=tx, f5=ty, f7=tz */
+
+	/* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
+	fmuls	%f3, %f3, %f6			! FGM	Group	f3 available
+	fmuls	%f5, %f5, %f8			! FGM	Group	f5 available
+	fmuls	%f7, %f7, %f10			! FGM	Group	f7 available
+	fadds	%f6, %f8, %f6			! FGA	Group	2cyc stall f6,f8 available
+	fadds	%f6, %f10, %f6			! FGA	Group	4cyc stall f6,f10 available
+
+	/* scale (f6) = 1.0 / sqrt(len) */
+	fsqrts	%f6, %f6			! FDIV  20 cycles
+	fdivs	%f12, %f6, %f6			! FDIV	14 cycles
+
+	fmuls	%f3, %f6, %f3
+	st	%f3, [%g3 + 0x00]		! out[i][0] = tx * scale
+	fmuls	%f5, %f6, %f5
+	st	%f5, [%g3 + 0x04]		! out[i][1] = ty * scale
+	fmuls	%f7, %f6, %f7
+	st	%f7, [%g3 + 0x08]		! out[i][2] = tz * scale
+
+	cmp	%o4, %g1			! continue if (i < count)
+	bl	1b
+	 add	%g3, 0x0c, %g3			! advance out vector pointer
+
+	ba	7f
+	 nop
+
+4:	/* LENGTHS != NULL */
+
+5:
+	ld	[%o5 + 0x00], %f3		! ux = from[0]
+	ld	[%o5 + 0x04], %f5		! uy = from[1]
+	ld	[%o5 + 0x08], %f7		! uz = from[2]
+	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
+	add	%o4, 1, %o4			! i++
+
+	ld	[%o3], %f13			! LSU
+	add	%o3, 4, %o3			! IEU0
+
+	/* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
+
+	fmuls	%f3, %f13, %f3
+	st	%f3, [%g3 + 0x00]		! out[i][0] = tx * len
+	fmuls	%f5, %f13, %f5
+	st	%f5, [%g3 + 0x04]		! out[i][1] = ty * len
+	fmuls	%f7, %f13, %f7
+	st	%f7, [%g3 + 0x08]		! out[i][2] = tz * len
+
+	cmp	%o4, %g1			! continue if (i < count)
+	bl	5b
+	 add	%g3, 0x0c, %g3			! advance out vector pointer
+
+7:	retl
+	 nop
+
+	.globl	_mesa_sparc_rescale_normals
+_mesa_sparc_rescale_normals:
+	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
+
+	sethi	%hi(ONE_DOT_ZERO), %g2
+	sub	%sp, 16, %sp
+	st	%o1, [%sp + STACK_VAR_OFF+0x0]
+	ld	[%sp + STACK_VAR_OFF+0x0], %f15	! f15 = scale
+	add	%sp, 16, %sp
+
+	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
+	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
+	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
+	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
+
+	/* dest->count = in->count */
+	st	%g1, [%o4 + V4F_COUNT]
+
+	cmp	%g1, 1
+	bl	7f
+	 clr	%o4				! 'i' for STRIDE_LOOP
+
+1:
+	ld	[%o5 + 0x00], %f3		! ux = from[0]
+	ld	[%o5 + 0x04], %f5		! uy = from[1]
+	ld	[%o5 + 0x08], %f7		! uz = from[2]
+	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
+	add	%o4, 1, %o4			! i++
+
+	/* f3=tx, f5=ty, f7=tz */
+
+	fmuls	%f3, %f15, %f3
+	st	%f3, [%g3 + 0x00]		! out[i][0] = tx * scale
+	fmuls	%f5, %f15, %f5
+	st	%f5, [%g3 + 0x04]		! out[i][1] = ty * scale
+	fmuls	%f7, %f15, %f7
+	st	%f7, [%g3 + 0x08]		! out[i][2] = tz * scale
+
+	cmp	%o4, %g1			! continue if (i < count)
+	bl	1b
+	 add	%g3, 0x0c, %g3			! advance out vector pointer
+
+7:	retl
+	 nop
diff --git a/src/mesa/sparc/sparc.c b/src/mesa/sparc/sparc.c
index c25a758a73b..45e2cecac18 100644
--- a/src/mesa/sparc/sparc.c
+++ b/src/mesa/sparc/sparc.c
@@ -1,4 +1,4 @@
-/* $Id: sparc.c,v 1.3 2001/06/05 23:54:01 davem69 Exp $ */
+/* $Id: sparc.c,v 1.4 2001/06/06 11:46:04 davem69 Exp $ */
 
 /*
  * Mesa 3-D graphics library
@@ -86,6 +86,22 @@ extern GLvector4f  *_mesa_sparc_cliptest_points4_np(GLvector4f *clip_vec,
 						    GLubyte clipMask[],
 						    GLubyte *orMask,
 						    GLubyte *andMask);
+
+#define NORM_ARGS	const GLmatrix *mat,				\
+			GLfloat scale,					\
+			const GLvector3f *in,				\
+			const GLfloat *lengths,				\
+			GLvector3f *dest
+
+extern void _mesa_sparc_transform_normalize_normals(NORM_ARGS);
+extern void _mesa_sparc_transform_normalize_normals_no_rot(NORM_ARGS);
+extern void _mesa_sparc_transform_rescale_normals_no_rot(NORM_ARGS);
+extern void _mesa_sparc_transform_rescale_normals(NORM_ARGS);
+extern void _mesa_sparc_transform_normals_no_rot(NORM_ARGS);
+extern void _mesa_sparc_transform_normals(NORM_ARGS);
+extern void _mesa_sparc_normalize_normals(NORM_ARGS);
+extern void _mesa_sparc_rescale_normals(NORM_ARGS);
+
 #endif
 
 void _mesa_init_all_sparc_transform_asm(void)
@@ -99,9 +115,27 @@ void _mesa_init_all_sparc_transform_asm(void)
    _mesa_clip_tab[4] = _mesa_sparc_cliptest_points4;
    _mesa_clip_np_tab[4] = _mesa_sparc_cliptest_points4_np;
 
+   _mesa_normal_tab[NORM_TRANSFORM | NORM_NORMALIZE] =
+	   _mesa_sparc_transform_normalize_normals;
+   _mesa_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_NORMALIZE] =
+	   _mesa_sparc_transform_normalize_normals_no_rot;
+   _mesa_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_RESCALE] =
+	   _mesa_sparc_transform_rescale_normals_no_rot;
+   _mesa_normal_tab[NORM_TRANSFORM | NORM_RESCALE] =
+	   _mesa_sparc_transform_rescale_normals;
+   _mesa_normal_tab[NORM_TRANSFORM_NO_ROT] =
+	   _mesa_sparc_transform_normals_no_rot;
+   _mesa_normal_tab[NORM_TRANSFORM] =
+	   _mesa_sparc_transform_normals;
+   _mesa_normal_tab[NORM_NORMALIZE] =
+	   _mesa_sparc_normalize_normals;
+   _mesa_normal_tab[NORM_RESCALE] =
+	   _mesa_sparc_rescale_normals;
+
 #ifdef DEBUG
    _math_test_all_transform_functions("sparc");
    _math_test_all_cliptest_functions("sparc");
+   _math_test_all_normal_transform_functions("sparc");
 #endif
 
 #endif
diff --git a/src/mesa/sparc/sparc_matrix.h b/src/mesa/sparc/sparc_matrix.h
index 85beef9d927..4fe09e83eaf 100644
--- a/src/mesa/sparc/sparc_matrix.h
+++ b/src/mesa/sparc/sparc_matrix.h
@@ -1,4 +1,4 @@
-/* $Id: sparc_matrix.h,v 1.2 2001/06/05 23:54:01 davem69 Exp $ */
+/* $Id: sparc_matrix.h,v 1.3 2001/06/06 11:46:04 davem69 Exp $ */
 
 #ifndef _SPARC_MATRIX_H
 #define _SPARC_MATRIX_H
@@ -146,6 +146,11 @@
 	ldd	[BASE + (12 * 0x4)], M12;	\
 	ld	[BASE + (14 * 0x4)], M14
 
+#define LDMATRIX_0_5_10(BASE) 			\
+	ld	[BASE + ( 0 * 0x4)], M0;	\
+	ld	[BASE + ( 5 * 0x4)], M5;	\
+	ld	[BASE + (10 * 0x4)], M10;	\
+
 #define LDMATRIX_0_5_10_12_13_14(BASE) 		\
 	ld	[BASE + ( 0 * 0x4)], M0;	\
 	ld	[BASE + ( 5 * 0x4)], M5;	\
author	davem69 <davem69>	2001-06-06 11:46:04 +0000
committer	davem69 <davem69>	2001-06-06 11:46:04 +0000
commit	6f365c21d796310a9ea70d8420e6879eb5abb6ae (patch)
tree	a17d087d3affc09de3caf515b5a05b063ad72dfa /src/mesa/sparc
parent	775355a88a0927e2e3a855036c26950397a61d7b (diff)