From 6f365c21d796310a9ea70d8420e6879eb5abb6ae Mon Sep 17 00:00:00 2001 From: davem69 Date: Wed, 6 Jun 2001 11:46:04 +0000 Subject: SPARC normal tnl optimizations. --- src/mesa/sparc/norm.S | 603 ++++++++++++++++++++++++++++++++++++++++++ src/mesa/sparc/sparc.c | 36 ++- src/mesa/sparc/sparc_matrix.h | 7 +- 3 files changed, 644 insertions(+), 2 deletions(-) create mode 100644 src/mesa/sparc/norm.S (limited to 'src/mesa/sparc') diff --git a/src/mesa/sparc/norm.S b/src/mesa/sparc/norm.S new file mode 100644 index 00000000000..8ec6334d708 --- /dev/null +++ b/src/mesa/sparc/norm.S @@ -0,0 +1,603 @@ +/* $Id: norm.S,v 1.1 2001/06/06 11:46:04 davem69 Exp $ */ + +#include "sparc_matrix.h" + + .text + +#ifdef __sparc_v9__ +#define STACK_VAR_OFF (2047 + (8 * 16)) +#else +#define STACK_VAR_OFF (4 * 16) +#endif + + /* Newton-Raphson approximation turns out to be slower + * (and less accurate) than direct fsqrts/fdivs. + */ +#define ONE_DOT_ZERO 0x3f800000 + + .globl _mesa_sparc_transform_normalize_normals +_mesa_sparc_transform_normalize_normals: + /* o0=mat o1=scale o2=in o3=lengths o4=dest */ + + sethi %hi(ONE_DOT_ZERO), %g2 + sub %sp, 16, %sp + st %g2, [%sp + STACK_VAR_OFF+0x0] + st %o1, [%sp + STACK_VAR_OFF+0x4] + ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f + ld [%sp + STACK_VAR_OFF+0x4], %f15 ! f15 = scale + add %sp, 16, %sp + + LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv + LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start + ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count + ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride + LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start + + LDMATRIX_0_1_2_4_5_6_8_9_10(%o0) + + /* dest->count = in->count */ + st %g1, [%o4 + V4F_COUNT] + + cmp %g1, 1 + bl 7f + cmp %o3, 0 + bne 4f + clr %o4 ! 'i' for STRIDE_LOOP + +1: /* LENGTHS == NULL */ + ld [%o5 + 0x00], %f0 ! ux = from[0] + ld [%o5 + 0x04], %f1 ! uy = from[1] + ld [%o5 + 0x08], %f2 ! uz = from[2] + add %o5, %g2, %o5 ! STRIDE_F(from, stride) + add %o4, 1, %o4 ! i++ + + /* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2) + * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6) + * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10) + */ + fmuls %f0, M0, %f3 ! FGM Group + fmuls %f1, M1, %f4 ! FGM Group + fmuls %f0, M4, %f5 ! FGM Group + fmuls %f1, M5, %f6 ! FGM Group + fmuls %f0, M8, %f7 ! FGM Group f3 available + fmuls %f1, M9, %f8 ! FGM Group f4 available + fadds %f3, %f4, %f3 ! FGA + fmuls %f2, M2, %f10 ! FGM Group f5 available + fmuls %f2, M6, %f0 ! FGM Group f6 available + fadds %f5, %f6, %f5 ! FGA + fmuls %f2, M10, %f4 ! FGM Group f7 available + fadds %f7, %f8, %f7 ! FGA Group f8,f3 available + fadds %f3, %f10, %f3 ! FGA Group f10 available + fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available + fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available + + /* f3=tx, f5=ty, f7=tz */ + + /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */ + fmuls %f3, %f3, %f6 ! FGM Group f3 available + fmuls %f5, %f5, %f8 ! FGM Group f5 available + fmuls %f7, %f7, %f10 ! FGM Group f7 available + fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available + fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available + + /* scale (f6) = 1.0 / sqrt(len) */ + fsqrts %f6, %f6 ! FDIV 20 cycles + fdivs %f12, %f6, %f6 ! FDIV 14 cycles + + fmuls %f3, %f6, %f3 + st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale + fmuls %f5, %f6, %f5 + st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale + fmuls %f7, %f6, %f7 + st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale + + cmp %o4, %g1 ! continue if (i < count) + bl 1b + add %g3, 0x0c, %g3 ! advance out vector pointer + + ba 7f + nop + +4: /* LENGTHS != NULL */ + fmuls M0, %f15, M0 + fmuls M1, %f15, M1 + fmuls M2, %f15, M2 + fmuls M4, %f15, M4 + fmuls M5, %f15, M5 + fmuls M6, %f15, M6 + fmuls M8, %f15, M8 + fmuls M9, %f15, M9 + fmuls M10, %f15, M10 + +5: + ld [%o5 + 0x00], %f0 ! ux = from[0] + ld [%o5 + 0x04], %f1 ! uy = from[1] + ld [%o5 + 0x08], %f2 ! uz = from[2] + add %o5, %g2, %o5 ! STRIDE_F(from, stride) + add %o4, 1, %o4 ! i++ + + /* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2) + * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6) + * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10) + */ + fmuls %f0, M0, %f3 ! FGM Group + fmuls %f1, M1, %f4 ! FGM Group + fmuls %f0, M4, %f5 ! FGM Group + fmuls %f1, M5, %f6 ! FGM Group + fmuls %f0, M8, %f7 ! FGM Group f3 available + fmuls %f1, M9, %f8 ! FGM Group f4 available + fadds %f3, %f4, %f3 ! FGA + fmuls %f2, M2, %f10 ! FGM Group f5 available + fmuls %f2, M6, %f0 ! FGM Group f6 available + fadds %f5, %f6, %f5 ! FGA + fmuls %f2, M10, %f4 ! FGM Group f7 available + fadds %f7, %f8, %f7 ! FGA Group f8,f3 available + fadds %f3, %f10, %f3 ! FGA Group f10 available + ld [%o3], %f13 ! LSU + fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available + add %o3, 4, %o3 ! IEU0 + fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available + + /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */ + + fmuls %f3, %f13, %f3 + st %f3, [%g3 + 0x00] ! out[i][0] = tx * len + fmuls %f5, %f13, %f5 + st %f5, [%g3 + 0x04] ! out[i][1] = ty * len + fmuls %f7, %f13, %f7 + st %f7, [%g3 + 0x08] ! out[i][2] = tz * len + + cmp %o4, %g1 ! continue if (i < count) + bl 5b + add %g3, 0x0c, %g3 ! advance out vector pointer + +7: retl + nop + + .globl _mesa_sparc_transform_normalize_normals_no_rot +_mesa_sparc_transform_normalize_normals_no_rot: + /* o0=mat o1=scale o2=in o3=lengths o4=dest */ + + sethi %hi(ONE_DOT_ZERO), %g2 + sub %sp, 16, %sp + st %g2, [%sp + STACK_VAR_OFF+0x0] + st %o1, [%sp + STACK_VAR_OFF+0x4] + ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f + ld [%sp + STACK_VAR_OFF+0x4], %f15 ! f15 = scale + add %sp, 16, %sp + + LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv + LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start + ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count + ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride + LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start + + LDMATRIX_0_5_10(%o0) + + /* dest->count = in->count */ + st %g1, [%o4 + V4F_COUNT] + + cmp %g1, 1 + bl 7f + cmp %o3, 0 + bne 4f + clr %o4 ! 'i' for STRIDE_LOOP + +1: /* LENGTHS == NULL */ + ld [%o5 + 0x00], %f0 ! ux = from[0] + ld [%o5 + 0x04], %f1 ! uy = from[1] + ld [%o5 + 0x08], %f2 ! uz = from[2] + add %o5, %g2, %o5 ! STRIDE_F(from, stride) + add %o4, 1, %o4 ! i++ + + /* tx (f3) = (ux * m0) + * ty (f5) = (uy * m5) + * tz (f7) = (uz * m10) + */ + fmuls %f0, M0, %f3 ! FGM Group + fmuls %f1, M5, %f5 ! FGM Group + fmuls %f2, M10, %f7 ! FGM Group + + /* f3=tx, f5=ty, f7=tz */ + + /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */ + fmuls %f3, %f3, %f6 ! FGM Group stall, f3 available + fmuls %f5, %f5, %f8 ! FGM Group f5 available + fmuls %f7, %f7, %f10 ! FGM Group f7 available + fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available + fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available + + /* scale (f6) = 1.0 / sqrt(len) */ + fsqrts %f6, %f6 ! FDIV 20 cycles + fdivs %f12, %f6, %f6 ! FDIV 14 cycles + + fmuls %f3, %f6, %f3 + st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale + fmuls %f5, %f6, %f5 + st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale + fmuls %f7, %f6, %f7 + st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale + + cmp %o4, %g1 ! continue if (i < count) + bl 1b + add %g3, 0x0c, %g3 ! advance out vector pointer + + ba 7f + nop + +4: /* LENGTHS != NULL */ + fmuls M0, %f15, M0 + fmuls M5, %f15, M5 + fmuls M10, %f15, M10 + +5: + ld [%o5 + 0x00], %f0 ! ux = from[0] + ld [%o5 + 0x04], %f1 ! uy = from[1] + ld [%o5 + 0x08], %f2 ! uz = from[2] + add %o5, %g2, %o5 ! STRIDE_F(from, stride) + add %o4, 1, %o4 ! i++ + + /* tx (f3) = (ux * m0) + * ty (f5) = (uy * m5) + * tz (f7) = (uz * m10) + */ + fmuls %f0, M0, %f3 ! FGM Group + ld [%o3], %f13 ! LSU + fmuls %f1, M5, %f5 ! FGM Group + add %o3, 4, %o3 ! IEU0 + fmuls %f2, M10, %f7 ! FGM Group + + /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */ + + fmuls %f3, %f13, %f3 + st %f3, [%g3 + 0x00] ! out[i][0] = tx * len + fmuls %f5, %f13, %f5 + st %f5, [%g3 + 0x04] ! out[i][1] = ty * len + fmuls %f7, %f13, %f7 + st %f7, [%g3 + 0x08] ! out[i][2] = tz * len + + cmp %o4, %g1 ! continue if (i < count) + bl 5b + add %g3, 0x0c, %g3 ! advance out vector pointer + +7: retl + nop + + .globl _mesa_sparc_transform_rescale_normals_no_rot +_mesa_sparc_transform_rescale_normals_no_rot: + /* o0=mat o1=scale o2=in o3=lengths o4=dest */ + sub %sp, 16, %sp + st %o1, [%sp + STACK_VAR_OFF+0x0] + ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale + add %sp, 16, %sp + + LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv + LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start + ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count + ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride + LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start + + LDMATRIX_0_5_10(%o0) + + /* dest->count = in->count */ + st %g1, [%o4 + V4F_COUNT] + + cmp %g1, 1 + bl 7f + clr %o4 ! 'i' for STRIDE_LOOP + + fmuls M0, %f15, M0 + fmuls M5, %f15, M5 + fmuls M10, %f15, M10 + +1: ld [%o5 + 0x00], %f0 ! ux = from[0] + ld [%o5 + 0x04], %f1 ! uy = from[1] + ld [%o5 + 0x08], %f2 ! uz = from[2] + add %o5, %g2, %o5 ! STRIDE_F(from, stride) + add %o4, 1, %o4 ! i++ + + /* tx (f3) = (ux * m0) + * ty (f5) = (uy * m5) + * tz (f7) = (uz * m10) + */ + fmuls %f0, M0, %f3 ! FGM Group + st %f3, [%g3 + 0x00] ! LSU + fmuls %f1, M5, %f5 ! FGM Group + st %f5, [%g3 + 0x04] ! LSU + fmuls %f2, M10, %f7 ! FGM Group + st %f7, [%g3 + 0x08] ! LSU + + cmp %o4, %g1 ! continue if (i < count) + bl 1b + add %g3, 0x0c, %g3 ! advance out vector pointer + +7: retl + nop + + .globl _mesa_sparc_transform_rescale_normals +_mesa_sparc_transform_rescale_normals: + /* o0=mat o1=scale o2=in o3=lengths o4=dest */ + sub %sp, 16, %sp + st %o1, [%sp + STACK_VAR_OFF+0x0] + ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale + add %sp, 16, %sp + + LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv + LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start + ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count + ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride + LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start + + LDMATRIX_0_1_2_4_5_6_8_9_10(%o0) + + /* dest->count = in->count */ + st %g1, [%o4 + V4F_COUNT] + + cmp %g1, 1 + bl 7f + clr %o4 ! 'i' for STRIDE_LOOP + + fmuls M0, %f15, M0 + fmuls M1, %f15, M1 + fmuls M2, %f15, M2 + fmuls M4, %f15, M4 + fmuls M5, %f15, M5 + fmuls M6, %f15, M6 + fmuls M8, %f15, M8 + fmuls M9, %f15, M9 + fmuls M10, %f15, M10 + +1: ld [%o5 + 0x00], %f0 ! ux = from[0] + ld [%o5 + 0x04], %f1 ! uy = from[1] + ld [%o5 + 0x08], %f2 ! uz = from[2] + add %o5, %g2, %o5 ! STRIDE_F(from, stride) + add %o4, 1, %o4 ! i++ + + fmuls %f0, M0, %f3 ! FGM Group + fmuls %f1, M1, %f4 ! FGM Group + fmuls %f0, M4, %f5 ! FGM Group + fmuls %f1, M5, %f6 ! FGM Group + fmuls %f0, M8, %f7 ! FGM Group f3 available + fmuls %f1, M9, %f8 ! FGM Group f4 available + fadds %f3, %f4, %f3 ! FGA + fmuls %f2, M2, %f10 ! FGM Group f5 available + fmuls %f2, M6, %f0 ! FGM Group f6 available + fadds %f5, %f6, %f5 ! FGA + fmuls %f2, M10, %f4 ! FGM Group f7 available + fadds %f7, %f8, %f7 ! FGA Group f8,f3 available + fadds %f3, %f10, %f3 ! FGA Group f10 available + st %f3, [%g3 + 0x00] ! LSU + fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available + st %f5, [%g3 + 0x04] ! LSU + fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available + st %f7, [%g3 + 0x08] ! LSU + + cmp %o4, %g1 ! continue if (i < count) + bl 1b + add %g3, 0x0c, %g3 ! advance out vector pointer + +7: retl + nop + + .globl _mesa_sparc_transform_normals_no_rot +_mesa_sparc_transform_normals_no_rot: + /* o0=mat o1=scale o2=in o3=lengths o4=dest */ + LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv + LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start + ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count + ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride + LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start + + LDMATRIX_0_5_10(%o0) + + /* dest->count = in->count */ + st %g1, [%o4 + V4F_COUNT] + + cmp %g1, 1 + bl 7f + clr %o4 ! 'i' for STRIDE_LOOP + +1: ld [%o5 + 0x00], %f0 ! ux = from[0] + ld [%o5 + 0x04], %f1 ! uy = from[1] + ld [%o5 + 0x08], %f2 ! uz = from[2] + add %o5, %g2, %o5 ! STRIDE_F(from, stride) + add %o4, 1, %o4 ! i++ + + /* tx (f3) = (ux * m0) + * ty (f5) = (uy * m5) + * tz (f7) = (uz * m10) + */ + fmuls %f0, M0, %f3 ! FGM Group + st %f3, [%g3 + 0x00] ! LSU + fmuls %f1, M5, %f5 ! FGM Group + st %f5, [%g3 + 0x04] ! LSU + fmuls %f2, M10, %f7 ! FGM Group + st %f7, [%g3 + 0x08] ! LSU + + cmp %o4, %g1 ! continue if (i < count) + bl 1b + add %g3, 0x0c, %g3 ! advance out vector pointer + +7: retl + nop + + .globl _mesa_sparc_transform_normals +_mesa_sparc_transform_normals: + /* o0=mat o1=scale o2=in o3=lengths o4=dest */ + LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv + LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start + ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count + ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride + LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start + + LDMATRIX_0_1_2_4_5_6_8_9_10(%o0) + + /* dest->count = in->count */ + st %g1, [%o4 + V4F_COUNT] + + cmp %g1, 1 + bl 7f + clr %o4 ! 'i' for STRIDE_LOOP + +1: ld [%o5 + 0x00], %f0 ! ux = from[0] + ld [%o5 + 0x04], %f1 ! uy = from[1] + ld [%o5 + 0x08], %f2 ! uz = from[2] + add %o5, %g2, %o5 ! STRIDE_F(from, stride) + add %o4, 1, %o4 ! i++ + + fmuls %f0, M0, %f3 ! FGM Group + fmuls %f1, M1, %f4 ! FGM Group + fmuls %f0, M4, %f5 ! FGM Group + fmuls %f1, M5, %f6 ! FGM Group + fmuls %f0, M8, %f7 ! FGM Group f3 available + fmuls %f1, M9, %f8 ! FGM Group f4 available + fadds %f3, %f4, %f3 ! FGA + fmuls %f2, M2, %f10 ! FGM Group f5 available + fmuls %f2, M6, %f0 ! FGM Group f6 available + fadds %f5, %f6, %f5 ! FGA + fmuls %f2, M10, %f4 ! FGM Group f7 available + fadds %f7, %f8, %f7 ! FGA Group f8,f3 available + fadds %f3, %f10, %f3 ! FGA Group f10 available + st %f3, [%g3 + 0x00] ! LSU + fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available + st %f5, [%g3 + 0x04] ! LSU + fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available + st %f7, [%g3 + 0x08] ! LSU + + cmp %o4, %g1 ! continue if (i < count) + bl 1b + add %g3, 0x0c, %g3 ! advance out vector pointer + +7: retl + nop + + .globl _mesa_sparc_normalize_normals +_mesa_sparc_normalize_normals: + /* o0=mat o1=scale o2=in o3=lengths o4=dest */ + + sethi %hi(ONE_DOT_ZERO), %g2 + sub %sp, 16, %sp + st %g2, [%sp + STACK_VAR_OFF+0x0] + ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f + add %sp, 16, %sp + + LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start + ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count + ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride + LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start + + /* dest->count = in->count */ + st %g1, [%o4 + V4F_COUNT] + + cmp %g1, 1 + bl 7f + cmp %o3, 0 + bne 4f + clr %o4 ! 'i' for STRIDE_LOOP + +1: /* LENGTHS == NULL */ + ld [%o5 + 0x00], %f3 ! ux = from[0] + ld [%o5 + 0x04], %f5 ! uy = from[1] + ld [%o5 + 0x08], %f7 ! uz = from[2] + add %o5, %g2, %o5 ! STRIDE_F(from, stride) + add %o4, 1, %o4 ! i++ + + /* f3=tx, f5=ty, f7=tz */ + + /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */ + fmuls %f3, %f3, %f6 ! FGM Group f3 available + fmuls %f5, %f5, %f8 ! FGM Group f5 available + fmuls %f7, %f7, %f10 ! FGM Group f7 available + fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available + fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available + + /* scale (f6) = 1.0 / sqrt(len) */ + fsqrts %f6, %f6 ! FDIV 20 cycles + fdivs %f12, %f6, %f6 ! FDIV 14 cycles + + fmuls %f3, %f6, %f3 + st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale + fmuls %f5, %f6, %f5 + st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale + fmuls %f7, %f6, %f7 + st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale + + cmp %o4, %g1 ! continue if (i < count) + bl 1b + add %g3, 0x0c, %g3 ! advance out vector pointer + + ba 7f + nop + +4: /* LENGTHS != NULL */ + +5: + ld [%o5 + 0x00], %f3 ! ux = from[0] + ld [%o5 + 0x04], %f5 ! uy = from[1] + ld [%o5 + 0x08], %f7 ! uz = from[2] + add %o5, %g2, %o5 ! STRIDE_F(from, stride) + add %o4, 1, %o4 ! i++ + + ld [%o3], %f13 ! LSU + add %o3, 4, %o3 ! IEU0 + + /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */ + + fmuls %f3, %f13, %f3 + st %f3, [%g3 + 0x00] ! out[i][0] = tx * len + fmuls %f5, %f13, %f5 + st %f5, [%g3 + 0x04] ! out[i][1] = ty * len + fmuls %f7, %f13, %f7 + st %f7, [%g3 + 0x08] ! out[i][2] = tz * len + + cmp %o4, %g1 ! continue if (i < count) + bl 5b + add %g3, 0x0c, %g3 ! advance out vector pointer + +7: retl + nop + + .globl _mesa_sparc_rescale_normals +_mesa_sparc_rescale_normals: + /* o0=mat o1=scale o2=in o3=lengths o4=dest */ + + sethi %hi(ONE_DOT_ZERO), %g2 + sub %sp, 16, %sp + st %o1, [%sp + STACK_VAR_OFF+0x0] + ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale + add %sp, 16, %sp + + LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start + ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count + ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride + LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start + + /* dest->count = in->count */ + st %g1, [%o4 + V4F_COUNT] + + cmp %g1, 1 + bl 7f + clr %o4 ! 'i' for STRIDE_LOOP + +1: + ld [%o5 + 0x00], %f3 ! ux = from[0] + ld [%o5 + 0x04], %f5 ! uy = from[1] + ld [%o5 + 0x08], %f7 ! uz = from[2] + add %o5, %g2, %o5 ! STRIDE_F(from, stride) + add %o4, 1, %o4 ! i++ + + /* f3=tx, f5=ty, f7=tz */ + + fmuls %f3, %f15, %f3 + st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale + fmuls %f5, %f15, %f5 + st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale + fmuls %f7, %f15, %f7 + st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale + + cmp %o4, %g1 ! continue if (i < count) + bl 1b + add %g3, 0x0c, %g3 ! advance out vector pointer + +7: retl + nop diff --git a/src/mesa/sparc/sparc.c b/src/mesa/sparc/sparc.c index c25a758a73b..45e2cecac18 100644 --- a/src/mesa/sparc/sparc.c +++ b/src/mesa/sparc/sparc.c @@ -1,4 +1,4 @@ -/* $Id: sparc.c,v 1.3 2001/06/05 23:54:01 davem69 Exp $ */ +/* $Id: sparc.c,v 1.4 2001/06/06 11:46:04 davem69 Exp $ */ /* * Mesa 3-D graphics library @@ -86,6 +86,22 @@ extern GLvector4f *_mesa_sparc_cliptest_points4_np(GLvector4f *clip_vec, GLubyte clipMask[], GLubyte *orMask, GLubyte *andMask); + +#define NORM_ARGS const GLmatrix *mat, \ + GLfloat scale, \ + const GLvector3f *in, \ + const GLfloat *lengths, \ + GLvector3f *dest + +extern void _mesa_sparc_transform_normalize_normals(NORM_ARGS); +extern void _mesa_sparc_transform_normalize_normals_no_rot(NORM_ARGS); +extern void _mesa_sparc_transform_rescale_normals_no_rot(NORM_ARGS); +extern void _mesa_sparc_transform_rescale_normals(NORM_ARGS); +extern void _mesa_sparc_transform_normals_no_rot(NORM_ARGS); +extern void _mesa_sparc_transform_normals(NORM_ARGS); +extern void _mesa_sparc_normalize_normals(NORM_ARGS); +extern void _mesa_sparc_rescale_normals(NORM_ARGS); + #endif void _mesa_init_all_sparc_transform_asm(void) @@ -99,9 +115,27 @@ void _mesa_init_all_sparc_transform_asm(void) _mesa_clip_tab[4] = _mesa_sparc_cliptest_points4; _mesa_clip_np_tab[4] = _mesa_sparc_cliptest_points4_np; + _mesa_normal_tab[NORM_TRANSFORM | NORM_NORMALIZE] = + _mesa_sparc_transform_normalize_normals; + _mesa_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_NORMALIZE] = + _mesa_sparc_transform_normalize_normals_no_rot; + _mesa_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_RESCALE] = + _mesa_sparc_transform_rescale_normals_no_rot; + _mesa_normal_tab[NORM_TRANSFORM | NORM_RESCALE] = + _mesa_sparc_transform_rescale_normals; + _mesa_normal_tab[NORM_TRANSFORM_NO_ROT] = + _mesa_sparc_transform_normals_no_rot; + _mesa_normal_tab[NORM_TRANSFORM] = + _mesa_sparc_transform_normals; + _mesa_normal_tab[NORM_NORMALIZE] = + _mesa_sparc_normalize_normals; + _mesa_normal_tab[NORM_RESCALE] = + _mesa_sparc_rescale_normals; + #ifdef DEBUG _math_test_all_transform_functions("sparc"); _math_test_all_cliptest_functions("sparc"); + _math_test_all_normal_transform_functions("sparc"); #endif #endif diff --git a/src/mesa/sparc/sparc_matrix.h b/src/mesa/sparc/sparc_matrix.h index 85beef9d927..4fe09e83eaf 100644 --- a/src/mesa/sparc/sparc_matrix.h +++ b/src/mesa/sparc/sparc_matrix.h @@ -1,4 +1,4 @@ -/* $Id: sparc_matrix.h,v 1.2 2001/06/05 23:54:01 davem69 Exp $ */ +/* $Id: sparc_matrix.h,v 1.3 2001/06/06 11:46:04 davem69 Exp $ */ #ifndef _SPARC_MATRIX_H #define _SPARC_MATRIX_H @@ -146,6 +146,11 @@ ldd [BASE + (12 * 0x4)], M12; \ ld [BASE + (14 * 0x4)], M14 +#define LDMATRIX_0_5_10(BASE) \ + ld [BASE + ( 0 * 0x4)], M0; \ + ld [BASE + ( 5 * 0x4)], M5; \ + ld [BASE + (10 * 0x4)], M10; \ + #define LDMATRIX_0_5_10_12_13_14(BASE) \ ld [BASE + ( 0 * 0x4)], M0; \ ld [BASE + ( 5 * 0x4)], M5; \ -- cgit v1.2.3