aboutsummaryrefslogtreecommitdiffstats
path: root/src/mesa/sparc/sparc_clip.S
blob: 11aa8324ec25aa74cc3b241add49150a4a3fbb25 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
/*
 * Clip testing in SPARC assembly
 */

#if __arch64__
#define LDPTR		ldx
#define MATH_ASM_PTR_SIZE 8
#include "math/m_vector_asm.h"
#else
#define LDPTR		ld
#define MATH_ASM_PTR_SIZE 4
#include "math/m_vector_asm.h"
#endif

#define VEC_SIZE_1   	1
#define VEC_SIZE_2   	3
#define VEC_SIZE_3   	7
#define VEC_SIZE_4   	15

        .register %g2, #scratch
        .register %g3, #scratch

	.text
	.align		64

one_dot_zero:
	.word		0x3f800000	/* 1.0f */

	/* This trick is shamelessly stolen from the x86
	 * Mesa asm.  Very clever, and we can do it too
	 * since we have the necessary add with carry
	 * instructions on Sparc.
	 */
clip_table:
	.byte	 0,  1,  0,  2,  4,  5,  4,  6
	.byte	 0,  1,  0,  2,  8,  9,  8, 10
	.byte	32, 33, 32, 34, 36, 37, 36, 38
	.byte	32, 33, 32, 34, 40, 41, 40, 42
	.byte	 0,  1,  0,  2,  4,  5,  4,  6
	.byte	 0,  1,  0,  2,  8,  9,  8, 10
	.byte	16, 17, 16, 18, 20, 21, 20, 22
	.byte	16, 17, 16, 18, 24, 25, 24, 26
	.byte	63, 61, 63, 62, 55, 53, 55, 54
	.byte	63, 61, 63, 62, 59, 57, 59, 58
	.byte	47, 45, 47, 46, 39, 37, 39, 38
	.byte	47, 45, 47, 46, 43, 41, 43, 42
	.byte	63, 61, 63, 62, 55, 53, 55, 54
	.byte	63, 61, 63, 62, 59, 57, 59, 58
	.byte	31, 29, 31, 30, 23, 21, 23, 22
	.byte	31, 29, 31, 30, 27, 25, 27, 26

/* GLvector4f *clip_vec, GLvector4f *proj_vec, 
   GLubyte clipMask[], GLubyte *orMask, GLubyte *andMask,
   GLboolean viewport_z_enable */

	.align		64
__pc_tramp:
	retl
	 nop

	.globl		_mesa_sparc_cliptest_points4
_mesa_sparc_cliptest_points4:
	save		%sp, -64, %sp
	call		__pc_tramp
	 sub		%o7, (. - one_dot_zero - 4), %g1
	ld		[%g1 + 0x0], %f4
	add		%g1, 0x4, %g1

	ld		[%i0 + V4F_STRIDE], %l1
	ld		[%i0 + V4F_COUNT], %l3
	LDPTR		[%i0 + V4F_START], %i0
	LDPTR		[%i1 + V4F_START], %i5
	ldub		[%i3], %g2
	ldub		[%i4], %g3
	sll		%g3, 8, %g3
	or		%g2, %g3, %g2

	ld		[%i1 + V4F_FLAGS], %g3
	or		%g3, VEC_SIZE_4, %g3
	st		%g3, [%i1 + V4F_FLAGS]
	mov		3, %g3
	st		%g3, [%i1 + V4F_SIZE]
	st		%l3, [%i1 + V4F_COUNT]
	clr		%l2
	clr		%l0

	/* l0:	i
	 * l3:	count
	 * l1:	stride
	 * l2:	c
	 * g2:	(tmpAndMask << 8) | tmpOrMask
	 * g1:	clip_table
	 * i0:	from[stride][i]
	 * i2:	clipMask
	 * i5:	vProj[4][i]
	 */

1:	ld		[%i0 + 0x0c], %f3	! LSU	Group
	ld		[%i0 + 0x0c], %g5	! LSU	Group
	ld		[%i0 + 0x08], %g4	! LSU	Group
	fdivs		%f4, %f3, %f8		! FGM
	addcc		%g5, %g5, %g5		! IEU1	Group
	addx		%g0, 0x0, %g3		! IEU1	Group
	addcc		%g4, %g4, %g4		! IEU1	Group
	addx		%g3, %g3, %g3		! IEU1	Group
	subcc		%g5, %g4, %g0		! IEU1	Group
	ld		[%i0 + 0x04], %g4	! LSU	Group
	addx		%g3, %g3, %g3		! IEU1	Group
	addcc		%g4, %g4, %g4		! IEU1	Group
	addx		%g3, %g3, %g3		! IEU1	Group
	subcc		%g5, %g4, %g0		! IEU1	Group
	ld		[%i0 + 0x00], %g4	! LSU	Group
	addx		%g3, %g3, %g3		! IEU1	Group
	addcc		%g4, %g4, %g4		! IEU1	Group
	addx		%g3, %g3, %g3		! IEU1	Group
	subcc		%g5, %g4, %g0		! IEU1	Group
	addx		%g3, %g3, %g3		! IEU1	Group
	ldub		[%g1 + %g3], %g3	! LSU	Group
	cmp		%g3, 0			! IEU1	Group, stall
	be		2f			! CTI
	 stb		%g3, [%i2]		! LSU
	sll		%g3, 8, %g4		! IEU1	Group
	add		%l2, 1, %l2		! IEU0
	st		%g0, [%i5 + 0x00]	! LSU
	or		%g4, 0xff, %g4		! IEU0	Group
	or		%g2, %g3, %g2		! IEU1
	st		%g0, [%i5 + 0x04]	! LSU
	and		%g2, %g4, %g2		! IEU0	Group
	st		%g0, [%i5 + 0x08]	! LSU
	b		3f			! CTI
	 st		%f4, [%i5 + 0x0c]	! LSU	Group
2:	ld		[%i0 + 0x00], %f0	! LSU	Group
	ld		[%i0 + 0x04], %f1	! LSU	Group
	ld		[%i0 + 0x08], %f2	! LSU	Group
	fmuls		%f0, %f8, %f0		! FGM
	st		%f0, [%i5 + 0x00]	! LSU	Group
	fmuls		%f1, %f8, %f1		! FGM
	st		%f1, [%i5 + 0x04]	! LSU	Group
	fmuls		%f2, %f8, %f2		! FGM
	st		%f2, [%i5 + 0x08]	! LSU	Group
	st		%f8, [%i5 + 0x0c]	! LSU	Group
3:	add		%i5, 0x10, %i5		! IEU1
	add		%l0, 1, %l0		! IEU0	Group
	add		%i2, 1, %i2		! IEU0	Group
	cmp		%l0, %l3		! IEU1	Group
	bne		1b			! CTI
	 add		%i0, %l1, %i0		! IEU0	Group
	stb		%g2, [%i3]		! LSU
	srl		%g2, 8, %g3		! IEU0	Group
	cmp		%l2, %l3		! IEU1	Group
	bl,a		1f			! CTI
	 clr		%g3			! IEU0
1:	stb		%g3, [%i4]		! LSU	Group
	ret					! CTI	Group
	 restore	%i1, 0x0, %o0

	.globl		_mesa_sparc_cliptest_points4_np
_mesa_sparc_cliptest_points4_np:
	save		%sp, -64, %sp

	call		__pc_tramp
	 sub		%o7, (. - one_dot_zero - 4), %g1
	add		%g1, 0x4, %g1

	ld		[%i0 + V4F_STRIDE], %l1
	ld		[%i0 + V4F_COUNT], %l3
	LDPTR		[%i0 + V4F_START], %i0
	ldub		[%i3], %g2
	ldub		[%i4], %g3
	sll		%g3, 8, %g3
	or		%g2, %g3, %g2

	clr		%l2
	clr		%l0

	/* l0:	i
	 * l3:	count
	 * l1:	stride
	 * l2:	c
	 * g2:	(tmpAndMask << 8) | tmpOrMask
	 * g1:	clip_table
	 * i0:	from[stride][i]
	 * i2:	clipMask
	 */

1:	ld		[%i0 + 0x0c], %g5	! LSU	Group
	ld		[%i0 + 0x08], %g4	! LSU	Group
	addcc		%g5, %g5, %g5		! IEU1	Group
	addx		%g0, 0x0, %g3		! IEU1	Group
	addcc		%g4, %g4, %g4		! IEU1	Group
	addx		%g3, %g3, %g3		! IEU1	Group
	subcc		%g5, %g4, %g0		! IEU1	Group
	ld		[%i0 + 0x04], %g4	! LSU	Group
	addx		%g3, %g3, %g3		! IEU1	Group
	addcc		%g4, %g4, %g4		! IEU1	Group
	addx		%g3, %g3, %g3		! IEU1	Group
	subcc		%g5, %g4, %g0		! IEU1	Group
	ld		[%i0 + 0x00], %g4	! LSU	Group
	addx		%g3, %g3, %g3		! IEU1	Group
	addcc		%g4, %g4, %g4		! IEU1	Group
	addx		%g3, %g3, %g3		! IEU1	Group
	subcc		%g5, %g4, %g0		! IEU1	Group
	addx		%g3, %g3, %g3		! IEU1	Group
	ldub		[%g1 + %g3], %g3	! LSU	Group
	cmp		%g3, 0			! IEU1	Group, stall
	be		2f			! CTI
	 stb		%g3, [%i2]		! LSU
	sll		%g3, 8, %g4		! IEU1	Group
	add		%l2, 1, %l2		! IEU0
	or		%g4, 0xff, %g4		! IEU0	Group
	or		%g2, %g3, %g2		! IEU1
	and		%g2, %g4, %g2		! IEU0	Group
2:	add		%l0, 1, %l0		! IEU0	Group
	add		%i2, 1, %i2		! IEU0	Group
	cmp		%l0, %l3		! IEU1	Group
	bne		1b			! CTI
	 add		%i0, %l1, %i0		! IEU0	Group
	stb		%g2, [%i3]		! LSU
	srl		%g2, 8, %g3		! IEU0	Group
	cmp		%l2, %l3		! IEU1	Group
	bl,a		1f			! CTI
	 clr		%g3			! IEU0
1:	stb		%g3, [%i4]		! LSU	Group
	ret					! CTI	Group
	 restore	%i1, 0x0, %o0