summaryrefslogtreecommitdiffstats
path: root/src/mesa/x86/mmx_blendtmp.h
diff options
context:
space:
mode:
authorJose Fonseca <[email protected]>2002-04-18 11:57:28 +0000
committerJose Fonseca <[email protected]>2002-04-18 11:57:28 +0000
commit55d9ee83b4c29e8f7c373ee6326bbb4f77402bee (patch)
tree6298e1e0caa0bebe42f3fcf140ddd901e9c5d115 /src/mesa/x86/mmx_blendtmp.h
parent9ff3e9d992bcd7b195feb39a2aacc7d0ea43bd5a (diff)
Definition of several utility macros for self-contained MMX operations such as scaling and lerping.
Restructured the MMX blending function to use a template, being only necessary to specify the main loop, which is also used for making the runin and runout sections. Optimization of the MMX function after remembering that the multiplication was commutative (how can somebody forget this..) resulting in less register usage. Now there is no need for generate or read from memory any constant inside the loop. Assemblers other than the GNU assembler can choke on the output of the C preprocessor since it was necessary to add line separators ';' to the defined macros.
Diffstat (limited to 'src/mesa/x86/mmx_blendtmp.h')
-rw-r--r--src/mesa/x86/mmx_blendtmp.h113
1 files changed, 113 insertions, 0 deletions
diff --git a/src/mesa/x86/mmx_blendtmp.h b/src/mesa/x86/mmx_blendtmp.h
new file mode 100644
index 00000000000..395436ba015
--- /dev/null
+++ b/src/mesa/x86/mmx_blendtmp.h
@@ -0,0 +1,113 @@
+/*
+ * Written by Jos� Fonseca <[email protected]>
+ */
+
+
+/*
+ * void _mesa_mmx_blend( GLcontext *ctx,
+ * GLuint n,
+ * const GLubyte mask[],
+ * GLchan rgba[][4],
+ * CONST GLchan dest[][4] )
+ *
+ */
+ALIGNTEXT16
+GLOBL GLNAME( TAG(_mesa_mmx_blend) )
+
+GLNAME( TAG(_mesa_mmx_blend) ):
+
+ PUSH_L ( EBP )
+ MOV_L ( ESP, EBP )
+ PUSH_L ( ESI )
+ PUSH_L ( EDI )
+ PUSH_L ( EBX )
+
+ MOV_L ( REGOFF(12, EBP), ECX ) /* n */
+ CMP_L ( CONST(0), ECX)
+ JE ( LLBL ( TAG(GMB_return) ) )
+
+ MOV_L ( REGOFF(16, EBP), EBX ) /* mask */
+ MOV_L ( REGOFF(20, EBP), EDI ) /* rgba */
+ MOV_L ( REGOFF(24, EBP), ESI ) /* dest */
+
+ INIT
+
+ TEST_L ( CONST(4), EDI ) /* align rgba on an 8-byte boundary */
+ JZ ( LLBL ( TAG(GMB_align_end) ) )
+
+ CMP_B ( CONST(0), REGIND(EBX) ) /* *mask == 0 */
+ JE ( LLBL ( TAG(GMB_align_continue) ) )
+
+ /* runin */
+#define ONE(x) x
+#define TWO(x)
+ MAIN
+#undef ONE
+#undef TWO
+
+LLBL ( TAG(GMB_align_continue) ):
+
+ DEC_L ( ECX ) /* n -= 1 */
+ INC_L ( EBX ) /* mask += 1 */
+ ADD_L ( CONST(4), EDI ) /* rgba += 1 */
+ ADD_L ( CONST(4), ESI ) /* dest += 1 */
+
+LLBL ( TAG(GMB_align_end) ):
+
+ CMP_L ( CONST(2), ECX)
+ JB ( LLBL ( TAG(GMB_loop_end) ) )
+
+ALIGNTEXT16
+LLBL ( TAG(GMB_loop_begin) ):
+
+ CMP_W ( CONST(0), REGIND(EBX) ) /* *mask == 0 && *(mask + 1) == 0 */
+ JE ( LLBL ( TAG(GMB_loop_continue) ) )
+
+ /* main loop */
+#define ONE(x)
+#define TWO(x) x
+ MAIN
+#undef ONE
+#undef TWO
+
+LLBL ( TAG(GMB_loop_continue) ):
+
+ DEC_L ( ECX )
+ DEC_L ( ECX ) /* n -= 2 */
+ ADD_L ( CONST(2), EBX ) /* mask += 2 */
+ ADD_L ( CONST(8), EDI ) /* rgba += 2 */
+ ADD_L ( CONST(8), ESI ) /* dest += 2 */
+ CMP_L ( CONST(2), ECX )
+ JAE ( LLBL ( TAG(GMB_loop_begin) ) )
+
+LLBL ( TAG(GMB_loop_end) ):
+
+ CMP_L ( CONST(1), ECX )
+ JB ( LLBL ( TAG(GMB_done) ) )
+
+ CMP_B ( CONST(0), REGIND(EBX) ) /* *mask == 0 */
+ JE ( LLBL ( TAG(GMB_done) ) )
+
+ /* runout */
+#define ONE(x) x
+#define TWO(x)
+ MAIN
+#undef ONE
+#undef TWO
+
+LLBL ( TAG(GMB_done) ):
+
+ EMMS
+
+LLBL ( TAG(GMB_return) ):
+
+ POP_L ( EBX )
+ POP_L ( EDI )
+ POP_L ( ESI )
+ MOV_L ( EBP, ESP )
+ POP_L ( EBP )
+ RET
+
+#undef TAG
+#undef INIT
+#undef MAIN