summaryrefslogtreecommitdiffstats
path: root/src/mesa/x86/rtasm/x86sse.h
diff options
context:
space:
mode:
authorKeith Whitwell <[email protected]>2005-06-07 12:44:26 +0000
committerKeith Whitwell <[email protected]>2005-06-07 12:44:26 +0000
commit461a2a799a99a8c8aba7e09c96d7c2e5c3196dcf (patch)
tree44aeed42bc90c9b7ecbba111a15c320c752a4491 /src/mesa/x86/rtasm/x86sse.h
parent757e0855adb1b1eb45b55e1fcf6acb47224b2853 (diff)
New files - split off runtime assembly functions from
t_vertex_sse.c.
Diffstat (limited to 'src/mesa/x86/rtasm/x86sse.h')
-rw-r--r--src/mesa/x86/rtasm/x86sse.h257
1 files changed, 257 insertions, 0 deletions
diff --git a/src/mesa/x86/rtasm/x86sse.h b/src/mesa/x86/rtasm/x86sse.h
new file mode 100644
index 00000000000..8d48e356479
--- /dev/null
+++ b/src/mesa/x86/rtasm/x86sse.h
@@ -0,0 +1,257 @@
+
+#ifndef _X86SSE_H_
+#define _X86SSE_H_
+
+#if defined(USE_X86_ASM)
+
+#include "glheader.h"
+
+/* It is up to the caller to ensure that instructions issued are
+ * suitable for the host cpu. There are no checks made in this module
+ * for mmx/sse/sse2 support on the cpu.
+ */
+struct x86_reg {
+ GLuint file:3;
+ GLuint idx:3;
+ GLuint mod:2; /* mod_REG if this is just a register */
+ GLint disp:24; /* only +/- 23bits of offset - should be enough... */
+};
+
+struct x86_function {
+ GLubyte *store;
+ GLubyte *csr;
+ GLuint stack_offset;
+ GLint need_emms;
+};
+
+enum x86_reg_file {
+ file_REG32,
+ file_MMX,
+ file_XMM
+};
+
+/* Values for mod field of modr/m byte
+ */
+enum x86_reg_mod {
+ mod_INDIRECT,
+ mod_DISP8,
+ mod_DISP32,
+ mod_REG
+};
+
+enum x86_reg_name {
+ reg_AX,
+ reg_CX,
+ reg_DX,
+ reg_BX,
+ reg_SP,
+ reg_BP,
+ reg_SI,
+ reg_DI
+};
+
+
+enum x86_cc {
+ cc_O, /* overflow */
+ cc_NO, /* not overflow */
+ cc_NAE, /* not above or equal / carry */
+ cc_AE, /* above or equal / not carry */
+ cc_E, /* equal / zero */
+ cc_NE /* not equal / not zero */
+};
+
+#define cc_Z cc_E
+#define cc_NZ cc_NE
+
+/* Begin/end/retreive function creation:
+ */
+
+
+void x86_init_func( struct x86_function *p );
+void x86_release_func( struct x86_function *p );
+void (*x86_get_func( struct x86_function *p ))( void );
+
+
+
+/* Create and manipulate registers and regmem values:
+ */
+struct x86_reg x86_make_reg( enum x86_reg_file file,
+ enum x86_reg_name idx );
+
+struct x86_reg x86_make_disp( struct x86_reg reg,
+ GLint disp );
+
+struct x86_reg x86_deref( struct x86_reg reg );
+
+struct x86_reg x86_get_base_reg( struct x86_reg reg );
+
+
+
+
+/* Labels, jumps and fixup:
+ */
+GLubyte *x86_get_label( struct x86_function *p );
+
+void x86_jcc( struct x86_function *p,
+ enum x86_cc cc,
+ GLubyte *label );
+
+/* Always use a 32bit offset for forward jumps:
+ */
+GLubyte *x86_jcc_forward( struct x86_function *p,
+ enum x86_cc cc );
+
+/* Fixup offset from forward jump:
+ */
+void x86_fixup_fwd_jump( struct x86_function *p,
+ GLubyte *fixup );
+
+void x86_push( struct x86_function *p,
+ struct x86_reg reg );
+
+void x86_pop( struct x86_function *p,
+ struct x86_reg reg );
+
+void x86_inc( struct x86_function *p,
+ struct x86_reg reg );
+
+void x86_dec( struct x86_function *p,
+ struct x86_reg reg );
+
+void x86_ret( struct x86_function *p );
+
+void mmx_emms( struct x86_function *p );
+
+void x86_mov( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src );
+
+void x86_xor( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src );
+
+void x86_cmp( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src );
+
+void sse2_movd( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src );
+
+void mmx_movd( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src );
+
+void mmx_movq( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src );
+
+void sse_movss( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src );
+
+void sse_movaps( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src );
+
+void sse_movups( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src );
+
+void sse_movhps( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src );
+
+void sse_movlps( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src );
+
+/* SSE operations often only have one format, with dest constrained to
+ * be a register:
+ */
+void sse_mulps( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src );
+
+void sse_addps( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src );
+
+void sse_movhlps( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src );
+
+void sse_movlhps( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src );
+
+void sse2_cvtps2dq( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src );
+
+void sse2_packssdw( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src );
+
+void sse2_packsswb( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src );
+
+void sse2_packuswb( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src );
+
+void sse_cvtps2pi( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src );
+
+void mmx_packssdw( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src );
+
+void mmx_packuswb( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src );
+
+
+/* Load effective address:
+ */
+void x86_lea( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src );
+
+void x86_test( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src );
+
+/* Perform a reduced swizzle in a single sse instruction:
+ */
+void sse2_pshufd( struct x86_function *p,
+ struct x86_reg dest,
+ struct x86_reg arg0,
+ GLubyte x,
+ GLubyte y,
+ GLubyte z,
+ GLubyte w );
+
+
+/* Shufps can also be used to implement a reduced swizzle when dest ==
+ * arg0.
+ */
+void sse_shufps( struct x86_function *p,
+ struct x86_reg dest,
+ struct x86_reg arg0,
+ GLubyte x,
+ GLubyte y,
+ GLubyte z,
+ GLubyte w );
+
+
+/* Retreive a reference to one of the function arguments, taking into
+ * account any push/pop activity:
+ */
+struct x86_reg x86_fn_arg( struct x86_function *p,
+ GLuint arg );
+
+#endif
+#endif