summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKeith Whitwell <[email protected]>2008-02-15 13:37:01 +0000
committerKeith Whitwell <[email protected]>2008-02-15 13:37:01 +0000
commitb29d8d27292c2ad956d3f0a307603f00ee01af28 (patch)
treefcd90e0489cd26141baa2074847e2a114e5ecdc4
parente822e09b89407d6cb8cd4a79e1c5c1e0955caf64 (diff)
draw: subclass vertex shaders according to execution method
Create new files for shaders compiled/executed with llvm, sse, exec respectively
-rw-r--r--src/gallium/auxiliary/draw/Makefile2
-rw-r--r--src/gallium/auxiliary/draw/draw_private.h40
-rw-r--r--src/gallium/auxiliary/draw/draw_vertex_shader.c229
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.h50
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_exec.c186
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_llvm.c237
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_sse.c251
-rw-r--r--src/mesa/sources3
8 files changed, 766 insertions, 232 deletions
diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile
index 451911a3545..fe9b150f304 100644
--- a/src/gallium/auxiliary/draw/Makefile
+++ b/src/gallium/auxiliary/draw/Makefile
@@ -1,2 +1,2 @@
default:
- cd .. ; make
+ cd ../../../mesa ; make
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 3d09aef87c1..bc11259cb21 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -128,13 +128,25 @@ struct draw_stage
* Private version of the compiled vertex_shader
*/
struct draw_vertex_shader {
+
+ /* This member will disappear shortly:
+ */
const struct pipe_shader_state *state;
-#if defined(__i386__) || defined(__386__)
- struct x86_function sse2_program;
-#endif
-#ifdef MESA_LLVM
- struct gallivm_prog *llvm_prog;
-#endif
+
+ void (*prepare)( struct draw_vertex_shader *shader,
+ struct draw_context *draw );
+
+ /* Run the shader - this interface will get cleaned up in the
+ * future:
+ */
+ void (*run)( struct draw_vertex_shader *shader,
+ struct draw_context *draw,
+ const unsigned *elts,
+ unsigned count,
+ struct vertex_header *vOut[] );
+
+
+ void (*delete)( struct draw_vertex_shader * );
};
@@ -176,7 +188,7 @@ struct draw_context
struct pipe_viewport_state viewport;
struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX];
struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX];
- const struct draw_vertex_shader *vertex_shader;
+ struct draw_vertex_shader *vertex_shader;
uint num_vs_outputs; /**< convenience, from vertex_shader */
@@ -201,6 +213,7 @@ struct draw_context
boolean convert_wide_points; /**< convert wide points to tris? */
boolean convert_wide_lines; /**< convert side lines to tris? */
+ boolean use_sse;
unsigned reduced_prim;
@@ -255,11 +268,10 @@ struct draw_context
unsigned queue_nr;
} pq;
- int use_sse : 1;
-#ifdef MESA_LLVM
- struct gallivm_cpu_engine *engine;
-#endif
-
+
+ /* This (and the tgsi_exec_machine struct) probably need to be moved somewhere private.
+ */
+ struct gallivm_cpu_engine *engine;
void *driver_private;
};
@@ -290,11 +302,7 @@ extern void draw_vertex_cache_invalidate( struct draw_context *draw );
extern void draw_vertex_cache_unreference( struct draw_context *draw );
extern void draw_vertex_cache_reset_vertex_ids( struct draw_context *draw );
-
extern void draw_vertex_shader_queue_flush( struct draw_context *draw );
-#ifdef MESA_LLVM
-extern void draw_vertex_shader_queue_flush_llvm( struct draw_context *draw );
-#endif
struct tgsi_exec_machine;
diff --git a/src/gallium/auxiliary/draw/draw_vertex_shader.c b/src/gallium/auxiliary/draw/draw_vertex_shader.c
index 9413f8b43a4..f68f6e32440 100644
--- a/src/gallium/auxiliary/draw/draw_vertex_shader.c
+++ b/src/gallium/auxiliary/draw/draw_vertex_shader.c
@@ -33,177 +33,10 @@
#include "pipe/p_util.h"
#include "pipe/p_shader_tokens.h"
-#if defined(__i386__) || defined(__386__)
-#include "tgsi/exec/tgsi_sse2.h"
-#endif
#include "draw_private.h"
#include "draw_context.h"
+#include "draw_vs.h"
-#include "x86/rtasm/x86sse.h"
-#include "llvm/gallivm.h"
-
-
-#define DBG_VS 0
-
-
-static INLINE unsigned
-compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr)
-{
- unsigned mask = 0;
- unsigned i;
-
- /* Do the hardwired planes first:
- */
- if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT;
- if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT;
- if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT;
- if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT;
- if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT;
- if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT;
-
- /* Followed by any remaining ones:
- */
- for (i = 6; i < nr; i++) {
- if (dot4(clip, plane[i]) < 0)
- mask |= (1<<i);
- }
-
- return mask;
-}
-
-
-typedef void (XSTDCALL *codegen_function) (
- const struct tgsi_exec_vector *input,
- struct tgsi_exec_vector *output,
- float (*constant)[4],
- struct tgsi_exec_vector *temporary );
-
-
-/**
- * Transform vertices with the current vertex program/shader
- * Up to four vertices can be shaded at a time.
- * \param vbuffer the input vertex data
- * \param elts indexes of four input vertices
- * \param count number of vertices to shade [1..4]
- * \param vOut array of pointers to four output vertices
- */
-static void
-run_vertex_program(struct draw_context *draw,
- unsigned elts[4], unsigned count,
- struct vertex_header *vOut[])
-{
- struct tgsi_exec_machine *machine = &draw->machine;
- unsigned int j;
-
- ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_ATTRIB_MAX);
- ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_ATTRIB_MAX);
- const float *scale = draw->viewport.scale;
- const float *trans = draw->viewport.translate;
-
- assert(count <= 4);
- assert(draw->vertex_shader->state->output_semantic_name[0]
- == TGSI_SEMANTIC_POSITION);
-
- /* Consts does not require 16 byte alignment. */
- machine->Consts = (float (*)[4]) draw->user.constants;
-
- machine->Inputs = ALIGN16_ASSIGN(inputs);
- machine->Outputs = ALIGN16_ASSIGN(outputs);
-
- draw->vertex_fetch.fetch_func( draw, machine, elts, count );
-
- /* run shader */
-#ifdef MESA_LLVM
- if (1) {
- struct gallivm_prog *prog = draw->vertex_shader->llvm_prog;
- gallivm_cpu_vs_exec(prog,
- machine->Inputs,
- machine->Outputs,
- machine->Consts,
- machine->Temps);
- } else
-#elif defined(__i386__) || defined(__386__)
- if (draw->use_sse) {
- /* SSE */
- /* cast away const */
- struct draw_vertex_shader *shader
- = (struct draw_vertex_shader *)draw->vertex_shader;
- codegen_function func
- = (codegen_function) x86_get_func( &shader->sse2_program );
-
- if (func)
- func(
- machine->Inputs,
- machine->Outputs,
- machine->Consts,
- machine->Temps );
- else
- /* interpreter */
- tgsi_exec_machine_run( machine );
- }
- else
-#endif
- {
- /* interpreter */
- tgsi_exec_machine_run( machine );
- }
-
- /* store machine results */
- for (j = 0; j < count; j++) {
- unsigned slot;
- float x, y, z, w;
-
- /* Handle attr[0] (position) specially:
- *
- * XXX: Computing the clipmask should be done in the vertex
- * program as a set of DP4 instructions appended to the
- * user-provided code.
- */
- x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j];
- y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j];
- z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j];
- w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j];
-
- vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes);
- vOut[j]->edgeflag = 1;
-
- /* divide by w */
- w = 1.0f / w;
- x *= w;
- y *= w;
- z *= w;
-
- /* Viewport mapping */
- vOut[j]->data[0][0] = x * scale[0] + trans[0];
- vOut[j]->data[0][1] = y * scale[1] + trans[1];
- vOut[j]->data[0][2] = z * scale[2] + trans[2];
- vOut[j]->data[0][3] = w;
-
-#if DBG_VS
- debug_printf("output[%d]win: %f %f %f %f\n", j,
- vOut[j]->data[0][0],
- vOut[j]->data[0][1],
- vOut[j]->data[0][2],
- vOut[j]->data[0][3]);
-#endif
- /* Remaining attributes are packed into sequential post-transform
- * vertex attrib slots.
- */
- for (slot = 1; slot < draw->num_vs_outputs; slot++) {
- vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
- vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
- vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
- vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
-#if DBG_VS
- debug_printf("output[%d][%d]: %f %f %f %f\n", j, slot,
- vOut[j]->data[slot][0],
- vOut[j]->data[slot][1],
- vOut[j]->data[slot][2],
- vOut[j]->data[slot][3]);
-#endif
- }
- } /* loop over vertices */
-}
/**
@@ -213,13 +46,14 @@ run_vertex_program(struct draw_context *draw,
void
draw_vertex_shader_queue_flush(struct draw_context *draw)
{
+ struct draw_vertex_shader *shader = draw->vertex_shader;
unsigned i;
assert(draw->vs.queue_nr != 0);
/* XXX: do this on statechange:
*/
- draw_update_vertex_fetch( draw );
+ shader->prepare( shader, draw );
// fprintf(stderr, " q(%d) ", draw->vs.queue_nr );
@@ -242,7 +76,7 @@ draw_vertex_shader_queue_flush(struct draw_context *draw)
assert(n > 0);
assert(n <= 4);
- run_vertex_program(draw, elts, n, dests);
+ shader->run(shader, draw, elts, n, dests);
}
draw->vs.queue_nr = 0;
@@ -255,43 +89,16 @@ draw_create_vertex_shader(struct draw_context *draw,
{
struct draw_vertex_shader *vs;
- vs = CALLOC_STRUCT( draw_vertex_shader );
- if (vs == NULL) {
- return NULL;
- }
-
- vs->state = shader;
+ vs = draw_create_vs_llvm( draw, shader );
+ if (vs)
+ return vs;
-#ifdef MESA_LLVM
- struct gallivm_ir *ir = gallivm_ir_new(GALLIVM_VS);
- gallivm_ir_set_layout(ir, GALLIVM_SOA);
- gallivm_ir_set_components(ir, 4);
- gallivm_ir_fill_from_tgsi(ir, shader->tokens);
- vs->llvm_prog = gallivm_ir_compile(ir);
- gallivm_ir_delete(ir);
-
- draw->engine = gallivm_global_cpu_engine();
- if (!draw->engine) {
- draw->engine = gallivm_cpu_engine_create(vs->llvm_prog);
- }
- else {
- gallivm_cpu_jit_compile(draw->engine, vs->llvm_prog);
- }
-#elif defined(__i386__) || defined(__386__)
- if (draw->use_sse) {
- /* cast-away const */
- struct pipe_shader_state *sh = (struct pipe_shader_state *) shader;
-
- x86_init_func( &vs->sse2_program );
- if (!tgsi_emit_sse2( (struct tgsi_token *) sh->tokens,
- &vs->sse2_program )) {
- x86_release_func( (struct x86_function *) &vs->sse2_program );
- fprintf(stdout /*err*/,
- "tgsi_emit_sse2() failed, falling back to interpreter\n");
- }
- }
-#endif
+ vs = draw_create_vs_sse( draw, shader );
+ if (vs)
+ return vs;
+ vs = draw_create_vs_exec( draw, shader );
+ assert(vs);
return vs;
}
@@ -307,11 +114,7 @@ draw_bind_vertex_shader(struct draw_context *draw,
tgsi_exec_machine_init(&draw->machine);
- /* specify the vertex program to interpret/execute */
- tgsi_exec_machine_bind_shader(&draw->machine,
- draw->vertex_shader->state->tokens,
- PIPE_MAX_SAMPLERS,
- NULL /*samplers*/ );
+ dvs->prepare( dvs, draw );
}
@@ -319,9 +122,5 @@ void
draw_delete_vertex_shader(struct draw_context *draw,
struct draw_vertex_shader *dvs)
{
-#if defined(__i386__) || defined(__386__)
- x86_release_func( (struct x86_function *) &dvs->sse2_program );
-#endif
-
- FREE( dvs );
+ dvs->delete( dvs );
}
diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h
new file mode 100644
index 00000000000..4ee7e705e93
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_vs.h
@@ -0,0 +1,50 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <[email protected]>
+ */
+
+#ifndef DRAW_VS_H
+#define DRAW_VS_H
+
+struct draw_vertex_shader;
+struct draw_context;
+struct pipe_shader_state;
+
+struct draw_vertex_shader *
+draw_create_vs_exec(struct draw_context *draw,
+ const struct pipe_shader_state *templ);
+
+struct draw_vertex_shader *
+draw_create_vs_sse(struct draw_context *draw,
+ const struct pipe_shader_state *templ);
+
+struct draw_vertex_shader *
+draw_create_vs_llvm(struct draw_context *draw,
+ const struct pipe_shader_state *templ);
+
+#endif
diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c
new file mode 100644
index 00000000000..8588879400a
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_vs_exec.c
@@ -0,0 +1,186 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ * Brian Paul
+ */
+
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+
+#include "draw_private.h"
+#include "draw_context.h"
+#include "draw_vs.h"
+
+
+static INLINE unsigned
+compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr)
+{
+ unsigned mask = 0;
+ unsigned i;
+
+ /* Do the hardwired planes first:
+ */
+ if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT;
+ if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT;
+ if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT;
+ if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT;
+ if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT;
+ if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT;
+
+ /* Followed by any remaining ones:
+ */
+ for (i = 6; i < nr; i++) {
+ if (dot4(clip, plane[i]) < 0)
+ mask |= (1<<i);
+ }
+
+ return mask;
+}
+
+
+static void
+vs_exec_prepare( struct draw_vertex_shader *shader,
+ struct draw_context *draw )
+{
+ /* specify the vertex program to interpret/execute */
+ tgsi_exec_machine_bind_shader(&draw->machine,
+ shader->state->tokens,
+ PIPE_MAX_SAMPLERS,
+ NULL /*samplers*/ );
+
+ draw_update_vertex_fetch( draw );
+}
+
+
+/**
+ * Transform vertices with the current vertex program/shader
+ * Up to four vertices can be shaded at a time.
+ * \param vbuffer the input vertex data
+ * \param elts indexes of four input vertices
+ * \param count number of vertices to shade [1..4]
+ * \param vOut array of pointers to four output vertices
+ */
+static void
+vs_exec_run( struct draw_vertex_shader *shader,
+ struct draw_context *draw,
+ const unsigned *elts,
+ unsigned count,
+ struct vertex_header *vOut[] )
+{
+ struct tgsi_exec_machine *machine = &draw->machine;
+ unsigned int j;
+
+ ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_ATTRIB_MAX);
+ ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_ATTRIB_MAX);
+ const float *scale = draw->viewport.scale;
+ const float *trans = draw->viewport.translate;
+
+ assert(count <= 4);
+ assert(draw->vertex_shader->state->output_semantic_name[0]
+ == TGSI_SEMANTIC_POSITION);
+
+ machine->Consts = (float (*)[4]) draw->user.constants;
+ machine->Inputs = ALIGN16_ASSIGN(inputs);
+ machine->Outputs = ALIGN16_ASSIGN(outputs);
+
+ draw->vertex_fetch.fetch_func( draw, machine, elts, count );
+
+ /* run interpreter */
+ tgsi_exec_machine_run( machine );
+
+
+ /* store machine results */
+ for (j = 0; j < count; j++) {
+ unsigned slot;
+ float x, y, z, w;
+
+ /* Handle attr[0] (position) specially:
+ *
+ * XXX: Computing the clipmask should be done in the vertex
+ * program as a set of DP4 instructions appended to the
+ * user-provided code.
+ */
+ x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j];
+ y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j];
+ z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j];
+ w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j];
+
+ vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes);
+ vOut[j]->edgeflag = 1;
+
+ /* divide by w */
+ w = 1.0f / w;
+ x *= w;
+ y *= w;
+ z *= w;
+
+ /* Viewport mapping */
+ vOut[j]->data[0][0] = x * scale[0] + trans[0];
+ vOut[j]->data[0][1] = y * scale[1] + trans[1];
+ vOut[j]->data[0][2] = z * scale[2] + trans[2];
+ vOut[j]->data[0][3] = w;
+
+ /* Remaining attributes are packed into sequential post-transform
+ * vertex attrib slots.
+ */
+ for (slot = 1; slot < draw->num_vs_outputs; slot++) {
+ vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
+ vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
+ vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
+ vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
+ }
+ } /* loop over vertices */
+}
+
+
+
+static void
+vs_exec_delete( struct draw_vertex_shader *dvs )
+{
+ FREE( dvs );
+}
+
+
+struct draw_vertex_shader *
+draw_create_vs_exec(struct draw_context *draw,
+ const struct pipe_shader_state *state)
+{
+ struct draw_vertex_shader *vs = CALLOC_STRUCT( draw_vertex_shader );
+
+ if (vs == NULL)
+ return NULL;
+
+ vs->state = state;
+ vs->prepare = vs_exec_prepare;
+ vs->run = vs_exec_run;
+ vs->delete = vs_exec_delete;
+
+ return vs;
+}
diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c
new file mode 100644
index 00000000000..44022b6e077
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c
@@ -0,0 +1,237 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Zack Rusin
+ * Keith Whitwell <[email protected]>
+ * Brian Paul
+ */
+
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "draw_private.h"
+#include "draw_context.h"
+#include "draw_vs.h"
+
+#ifdef MESA_LLVM
+
+#include "llvm/gallivm.h"
+
+struct draw_llvm_vertex_shader {
+ struct draw_vertex_shader base;
+ struct gallivm_prog *llvm_prog;
+};
+
+
+static INLINE unsigned
+compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr)
+{
+ unsigned mask = 0;
+ unsigned i;
+
+ /* Do the hardwired planes first:
+ */
+ if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT;
+ if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT;
+ if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT;
+ if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT;
+ if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT;
+ if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT;
+
+ /* Followed by any remaining ones:
+ */
+ for (i = 6; i < nr; i++) {
+ if (dot4(clip, plane[i]) < 0)
+ mask |= (1<<i);
+ }
+
+ return mask;
+}
+
+
+
+static void
+vs_llvm_prepare( struct draw_vertex_shader *base,
+ struct draw_context *draw )
+{
+ draw_update_vertex_fetch( draw );
+}
+
+
+
+/**
+ * Transform vertices with the current vertex program/shader
+ * Up to four vertices can be shaded at a time.
+ * \param vbuffer the input vertex data
+ * \param elts indexes of four input vertices
+ * \param count number of vertices to shade [1..4]
+ * \param vOut array of pointers to four output vertices
+ */
+static void
+vs_llvm_run( struct draw_vertex_shader *base,
+ struct draw_context *draw,
+ const unsigned *elts,
+ unsigned count,
+ struct vertex_header *vOut[] )
+{
+ struct draw_llvm_vertex_shader *shader =
+ (struct draw_llvm_vertex_shader *)base;
+
+ struct tgsi_exec_machine *machine = &draw->machine;
+ unsigned int j;
+
+ ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_ATTRIB_MAX);
+ ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_ATTRIB_MAX);
+ const float *scale = draw->viewport.scale;
+ const float *trans = draw->viewport.translate;
+
+
+ assert(count <= 4);
+ assert(draw->vertex_shader->state->output_semantic_name[0]
+ == TGSI_SEMANTIC_POSITION);
+
+ /* Consts does not require 16 byte alignment. */
+ machine->Consts = (float (*)[4]) draw->user.constants;
+
+ machine->Inputs = ALIGN16_ASSIGN(inputs);
+ machine->Outputs = ALIGN16_ASSIGN(outputs);
+
+ draw->vertex_fetch.fetch_func( draw, machine, elts, count );
+
+ /* run shader */
+ gallivm_cpu_vs_exec(shader->llvm_prog,
+ machine->Inputs,
+ machine->Outputs,
+ machine->Consts,
+ machine->Temps);
+
+ /* store machine results */
+ for (j = 0; j < count; j++) {
+ unsigned slot;
+ float x, y, z, w;
+
+ x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j];
+ y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j];
+ z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j];
+ w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j];
+
+ vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes);
+ vOut[j]->edgeflag = 1;
+
+ /* divide by w */
+ w = 1.0f / w;
+ x *= w;
+ y *= w;
+ z *= w;
+
+ /* Viewport mapping */
+ vOut[j]->data[0][0] = x * scale[0] + trans[0];
+ vOut[j]->data[0][1] = y * scale[1] + trans[1];
+ vOut[j]->data[0][2] = z * scale[2] + trans[2];
+ vOut[j]->data[0][3] = w;
+
+ /* Remaining attributes are packed into sequential post-transform
+ * vertex attrib slots.
+ */
+ for (slot = 1; slot < draw->num_vs_outputs; slot++) {
+ vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
+ vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
+ vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
+ vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
+ }
+ } /* loop over vertices */
+}
+
+static void
+vs_llvm_delete( struct draw_vertex_shader *base )
+{
+ struct draw_llvm_vertex_shader *shader =
+ (struct draw_llvm_vertex_shader *)base;
+
+ /* Do something to free compiled shader:
+ */
+
+ FREE( shader );
+}
+
+
+
+
+struct draw_vertex_shader *
+draw_create_vs_llvm(struct draw_context *draw,
+ const struct pipe_shader_state *templ)
+{
+ struct draw_llvm_vertex_shader *vs;
+
+ vs = CALLOC_STRUCT( draw_llvm_vertex_shader );
+ if (vs == NULL)
+ return NULL;
+
+ vs->base.state = templ;
+ vs->base.prepare = vs_llvm_prepare;
+ vs->base.run = vs_llvm_run;
+ vs->base.delete = vs_llvm_delete;
+
+ {
+ struct gallivm_ir *ir = gallivm_ir_new(GALLIVM_VS);
+ gallivm_ir_set_layout(ir, GALLIVM_SOA);
+ gallivm_ir_set_components(ir, 4);
+ gallivm_ir_fill_from_tgsi(ir, vs->base.state->tokens);
+ vs->llvm_prog = gallivm_ir_compile(ir);
+ gallivm_ir_delete(ir);
+ }
+
+ draw->engine = gallivm_global_cpu_engine();
+
+ /* XXX: Why are there two versions of this? Shouldn't creating the
+ * engine be a separate operation to compiling a shader?
+ */
+ if (!draw->engine) {
+ draw->engine = gallivm_cpu_engine_create(vs->llvm_prog);
+ }
+ else {
+ gallivm_cpu_jit_compile(draw->engine, vs->llvm_prog);
+ }
+
+ return &vs->base;
+}
+
+
+
+
+
+#else
+
+struct draw_vertex_shader *
+draw_create_vs_llvm(struct draw_context *draw,
+ const struct pipe_shader_state *shader)
+{
+ return NULL;
+}
+
+#endif
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
new file mode 100644
index 00000000000..04349cb404c
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -0,0 +1,251 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ * Brian Paul
+ */
+
+#include "draw_vs.h"
+
+#if defined(__i386__) || defined(__386__)
+
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+
+#include "draw_private.h"
+#include "draw_context.h"
+
+#include "x86/rtasm/x86sse.h"
+#include "tgsi/exec/tgsi_sse2.h"
+
+
+typedef void (XSTDCALL *codegen_function) (
+ const struct tgsi_exec_vector *input,
+ struct tgsi_exec_vector *output,
+ float (*constant)[4],
+ struct tgsi_exec_vector *temporary );
+
+
+struct draw_sse_vertex_shader {
+ struct draw_vertex_shader base;
+ struct x86_function sse2_program;
+ codegen_function func;
+};
+
+
+/* Should be part of the generated shader:
+ */
+static INLINE unsigned
+compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr)
+{
+ unsigned mask = 0;
+ unsigned i;
+
+ /* Do the hardwired planes first:
+ */
+ if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT;
+ if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT;
+ if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT;
+ if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT;
+ if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT;
+ if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT;
+
+ /* Followed by any remaining ones:
+ */
+ for (i = 6; i < nr; i++) {
+ if (dot4(clip, plane[i]) < 0)
+ mask |= (1<<i);
+ }
+
+ return mask;
+}
+
+
+static void
+vs_sse_prepare( struct draw_vertex_shader *base,
+ struct draw_context *draw )
+{
+ draw_update_vertex_fetch( draw );
+}
+
+/**
+ * Transform vertices with the current vertex program/shader
+ * Up to four vertices can be shaded at a time.
+ * \param vbuffer the input vertex data
+ * \param elts indexes of four input vertices
+ * \param count number of vertices to shade [1..4]
+ * \param vOut array of pointers to four output vertices
+ */
+static void
+vs_sse_run( struct draw_vertex_shader *base,
+ struct draw_context *draw,
+ const unsigned *elts,
+ unsigned count,
+ struct vertex_header *vOut[] )
+{
+ struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base;
+ struct tgsi_exec_machine *machine = &draw->machine;
+ unsigned int j;
+
+ ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_ATTRIB_MAX);
+ ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_ATTRIB_MAX);
+ const float *scale = draw->viewport.scale;
+ const float *trans = draw->viewport.translate;
+
+ assert(count <= 4);
+ assert(draw->vertex_shader->state->output_semantic_name[0]
+ == TGSI_SEMANTIC_POSITION);
+
+ /* Consts does not require 16 byte alignment. */
+ machine->Consts = (float (*)[4]) draw->user.constants;
+ machine->Inputs = ALIGN16_ASSIGN(inputs);
+ machine->Outputs = ALIGN16_ASSIGN(outputs);
+
+
+ /* Fetch vertices. This may at some point be integrated into the
+ * compiled shader -- that would require a reorganization where
+ * multiple versions of the compiled shader might exist,
+ * specialized for each fetch state.
+ */
+ draw->vertex_fetch.fetch_func( draw, machine, elts, count );
+
+
+ /* run compiled shader
+ */
+ shader->func(
+ machine->Inputs,
+ machine->Outputs,
+ machine->Consts,
+ machine->Temps );
+
+
+ /* XXX: Computing the clipmask and emitting results should be done
+ * in the vertex program as a set of instructions appended to
+ * the user-provided code.
+ */
+ for (j = 0; j < count; j++) {
+ unsigned slot;
+ float x, y, z, w;
+
+ x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j];
+ y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j];
+ z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j];
+ w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j];
+
+ vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes);
+ vOut[j]->edgeflag = 1;
+
+ /* divide by w */
+ w = 1.0f / w;
+ x *= w;
+ y *= w;
+ z *= w;
+
+ /* Viewport mapping */
+ vOut[j]->data[0][0] = x * scale[0] + trans[0];
+ vOut[j]->data[0][1] = y * scale[1] + trans[1];
+ vOut[j]->data[0][2] = z * scale[2] + trans[2];
+ vOut[j]->data[0][3] = w;
+
+ /* Remaining attributes are packed into sequential post-transform
+ * vertex attrib slots.
+ */
+ for (slot = 1; slot < draw->num_vs_outputs; slot++) {
+ vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
+ vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
+ vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
+ vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
+ }
+ }
+}
+
+
+
+static void
+vs_sse_delete( struct draw_vertex_shader *base )
+{
+ struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base;
+
+ x86_release_func( &shader->sse2_program );
+
+ FREE( shader );
+}
+
+
+struct draw_vertex_shader *
+draw_create_vs_sse(struct draw_context *draw,
+ const struct pipe_shader_state *templ)
+{
+ struct draw_sse_vertex_shader *vs;
+
+ if (!draw->use_sse)
+ return NULL;
+
+ vs = CALLOC_STRUCT( draw_sse_vertex_shader );
+ if (vs == NULL)
+ return NULL;
+
+ vs->base.state = templ;
+ vs->base.prepare = vs_sse_prepare;
+ vs->base.run = vs_sse_run;
+ vs->base.delete = vs_sse_delete;
+
+ x86_init_func( &vs->sse2_program );
+
+ if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state->tokens,
+ &vs->sse2_program ))
+ goto fail;
+
+ vs->func = (codegen_function) x86_get_func( &vs->sse2_program );
+
+ return &vs->base;
+
+fail:
+ fprintf(stderr, "tgsi_emit_sse2() failed, falling back to interpreter\n");
+
+ x86_release_func( &vs->sse2_program );
+
+ FREE(vs);
+ return NULL;
+}
+
+
+
+#else
+
+struct draw_vertex_shader *
+draw_create_vs_sse( struct draw_context *draw,
+ const struct pipe_shader_state *templ )
+{
+ return NULL;
+}
+
+
+#endif
+
diff --git a/src/mesa/sources b/src/mesa/sources
index cecd8a830fe..f83d247a1e2 100644
--- a/src/mesa/sources
+++ b/src/mesa/sources
@@ -159,6 +159,9 @@ VF_SOURCES = \
DRAW_SOURCES = \
$(TOP)/src/gallium/auxiliary/draw/draw_clip.c \
+ $(TOP)/src/gallium/auxiliary/draw/draw_vs_exec.c \
+ $(TOP)/src/gallium/auxiliary/draw/draw_vs_sse.c \
+ $(TOP)/src/gallium/auxiliary/draw/draw_vs_llvm.c \
$(TOP)/src/gallium/auxiliary/draw/draw_context.c\
$(TOP)/src/gallium/auxiliary/draw/draw_cull.c \
$(TOP)/src/gallium/auxiliary/draw/draw_debug.c \