summaryrefslogtreecommitdiffstats
path: root/src/gallium/auxiliary/draw
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary/draw')
-rw-r--r--src/gallium/auxiliary/draw/Makefile1
-rw-r--r--src/gallium/auxiliary/draw/SConscript8
-rw-r--r--src/gallium/auxiliary/draw/draw_context.c26
-rw-r--r--src/gallium/auxiliary/draw/draw_context.h13
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_stipple.c20
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_vbuf.c2
-rw-r--r--src/gallium/auxiliary/draw/draw_private.h21
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.c12
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.h9
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_emit.c48
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c55
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c86
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_varray_tmp.h9
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vcache.c127
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h9
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.c60
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.h28
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos.c411
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos.h58
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos_io.c25
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos_machine.c297
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_sse.c14
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_varient.c49
23 files changed, 983 insertions, 405 deletions
diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile
index 9a88ecc0708..f2e36a89e90 100644
--- a/src/gallium/auxiliary/draw/Makefile
+++ b/src/gallium/auxiliary/draw/Makefile
@@ -37,6 +37,7 @@ C_SOURCES = \
draw_vs_varient.c \
draw_vs_aos.c \
draw_vs_aos_io.c \
+ draw_vs_aos_machine.c \
draw_vs_exec.c \
draw_vs_llvm.c \
draw_vs_sse.c
diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript
index 3b5d5ed4921..544a04918b6 100644
--- a/src/gallium/auxiliary/draw/SConscript
+++ b/src/gallium/auxiliary/draw/SConscript
@@ -15,7 +15,7 @@ draw = env.ConvenienceLibrary(
'draw_pipe_stipple.c',
'draw_pipe_twoside.c',
'draw_pipe_unfilled.c',
- 'draw_pipe_util.c',
+ 'draw_pipe_util.c',
'draw_pipe_validate.c',
'draw_pipe_vbuf.c',
'draw_pipe_wide_line.c',
@@ -25,15 +25,21 @@ draw = env.ConvenienceLibrary(
'draw_pt_emit.c',
'draw_pt_fetch.c',
'draw_pt_fetch_emit.c',
+ 'draw_pt_fetch_shade_emit.c',
'draw_pt_fetch_shade_pipeline.c',
'draw_pt_post_vs.c',
+ 'draw_pt_util.c',
'draw_pt_varray.c',
'draw_pt_vcache.c',
'draw_vertex.c',
'draw_vs.c',
+ 'draw_vs_aos.c',
+ 'draw_vs_aos_io.c',
+ 'draw_vs_aos_machine.c',
'draw_vs_exec.c',
'draw_vs_llvm.c',
'draw_vs_sse.c',
+ 'draw_vs_varient.c'
])
auxiliaries.insert(0, draw)
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index 22420749656..2f263cf06a9 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -174,6 +174,8 @@ void draw_set_viewport_state( struct draw_context *draw,
viewport->translate[1] == 0.0f &&
viewport->translate[2] == 0.0f &&
viewport->translate[3] == 0.0f);
+
+ draw_vs_set_viewport( draw, viewport );
}
@@ -215,9 +217,11 @@ draw_set_mapped_vertex_buffer(struct draw_context *draw,
void
draw_set_mapped_constant_buffer(struct draw_context *draw,
- const void *buffer)
+ const void *buffer,
+ unsigned size )
{
draw->pt.user.constants = buffer;
+ draw_vs_set_constants( draw, (const float (*)[4])buffer, size );
}
@@ -345,14 +349,30 @@ void draw_set_edgeflags( struct draw_context *draw,
* \param elements the element buffer ptr
*/
void
-draw_set_mapped_element_buffer( struct draw_context *draw,
- unsigned eltSize, void *elements )
+draw_set_mapped_element_buffer_range( struct draw_context *draw,
+ unsigned eltSize,
+ unsigned min_index,
+ unsigned max_index,
+ void *elements )
{
draw->pt.user.elts = elements;
draw->pt.user.eltSize = eltSize;
+ draw->pt.user.min_index = min_index;
+ draw->pt.user.max_index = max_index;
}
+void
+draw_set_mapped_element_buffer( struct draw_context *draw,
+ unsigned eltSize,
+ void *elements )
+{
+ draw->pt.user.elts = elements;
+ draw->pt.user.eltSize = eltSize;
+ draw->pt.user.min_index = 0;
+ draw->pt.user.max_index = 0xffffffff;
+}
+
/* Revamp me please:
*/
diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h
index c5c3d3b09e0..b8f2bfa3327 100644
--- a/src/gallium/auxiliary/draw/draw_context.h
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -118,14 +118,23 @@ void draw_set_vertex_elements(struct draw_context *draw,
unsigned count,
const struct pipe_vertex_element *elements);
+void
+draw_set_mapped_element_buffer_range( struct draw_context *draw,
+ unsigned eltSize,
+ unsigned min_index,
+ unsigned max_index,
+ void *elements );
+
void draw_set_mapped_element_buffer( struct draw_context *draw,
- unsigned eltSize, void *elements );
+ unsigned eltSize,
+ void *elements );
void draw_set_mapped_vertex_buffer(struct draw_context *draw,
unsigned attr, const void *buffer);
void draw_set_mapped_constant_buffer(struct draw_context *draw,
- const void *buffer);
+ const void *buffer,
+ unsigned size );
void draw_set_edgeflags( struct draw_context *draw,
const unsigned *edgeflag );
diff --git a/src/gallium/auxiliary/draw/draw_pipe_stipple.c b/src/gallium/auxiliary/draw/draw_pipe_stipple.c
index 4673d5dcbae..9522b79582e 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_stipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c
@@ -175,6 +175,22 @@ reset_stipple_counter(struct draw_stage *stage)
stage->next->reset_stipple_counter( stage->next );
}
+static void
+stipple_reset_point(struct draw_stage *stage, struct prim_header *header)
+{
+ struct stipple_stage *stipple = stipple_stage(stage);
+ stipple->counter = 0;
+ stage->next->point(stage->next, header);
+}
+
+static void
+stipple_reset_tri(struct draw_stage *stage, struct prim_header *header)
+{
+ struct stipple_stage *stipple = stipple_stage(stage);
+ stipple->counter = 0;
+ stage->next->tri(stage->next, header);
+}
+
static void
stipple_first_line(struct draw_stage *stage,
@@ -220,9 +236,9 @@ struct draw_stage *draw_stipple_stage( struct draw_context *draw )
stipple->stage.draw = draw;
stipple->stage.next = NULL;
- stipple->stage.point = draw_pipe_passthrough_point;
+ stipple->stage.point = stipple_reset_point;
stipple->stage.line = stipple_first_line;
- stipple->stage.tri = draw_pipe_passthrough_tri;
+ stipple->stage.tri = stipple_reset_tri;
stipple->stage.reset_stipple_counter = reset_stipple_counter;
stipple->stage.flush = stipple_flush;
stipple->stage.destroy = stipple_destroy;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
index 67b9a9503d2..d514e28b77f 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
@@ -145,7 +145,7 @@ emit_vertex( struct vbuf_stage *vbuf,
vertex->vertex_id = vbuf->nr_vertices++;
}
- return vertex->vertex_id;
+ return (ushort)vertex->vertex_id;
}
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index c095bf3d7b9..4f8cceee1e8 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -147,6 +147,8 @@ struct draw_context
const void *elts;
/** bytes per index (0, 1, 2 or 4) */
unsigned eltSize;
+ unsigned min_index;
+ unsigned max_index;
/** vertex arrays */
const void *vbuffer[PIPE_MAX_ATTRIBS];
@@ -155,7 +157,8 @@ struct draw_context
const void *constants;
} user;
- boolean test_fse;
+ boolean test_fse; /* enable FSE even though its not correct (eg for softpipe) */
+ boolean no_fse; /* disable FSE even when it is correct */
} pt;
struct {
@@ -183,6 +186,16 @@ struct draw_context
*/
struct gallivm_cpu_engine *engine;
+ /* Here's another one:
+ */
+ struct aos_machine *aos_machine;
+
+
+ const float (*aligned_constants)[4];
+
+ const float (*aligned_constant_storage)[4];
+ unsigned const_storage_size;
+
struct translate *fetch;
struct translate_cache *fetch_cache;
@@ -215,6 +228,12 @@ struct draw_context
boolean draw_vs_init( struct draw_context *draw );
void draw_vs_destroy( struct draw_context *draw );
+void draw_vs_set_viewport( struct draw_context *,
+ const struct pipe_viewport_state * );
+
+void draw_vs_set_constants( struct draw_context *,
+ const float (*constants)[4],
+ unsigned size );
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index 75f44d503eb..723077159b5 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -75,7 +75,7 @@ draw_pt_arrays(struct draw_context *draw,
if (opt == 0)
middle = draw->pt.middle.fetch_emit;
- else if (opt == PT_SHADE && draw->pt.test_fse)
+ else if (opt == PT_SHADE && !draw->pt.no_fse)
middle = draw->pt.middle.fetch_shade_emit;
else
middle = draw->pt.middle.general;
@@ -105,6 +105,7 @@ draw_pt_arrays(struct draw_context *draw,
boolean draw_pt_init( struct draw_context *draw )
{
draw->pt.test_fse = GETENV("DRAW_FSE") != NULL;
+ draw->pt.no_fse = GETENV("DRAW_NO_FSE") != NULL;
draw->pt.front.vcache = draw_pt_vcache( draw );
if (!draw->pt.front.vcache)
@@ -118,12 +119,9 @@ boolean draw_pt_init( struct draw_context *draw )
if (!draw->pt.middle.fetch_emit)
return FALSE;
- if (draw->pt.test_fse) {
- draw->pt.middle.fetch_shade_emit = draw_pt_middle_fse( draw );
- if (!draw->pt.middle.fetch_shade_emit)
- return FALSE;
- }
-
+ draw->pt.middle.fetch_shade_emit = draw_pt_middle_fse( draw );
+ if (!draw->pt.middle.fetch_shade_emit)
+ return FALSE;
draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw );
if (!draw->pt.middle.general)
diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h
index e03816ebbc7..6b8ba1d171b 100644
--- a/src/gallium/auxiliary/draw/draw_pt.h
+++ b/src/gallium/auxiliary/draw/draw_pt.h
@@ -96,6 +96,15 @@ struct draw_pt_middle_end {
unsigned start,
unsigned count);
+ /* Transform all vertices in a linear range and then draw them with
+ * the supplied element list.
+ */
+ void (*run_linear_elts)( struct draw_pt_middle_end *,
+ unsigned fetch_start,
+ unsigned fetch_count,
+ const ushort *draw_elts,
+ unsigned draw_count );
+
void (*finish)( struct draw_pt_middle_end * );
void (*destroy)( struct draw_pt_middle_end * );
};
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
index a1d041a74f5..09bdc5fb5e1 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
@@ -311,6 +311,53 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle,
}
+static void fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle,
+ unsigned start,
+ unsigned count,
+ const ushort *draw_elts,
+ unsigned draw_count )
+{
+ struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
+ struct draw_context *draw = feme->draw;
+ void *hw_verts;
+
+ /* XXX: need to flush to get prim_vbuf.c to release its allocation??
+ */
+ draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+
+ hw_verts = draw->render->allocate_vertices( draw->render,
+ (ushort)feme->translate->key.output_stride,
+ (ushort)count );
+ if (!hw_verts) {
+ assert(0);
+ return;
+ }
+
+ /* Single routine to fetch vertices and emit HW verts.
+ */
+ feme->translate->run( feme->translate,
+ start,
+ count,
+ hw_verts );
+
+ /* XXX: Draw arrays path to avoid re-emitting index list again and
+ * again.
+ */
+ draw->render->draw( draw->render,
+ draw_elts,
+ draw_count );
+
+ /* Done -- that was easy, wasn't it:
+ */
+ draw->render->release_vertices( draw->render,
+ hw_verts,
+ feme->translate->key.output_stride,
+ count );
+
+}
+
+
+
static void fetch_emit_finish( struct draw_pt_middle_end *middle )
{
@@ -343,6 +390,7 @@ struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw )
fetch_emit->base.prepare = fetch_emit_prepare;
fetch_emit->base.run = fetch_emit_run;
fetch_emit->base.run_linear = fetch_emit_run_linear;
+ fetch_emit->base.run_linear_elts = fetch_emit_run_linear_elts;
fetch_emit->base.finish = fetch_emit_finish;
fetch_emit->base.destroy = fetch_emit_destroy;
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
index 729c7db9999..efa6dddbda8 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
@@ -189,12 +189,6 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
draw->pt.vertex_buffer[buf].pitch );
}
- fse->active->set_constants( fse->active,
- (const float (*)[4])draw->pt.user.constants );
-
- fse->active->set_viewport( fse->active,
- &draw->viewport );
-
//return TRUE;
}
@@ -316,6 +310,54 @@ fse_run(struct draw_pt_middle_end *middle,
}
+
+static void fse_run_linear_elts( struct draw_pt_middle_end *middle,
+ unsigned start,
+ unsigned count,
+ const ushort *draw_elts,
+ unsigned draw_count )
+{
+ struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
+ struct draw_context *draw = fse->draw;
+ unsigned alloc_count = align(count, 4);
+ char *hw_verts;
+
+ /* XXX: need to flush to get prim_vbuf.c to release its allocation??
+ */
+ draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+
+ hw_verts = draw->render->allocate_vertices( draw->render,
+ (ushort)fse->key.output_stride,
+ (ushort)alloc_count );
+
+ if (!hw_verts) {
+ assert(0);
+ return;
+ }
+
+ /* Single routine to fetch vertices, run shader and emit HW verts.
+ * Clipping is done elsewhere -- either by the API or on hardware,
+ * or for some other reason not required...
+ */
+ fse->active->run_linear( fse->active,
+ start, count,
+ hw_verts );
+
+
+ draw->render->draw( draw->render,
+ draw_elts,
+ draw_count );
+
+
+
+ draw->render->release_vertices( draw->render,
+ hw_verts,
+ fse->key.output_stride,
+ count );
+}
+
+
+
static void fse_finish( struct draw_pt_middle_end *middle )
{
}
@@ -336,6 +378,7 @@ struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw )
fse->base.prepare = fse_prepare;
fse->base.run = fse_run;
fse->base.run_linear = fse_run_linear;
+ fse->base.run_linear_elts = fse_run_linear_elts;
fse->base.finish = fse_finish;
fse->base.destroy = fse_destroy;
fse->draw = draw;
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index 06718779a5b..eb6988ff03b 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -81,9 +81,9 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
* but gl vs dx9 clip spaces.
*/
draw_pt_post_vs_prepare( fpme->post_vs,
- draw->bypass_clipping,
- draw->identity_viewport,
- draw->rasterizer->gl_rasterization_rules );
+ (boolean)draw->bypass_clipping,
+ (boolean)draw->identity_viewport,
+ (boolean)draw->rasterizer->gl_rasterization_rules );
if (!(opt & PT_PIPELINE))
@@ -98,7 +98,6 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
-
static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
const unsigned *fetch_elts,
unsigned fetch_count,
@@ -251,6 +250,84 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
+static void fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle,
+ unsigned start,
+ unsigned count,
+ const ushort *draw_elts,
+ unsigned draw_count )
+{
+ struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
+ struct draw_context *draw = fpme->draw;
+ struct draw_vertex_shader *shader = draw->vs.vertex_shader;
+ unsigned opt = fpme->opt;
+ unsigned alloc_count = align_int( count, 4 );
+
+ struct vertex_header *pipeline_verts =
+ (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
+
+ if (!pipeline_verts) {
+ /* Not much we can do here - just skip the rendering.
+ */
+ assert(0);
+ return;
+ }
+
+ /* Fetch into our vertex buffer
+ */
+ draw_pt_fetch_run_linear( fpme->fetch,
+ start,
+ count,
+ (char *)pipeline_verts );
+
+ /* Run the shader, note that this overwrites the data[] parts of
+ * the pipeline verts. If there is no shader, ie a bypass shader,
+ * then the inputs == outputs, and are already in the correct
+ * place.
+ */
+ if (opt & PT_SHADE)
+ {
+ shader->run_linear(shader,
+ (const float (*)[4])pipeline_verts->data,
+ ( float (*)[4])pipeline_verts->data,
+ (const float (*)[4])draw->pt.user.constants,
+ count,
+ fpme->vertex_size,
+ fpme->vertex_size);
+ }
+
+ if (draw_pt_post_vs_run( fpme->post_vs,
+ pipeline_verts,
+ count,
+ fpme->vertex_size ))
+ {
+ opt |= PT_PIPELINE;
+ }
+
+ /* Do we need to run the pipeline?
+ */
+ if (opt & PT_PIPELINE) {
+ draw_pipeline_run( fpme->draw,
+ fpme->prim,
+ pipeline_verts,
+ count,
+ fpme->vertex_size,
+ draw_elts,
+ draw_count );
+ }
+ else {
+ draw_pt_emit( fpme->emit,
+ (const float (*)[4])pipeline_verts->data,
+ count,
+ fpme->vertex_size,
+ draw_elts,
+ draw_count );
+ }
+
+ FREE(pipeline_verts);
+}
+
+
+
static void fetch_pipeline_finish( struct draw_pt_middle_end *middle )
{
/* nothing to do */
@@ -282,6 +359,7 @@ struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit( struct draw_context *
fpme->base.prepare = fetch_pipeline_prepare;
fpme->base.run = fetch_pipeline_run;
fpme->base.run_linear = fetch_pipeline_linear_run;
+ fpme->base.run_linear_elts = fetch_pipeline_linear_run_elts;
fpme->base.finish = fetch_pipeline_finish;
fpme->base.destroy = fetch_pipeline_destroy;
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
index 6979f6b544f..7c722457c3c 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
@@ -10,7 +10,8 @@ static void FUNC(struct draw_pt_front_end *frontend,
boolean flatfirst = (draw->rasterizer->flatshade &&
draw->rasterizer->flatshade_first);
- unsigned i, j, flags;
+ unsigned i, j;
+ ushort flags;
unsigned first, incr;
varray->fetch_start = start;
@@ -200,9 +201,9 @@ static void FUNC(struct draw_pt_front_end *frontend,
/* These bitflags look a little odd because we submit the
* vertices as (1,2,0) to satisfy flatshade requirements.
*/
- const unsigned edge_first = DRAW_PIPE_EDGE_FLAG_2;
- const unsigned edge_middle = DRAW_PIPE_EDGE_FLAG_0;
- const unsigned edge_last = DRAW_PIPE_EDGE_FLAG_1;
+ const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2;
+ const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0;
+ const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1;
flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
for (j = 0; j + first <= count; j += i) {
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c
index 96e02fbf3a9..9ffc99abf97 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vcache.c
+++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c
@@ -36,8 +36,8 @@
#include "draw/draw_pt.h"
-#define CACHE_MAX 32
-#define FETCH_MAX 128
+#define CACHE_MAX 1024
+#define FETCH_MAX 4096
#define DRAW_MAX (16*1024)
struct vcache_frontend {
@@ -201,7 +201,128 @@ static void vcache_ef_quad( struct vcache_frontend *vcache,
#define FUNC vcache_run
#include "draw_pt_vcache_tmp.h"
+static void translate_uint_elts( const unsigned *src,
+ unsigned count,
+ int delta,
+ ushort *dest )
+{
+ unsigned i;
+
+ for (i = 0; i < count; i++)
+ dest[i] = (ushort)(src[i] + delta);
+}
+
+static void translate_ushort_elts( const ushort *src,
+ unsigned count,
+ int delta,
+ ushort *dest )
+{
+ unsigned i;
+
+ for (i = 0; i < count; i++)
+ dest[i] = (ushort)(src[i] + delta);
+}
+static void translate_ubyte_elts( const ubyte *src,
+ unsigned count,
+ int delta,
+ ushort *dest )
+{
+ unsigned i;
+
+ for (i = 0; i < count; i++)
+ dest[i] = (ushort)(src[i] + delta);
+}
+
+#if 0
+static enum pipe_format format_from_get_elt( pt_elt_func get_elt )
+{
+ switch (draw->pt.user.eltSize) {
+ case 1: return PIPE_FORMAT_R8_UNORM;
+ case 2: return PIPE_FORMAT_R16_UNORM;
+ case 4: return PIPE_FORMAT_R32_UNORM;
+ default: return PIPE_FORMAT_NONE;
+ }
+}
+#endif
+
+static void vcache_check_run( struct draw_pt_front_end *frontend,
+ pt_elt_func get_elt,
+ const void *elts,
+ unsigned draw_count )
+{
+ struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
+ struct draw_context *draw = vcache->draw;
+ unsigned min_index = draw->pt.user.min_index;
+ unsigned max_index = draw->pt.user.max_index;
+ unsigned index_size = draw->pt.user.eltSize;
+ unsigned fetch_count = max_index + 1 - min_index;
+ const ushort *transformed_elts;
+ ushort *storage = NULL;
+
+
+ if (0) debug_printf("fetch_count %d draw_count %d\n", fetch_count, draw_count);
+
+ if (max_index == 0xffffffff ||
+ fetch_count >= FETCH_MAX ||
+ fetch_count > draw_count) {
+ if (0) debug_printf("fail\n");
+ goto fail;
+ }
+
+
+ if (min_index == 0 &&
+ index_size == 2)
+ {
+ transformed_elts = (const ushort *)elts;
+ }
+ else
+ {
+ storage = MALLOC( draw_count * sizeof(ushort) );
+ if (!storage)
+ goto fail;
+
+ switch(index_size) {
+ case 1:
+ translate_ubyte_elts( (const ubyte *)elts,
+ draw_count,
+ 0 - (int)min_index,
+ storage );
+ break;
+
+ case 2:
+ translate_ushort_elts( (const ushort *)elts,
+ draw_count,
+ 0 - (int)min_index,
+ storage );
+ break;
+
+ case 4:
+ translate_uint_elts( (const uint *)elts,
+ draw_count,
+ 0 - (int)min_index,
+ storage );
+ break;
+
+ default:
+ assert(0);
+ return;
+ }
+ transformed_elts = storage;
+ }
+
+ vcache->middle->run_linear_elts( vcache->middle,
+ min_index, /* start */
+ fetch_count,
+ transformed_elts,
+ draw_count );
+
+ FREE(storage);
+ return;
+
+ fail:
+ vcache_run( frontend, get_elt, elts, draw_count );
+}
@@ -219,7 +340,7 @@ static void vcache_prepare( struct draw_pt_front_end *frontend,
}
else
{
- vcache->base.run = vcache_run;
+ vcache->base.run = vcache_check_run;
}
vcache->input_prim = prim;
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
index cf9f394aa3b..5c99a47e49a 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
@@ -10,7 +10,8 @@ static void FUNC( struct draw_pt_front_end *frontend,
boolean flatfirst = (draw->rasterizer->flatshade &&
draw->rasterizer->flatshade_first);
- unsigned i, flags;
+ unsigned i;
+ ushort flags;
switch (vcache->input_prim) {
@@ -138,9 +139,9 @@ static void FUNC( struct draw_pt_front_end *frontend,
/* These bitflags look a little odd because we submit the
* vertices as (1,2,0) to satisfy flatshade requirements.
*/
- const unsigned edge_first = DRAW_PIPE_EDGE_FLAG_2;
- const unsigned edge_middle = DRAW_PIPE_EDGE_FLAG_0;
- const unsigned edge_last = DRAW_PIPE_EDGE_FLAG_1;
+ const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2;
+ const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0;
+ const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1;
flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c
index 9b899d404e1..ce35112fc10 100644
--- a/src/gallium/auxiliary/draw/draw_vs.c
+++ b/src/gallium/auxiliary/draw/draw_vs.c
@@ -41,6 +41,36 @@
+
+void draw_vs_set_constants( struct draw_context *draw,
+ const float (*constants)[4],
+ unsigned size )
+{
+ if (((unsigned)constants) & 0xf) {
+ if (size > draw->vs.const_storage_size) {
+ if (draw->vs.aligned_constant_storage)
+ align_free(draw->vs.aligned_constant_storage);
+ draw->vs.aligned_constant_storage = align_malloc( size, 16 );
+ }
+ memcpy( draw->vs.aligned_constant_storage,
+ constants,
+ size );
+ constants = draw->vs.aligned_constant_storage;
+ }
+
+ draw->vs.aligned_constants = constants;
+ draw_vs_aos_machine_constants( draw->vs.aos_machine, constants );
+}
+
+
+void draw_vs_set_viewport( struct draw_context *draw,
+ const struct pipe_viewport_state *viewport )
+{
+ draw_vs_aos_machine_viewport( draw->vs.aos_machine, viewport );
+}
+
+
+
struct draw_vertex_shader *
draw_create_vertex_shader(struct draw_context *draw,
const struct pipe_shader_state *shader)
@@ -83,6 +113,13 @@ void
draw_delete_vertex_shader(struct draw_context *draw,
struct draw_vertex_shader *dvs)
{
+ unsigned i;
+
+ for (i = 0; i < dvs->nr_varients; i++)
+ dvs->varient[i]->destroy( dvs->varient[i] );
+
+ dvs->nr_varients = 0;
+
dvs->delete( dvs );
}
@@ -110,6 +147,10 @@ draw_vs_init( struct draw_context *draw )
draw->vs.fetch_cache = translate_cache_create();
if (!draw->vs.fetch_cache)
return FALSE;
+
+ draw->vs.aos_machine = draw_vs_aos_machine();
+ if (!draw->vs.aos_machine)
+ return FALSE;
return TRUE;
}
@@ -129,6 +170,12 @@ draw_vs_destroy( struct draw_context *draw )
if (draw->vs.emit_cache)
translate_cache_destroy(draw->vs.emit_cache);
+ if (draw->vs.aos_machine)
+ draw_vs_aos_machine_destroy(draw->vs.aos_machine);
+
+ if (draw->vs.aligned_constant_storage)
+ align_free(draw->vs.aligned_constant_storage);
+
tgsi_exec_machine_free_data(&draw->vs.machine);
}
@@ -153,10 +200,17 @@ draw_vs_lookup_varient( struct draw_vertex_shader *vs,
if (varient == NULL)
return NULL;
- /* Add it to our list:
+ /* Add it to our list, could be smarter:
*/
- assert(vs->nr_varients < Elements(vs->varient));
- vs->varient[vs->nr_varients++] = varient;
+ if (vs->nr_varients < Elements(vs->varient)) {
+ vs->varient[vs->nr_varients++] = varient;
+ }
+ else {
+ vs->last_varient++;
+ vs->last_varient %= Elements(vs->varient);
+ vs->varient[vs->last_varient]->destroy(vs->varient[vs->last_varient]);
+ vs->varient[vs->last_varient] = varient;
+ }
/* Done
*/
diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h
index 01171bc23d5..08c6de8ba86 100644
--- a/src/gallium/auxiliary/draw/draw_vs.h
+++ b/src/gallium/auxiliary/draw/draw_vs.h
@@ -70,16 +70,6 @@ struct draw_vs_varient_key {
struct draw_vs_varient;
-typedef void (PIPE_CDECL *vsv_run_elts_func)( struct draw_vs_varient *,
- const unsigned *elts,
- unsigned count,
- void *output_buffer);
-
-typedef void (PIPE_CDECL *vsv_run_linear_func)( struct draw_vs_varient *,
- unsigned start,
- unsigned count,
- void *output_buffer);
-
struct draw_vs_varient {
struct draw_vs_varient_key key;
@@ -91,12 +81,6 @@ struct draw_vs_varient {
const void *ptr,
unsigned stride );
- void (*set_constants)( struct draw_vs_varient *,
- const float (*constants)[4] );
-
- void (*set_viewport)( struct draw_vs_varient *,
- const struct pipe_viewport_state * );
-
void (PIPE_CDECL *run_linear)( struct draw_vs_varient *shader,
unsigned start,
unsigned count,
@@ -123,10 +107,15 @@ struct draw_vertex_shader {
struct tgsi_shader_info info;
+ /* Extracted from shader:
+ */
+ const float (*immediates)[4];
+
/*
*/
struct draw_vs_varient *varient[16];
unsigned nr_varients;
+ unsigned last_varient;
struct draw_vs_varient *(*create_varient)( struct draw_vertex_shader *shader,
const struct draw_vs_varient_key *key );
@@ -213,7 +202,14 @@ static INLINE int draw_vs_varient_key_compare( const struct draw_vs_varient_key
}
+struct aos_machine *draw_vs_aos_machine( void );
+void draw_vs_aos_machine_destroy( struct aos_machine *machine );
+
+void draw_vs_aos_machine_constants( struct aos_machine *machine,
+ const float (*constants)[4] );
+void draw_vs_aos_machine_viewport( struct aos_machine *machine,
+ const struct pipe_viewport_state *viewport );
#define MAX_TGSI_VERTICES 4
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index d3770b2c53b..725f36b5025 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -66,6 +66,37 @@ static INLINE boolean eq( struct x86_reg a,
a.disp == b.disp);
}
+struct x86_reg aos_get_x86( struct aos_compilation *cp,
+ unsigned value )
+{
+ if (cp->ebp != value) {
+ unsigned offset;
+
+ switch (value) {
+ case X86_IMMEDIATES:
+ offset = Offset(struct aos_machine, immediates);
+ break;
+ case X86_CONSTANTS:
+ offset = Offset(struct aos_machine, constants);
+ break;
+ case X86_ATTRIBS:
+ offset = Offset(struct aos_machine, attrib);
+ break;
+ default:
+ assert(0);
+ offset = 0;
+ }
+
+ x86_mov(cp->func, cp->temp_EBP,
+ x86_make_disp(cp->machine_EDX, offset));
+ /* x86_deref(x86_make_disp(cp->machine_EDX, offset))); */
+
+ cp->ebp = value;
+ }
+
+ return cp->temp_EBP;
+}
+
static struct x86_reg get_reg_ptr(struct aos_compilation *cp,
unsigned file,
@@ -83,15 +114,15 @@ static struct x86_reg get_reg_ptr(struct aos_compilation *cp,
case TGSI_FILE_TEMPORARY:
return x86_make_disp(ptr, Offset(struct aos_machine, temp[idx]));
- case TGSI_FILE_IMMEDIATE:
- return x86_make_disp(ptr, Offset(struct aos_machine, immediate[idx]));
-
- case TGSI_FILE_CONSTANT:
- return x86_make_disp(ptr, Offset(struct aos_machine, constant[idx]));
-
case AOS_FILE_INTERNAL:
return x86_make_disp(ptr, Offset(struct aos_machine, internal[idx]));
+ case TGSI_FILE_IMMEDIATE:
+ return x86_make_disp(aos_get_x86(cp, X86_IMMEDIATES), idx * 4 * sizeof(float));
+
+ case TGSI_FILE_CONSTANT:
+ return x86_make_disp(aos_get_x86(cp, X86_CONSTANTS), idx * 4 * sizeof(float));
+
default:
ERROR(cp, "unknown reg file");
return x86_make_reg(0,0);
@@ -118,70 +149,7 @@ static struct x86_reg get_reg_ptr(struct aos_compilation *cp,
#define X87_CW_ROUND_MASK (3<<10)
#define X87_CW_INFINITY (1<<12)
-static void do_populate_lut( struct shine_tab *tab,
- float unclamped_exponent )
-{
- const float epsilon = 1.0F / 256.0F;
- float exponent = CLAMP(unclamped_exponent, -(128.0F - epsilon), (128.0F - epsilon));
- unsigned i;
- tab->exponent = unclamped_exponent; /* for later comparison */
-
- tab->values[0] = 0;
- if (exponent == 0) {
- for (i = 1; i < 258; i++) {
- tab->values[i] = 1.0;
- }
- }
- else {
- for (i = 1; i < 258; i++) {
- tab->values[i] = powf((float)i * epsilon, exponent);
- }
- }
-}
-
-static void init_internals( struct aos_machine *machine )
-{
- unsigned i;
- float inv = 1.0f/255.0f;
- float f255 = 255.0f;
-
- ASSIGN_4V(machine->internal[IMM_SWZ], 1.0f, -1.0f, 0.0f, 1.0f);
- *(unsigned *)&machine->internal[IMM_SWZ][3] = 0xffffffff;
-
- ASSIGN_4V(machine->internal[IMM_ONES], 1.0f, 1.0f, 1.0f, 1.0f);
- ASSIGN_4V(machine->internal[IMM_NEGS], -1.0f, -1.0f, -1.0f, -1.0f);
- ASSIGN_4V(machine->internal[IMM_IDENTITY], 0.0f, 0.0f, 0.0f, 1.0f);
- ASSIGN_4V(machine->internal[IMM_INV_255], inv, inv, inv, inv);
- ASSIGN_4V(machine->internal[IMM_255], f255, f255, f255, f255);
- ASSIGN_4V(machine->internal[IMM_RSQ], -.5f, 1.5f, 0.0f, 0.0f);
-
-
- machine->fpu_rnd_nearest = (X87_CW_EXCEPTION_INV_OP |
- X87_CW_EXCEPTION_DENORM_OP |
- X87_CW_EXCEPTION_ZERO_DIVIDE |
- X87_CW_EXCEPTION_OVERFLOW |
- X87_CW_EXCEPTION_UNDERFLOW |
- X87_CW_EXCEPTION_PRECISION |
- (1<<6) |
- X87_CW_ROUND_NEAREST |
- X87_CW_PRECISION_DOUBLE_EXT);
-
- assert(machine->fpu_rnd_nearest == 0x37f);
-
- machine->fpu_rnd_neg_inf = (X87_CW_EXCEPTION_INV_OP |
- X87_CW_EXCEPTION_DENORM_OP |
- X87_CW_EXCEPTION_ZERO_DIVIDE |
- X87_CW_EXCEPTION_OVERFLOW |
- X87_CW_EXCEPTION_UNDERFLOW |
- X87_CW_EXCEPTION_PRECISION |
- (1<<6) |
- X87_CW_ROUND_DOWN |
- X87_CW_PRECISION_DOUBLE_EXT);
-
- for (i = 0; i < MAX_SHINE_TAB; i++)
- do_populate_lut( &machine->shine_tab[i], 1.0f );
-}
static void spill( struct aos_compilation *cp, unsigned idx )
@@ -1189,136 +1157,6 @@ static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_inst
return TRUE;
}
-static PIPE_CDECL void do_lit( struct aos_machine *machine,
- float *result,
- const float *in,
- unsigned count )
-{
- if (in[0] > 0)
- {
- if (in[1] <= 0.0)
- {
- result[0] = 1.0F;
- result[1] = in[0];
- result[2] = 1.0;
- result[3] = 1.0F;
- }
- else
- {
- const float epsilon = 1.0F / 256.0F;
- float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon));
- result[0] = 1.0F;
- result[1] = in[0];
- result[2] = powf(in[1], exponent);
- result[3] = 1.0;
- }
- }
- else
- {
- result[0] = 1.0F;
- result[1] = 0.0;
- result[2] = 0.0;
- result[3] = 1.0F;
- }
-}
-
-
-static PIPE_CDECL void do_lit_lut( struct aos_machine *machine,
- float *result,
- const float *in,
- unsigned count )
-{
- if (in[0] > 0)
- {
- if (in[1] <= 0.0)
- {
- result[0] = 1.0F;
- result[1] = in[0];
- result[2] = 1.0;
- result[3] = 1.0F;
- return;
- }
-
- if (machine->lit_info[count].shine_tab->exponent != in[3]) {
- machine->lit_info[count].func = do_lit;
- goto no_luck;
- }
-
- if (in[1] <= 1.0)
- {
- const float *tab = machine->lit_info[count].shine_tab->values;
- float f = in[1] * 256;
- int k = (int)f;
- float frac = f - (float)k;
-
- result[0] = 1.0F;
- result[1] = in[0];
- result[2] = tab[k] + frac*(tab[k+1]-tab[k]);
- result[3] = 1.0;
- return;
- }
-
- no_luck:
- {
- const float epsilon = 1.0F / 256.0F;
- float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon));
- result[0] = 1.0F;
- result[1] = in[0];
- result[2] = powf(in[1], exponent);
- result[3] = 1.0;
- }
- }
- else
- {
- result[0] = 1.0F;
- result[1] = 0.0;
- result[2] = 0.0;
- result[3] = 1.0F;
- }
-}
-
-
-
-static void PIPE_CDECL populate_lut( struct aos_machine *machine,
- float *result,
- const float *in,
- unsigned count )
-{
- unsigned i, tab;
-
- /* Search for an existing table for this value. Note that without
- * static analysis we don't really know if in[3] will be constant,
- * but it usually is...
- */
- for (tab = 0; tab < 4; tab++) {
- if (machine->shine_tab[tab].exponent == in[3]) {
- goto found;
- }
- }
-
- for (tab = 0, i = 1; i < 4; i++) {
- if (machine->shine_tab[i].last_used < machine->shine_tab[tab].last_used)
- tab = i;
- }
-
- if (machine->shine_tab[tab].last_used == machine->now) {
- /* No unused tables (this is not a ffvertex program...). Just
- * call pow each time:
- */
- machine->lit_info[count].func = do_lit;
- machine->lit_info[count].func( machine, result, in, count );
- return;
- }
- else {
- do_populate_lut( &machine->shine_tab[tab], in[3] );
- }
-
- found:
- machine->shine_tab[tab].last_used = machine->now;
- machine->lit_info[count].shine_tab = &machine->shine_tab[tab];
- machine->lit_info[count].func = do_lit_lut;
- machine->lit_info[count].func( machine, result, in, count );
-}
@@ -1382,7 +1220,7 @@ static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_inst
Offset(struct lit_info, func)));
}
else {
- x86_mov_reg_imm( cp->func, ecx, (int)do_lit );
+ x86_mov_reg_imm( cp->func, ecx, (int)aos_do_lit );
}
x86_call( cp->func, ecx );
@@ -1403,7 +1241,7 @@ static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_inst
return TRUE;
}
-
+#if 0
static boolean emit_inline_LIT( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
{
struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]);
@@ -1464,6 +1302,7 @@ static boolean emit_inline_LIT( struct aos_compilation *cp, const struct tgsi_fu
return TRUE;
}
+#endif
@@ -1533,11 +1372,20 @@ static boolean emit_MAD( struct aos_compilation *cp, const struct tgsi_full_inst
return TRUE;
}
+/* A wrapper for powf().
+ * Makes sure it is cdecl and operates on floats.
+ */
+static float PIPE_CDECL _powerf( float x, float y )
+{
+ return powf( x, y );
+}
+
/* Really not sufficient -- need to check for conditions that could
* generate inf/nan values, which will slow things down hugely.
*/
static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
{
+#if 0
x87_fld_src(cp, &op->FullSrcRegisters[1], 0); /* a1.x */
x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* a0.x a1.x */
x87_fyl2x(cp->func); /* a1*log2(a0) */
@@ -1545,6 +1393,42 @@ static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_inst
x87_emit_ex2( cp ); /* 2^(a1*log2(a0)) */
x87_fstp_dest4(cp, &op->FullDstRegisters[0]);
+#else
+ uint i;
+
+ /* For absolute correctness, need to spill/invalidate all XMM regs
+ * too.
+ */
+ for (i = 0; i < 8; i++) {
+ if (cp->xmm[i].dirty)
+ spill(cp, i);
+ aos_release_xmm_reg(cp, i);
+ }
+
+ /* Push caller-save (ie scratch) regs.
+ */
+ x86_cdecl_caller_push_regs( cp->func );
+
+ x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, -8) );
+
+ x87_fld_src( cp, &op->FullSrcRegisters[1], 0 );
+ x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 4 ) );
+ x87_fld_src( cp, &op->FullSrcRegisters[0], 0 );
+ x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 0 ) );
+
+ x86_mov_reg_imm( cp->func, cp->tmp_EAX, (unsigned long) _powerf );
+ x86_call( cp->func, cp->tmp_EAX );
+
+ x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, 8) );
+
+ x86_cdecl_caller_pop_regs( cp->func );
+
+ /* Note retval on x87 stack:
+ */
+ cp->func->x87_stack++;
+
+ x87_fstp_dest4( cp, &op->FullDstRegisters[0] );
+#endif
return TRUE;
}
@@ -1865,6 +1749,7 @@ static boolean emit_rhw_viewport( struct aos_compilation *cp )
}
+#if 0
static boolean note_immediate( struct aos_compilation *cp,
struct tgsi_full_immediate *imm )
{
@@ -1877,6 +1762,7 @@ static boolean note_immediate( struct aos_compilation *cp,
return TRUE;
}
+#endif
@@ -1912,7 +1798,7 @@ static void find_last_write_outputs( struct aos_compilation *cp )
}
-#define ARG_VARIENT 1
+#define ARG_MACHINE 1
#define ARG_START_ELTS 2
#define ARG_COUNT 3
#define ARG_OUTBUF 4
@@ -1939,6 +1825,8 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
cp.outbuf_ECX = x86_make_reg(file_REG32, reg_CX);
cp.machine_EDX = x86_make_reg(file_REG32, reg_DX);
cp.count_ESI = x86_make_reg(file_REG32, reg_SI);
+ cp.temp_EBP = x86_make_reg(file_REG32, reg_BP);
+ cp.stack_ESP = x86_make_reg( file_REG32, reg_SP );
x86_init_func(cp.func);
@@ -1946,11 +1834,12 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
x86_push(cp.func, cp.idx_EBX);
x86_push(cp.func, cp.count_ESI);
+ x86_push(cp.func, cp.temp_EBP);
/* Load arguments into regs:
*/
- x86_mov(cp.func, cp.machine_EDX, x86_fn_arg(cp.func, ARG_VARIENT));
+ x86_mov(cp.func, cp.machine_EDX, x86_fn_arg(cp.func, ARG_MACHINE));
x86_mov(cp.func, cp.idx_EBX, x86_fn_arg(cp.func, ARG_START_ELTS));
x86_mov(cp.func, cp.count_ESI, x86_fn_arg(cp.func, ARG_COUNT));
x86_mov(cp.func, cp.outbuf_ECX, x86_fn_arg(cp.func, ARG_OUTBUF));
@@ -1962,11 +1851,6 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
x86_cmp(cp.func, cp.count_ESI, cp.tmp_EAX);
fixup = x86_jcc_forward(cp.func, cc_E);
- /* Dig out the machine pointer from inside the varient arg
- */
- x86_mov(cp.func, cp.machine_EDX,
- x86_make_disp(cp.machine_EDX,
- Offset( struct draw_vs_varient_aos_sse, machine )));
save_fpu_state( &cp );
set_fpu_round_nearest( &cp );
@@ -1988,8 +1872,10 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
switch (parse.FullToken.Token.Type) {
case TGSI_TOKEN_TYPE_IMMEDIATE:
+#if 0
if (!note_immediate( &cp, &parse.FullToken.FullImmediate ))
goto fail;
+#endif
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
@@ -2072,6 +1958,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
if (cp.func->need_emms)
mmx_emms(cp.func);
+ x86_pop(cp.func, cp.temp_EBP);
x86_pop(cp.func, cp.count_ESI);
x86_pop(cp.func, cp.idx_EBX);
@@ -2098,93 +1985,65 @@ static void vaos_set_buffer( struct draw_vs_varient *varient,
for (i = 0; i < vaos->base.key.nr_inputs; i++) {
if (vaos->base.key.element[i].in.buffer == buf) {
- vaos->machine->attrib[i].input_ptr = ((char *)ptr +
- vaos->base.key.element[i].in.offset);
- vaos->machine->attrib[i].input_stride = stride;
+ vaos->attrib[i].input_ptr = ((char *)ptr +
+ vaos->base.key.element[i].in.offset);
+ vaos->attrib[i].input_stride = stride;
}
}
}
-static void vaos_destroy( struct draw_vs_varient *varient )
-{
- struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
-
- if (vaos->machine)
- align_free( vaos->machine );
-
- x86_release_func( &vaos->func[0] );
- x86_release_func( &vaos->func[1] );
- FREE(vaos);
-}
-
-static void vaos_run_elts( struct draw_vs_varient *varient,
- const unsigned *elts,
- unsigned count,
- void *output_buffer )
+static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient,
+ const unsigned *elts,
+ unsigned count,
+ void *output_buffer )
{
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
+ struct aos_machine *machine = vaos->draw->vs.aos_machine;
+
+ machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
+ machine->constants = vaos->draw->vs.aligned_constants;
+ machine->immediates = vaos->base.vs->immediates;
+ machine->attrib = vaos->attrib;
- vaos->machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
- vaos->gen_run_elts( varient,
+ vaos->gen_run_elts( machine,
elts,
count,
output_buffer );
}
-static void vaos_run_linear( struct draw_vs_varient *varient,
- unsigned start,
- unsigned count,
- void *output_buffer )
+static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient,
+ unsigned start,
+ unsigned count,
+ void *output_buffer )
{
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
+ struct aos_machine *machine = vaos->draw->vs.aos_machine;
- vaos->machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
- vaos->gen_run_linear( varient,
+ machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
+ machine->constants = vaos->draw->vs.aligned_constants;
+ machine->immediates = vaos->base.vs->immediates;
+ machine->attrib = vaos->attrib;
+
+ vaos->gen_run_linear( machine,
start,
count,
output_buffer );
}
-static void vaos_set_constants( struct draw_vs_varient *varient,
- const float (*constants)[4] )
+
+static void vaos_destroy( struct draw_vs_varient *varient )
{
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
- memcpy(vaos->machine->constant,
- constants,
- (vaos->base.vs->info.file_max[TGSI_FILE_CONSTANT] + 1) * 4 * sizeof(float));
-
-#if 0
- unsigned i;
- for (i =0; i < vaos->base.vs->info.file_max[TGSI_FILE_CONSTANT] + 1; i++)
- debug_printf("state %d: %f %f %f %f\n",
- i,
- constants[i][0],
- constants[i][1],
- constants[i][2],
- constants[i][3]);
-#endif
-
- {
- unsigned i;
- for (i = 0; i < MAX_LIT_INFO; i++) {
- vaos->machine->lit_info[i].func = populate_lut;
- vaos->machine->now++;
- }
- }
-}
-
+ FREE( vaos->attrib );
-static void vaos_set_viewport( struct draw_vs_varient *varient,
- const struct pipe_viewport_state *viewport )
-{
- struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
+ x86_release_func( &vaos->func[0] );
+ x86_release_func( &vaos->func[1] );
- memcpy(vaos->machine->scale, viewport->scale, 4 * sizeof(float));
- memcpy(vaos->machine->translate, viewport->translate, 4 * sizeof(float));
+ FREE(vaos);
}
@@ -2200,19 +2059,15 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
vaos->base.key = *key;
vaos->base.vs = vs;
vaos->base.set_input = vaos_set_buffer;
- vaos->base.set_constants = vaos_set_constants;
- vaos->base.set_viewport = vaos_set_viewport;
vaos->base.destroy = vaos_destroy;
vaos->base.run_linear = vaos_run_linear;
vaos->base.run_elts = vaos_run_elts;
vaos->draw = vs->draw;
- vaos->machine = align_malloc( sizeof(struct aos_machine), 16 );
- if (!vaos->machine)
+
+ vaos->attrib = MALLOC( key->nr_inputs * sizeof(vaos->attrib[0]) );
+ if (!vaos->attrib)
goto fail;
-
- memset(vaos->machine, 0, sizeof(struct aos_machine));
- init_internals(vaos->machine);
tgsi_dump(vs->state.tokens, 0);
@@ -2222,19 +2077,19 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
if (!build_vertex_program( vaos, FALSE ))
goto fail;
- vaos->gen_run_linear = (vsv_run_linear_func)x86_get_func(&vaos->func[0]);
+ vaos->gen_run_linear = (vaos_run_linear_func)x86_get_func(&vaos->func[0]);
if (!vaos->gen_run_linear)
goto fail;
- vaos->gen_run_elts = (vsv_run_elts_func)x86_get_func(&vaos->func[1]);
+ vaos->gen_run_elts = (vaos_run_elts_func)x86_get_func(&vaos->func[1]);
if (!vaos->gen_run_elts)
goto fail;
return &vaos->base;
fail:
- if (vaos->machine)
- align_free( vaos->machine );
+ if (vaos && vaos->attrib)
+ FREE(vaos->attrib);
if (vaos)
x86_release_func( &vaos->func[0] );
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h
index b47413ff43c..fb6d43d32e2 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.h
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.h
@@ -60,10 +60,16 @@ struct x86_function;
#define FPU_RND_NEAREST 2
struct aos_machine;
-typedef void PIPE_CDECL (*lit_func)( struct aos_machine *,
+typedef void (PIPE_CDECL *lit_func)( struct aos_machine *,
float *result,
const float *in,
unsigned count );
+
+void PIPE_CDECL aos_do_lit( struct aos_machine *machine,
+ float *result,
+ const float *in,
+ unsigned count );
+
struct shine_tab {
float exponent;
float values[258];
@@ -78,6 +84,14 @@ struct lit_info {
#define MAX_SHINE_TAB 4
#define MAX_LIT_INFO 16
+struct aos_attrib {
+ const void *input_ptr;
+ unsigned input_stride;
+};
+
+
+
+
/* This is the temporary storage used by all the aos_sse vs varients.
* Create one per context and reuse by passing a pointer in at
* vs_varient creation??
@@ -86,8 +100,6 @@ struct aos_machine {
float input [MAX_INPUTS ][4];
float output [MAX_OUTPUTS ][4];
float temp [MAX_TEMPS ][4];
- float constant [MAX_CONSTANTS ][4]; /* fixme -- should just be a pointer */
- float immediate[MAX_IMMEDIATES][4]; /* fixme -- should just be a pointer */
float internal [MAX_INTERNALS ][4];
float scale[4]; /* viewport */
@@ -105,12 +117,10 @@ struct aos_machine {
ushort fpu_restore;
ushort fpucntl; /* one of FPU_* above */
- struct {
- const void *input_ptr;
- unsigned input_stride;
+ const float (*immediates)[4]; /* points to shader data */
+ const float (*constants)[4]; /* points to draw data */
- unsigned output_offset;
- } attrib[PIPE_MAX_ATTRIBS];
+ const struct aos_attrib *attrib; /* points to ? */
};
@@ -132,6 +142,7 @@ struct aos_compilation {
unsigned last_used;
} xmm[8];
+ unsigned ebp; /* one of X86_* */
boolean input_fetched[PIPE_MAX_ATTRIBS];
unsigned output_last_write[PIPE_MAX_ATTRIBS];
@@ -148,6 +159,8 @@ struct aos_compilation {
struct x86_reg outbuf_ECX;
struct x86_reg machine_EDX;
struct x86_reg count_ESI; /* decrements to zero */
+ struct x86_reg temp_EBP;
+ struct x86_reg stack_ESP;
};
struct x86_reg aos_get_xmm_reg( struct aos_compilation *cp );
@@ -192,25 +205,34 @@ do { \
} while (0)
+#define X86_NULL 0
+#define X86_IMMEDIATES 1
+#define X86_CONSTANTS 2
+#define X86_ATTRIBS 3
+
+struct x86_reg aos_get_x86( struct aos_compilation *cp,
+ unsigned value );
+typedef void (PIPE_CDECL *vaos_run_elts_func)( struct aos_machine *,
+ const unsigned *elts,
+ unsigned count,
+ void *output_buffer);
+
+typedef void (PIPE_CDECL *vaos_run_linear_func)( struct aos_machine *,
+ unsigned start,
+ unsigned count,
+ void *output_buffer);
struct draw_vs_varient_aos_sse {
struct draw_vs_varient base;
struct draw_context *draw;
-#if 0
- struct {
- const void *ptr;
- unsigned stride;
- } attrib[PIPE_MAX_ATTRIBS];
-#endif
-
- struct aos_machine *machine; /* XXX: temporarily unshared */
+ struct aos_attrib *attrib;
- vsv_run_linear_func gen_run_linear;
- vsv_run_elts_func gen_run_elts;
+ vaos_run_linear_func gen_run_linear;
+ vaos_run_elts_func gen_run_elts;
struct x86_function func[2];
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c
index 836110f382d..45e20922092 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c
@@ -91,25 +91,25 @@ static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp,
-static void get_src_ptr( struct x86_function *func,
+static void get_src_ptr( struct aos_compilation *cp,
struct x86_reg src,
- struct x86_reg machine,
struct x86_reg elt,
unsigned a )
{
- struct x86_reg input_ptr =
- x86_make_disp(machine,
- Offset(struct aos_machine, attrib[a].input_ptr));
+ struct x86_reg attrib = x86_make_disp(aos_get_x86( cp, X86_ATTRIBS ),
+ a * sizeof(struct aos_attrib));
- struct x86_reg input_stride =
- x86_make_disp(machine,
- Offset(struct aos_machine, attrib[a].input_stride));
+ struct x86_reg input_ptr = x86_make_disp(attrib,
+ Offset(struct aos_attrib, input_ptr));
+
+ struct x86_reg input_stride = x86_make_disp(attrib,
+ Offset(struct aos_attrib, input_stride));
/* Calculate pointer to current attrib:
*/
- x86_mov(func, src, input_stride);
- x86_imul(func, src, elt);
- x86_add(func, src, input_ptr);
+ x86_mov(cp->func, src, input_stride);
+ x86_imul(cp->func, src, elt);
+ x86_add(cp->func, src, input_ptr);
}
@@ -134,9 +134,8 @@ static boolean load_input( struct aos_compilation *cp,
/* Figure out source pointer address:
*/
- get_src_ptr(cp->func,
+ get_src_ptr(cp,
src,
- cp->machine_EDX,
linear ? cp->idx_EBX : x86_deref(cp->idx_EBX),
idx);
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_machine.c b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c
new file mode 100644
index 00000000000..b7864e9f2f0
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c
@@ -0,0 +1,297 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/util/tgsi_util.h"
+#include "tgsi/exec/tgsi_exec.h"
+#include "draw_vs.h"
+#include "draw_vs_aos.h"
+#include "draw_vertex.h"
+
+#include "rtasm/rtasm_x86sse.h"
+
+
+#define X87_CW_EXCEPTION_INV_OP (1<<0)
+#define X87_CW_EXCEPTION_DENORM_OP (1<<1)
+#define X87_CW_EXCEPTION_ZERO_DIVIDE (1<<2)
+#define X87_CW_EXCEPTION_OVERFLOW (1<<3)
+#define X87_CW_EXCEPTION_UNDERFLOW (1<<4)
+#define X87_CW_EXCEPTION_PRECISION (1<<5)
+#define X87_CW_PRECISION_SINGLE (0<<8)
+#define X87_CW_PRECISION_RESERVED (1<<8)
+#define X87_CW_PRECISION_DOUBLE (2<<8)
+#define X87_CW_PRECISION_DOUBLE_EXT (3<<8)
+#define X87_CW_PRECISION_MASK (3<<8)
+#define X87_CW_ROUND_NEAREST (0<<10)
+#define X87_CW_ROUND_DOWN (1<<10)
+#define X87_CW_ROUND_UP (2<<10)
+#define X87_CW_ROUND_ZERO (3<<10)
+#define X87_CW_ROUND_MASK (3<<10)
+#define X87_CW_INFINITY (1<<12)
+
+
+void PIPE_CDECL aos_do_lit( struct aos_machine *machine,
+ float *result,
+ const float *in,
+ unsigned count )
+{
+ if (in[0] > 0)
+ {
+ if (in[1] <= 0.0)
+ {
+ result[0] = 1.0F;
+ result[1] = in[0];
+ result[2] = 1.0;
+ result[3] = 1.0F;
+ }
+ else
+ {
+ const float epsilon = 1.0F / 256.0F;
+ float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon));
+ result[0] = 1.0F;
+ result[1] = in[0];
+ result[2] = powf(in[1], exponent);
+ result[3] = 1.0;
+ }
+ }
+ else
+ {
+ result[0] = 1.0F;
+ result[1] = 0.0;
+ result[2] = 0.0;
+ result[3] = 1.0F;
+ }
+}
+
+
+static void PIPE_CDECL do_lit_lut( struct aos_machine *machine,
+ float *result,
+ const float *in,
+ unsigned count )
+{
+ if (in[0] > 0)
+ {
+ if (in[1] <= 0.0)
+ {
+ result[0] = 1.0F;
+ result[1] = in[0];
+ result[2] = 1.0;
+ result[3] = 1.0F;
+ return;
+ }
+
+ if (machine->lit_info[count].shine_tab->exponent != in[3]) {
+ machine->lit_info[count].func = aos_do_lit;
+ goto no_luck;
+ }
+
+ if (in[1] <= 1.0)
+ {
+ const float *tab = machine->lit_info[count].shine_tab->values;
+ float f = in[1] * 256;
+ int k = (int)f;
+ float frac = f - (float)k;
+
+ result[0] = 1.0F;
+ result[1] = in[0];
+ result[2] = tab[k] + frac*(tab[k+1]-tab[k]);
+ result[3] = 1.0;
+ return;
+ }
+
+ no_luck:
+ {
+ const float epsilon = 1.0F / 256.0F;
+ float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon));
+ result[0] = 1.0F;
+ result[1] = in[0];
+ result[2] = powf(in[1], exponent);
+ result[3] = 1.0;
+ }
+ }
+ else
+ {
+ result[0] = 1.0F;
+ result[1] = 0.0;
+ result[2] = 0.0;
+ result[3] = 1.0F;
+ }
+}
+
+
+static void do_populate_lut( struct shine_tab *tab,
+ float unclamped_exponent )
+{
+ const float epsilon = 1.0F / 256.0F;
+ float exponent = CLAMP(unclamped_exponent, -(128.0F - epsilon), (128.0F - epsilon));
+ unsigned i;
+
+ tab->exponent = unclamped_exponent; /* for later comparison */
+
+ tab->values[0] = 0;
+ if (exponent == 0) {
+ for (i = 1; i < 258; i++) {
+ tab->values[i] = 1.0;
+ }
+ }
+ else {
+ for (i = 1; i < 258; i++) {
+ tab->values[i] = powf((float)i * epsilon, exponent);
+ }
+ }
+}
+
+
+
+
+static void PIPE_CDECL populate_lut( struct aos_machine *machine,
+ float *result,
+ const float *in,
+ unsigned count )
+{
+ unsigned i, tab;
+
+ /* Search for an existing table for this value. Note that without
+ * static analysis we don't really know if in[3] will be constant,
+ * but it usually is...
+ */
+ for (tab = 0; tab < 4; tab++) {
+ if (machine->shine_tab[tab].exponent == in[3]) {
+ goto found;
+ }
+ }
+
+ for (tab = 0, i = 1; i < 4; i++) {
+ if (machine->shine_tab[i].last_used < machine->shine_tab[tab].last_used)
+ tab = i;
+ }
+
+ if (machine->shine_tab[tab].last_used == machine->now) {
+ /* No unused tables (this is not a ffvertex program...). Just
+ * call pow each time:
+ */
+ machine->lit_info[count].func = aos_do_lit;
+ machine->lit_info[count].func( machine, result, in, count );
+ return;
+ }
+ else {
+ do_populate_lut( &machine->shine_tab[tab], in[3] );
+ }
+
+ found:
+ machine->shine_tab[tab].last_used = machine->now;
+ machine->lit_info[count].shine_tab = &machine->shine_tab[tab];
+ machine->lit_info[count].func = do_lit_lut;
+ machine->lit_info[count].func( machine, result, in, count );
+}
+
+
+void draw_vs_aos_machine_constants( struct aos_machine *machine,
+ const float (*constants)[4] )
+{
+ machine->constants = constants;
+
+ {
+ unsigned i;
+ for (i = 0; i < MAX_LIT_INFO; i++) {
+ machine->lit_info[i].func = populate_lut;
+ machine->now++;
+ }
+ }
+}
+
+
+void draw_vs_aos_machine_viewport( struct aos_machine *machine,
+ const struct pipe_viewport_state *viewport )
+{
+ memcpy(machine->scale, viewport->scale, 4 * sizeof(float));
+ memcpy(machine->translate, viewport->translate, 4 * sizeof(float));
+}
+
+
+
+void draw_vs_aos_machine_destroy( struct aos_machine *machine )
+{
+ align_free(machine);
+}
+
+struct aos_machine *draw_vs_aos_machine( void )
+{
+ struct aos_machine *machine;
+ unsigned i;
+ float inv = 1.0f/255.0f;
+ float f255 = 255.0f;
+
+ machine = align_malloc(sizeof(struct aos_machine), 16);
+ if (!machine)
+ return NULL;
+
+ memset(machine, 0, sizeof(*machine));
+
+ ASSIGN_4V(machine->internal[IMM_SWZ], 1.0f, -1.0f, 0.0f, 1.0f);
+ *(unsigned *)&machine->internal[IMM_SWZ][3] = 0xffffffff;
+
+ ASSIGN_4V(machine->internal[IMM_ONES], 1.0f, 1.0f, 1.0f, 1.0f);
+ ASSIGN_4V(machine->internal[IMM_NEGS], -1.0f, -1.0f, -1.0f, -1.0f);
+ ASSIGN_4V(machine->internal[IMM_IDENTITY], 0.0f, 0.0f, 0.0f, 1.0f);
+ ASSIGN_4V(machine->internal[IMM_INV_255], inv, inv, inv, inv);
+ ASSIGN_4V(machine->internal[IMM_255], f255, f255, f255, f255);
+ ASSIGN_4V(machine->internal[IMM_RSQ], -.5f, 1.5f, 0.0f, 0.0f);
+
+
+ machine->fpu_rnd_nearest = (X87_CW_EXCEPTION_INV_OP |
+ X87_CW_EXCEPTION_DENORM_OP |
+ X87_CW_EXCEPTION_ZERO_DIVIDE |
+ X87_CW_EXCEPTION_OVERFLOW |
+ X87_CW_EXCEPTION_UNDERFLOW |
+ X87_CW_EXCEPTION_PRECISION |
+ (1<<6) |
+ X87_CW_ROUND_NEAREST |
+ X87_CW_PRECISION_DOUBLE_EXT);
+
+ assert(machine->fpu_rnd_nearest == 0x37f);
+
+ machine->fpu_rnd_neg_inf = (X87_CW_EXCEPTION_INV_OP |
+ X87_CW_EXCEPTION_DENORM_OP |
+ X87_CW_EXCEPTION_ZERO_DIVIDE |
+ X87_CW_EXCEPTION_OVERFLOW |
+ X87_CW_EXCEPTION_UNDERFLOW |
+ X87_CW_EXCEPTION_PRECISION |
+ (1<<6) |
+ X87_CW_ROUND_DOWN |
+ X87_CW_PRECISION_DOUBLE_EXT);
+
+ for (i = 0; i < MAX_SHINE_TAB; i++)
+ do_populate_lut( &machine->shine_tab[i], 1.0f );
+
+ return machine;
+}
+
+
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index 2dc9352c6f9..37cbe9bf9e3 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -70,8 +70,6 @@ struct draw_sse_vertex_shader {
codegen_function func;
struct tgsi_exec_machine *machine;
-
- float immediates[TGSI_EXEC_NUM_IMMEDIATES][4];
};
@@ -109,7 +107,7 @@ vs_sse_run_linear( struct draw_vertex_shader *base,
machine->Outputs,
(float (*)[4])constants,
machine->Temps,
- shader->immediates,
+ (float (*)[4])shader->base.immediates,
input,
base->info.num_inputs,
input_stride,
@@ -132,6 +130,8 @@ vs_sse_delete( struct draw_vertex_shader *base )
x86_release_func( &shader->sse2_program );
+ align_free(shader->base.immediates);
+
FREE( (void*) shader->base.state.tokens );
FREE( shader );
}
@@ -163,12 +163,18 @@ draw_create_vs_sse(struct draw_context *draw,
vs->base.prepare = vs_sse_prepare;
vs->base.run_linear = vs_sse_run_linear;
vs->base.delete = vs_sse_delete;
+
+ vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 *
+ sizeof(float), 16);
+
vs->machine = &draw->vs.machine;
x86_init_func( &vs->sse2_program );
if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens,
- &vs->sse2_program, vs->immediates, TRUE ))
+ &vs->sse2_program,
+ (float (*)[4])vs->base.immediates,
+ TRUE ))
goto fail;
vs->func = (codegen_function) x86_get_func( &vs->sse2_program );
diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c
index 119a3a04b56..4a155251b53 100644
--- a/src/gallium/auxiliary/draw/draw_vs_varient.c
+++ b/src/gallium/auxiliary/draw/draw_vs_varient.c
@@ -44,8 +44,6 @@
struct draw_vs_varient_generic {
struct draw_vs_varient base;
- struct pipe_viewport_state viewport;
-
struct draw_vertex_shader *shader;
struct draw_context *draw;
@@ -57,21 +55,11 @@ struct draw_vs_varient_generic {
*/
struct translate *fetch;
struct translate *emit;
-
- const float (*constants)[4];
};
-static void vsvg_set_constants( struct draw_vs_varient *varient,
- const float (*constants)[4] )
-{
- struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
-
- vsvg->constants = constants;
-}
-
static void vsvg_set_input( struct draw_vs_varient *varient,
unsigned buffer,
@@ -94,8 +82,8 @@ static void do_rhw_viewport( struct draw_vs_varient_generic *vsvg,
void *output_buffer )
{
char *ptr = (char *)output_buffer;
- const float *scale = vsvg->viewport.scale;
- const float *trans = vsvg->viewport.translate;
+ const float *scale = vsvg->base.vs->draw->viewport.scale;
+ const float *trans = vsvg->base.vs->draw->viewport.translate;
unsigned stride = vsvg->base.key.output_stride;
unsigned j;
@@ -115,8 +103,8 @@ static void do_viewport( struct draw_vs_varient_generic *vsvg,
void *output_buffer )
{
char *ptr = (char *)output_buffer;
- const float *scale = vsvg->viewport.scale;
- const float *trans = vsvg->viewport.translate;
+ const float *scale = vsvg->base.vs->draw->viewport.scale;
+ const float *trans = vsvg->base.vs->draw->viewport.translate;
unsigned stride = vsvg->base.key.output_stride;
unsigned j;
@@ -130,10 +118,10 @@ static void do_viewport( struct draw_vs_varient_generic *vsvg,
}
-static void vsvg_run_elts( struct draw_vs_varient *varient,
- const unsigned *elts,
- unsigned count,
- void *output_buffer)
+static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient,
+ const unsigned *elts,
+ unsigned count,
+ void *output_buffer)
{
struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
@@ -150,7 +138,7 @@ static void vsvg_run_elts( struct draw_vs_varient *varient,
vsvg->base.vs->run_linear( vsvg->base.vs,
output_buffer,
output_buffer,
- vsvg->constants,
+ (const float (*)[4])vsvg->base.vs->draw->pt.user.constants,
count,
vsvg->base.key.output_stride,
vsvg->base.key.output_stride);
@@ -186,10 +174,10 @@ static void vsvg_run_elts( struct draw_vs_varient *varient,
}
-static void vsvg_run_linear( struct draw_vs_varient *varient,
- unsigned start,
- unsigned count,
- void *output_buffer )
+static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient,
+ unsigned start,
+ unsigned count,
+ void *output_buffer )
{
struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
@@ -206,7 +194,7 @@ static void vsvg_run_linear( struct draw_vs_varient *varient,
vsvg->base.vs->run_linear( vsvg->base.vs,
output_buffer,
output_buffer,
- vsvg->constants,
+ (const float (*)[4])vsvg->base.vs->draw->pt.user.constants,
count,
vsvg->base.key.output_stride,
vsvg->base.key.output_stride);
@@ -245,13 +233,6 @@ static void vsvg_run_linear( struct draw_vs_varient *varient,
-static void vsvg_set_viewport( struct draw_vs_varient *varient,
- const struct pipe_viewport_state *viewport )
-{
- struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
-
- vsvg->viewport = *viewport;
-}
static void vsvg_destroy( struct draw_vs_varient *varient )
{
@@ -272,8 +253,6 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
vsvg->base.key = *key;
vsvg->base.vs = vs;
vsvg->base.set_input = vsvg_set_input;
- vsvg->base.set_constants = vsvg_set_constants;
- vsvg->base.set_viewport = vsvg_set_viewport;
vsvg->base.run_elts = vsvg_run_elts;
vsvg->base.run_linear = vsvg_run_linear;
vsvg->base.destroy = vsvg_destroy;