summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/auxiliary/draw/Makefile1
-rw-r--r--src/gallium/auxiliary/draw/SConscript1
-rw-r--r--src/gallium/auxiliary/draw/draw_context.c3
-rw-r--r--src/gallium/auxiliary/draw/draw_private.h9
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c6
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.c43
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.h24
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos.c282
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos.h25
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos_machine.c297
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_varient.c49
11 files changed, 412 insertions, 328 deletions
diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile
index 9a88ecc0708..f2e36a89e90 100644
--- a/src/gallium/auxiliary/draw/Makefile
+++ b/src/gallium/auxiliary/draw/Makefile
@@ -37,6 +37,7 @@ C_SOURCES = \
draw_vs_varient.c \
draw_vs_aos.c \
draw_vs_aos_io.c \
+ draw_vs_aos_machine.c \
draw_vs_exec.c \
draw_vs_llvm.c \
draw_vs_sse.c
diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript
index 26919a22982..925e668f222 100644
--- a/src/gallium/auxiliary/draw/SConscript
+++ b/src/gallium/auxiliary/draw/SConscript
@@ -36,6 +36,7 @@ draw = env.ConvenienceLibrary(
'draw_vs.c',
'draw_vs_aos.c',
'draw_vs_aos_io.c',
+ 'draw_vs_aos_machine.c',
'draw_vs_exec.c',
'draw_vs_llvm.c',
'draw_vs_sse.c',
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index 22420749656..8509baf8654 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -174,6 +174,8 @@ void draw_set_viewport_state( struct draw_context *draw,
viewport->translate[1] == 0.0f &&
viewport->translate[2] == 0.0f &&
viewport->translate[3] == 0.0f);
+
+ draw_vs_set_viewport( draw, viewport );
}
@@ -218,6 +220,7 @@ draw_set_mapped_constant_buffer(struct draw_context *draw,
const void *buffer)
{
draw->pt.user.constants = buffer;
+ draw_vs_set_constants( draw, (const float (*)[4])buffer );
}
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index c095bf3d7b9..4cbccc8b5bb 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -183,6 +183,10 @@ struct draw_context
*/
struct gallivm_cpu_engine *engine;
+ /* Here's another one:
+ */
+ struct aos_machine *aos_machine;
+
struct translate *fetch;
struct translate_cache *fetch_cache;
@@ -215,6 +219,11 @@ struct draw_context
boolean draw_vs_init( struct draw_context *draw );
void draw_vs_destroy( struct draw_context *draw );
+void draw_vs_set_viewport( struct draw_context *,
+ const struct pipe_viewport_state * );
+
+void draw_vs_set_constants( struct draw_context *,
+ const float (*constants)[4] );
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
index 729c7db9999..5265a131605 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
@@ -189,12 +189,6 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
draw->pt.vertex_buffer[buf].pitch );
}
- fse->active->set_constants( fse->active,
- (const float (*)[4])draw->pt.user.constants );
-
- fse->active->set_viewport( fse->active,
- &draw->viewport );
-
//return TRUE;
}
diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c
index 9b899d404e1..a8b6d0c90d2 100644
--- a/src/gallium/auxiliary/draw/draw_vs.c
+++ b/src/gallium/auxiliary/draw/draw_vs.c
@@ -41,6 +41,22 @@
+
+void draw_vs_set_constants( struct draw_context *draw,
+ const float (*constants)[4] )
+{
+ draw_vs_aos_machine_constants( draw->vs.aos_machine, constants );
+}
+
+
+void draw_vs_set_viewport( struct draw_context *draw,
+ const struct pipe_viewport_state *viewport )
+{
+ draw_vs_aos_machine_viewport( draw->vs.aos_machine, viewport );
+}
+
+
+
struct draw_vertex_shader *
draw_create_vertex_shader(struct draw_context *draw,
const struct pipe_shader_state *shader)
@@ -83,6 +99,13 @@ void
draw_delete_vertex_shader(struct draw_context *draw,
struct draw_vertex_shader *dvs)
{
+ unsigned i;
+
+ for (i = 0; i < dvs->nr_varients; i++)
+ dvs->varient[i]->destroy( dvs->varient[i] );
+
+ dvs->nr_varients = 0;
+
dvs->delete( dvs );
}
@@ -110,6 +133,10 @@ draw_vs_init( struct draw_context *draw )
draw->vs.fetch_cache = translate_cache_create();
if (!draw->vs.fetch_cache)
return FALSE;
+
+ draw->vs.aos_machine = draw_vs_aos_machine();
+ if (!draw->vs.aos_machine)
+ return FALSE;
return TRUE;
}
@@ -129,6 +156,9 @@ draw_vs_destroy( struct draw_context *draw )
if (draw->vs.emit_cache)
translate_cache_destroy(draw->vs.emit_cache);
+ if (draw->vs.aos_machine)
+ draw_vs_aos_machine_destroy(draw->vs.aos_machine);
+
tgsi_exec_machine_free_data(&draw->vs.machine);
}
@@ -153,10 +183,17 @@ draw_vs_lookup_varient( struct draw_vertex_shader *vs,
if (varient == NULL)
return NULL;
- /* Add it to our list:
+ /* Add it to our list, could be smarter:
*/
- assert(vs->nr_varients < Elements(vs->varient));
- vs->varient[vs->nr_varients++] = varient;
+ if (vs->nr_varients < Elements(vs->varient)) {
+ vs->varient[vs->nr_varients++] = varient;
+ }
+ else {
+ vs->last_varient++;
+ vs->last_varient %= Elements(vs->varient);
+ vs->varient[vs->last_varient]->destroy(vs->varient[vs->last_varient]);
+ vs->varient[vs->last_varient] = varient;
+ }
/* Done
*/
diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h
index 7aa0415baf0..08c6de8ba86 100644
--- a/src/gallium/auxiliary/draw/draw_vs.h
+++ b/src/gallium/auxiliary/draw/draw_vs.h
@@ -70,16 +70,6 @@ struct draw_vs_varient_key {
struct draw_vs_varient;
-typedef void (PIPE_CDECL *vsv_run_elts_func)( struct draw_vs_varient *,
- const unsigned *elts,
- unsigned count,
- void *output_buffer);
-
-typedef void (PIPE_CDECL *vsv_run_linear_func)( struct draw_vs_varient *,
- unsigned start,
- unsigned count,
- void *output_buffer);
-
struct draw_vs_varient {
struct draw_vs_varient_key key;
@@ -91,12 +81,6 @@ struct draw_vs_varient {
const void *ptr,
unsigned stride );
- void (*set_constants)( struct draw_vs_varient *,
- const float (*constants)[4] );
-
- void (*set_viewport)( struct draw_vs_varient *,
- const struct pipe_viewport_state * );
-
void (PIPE_CDECL *run_linear)( struct draw_vs_varient *shader,
unsigned start,
unsigned count,
@@ -131,6 +115,7 @@ struct draw_vertex_shader {
*/
struct draw_vs_varient *varient[16];
unsigned nr_varients;
+ unsigned last_varient;
struct draw_vs_varient *(*create_varient)( struct draw_vertex_shader *shader,
const struct draw_vs_varient_key *key );
@@ -217,7 +202,14 @@ static INLINE int draw_vs_varient_key_compare( const struct draw_vs_varient_key
}
+struct aos_machine *draw_vs_aos_machine( void );
+void draw_vs_aos_machine_destroy( struct aos_machine *machine );
+
+void draw_vs_aos_machine_constants( struct aos_machine *machine,
+ const float (*constants)[4] );
+void draw_vs_aos_machine_viewport( struct aos_machine *machine,
+ const struct pipe_viewport_state *viewport );
#define MAX_TGSI_VERTICES 4
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index 9056785e7a6..b5e4e1e7b1a 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -149,70 +149,7 @@ static struct x86_reg get_reg_ptr(struct aos_compilation *cp,
#define X87_CW_ROUND_MASK (3<<10)
#define X87_CW_INFINITY (1<<12)
-static void do_populate_lut( struct shine_tab *tab,
- float unclamped_exponent )
-{
- const float epsilon = 1.0F / 256.0F;
- float exponent = CLAMP(unclamped_exponent, -(128.0F - epsilon), (128.0F - epsilon));
- unsigned i;
- tab->exponent = unclamped_exponent; /* for later comparison */
-
- tab->values[0] = 0;
- if (exponent == 0) {
- for (i = 1; i < 258; i++) {
- tab->values[i] = 1.0;
- }
- }
- else {
- for (i = 1; i < 258; i++) {
- tab->values[i] = powf((float)i * epsilon, exponent);
- }
- }
-}
-
-static void init_internals( struct aos_machine *machine )
-{
- unsigned i;
- float inv = 1.0f/255.0f;
- float f255 = 255.0f;
-
- ASSIGN_4V(machine->internal[IMM_SWZ], 1.0f, -1.0f, 0.0f, 1.0f);
- *(unsigned *)&machine->internal[IMM_SWZ][3] = 0xffffffff;
-
- ASSIGN_4V(machine->internal[IMM_ONES], 1.0f, 1.0f, 1.0f, 1.0f);
- ASSIGN_4V(machine->internal[IMM_NEGS], -1.0f, -1.0f, -1.0f, -1.0f);
- ASSIGN_4V(machine->internal[IMM_IDENTITY], 0.0f, 0.0f, 0.0f, 1.0f);
- ASSIGN_4V(machine->internal[IMM_INV_255], inv, inv, inv, inv);
- ASSIGN_4V(machine->internal[IMM_255], f255, f255, f255, f255);
- ASSIGN_4V(machine->internal[IMM_RSQ], -.5f, 1.5f, 0.0f, 0.0f);
-
-
- machine->fpu_rnd_nearest = (X87_CW_EXCEPTION_INV_OP |
- X87_CW_EXCEPTION_DENORM_OP |
- X87_CW_EXCEPTION_ZERO_DIVIDE |
- X87_CW_EXCEPTION_OVERFLOW |
- X87_CW_EXCEPTION_UNDERFLOW |
- X87_CW_EXCEPTION_PRECISION |
- (1<<6) |
- X87_CW_ROUND_NEAREST |
- X87_CW_PRECISION_DOUBLE_EXT);
-
- assert(machine->fpu_rnd_nearest == 0x37f);
-
- machine->fpu_rnd_neg_inf = (X87_CW_EXCEPTION_INV_OP |
- X87_CW_EXCEPTION_DENORM_OP |
- X87_CW_EXCEPTION_ZERO_DIVIDE |
- X87_CW_EXCEPTION_OVERFLOW |
- X87_CW_EXCEPTION_UNDERFLOW |
- X87_CW_EXCEPTION_PRECISION |
- (1<<6) |
- X87_CW_ROUND_DOWN |
- X87_CW_PRECISION_DOUBLE_EXT);
-
- for (i = 0; i < MAX_SHINE_TAB; i++)
- do_populate_lut( &machine->shine_tab[i], 1.0f );
-}
static void spill( struct aos_compilation *cp, unsigned idx )
@@ -1220,136 +1157,6 @@ static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_inst
return TRUE;
}
-static void PIPE_CDECL do_lit( struct aos_machine *machine,
- float *result,
- const float *in,
- unsigned count )
-{
- if (in[0] > 0)
- {
- if (in[1] <= 0.0)
- {
- result[0] = 1.0F;
- result[1] = in[0];
- result[2] = 1.0;
- result[3] = 1.0F;
- }
- else
- {
- const float epsilon = 1.0F / 256.0F;
- float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon));
- result[0] = 1.0F;
- result[1] = in[0];
- result[2] = powf(in[1], exponent);
- result[3] = 1.0;
- }
- }
- else
- {
- result[0] = 1.0F;
- result[1] = 0.0;
- result[2] = 0.0;
- result[3] = 1.0F;
- }
-}
-
-
-static void PIPE_CDECL do_lit_lut( struct aos_machine *machine,
- float *result,
- const float *in,
- unsigned count )
-{
- if (in[0] > 0)
- {
- if (in[1] <= 0.0)
- {
- result[0] = 1.0F;
- result[1] = in[0];
- result[2] = 1.0;
- result[3] = 1.0F;
- return;
- }
-
- if (machine->lit_info[count].shine_tab->exponent != in[3]) {
- machine->lit_info[count].func = do_lit;
- goto no_luck;
- }
-
- if (in[1] <= 1.0)
- {
- const float *tab = machine->lit_info[count].shine_tab->values;
- float f = in[1] * 256;
- int k = (int)f;
- float frac = f - (float)k;
-
- result[0] = 1.0F;
- result[1] = in[0];
- result[2] = tab[k] + frac*(tab[k+1]-tab[k]);
- result[3] = 1.0;
- return;
- }
-
- no_luck:
- {
- const float epsilon = 1.0F / 256.0F;
- float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon));
- result[0] = 1.0F;
- result[1] = in[0];
- result[2] = powf(in[1], exponent);
- result[3] = 1.0;
- }
- }
- else
- {
- result[0] = 1.0F;
- result[1] = 0.0;
- result[2] = 0.0;
- result[3] = 1.0F;
- }
-}
-
-
-
-static void PIPE_CDECL populate_lut( struct aos_machine *machine,
- float *result,
- const float *in,
- unsigned count )
-{
- unsigned i, tab;
-
- /* Search for an existing table for this value. Note that without
- * static analysis we don't really know if in[3] will be constant,
- * but it usually is...
- */
- for (tab = 0; tab < 4; tab++) {
- if (machine->shine_tab[tab].exponent == in[3]) {
- goto found;
- }
- }
-
- for (tab = 0, i = 1; i < 4; i++) {
- if (machine->shine_tab[i].last_used < machine->shine_tab[tab].last_used)
- tab = i;
- }
-
- if (machine->shine_tab[tab].last_used == machine->now) {
- /* No unused tables (this is not a ffvertex program...). Just
- * call pow each time:
- */
- machine->lit_info[count].func = do_lit;
- machine->lit_info[count].func( machine, result, in, count );
- return;
- }
- else {
- do_populate_lut( &machine->shine_tab[tab], in[3] );
- }
-
- found:
- machine->shine_tab[tab].last_used = machine->now;
- machine->lit_info[count].shine_tab = &machine->shine_tab[tab];
- machine->lit_info[count].func = do_lit_lut;
- machine->lit_info[count].func( machine, result, in, count );
-}
@@ -1413,7 +1220,7 @@ static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_inst
Offset(struct lit_info, func)));
}
else {
- x86_mov_reg_imm( cp->func, ecx, (int)do_lit );
+ x86_mov_reg_imm( cp->func, ecx, (int)aos_do_lit );
}
x86_call( cp->func, ecx );
@@ -1434,7 +1241,7 @@ static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_inst
return TRUE;
}
-
+#if 0
static boolean emit_inline_LIT( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
{
struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]);
@@ -1495,6 +1302,7 @@ static boolean emit_inline_LIT( struct aos_compilation *cp, const struct tgsi_fu
return TRUE;
}
+#endif
@@ -1945,7 +1753,7 @@ static void find_last_write_outputs( struct aos_compilation *cp )
}
-#define ARG_VARIENT 1
+#define ARG_MACHINE 1
#define ARG_START_ELTS 2
#define ARG_COUNT 3
#define ARG_OUTBUF 4
@@ -1985,7 +1793,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
/* Load arguments into regs:
*/
- x86_mov(cp.func, cp.machine_EDX, x86_fn_arg(cp.func, ARG_VARIENT));
+ x86_mov(cp.func, cp.machine_EDX, x86_fn_arg(cp.func, ARG_MACHINE));
x86_mov(cp.func, cp.idx_EBX, x86_fn_arg(cp.func, ARG_START_ELTS));
x86_mov(cp.func, cp.count_ESI, x86_fn_arg(cp.func, ARG_COUNT));
x86_mov(cp.func, cp.outbuf_ECX, x86_fn_arg(cp.func, ARG_OUTBUF));
@@ -1997,11 +1805,6 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
x86_cmp(cp.func, cp.count_ESI, cp.tmp_EAX);
fixup = x86_jcc_forward(cp.func, cc_E);
- /* Dig out the machine pointer from inside the varient arg
- */
- x86_mov(cp.func, cp.machine_EDX,
- x86_make_disp(cp.machine_EDX,
- Offset( struct draw_vs_varient_aos_sse, machine )));
save_fpu_state( &cp );
set_fpu_round_nearest( &cp );
@@ -2151,13 +1954,14 @@ static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient,
void *output_buffer )
{
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
+ struct aos_machine *machine = vaos->draw->vs.aos_machine;
- vaos->machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
- vaos->machine->constants = vaos->draw->pt.user.constants;
- vaos->machine->immediates = vaos->base.vs->immediates;
- vaos->machine->attrib = vaos->attrib;
+ machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
+ machine->constants = (const float (*)[4])vaos->draw->pt.user.constants;
+ machine->immediates = vaos->base.vs->immediates;
+ machine->attrib = vaos->attrib;
- vaos->gen_run_elts( varient,
+ vaos->gen_run_elts( machine,
elts,
count,
output_buffer );
@@ -2169,61 +1973,25 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient,
void *output_buffer )
{
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
+ struct aos_machine *machine = vaos->draw->vs.aos_machine;
- vaos->machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
- vaos->machine->constants = vaos->draw->pt.user.constants;
- vaos->machine->immediates = vaos->base.vs->immediates;
- vaos->machine->attrib = vaos->attrib;
+ machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
+ machine->constants = (const float (*)[4])vaos->draw->pt.user.constants;
+ machine->immediates = vaos->base.vs->immediates;
+ machine->attrib = vaos->attrib;
- vaos->gen_run_linear( varient,
+ vaos->gen_run_linear( machine,
start,
count,
output_buffer );
}
-static void vaos_set_constants( struct draw_vs_varient *varient,
- const float (*constants)[4] )
-{
- struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
-
-#if 0
- unsigned i;
- for (i =0; i < vaos->base.vs->info.file_max[TGSI_FILE_CONSTANT] + 1; i++)
- debug_printf("state %d: %f %f %f %f\n",
- i,
- constants[i][0],
- constants[i][1],
- constants[i][2],
- constants[i][3]);
-#endif
-
- {
- unsigned i;
- for (i = 0; i < MAX_LIT_INFO; i++) {
- vaos->machine->lit_info[i].func = populate_lut;
- vaos->machine->now++;
- }
- }
-}
-
-
-static void vaos_set_viewport( struct draw_vs_varient *varient,
- const struct pipe_viewport_state *viewport )
-{
- struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
-
- memcpy(vaos->machine->scale, viewport->scale, 4 * sizeof(float));
- memcpy(vaos->machine->translate, viewport->translate, 4 * sizeof(float));
-}
static void vaos_destroy( struct draw_vs_varient *varient )
{
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
- if (vaos->machine)
- align_free( vaos->machine );
-
FREE( vaos->attrib );
x86_release_func( &vaos->func[0] );
@@ -2245,8 +2013,6 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
vaos->base.key = *key;
vaos->base.vs = vs;
vaos->base.set_input = vaos_set_buffer;
- vaos->base.set_constants = vaos_set_constants;
- vaos->base.set_viewport = vaos_set_viewport;
vaos->base.destroy = vaos_destroy;
vaos->base.run_linear = vaos_run_linear;
vaos->base.run_elts = vaos_run_elts;
@@ -2257,13 +2023,6 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
if (!vaos->attrib)
goto fail;
- vaos->machine = align_malloc( sizeof(struct aos_machine), 16 );
- if (!vaos->machine)
- goto fail;
-
- memset(vaos->machine, 0, sizeof(struct aos_machine));
- init_internals(vaos->machine);
-
tgsi_dump(vs->state.tokens, 0);
if (!build_vertex_program( vaos, TRUE ))
@@ -2272,11 +2031,11 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
if (!build_vertex_program( vaos, FALSE ))
goto fail;
- vaos->gen_run_linear = (vsv_run_linear_func)x86_get_func(&vaos->func[0]);
+ vaos->gen_run_linear = (vaos_run_linear_func)x86_get_func(&vaos->func[0]);
if (!vaos->gen_run_linear)
goto fail;
- vaos->gen_run_elts = (vsv_run_elts_func)x86_get_func(&vaos->func[1]);
+ vaos->gen_run_elts = (vaos_run_elts_func)x86_get_func(&vaos->func[1]);
if (!vaos->gen_run_elts)
goto fail;
@@ -2286,9 +2045,6 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
if (vaos && vaos->attrib)
FREE(vaos->attrib);
- if (vaos && vaos->machine)
- align_free( vaos->machine );
-
if (vaos)
x86_release_func( &vaos->func[0] );
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h
index 295d2cb3fe5..89a9174151d 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.h
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.h
@@ -60,10 +60,16 @@ struct x86_function;
#define FPU_RND_NEAREST 2
struct aos_machine;
-typedef void (PIPE_CDECL *lit_func)( struct aos_machine *,
+typedef void PIPE_CDECL (*lit_func)( struct aos_machine *,
float *result,
const float *in,
unsigned count );
+
+PIPE_CDECL void aos_do_lit( struct aos_machine *machine,
+ float *result,
+ const float *in,
+ unsigned count );
+
struct shine_tab {
float exponent;
float values[258];
@@ -207,16 +213,25 @@ struct x86_reg aos_get_x86( struct aos_compilation *cp,
unsigned value );
+typedef void (PIPE_CDECL *vaos_run_elts_func)( struct aos_machine *,
+ const unsigned *elts,
+ unsigned count,
+ void *output_buffer);
+
+typedef void (PIPE_CDECL *vaos_run_linear_func)( struct aos_machine *,
+ unsigned start,
+ unsigned count,
+ void *output_buffer);
+
+
struct draw_vs_varient_aos_sse {
struct draw_vs_varient base;
struct draw_context *draw;
struct aos_attrib *attrib;
- struct aos_machine *machine; /* XXX: temporarily unshared */
-
- vsv_run_linear_func gen_run_linear;
- vsv_run_elts_func gen_run_elts;
+ vaos_run_linear_func gen_run_linear;
+ vaos_run_elts_func gen_run_elts;
struct x86_function func[2];
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_machine.c b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c
new file mode 100644
index 00000000000..53e999b191e
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c
@@ -0,0 +1,297 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/util/tgsi_util.h"
+#include "tgsi/exec/tgsi_exec.h"
+#include "draw_vs.h"
+#include "draw_vs_aos.h"
+#include "draw_vertex.h"
+
+#include "rtasm/rtasm_x86sse.h"
+
+
+#define X87_CW_EXCEPTION_INV_OP (1<<0)
+#define X87_CW_EXCEPTION_DENORM_OP (1<<1)
+#define X87_CW_EXCEPTION_ZERO_DIVIDE (1<<2)
+#define X87_CW_EXCEPTION_OVERFLOW (1<<3)
+#define X87_CW_EXCEPTION_UNDERFLOW (1<<4)
+#define X87_CW_EXCEPTION_PRECISION (1<<5)
+#define X87_CW_PRECISION_SINGLE (0<<8)
+#define X87_CW_PRECISION_RESERVED (1<<8)
+#define X87_CW_PRECISION_DOUBLE (2<<8)
+#define X87_CW_PRECISION_DOUBLE_EXT (3<<8)
+#define X87_CW_PRECISION_MASK (3<<8)
+#define X87_CW_ROUND_NEAREST (0<<10)
+#define X87_CW_ROUND_DOWN (1<<10)
+#define X87_CW_ROUND_UP (2<<10)
+#define X87_CW_ROUND_ZERO (3<<10)
+#define X87_CW_ROUND_MASK (3<<10)
+#define X87_CW_INFINITY (1<<12)
+
+
+PIPE_CDECL void aos_do_lit( struct aos_machine *machine,
+ float *result,
+ const float *in,
+ unsigned count )
+{
+ if (in[0] > 0)
+ {
+ if (in[1] <= 0.0)
+ {
+ result[0] = 1.0F;
+ result[1] = in[0];
+ result[2] = 1.0;
+ result[3] = 1.0F;
+ }
+ else
+ {
+ const float epsilon = 1.0F / 256.0F;
+ float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon));
+ result[0] = 1.0F;
+ result[1] = in[0];
+ result[2] = powf(in[1], exponent);
+ result[3] = 1.0;
+ }
+ }
+ else
+ {
+ result[0] = 1.0F;
+ result[1] = 0.0;
+ result[2] = 0.0;
+ result[3] = 1.0F;
+ }
+}
+
+
+static PIPE_CDECL void do_lit_lut( struct aos_machine *machine,
+ float *result,
+ const float *in,
+ unsigned count )
+{
+ if (in[0] > 0)
+ {
+ if (in[1] <= 0.0)
+ {
+ result[0] = 1.0F;
+ result[1] = in[0];
+ result[2] = 1.0;
+ result[3] = 1.0F;
+ return;
+ }
+
+ if (machine->lit_info[count].shine_tab->exponent != in[3]) {
+ machine->lit_info[count].func = aos_do_lit;
+ goto no_luck;
+ }
+
+ if (in[1] <= 1.0)
+ {
+ const float *tab = machine->lit_info[count].shine_tab->values;
+ float f = in[1] * 256;
+ int k = (int)f;
+ float frac = f - (float)k;
+
+ result[0] = 1.0F;
+ result[1] = in[0];
+ result[2] = tab[k] + frac*(tab[k+1]-tab[k]);
+ result[3] = 1.0;
+ return;
+ }
+
+ no_luck:
+ {
+ const float epsilon = 1.0F / 256.0F;
+ float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon));
+ result[0] = 1.0F;
+ result[1] = in[0];
+ result[2] = powf(in[1], exponent);
+ result[3] = 1.0;
+ }
+ }
+ else
+ {
+ result[0] = 1.0F;
+ result[1] = 0.0;
+ result[2] = 0.0;
+ result[3] = 1.0F;
+ }
+}
+
+
+static void do_populate_lut( struct shine_tab *tab,
+ float unclamped_exponent )
+{
+ const float epsilon = 1.0F / 256.0F;
+ float exponent = CLAMP(unclamped_exponent, -(128.0F - epsilon), (128.0F - epsilon));
+ unsigned i;
+
+ tab->exponent = unclamped_exponent; /* for later comparison */
+
+ tab->values[0] = 0;
+ if (exponent == 0) {
+ for (i = 1; i < 258; i++) {
+ tab->values[i] = 1.0;
+ }
+ }
+ else {
+ for (i = 1; i < 258; i++) {
+ tab->values[i] = powf((float)i * epsilon, exponent);
+ }
+ }
+}
+
+
+
+
+static void PIPE_CDECL populate_lut( struct aos_machine *machine,
+ float *result,
+ const float *in,
+ unsigned count )
+{
+ unsigned i, tab;
+
+ /* Search for an existing table for this value. Note that without
+ * static analysis we don't really know if in[3] will be constant,
+ * but it usually is...
+ */
+ for (tab = 0; tab < 4; tab++) {
+ if (machine->shine_tab[tab].exponent == in[3]) {
+ goto found;
+ }
+ }
+
+ for (tab = 0, i = 1; i < 4; i++) {
+ if (machine->shine_tab[i].last_used < machine->shine_tab[tab].last_used)
+ tab = i;
+ }
+
+ if (machine->shine_tab[tab].last_used == machine->now) {
+ /* No unused tables (this is not a ffvertex program...). Just
+ * call pow each time:
+ */
+ machine->lit_info[count].func = aos_do_lit;
+ machine->lit_info[count].func( machine, result, in, count );
+ return;
+ }
+ else {
+ do_populate_lut( &machine->shine_tab[tab], in[3] );
+ }
+
+ found:
+ machine->shine_tab[tab].last_used = machine->now;
+ machine->lit_info[count].shine_tab = &machine->shine_tab[tab];
+ machine->lit_info[count].func = do_lit_lut;
+ machine->lit_info[count].func( machine, result, in, count );
+}
+
+
+void draw_vs_aos_machine_constants( struct aos_machine *machine,
+ const float (*constants)[4] )
+{
+ machine->constants = constants;
+
+ {
+ unsigned i;
+ for (i = 0; i < MAX_LIT_INFO; i++) {
+ machine->lit_info[i].func = populate_lut;
+ machine->now++;
+ }
+ }
+}
+
+
+void draw_vs_aos_machine_viewport( struct aos_machine *machine,
+ const struct pipe_viewport_state *viewport )
+{
+ memcpy(machine->scale, viewport->scale, 4 * sizeof(float));
+ memcpy(machine->translate, viewport->translate, 4 * sizeof(float));
+}
+
+
+
+void draw_vs_aos_machine_destroy( struct aos_machine *machine )
+{
+ align_free(machine);
+}
+
+struct aos_machine *draw_vs_aos_machine( void )
+{
+ struct aos_machine *machine;
+ unsigned i;
+ float inv = 1.0f/255.0f;
+ float f255 = 255.0f;
+
+ machine = align_malloc(sizeof(struct aos_machine), 16);
+ if (!machine)
+ return NULL;
+
+ memset(machine, 0, sizeof(*machine));
+
+ ASSIGN_4V(machine->internal[IMM_SWZ], 1.0f, -1.0f, 0.0f, 1.0f);
+ *(unsigned *)&machine->internal[IMM_SWZ][3] = 0xffffffff;
+
+ ASSIGN_4V(machine->internal[IMM_ONES], 1.0f, 1.0f, 1.0f, 1.0f);
+ ASSIGN_4V(machine->internal[IMM_NEGS], -1.0f, -1.0f, -1.0f, -1.0f);
+ ASSIGN_4V(machine->internal[IMM_IDENTITY], 0.0f, 0.0f, 0.0f, 1.0f);
+ ASSIGN_4V(machine->internal[IMM_INV_255], inv, inv, inv, inv);
+ ASSIGN_4V(machine->internal[IMM_255], f255, f255, f255, f255);
+ ASSIGN_4V(machine->internal[IMM_RSQ], -.5f, 1.5f, 0.0f, 0.0f);
+
+
+ machine->fpu_rnd_nearest = (X87_CW_EXCEPTION_INV_OP |
+ X87_CW_EXCEPTION_DENORM_OP |
+ X87_CW_EXCEPTION_ZERO_DIVIDE |
+ X87_CW_EXCEPTION_OVERFLOW |
+ X87_CW_EXCEPTION_UNDERFLOW |
+ X87_CW_EXCEPTION_PRECISION |
+ (1<<6) |
+ X87_CW_ROUND_NEAREST |
+ X87_CW_PRECISION_DOUBLE_EXT);
+
+ assert(machine->fpu_rnd_nearest == 0x37f);
+
+ machine->fpu_rnd_neg_inf = (X87_CW_EXCEPTION_INV_OP |
+ X87_CW_EXCEPTION_DENORM_OP |
+ X87_CW_EXCEPTION_ZERO_DIVIDE |
+ X87_CW_EXCEPTION_OVERFLOW |
+ X87_CW_EXCEPTION_UNDERFLOW |
+ X87_CW_EXCEPTION_PRECISION |
+ (1<<6) |
+ X87_CW_ROUND_DOWN |
+ X87_CW_PRECISION_DOUBLE_EXT);
+
+ for (i = 0; i < MAX_SHINE_TAB; i++)
+ do_populate_lut( &machine->shine_tab[i], 1.0f );
+
+ return machine;
+}
+
+
diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c
index 784ae41205f..18cb06e3742 100644
--- a/src/gallium/auxiliary/draw/draw_vs_varient.c
+++ b/src/gallium/auxiliary/draw/draw_vs_varient.c
@@ -44,8 +44,6 @@
struct draw_vs_varient_generic {
struct draw_vs_varient base;
- struct pipe_viewport_state viewport;
-
struct draw_vertex_shader *shader;
struct draw_context *draw;
@@ -57,21 +55,11 @@ struct draw_vs_varient_generic {
*/
struct translate *fetch;
struct translate *emit;
-
- const float (*constants)[4];
};
-static void vsvg_set_constants( struct draw_vs_varient *varient,
- const float (*constants)[4] )
-{
- struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
-
- vsvg->constants = constants;
-}
-
static void vsvg_set_input( struct draw_vs_varient *varient,
unsigned buffer,
@@ -94,8 +82,8 @@ static void do_rhw_viewport( struct draw_vs_varient_generic *vsvg,
void *output_buffer )
{
char *ptr = (char *)output_buffer;
- const float *scale = vsvg->viewport.scale;
- const float *trans = vsvg->viewport.translate;
+ const float *scale = vsvg->base.vs->draw->viewport.scale;
+ const float *trans = vsvg->base.vs->draw->viewport.translate;
unsigned stride = vsvg->base.key.output_stride;
unsigned j;
@@ -115,8 +103,8 @@ static void do_viewport( struct draw_vs_varient_generic *vsvg,
void *output_buffer )
{
char *ptr = (char *)output_buffer;
- const float *scale = vsvg->viewport.scale;
- const float *trans = vsvg->viewport.translate;
+ const float *scale = vsvg->base.vs->draw->viewport.scale;
+ const float *trans = vsvg->base.vs->draw->viewport.translate;
unsigned stride = vsvg->base.key.output_stride;
unsigned j;
@@ -130,10 +118,10 @@ static void do_viewport( struct draw_vs_varient_generic *vsvg,
}
-static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient,
- const unsigned *elts,
- unsigned count,
- void *output_buffer )
+static void vsvg_run_elts( struct draw_vs_varient *varient,
+ const unsigned *elts,
+ unsigned count,
+ void *output_buffer)
{
struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
@@ -150,7 +138,7 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient,
vsvg->base.vs->run_linear( vsvg->base.vs,
output_buffer,
output_buffer,
- vsvg->constants,
+ (const float (*)[4])vsvg->base.vs->draw->pt.user.constants,
count,
vsvg->base.key.output_stride,
vsvg->base.key.output_stride);
@@ -186,10 +174,10 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient,
}
-static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient,
- unsigned start,
- unsigned count,
- void *output_buffer )
+static void vsvg_run_linear( struct draw_vs_varient *varient,
+ unsigned start,
+ unsigned count,
+ void *output_buffer )
{
struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
@@ -206,7 +194,7 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient,
vsvg->base.vs->run_linear( vsvg->base.vs,
output_buffer,
output_buffer,
- vsvg->constants,
+ (const float (*)[4])vsvg->base.vs->draw->pt.user.constants,
count,
vsvg->base.key.output_stride,
vsvg->base.key.output_stride);
@@ -245,13 +233,6 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient,
-static void vsvg_set_viewport( struct draw_vs_varient *varient,
- const struct pipe_viewport_state *viewport )
-{
- struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
-
- vsvg->viewport = *viewport;
-}
static void vsvg_destroy( struct draw_vs_varient *varient )
{
@@ -272,8 +253,6 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
vsvg->base.key = *key;
vsvg->base.vs = vs;
vsvg->base.set_input = vsvg_set_input;
- vsvg->base.set_constants = vsvg_set_constants;
- vsvg->base.set_viewport = vsvg_set_viewport;
vsvg->base.run_elts = vsvg_run_elts;
vsvg->base.run_linear = vsvg_run_linear;
vsvg->base.destroy = vsvg_destroy;