summaryrefslogtreecommitdiffstats
path: root/src/gallium/auxiliary
diff options
context:
space:
mode:
authorBen Skeggs <[email protected]>2008-04-23 12:39:38 +1000
committerBen Skeggs <[email protected]>2008-04-23 12:39:38 +1000
commit104ff59585ad1888c8cef5ad9de0e2fdb3f48c21 (patch)
tree9128984eef4a90cc6177d336759ce795b835d71f /src/gallium/auxiliary
parentb20acef90695d6e5975f538b6e9cb812b05f0cf6 (diff)
parent6fc530ccda2971a5d99a955ad90ae9762238040f (diff)
Merge branch 'upstream-gallium-0.1' into nouveau-gallium-0.1
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_cache.c4
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_context.c250
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_context.h56
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_hash.c23
-rw-r--r--src/gallium/auxiliary/draw/Makefile56
-rw-r--r--src/gallium/auxiliary/draw/SConscript54
-rw-r--r--src/gallium/auxiliary/draw/draw_context.c233
-rw-r--r--src/gallium/auxiliary/draw/draw_context.h55
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe.c (renamed from src/gallium/auxiliary/draw/draw_pt_pipeline.c)143
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe.h114
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_aaline.c (renamed from src/gallium/auxiliary/draw/draw_aaline.c)108
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_aapoint.c (renamed from src/gallium/auxiliary/draw/draw_aapoint.c)75
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_clip.c (renamed from src/gallium/auxiliary/draw/draw_clip.c)24
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_cull.c (renamed from src/gallium/auxiliary/draw/draw_cull.c)31
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_flatshade.c (renamed from src/gallium/auxiliary/draw/draw_flatshade.c)23
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_offset.c (renamed from src/gallium/auxiliary/draw/draw_offset.c)26
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_pstipple.c (renamed from src/gallium/auxiliary/draw/draw_pstipple.c)84
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_stipple.c (renamed from src/gallium/auxiliary/draw/draw_stipple.c)18
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_twoside.c (renamed from src/gallium/auxiliary/draw/draw_twoside.c)34
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_unfilled.c (renamed from src/gallium/auxiliary/draw/draw_unfilled.c)29
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_util.c137
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_validate.c (renamed from src/gallium/auxiliary/draw/draw_validate.c)41
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_vbuf.c (renamed from src/gallium/auxiliary/draw/draw_vbuf.c)339
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_wide_line.c (renamed from src/gallium/auxiliary/draw/draw_wide_line.c)18
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_wide_point.c (renamed from src/gallium/auxiliary/draw/draw_wide_point.c)43
-rw-r--r--src/gallium/auxiliary/draw/draw_prim.c523
-rw-r--r--src/gallium/auxiliary/draw/draw_private.h337
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.c225
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.h102
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_elts.c6
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_emit.c252
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch.c223
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_emit.c215
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c326
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c269
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_post_vs.c215
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vcache.c22
-rw-r--r--src/gallium/auxiliary/draw/draw_vertex.c54
-rw-r--r--src/gallium/auxiliary/draw/draw_vertex.h2
-rw-r--r--src/gallium/auxiliary/draw/draw_vertex_cache.c219
-rw-r--r--src/gallium/auxiliary/draw/draw_vertex_fetch.c528
-rw-r--r--src/gallium/auxiliary/draw/draw_vf.c378
-rw-r--r--src/gallium/auxiliary/draw/draw_vf.h232
-rw-r--r--src/gallium/auxiliary/draw/draw_vf_generic.c585
-rw-r--r--src/gallium/auxiliary/draw/draw_vf_sse.c613
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.c (renamed from src/gallium/auxiliary/draw/draw_vertex_shader.c)50
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.h60
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_exec.c200
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_llvm.c138
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_sse.c178
-rw-r--r--src/gallium/auxiliary/draw/draw_wide_prims.c366
-rw-r--r--src/gallium/auxiliary/gallivm/Makefile8
-rw-r--r--src/gallium/auxiliary/gallivm/gallivm_builtins.cpp708
-rw-r--r--src/gallium/auxiliary/gallivm/instructions.cpp53
-rw-r--r--src/gallium/auxiliary/gallivm/instructions.h4
-rw-r--r--src/gallium/auxiliary/gallivm/instructionssoa.cpp86
-rw-r--r--src/gallium/auxiliary/gallivm/instructionssoa.h4
-rw-r--r--src/gallium/auxiliary/gallivm/llvm_builtins.c2
-rw-r--r--src/gallium/auxiliary/gallivm/soabuiltins.c2
-rw-r--r--src/gallium/auxiliary/gallivm/storage.cpp32
-rw-r--r--src/gallium/auxiliary/gallivm/storagesoa.cpp10
-rw-r--r--src/gallium/auxiliary/gallivm/tgsitollvm.cpp4
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_buffer.h11
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c18
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr.h22
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c67
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c8
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c308
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_cpu.c23
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.c339
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.h4
-rw-r--r--src/gallium/auxiliary/tgsi/exec/tgsi_exec.c88
-rw-r--r--src/gallium/auxiliary/tgsi/exec/tgsi_exec.h2
-rwxr-xr-xsrc/gallium/auxiliary/tgsi/exec/tgsi_sse2.c610
-rwxr-xr-xsrc/gallium/auxiliary/tgsi/exec/tgsi_sse2.h2
-rw-r--r--src/gallium/auxiliary/tgsi/util/tgsi_dump.c334
-rw-r--r--src/gallium/auxiliary/tgsi/util/tgsi_dump.h10
-rw-r--r--src/gallium/auxiliary/tgsi/util/tgsi_parse.h2
-rw-r--r--src/gallium/auxiliary/translate/Makefile14
-rw-r--r--src/gallium/auxiliary/translate/SConscript11
-rw-r--r--src/gallium/auxiliary/translate/translate.c (renamed from src/gallium/auxiliary/draw/draw_debug.c)87
-rw-r--r--src/gallium/auxiliary/translate/translate.h108
-rw-r--r--src/gallium/auxiliary/translate/translate_generic.c676
-rw-r--r--src/gallium/auxiliary/translate/translate_sse.c625
-rw-r--r--src/gallium/auxiliary/util/p_debug.c60
-rw-r--r--src/gallium/auxiliary/util/u_blit.c20
-rw-r--r--src/gallium/auxiliary/util/u_gen_mipmap.c47
-rw-r--r--src/gallium/auxiliary/util/u_pack_color.h39
-rw-r--r--src/gallium/auxiliary/util/u_time.c8
-rw-r--r--src/gallium/auxiliary/util/u_time.h6
90 files changed, 5272 insertions, 7526 deletions
diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c
index 18acab09674..63464e07058 100644
--- a/src/gallium/auxiliary/cso_cache/cso_cache.c
+++ b/src/gallium/auxiliary/cso_cache/cso_cache.c
@@ -290,6 +290,8 @@ void * cso_take_state(struct cso_cache *sc,
struct cso_cache *cso_cache_create(void)
{
struct cso_cache *sc = MALLOC_STRUCT(cso_cache);
+ if (sc == NULL)
+ return NULL;
sc->max_size = 4096;
sc->blend_hash = cso_hash_create();
@@ -332,10 +334,10 @@ void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type,
iter = cso_hash_first_node(hash);
while (!cso_hash_iter_is_null(iter)) {
void *state = cso_hash_iter_data(iter);
+ iter = cso_hash_iter_next(iter);
if (state) {
func(state, user_data);
}
- iter = cso_hash_iter_next(iter);
}
}
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c
index 4a1a6cb79c2..0523cb19497 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -25,16 +25,20 @@
*
**************************************************************************/
- /* Wrap the cso cache & hash mechanisms in a simplified
+ /**
+ * @file
+ *
+ * Wrap the cso cache & hash mechanisms in a simplified
* pipe-driver-specific interface.
*
- * Authors:
- * Zack Rusin <[email protected]>
- * Keith Whitwell <[email protected]>
+ * @author Zack Rusin <[email protected]>
+ * @author Keith Whitwell <[email protected]>
*/
#include "pipe/p_state.h"
#include "pipe/p_util.h"
+#include "pipe/p_inlines.h"
+#include "tgsi/util/tgsi_parse.h"
#include "cso_cache/cso_context.h"
#include "cso_cache/cso_cache.h"
@@ -135,8 +139,8 @@ void cso_destroy_context( struct cso_context *ctx )
* the data member of the cso to be the template itself.
*/
-void cso_set_blend(struct cso_context *ctx,
- const struct pipe_blend_state *templ)
+enum pipe_error cso_set_blend(struct cso_context *ctx,
+ const struct pipe_blend_state *templ)
{
unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_blend_state));
struct cso_hash_iter iter = cso_find_state_template(ctx->cache,
@@ -146,6 +150,8 @@ void cso_set_blend(struct cso_context *ctx,
if (cso_hash_iter_is_null(iter)) {
struct cso_blend *cso = MALLOC(sizeof(struct cso_blend));
+ if (!cso)
+ return PIPE_ERROR_OUT_OF_MEMORY;
cso->state = *templ;
cso->data = ctx->pipe->create_blend_state(ctx->pipe, &cso->state);
@@ -153,6 +159,11 @@ void cso_set_blend(struct cso_context *ctx,
cso->context = ctx->pipe;
iter = cso_insert_state(ctx->cache, hash_key, CSO_BLEND, cso);
+ if (cso_hash_iter_is_null(iter)) {
+ FREE(cso);
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+
handle = cso->data;
}
else {
@@ -163,6 +174,7 @@ void cso_set_blend(struct cso_context *ctx,
ctx->blend = handle;
ctx->pipe->bind_blend_state(ctx->pipe, handle);
}
+ return PIPE_OK;
}
void cso_save_blend(struct cso_context *ctx)
@@ -182,12 +194,12 @@ void cso_restore_blend(struct cso_context *ctx)
-void cso_single_sampler(struct cso_context *ctx,
- unsigned idx,
- const struct pipe_sampler_state *templ)
+enum pipe_error cso_single_sampler(struct cso_context *ctx,
+ unsigned idx,
+ const struct pipe_sampler_state *templ)
{
void *handle = NULL;
-
+
if (templ != NULL) {
unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_sampler_state));
struct cso_hash_iter iter = cso_find_state_template(ctx->cache,
@@ -196,13 +208,20 @@ void cso_single_sampler(struct cso_context *ctx,
if (cso_hash_iter_is_null(iter)) {
struct cso_sampler *cso = MALLOC(sizeof(struct cso_sampler));
-
+ if (!cso)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
cso->state = *templ;
cso->data = ctx->pipe->create_sampler_state(ctx->pipe, &cso->state);
cso->delete_state = (cso_state_callback)ctx->pipe->delete_sampler_state;
cso->context = ctx->pipe;
iter = cso_insert_state(ctx->cache, hash_key, CSO_SAMPLER, cso);
+ if (cso_hash_iter_is_null(iter)) {
+ FREE(cso);
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+
handle = cso->data;
}
else {
@@ -211,11 +230,12 @@ void cso_single_sampler(struct cso_context *ctx,
}
ctx->samplers[idx] = handle;
+ return PIPE_OK;
}
void cso_single_sampler_done( struct cso_context *ctx )
{
- unsigned i;
+ unsigned i;
/* find highest non-null sampler */
for (i = PIPE_MAX_SAMPLERS; i > 0; i--) {
@@ -226,8 +246,8 @@ void cso_single_sampler_done( struct cso_context *ctx )
ctx->nr_samplers = i;
if (ctx->hw.nr_samplers != ctx->nr_samplers ||
- memcmp(ctx->hw.samplers,
- ctx->samplers,
+ memcmp(ctx->hw.samplers,
+ ctx->samplers,
ctx->nr_samplers * sizeof(void *)) != 0)
{
memcpy(ctx->hw.samplers, ctx->samplers, ctx->nr_samplers * sizeof(void *));
@@ -237,22 +257,36 @@ void cso_single_sampler_done( struct cso_context *ctx )
}
}
-void cso_set_samplers( struct cso_context *ctx,
- unsigned nr,
- const struct pipe_sampler_state **templates )
+/*
+ * If the function encouters any errors it will return the
+ * last one. Done to always try to set as many samplers
+ * as possible.
+ */
+enum pipe_error cso_set_samplers( struct cso_context *ctx,
+ unsigned nr,
+ const struct pipe_sampler_state **templates )
{
unsigned i;
-
+ enum pipe_error temp, error = PIPE_OK;
+
/* TODO: fastpath
*/
- for (i = 0; i < nr; i++)
- cso_single_sampler( ctx, i, templates[i] );
+ for (i = 0; i < nr; i++) {
+ temp = cso_single_sampler( ctx, i, templates[i] );
+ if (temp != PIPE_OK)
+ error = temp;
+ }
+
+ for ( ; i < ctx->nr_samplers; i++) {
+ temp = cso_single_sampler( ctx, i, NULL );
+ if (temp != PIPE_OK)
+ error = temp;
+ }
- for ( ; i < ctx->nr_samplers; i++)
- cso_single_sampler( ctx, i, NULL );
-
cso_single_sampler_done( ctx );
+
+ return error;
}
void cso_save_samplers(struct cso_context *ctx)
@@ -263,44 +297,64 @@ void cso_save_samplers(struct cso_context *ctx)
void cso_restore_samplers(struct cso_context *ctx)
{
- cso_set_samplers(ctx, ctx->nr_samplers_saved,
- (const struct pipe_sampler_state **) ctx->samplers_saved);
+ ctx->nr_samplers = ctx->nr_samplers_saved;
+ memcpy(ctx->samplers, ctx->samplers_saved, sizeof(ctx->samplers));
+ cso_single_sampler_done( ctx );
}
-void cso_set_sampler_textures( struct cso_context *ctx,
- uint count,
- struct pipe_texture **textures )
+enum pipe_error cso_set_sampler_textures( struct cso_context *ctx,
+ uint count,
+ struct pipe_texture **textures )
{
uint i;
ctx->nr_textures = count;
for (i = 0; i < count; i++)
- ctx->textures[i] = textures[i];
+ pipe_texture_reference(&ctx->textures[i], textures[i]);
for ( ; i < PIPE_MAX_SAMPLERS; i++)
- ctx->textures[i] = NULL;
+ pipe_texture_reference(&ctx->textures[i], NULL);
ctx->pipe->set_sampler_textures(ctx->pipe, count, textures);
+
+ return PIPE_OK;
}
void cso_save_sampler_textures( struct cso_context *ctx )
{
+ uint i;
+
ctx->nr_textures_saved = ctx->nr_textures;
- memcpy(ctx->textures_saved, ctx->textures, sizeof(ctx->textures));
+ for (i = 0; i < ctx->nr_textures; i++) {
+ assert(!ctx->textures_saved[i]);
+ pipe_texture_reference(&ctx->textures_saved[i], ctx->textures[i]);
+ }
}
void cso_restore_sampler_textures( struct cso_context *ctx )
{
- cso_set_sampler_textures(ctx, ctx->nr_textures_saved, ctx->textures_saved);
+ uint i;
+
+ ctx->nr_textures = ctx->nr_textures_saved;
+
+ for (i = 0; i < ctx->nr_textures; i++) {
+ pipe_texture_reference(&ctx->textures[i], NULL);
+ ctx->textures[i] = ctx->textures_saved[i];
+ ctx->textures_saved[i] = NULL;
+ }
+ for ( ; i < PIPE_MAX_SAMPLERS; i++)
+ pipe_texture_reference(&ctx->textures[i], NULL);
+
+ ctx->pipe->set_sampler_textures(ctx->pipe, ctx->nr_textures, ctx->textures);
+
ctx->nr_textures_saved = 0;
}
-
-void cso_set_depth_stencil_alpha(struct cso_context *ctx,
- const struct pipe_depth_stencil_alpha_state *templ)
+enum pipe_error cso_set_depth_stencil_alpha(struct cso_context *ctx,
+ const struct pipe_depth_stencil_alpha_state *templ)
{
unsigned hash_key = cso_construct_key((void*)templ,
sizeof(struct pipe_depth_stencil_alpha_state));
@@ -312,13 +366,20 @@ void cso_set_depth_stencil_alpha(struct cso_context *ctx,
if (cso_hash_iter_is_null(iter)) {
struct cso_depth_stencil_alpha *cso = MALLOC(sizeof(struct cso_depth_stencil_alpha));
+ if (!cso)
+ return PIPE_ERROR_OUT_OF_MEMORY;
cso->state = *templ;
cso->data = ctx->pipe->create_depth_stencil_alpha_state(ctx->pipe, &cso->state);
cso->delete_state = (cso_state_callback)ctx->pipe->delete_depth_stencil_alpha_state;
cso->context = ctx->pipe;
- cso_insert_state(ctx->cache, hash_key, CSO_DEPTH_STENCIL_ALPHA, cso);
+ iter = cso_insert_state(ctx->cache, hash_key, CSO_DEPTH_STENCIL_ALPHA, cso);
+ if (cso_hash_iter_is_null(iter)) {
+ FREE(cso);
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+
handle = cso->data;
}
else {
@@ -329,6 +390,7 @@ void cso_set_depth_stencil_alpha(struct cso_context *ctx,
ctx->depth_stencil = handle;
ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, handle);
}
+ return PIPE_OK;
}
void cso_save_depth_stencil_alpha(struct cso_context *ctx)
@@ -348,8 +410,8 @@ void cso_restore_depth_stencil_alpha(struct cso_context *ctx)
-void cso_set_rasterizer(struct cso_context *ctx,
- const struct pipe_rasterizer_state *templ)
+enum pipe_error cso_set_rasterizer(struct cso_context *ctx,
+ const struct pipe_rasterizer_state *templ)
{
unsigned hash_key = cso_construct_key((void*)templ,
sizeof(struct pipe_rasterizer_state));
@@ -360,13 +422,20 @@ void cso_set_rasterizer(struct cso_context *ctx,
if (cso_hash_iter_is_null(iter)) {
struct cso_rasterizer *cso = MALLOC(sizeof(struct cso_rasterizer));
+ if (!cso)
+ return PIPE_ERROR_OUT_OF_MEMORY;
cso->state = *templ;
cso->data = ctx->pipe->create_rasterizer_state(ctx->pipe, &cso->state);
cso->delete_state = (cso_state_callback)ctx->pipe->delete_rasterizer_state;
cso->context = ctx->pipe;
- cso_insert_state(ctx->cache, hash_key, CSO_RASTERIZER, cso);
+ iter = cso_insert_state(ctx->cache, hash_key, CSO_RASTERIZER, cso);
+ if (cso_hash_iter_is_null(iter)) {
+ FREE(cso);
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+
handle = cso->data;
}
else {
@@ -377,6 +446,7 @@ void cso_set_rasterizer(struct cso_context *ctx,
ctx->rasterizer = handle;
ctx->pipe->bind_rasterizer_state(ctx->pipe, handle);
}
+ return PIPE_OK;
}
void cso_save_rasterizer(struct cso_context *ctx)
@@ -394,37 +464,61 @@ void cso_restore_rasterizer(struct cso_context *ctx)
ctx->rasterizer_saved = NULL;
}
-
-void cso_set_fragment_shader(struct cso_context *ctx,
- const struct pipe_shader_state *templ)
+enum pipe_error cso_set_fragment_shader_handle(struct cso_context *ctx,
+ void *handle )
{
- unsigned hash_key = cso_construct_key((void*)templ,
- sizeof(struct pipe_shader_state));
+ if (ctx->fragment_shader != handle) {
+ ctx->fragment_shader = handle;
+ ctx->pipe->bind_fs_state(ctx->pipe, handle);
+ }
+ return PIPE_OK;
+}
+
+
+/* Not really working:
+ */
+#if 0
+enum pipe_error cso_set_fragment_shader(struct cso_context *ctx,
+ const struct pipe_shader_state *templ)
+{
+ const struct tgsi_token *tokens = templ->tokens;
+ unsigned num_tokens = tgsi_num_tokens(tokens);
+ size_t tokens_size = num_tokens*sizeof(struct tgsi_token);
+ unsigned hash_key = cso_construct_key((void*)tokens, tokens_size);
struct cso_hash_iter iter = cso_find_state_template(ctx->cache,
- hash_key, CSO_FRAGMENT_SHADER,
- (void*)templ);
+ hash_key,
+ CSO_FRAGMENT_SHADER,
+ (void*)tokens);
void *handle = NULL;
if (cso_hash_iter_is_null(iter)) {
- struct cso_fragment_shader *cso = MALLOC(sizeof(struct cso_fragment_shader));
+ struct cso_fragment_shader *cso = MALLOC(sizeof(struct cso_fragment_shader) + tokens_size);
+ struct tgsi_token *cso_tokens = (struct tgsi_token *)((char *)cso + sizeof(*cso));
- cso->state = *templ;
+ if (!cso)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ memcpy(cso_tokens, tokens, tokens_size);
+ cso->state.tokens = cso_tokens;
cso->data = ctx->pipe->create_fs_state(ctx->pipe, &cso->state);
cso->delete_state = (cso_state_callback)ctx->pipe->delete_fs_state;
cso->context = ctx->pipe;
iter = cso_insert_state(ctx->cache, hash_key, CSO_FRAGMENT_SHADER, cso);
+ if (cso_hash_iter_is_null(iter)) {
+ FREE(cso);
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+
handle = cso->data;
}
else {
handle = ((struct cso_fragment_shader *)cso_hash_iter_data(iter))->data;
}
- if (ctx->fragment_shader != handle) {
- ctx->fragment_shader = handle;
- ctx->pipe->bind_fs_state(ctx->pipe, handle);
- }
+ return cso_set_fragment_shader_handle( ctx, handle );
}
+#endif
void cso_save_fragment_shader(struct cso_context *ctx)
{
@@ -434,7 +528,6 @@ void cso_save_fragment_shader(struct cso_context *ctx)
void cso_restore_fragment_shader(struct cso_context *ctx)
{
- assert(ctx->fragment_shader_saved);
if (ctx->fragment_shader_saved != ctx->fragment_shader) {
ctx->pipe->bind_fs_state(ctx->pipe, ctx->fragment_shader_saved);
ctx->fragment_shader = ctx->fragment_shader_saved;
@@ -443,9 +536,22 @@ void cso_restore_fragment_shader(struct cso_context *ctx)
}
+enum pipe_error cso_set_vertex_shader_handle(struct cso_context *ctx,
+ void *handle )
+{
+ if (ctx->vertex_shader != handle) {
+ ctx->vertex_shader = handle;
+ ctx->pipe->bind_vs_state(ctx->pipe, handle);
+ }
+ return PIPE_OK;
+}
+
-void cso_set_vertex_shader(struct cso_context *ctx,
- const struct pipe_shader_state *templ)
+/* Not really working:
+ */
+#if 0
+enum pipe_error cso_set_vertex_shader(struct cso_context *ctx,
+ const struct pipe_shader_state *templ)
{
unsigned hash_key = cso_construct_key((void*)templ,
sizeof(struct pipe_shader_state));
@@ -457,23 +563,31 @@ void cso_set_vertex_shader(struct cso_context *ctx,
if (cso_hash_iter_is_null(iter)) {
struct cso_vertex_shader *cso = MALLOC(sizeof(struct cso_vertex_shader));
+ if (!cso)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
cso->state = *templ;
cso->data = ctx->pipe->create_vs_state(ctx->pipe, &cso->state);
cso->delete_state = (cso_state_callback)ctx->pipe->delete_vs_state;
cso->context = ctx->pipe;
iter = cso_insert_state(ctx->cache, hash_key, CSO_VERTEX_SHADER, cso);
+ if (cso_hash_iter_is_null(iter)) {
+ FREE(cso);
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+
handle = cso->data;
}
else {
handle = ((struct cso_vertex_shader *)cso_hash_iter_data(iter))->data;
}
- if (ctx->vertex_shader != handle) {
- ctx->vertex_shader = handle;
- ctx->pipe->bind_vs_state(ctx->pipe, handle);
- }
+ return cso_set_vertex_shader_handle( ctx, handle );
}
+#endif
+
+
void cso_save_vertex_shader(struct cso_context *ctx)
{
@@ -483,9 +597,8 @@ void cso_save_vertex_shader(struct cso_context *ctx)
void cso_restore_vertex_shader(struct cso_context *ctx)
{
- assert(ctx->vertex_shader_saved);
if (ctx->vertex_shader_saved != ctx->vertex_shader) {
- ctx->pipe->bind_fs_state(ctx->pipe, ctx->vertex_shader_saved);
+ ctx->pipe->bind_vs_state(ctx->pipe, ctx->vertex_shader_saved);
ctx->vertex_shader = ctx->vertex_shader_saved;
}
ctx->vertex_shader_saved = NULL;
@@ -493,14 +606,15 @@ void cso_restore_vertex_shader(struct cso_context *ctx)
-void cso_set_framebuffer(struct cso_context *ctx,
- const struct pipe_framebuffer_state *fb)
+enum pipe_error cso_set_framebuffer(struct cso_context *ctx,
+ const struct pipe_framebuffer_state *fb)
{
/* XXX this memcmp() fails to detect buffer size changes */
if (1/*memcmp(&ctx->fb, fb, sizeof(*fb))*/) {
ctx->fb = *fb;
ctx->pipe->set_framebuffer_state(ctx->pipe, fb);
}
+ return PIPE_OK;
}
void cso_save_framebuffer(struct cso_context *ctx)
@@ -517,13 +631,14 @@ void cso_restore_framebuffer(struct cso_context *ctx)
}
-void cso_set_viewport(struct cso_context *ctx,
- const struct pipe_viewport_state *vp)
+enum pipe_error cso_set_viewport(struct cso_context *ctx,
+ const struct pipe_viewport_state *vp)
{
if (memcmp(&ctx->vp, vp, sizeof(*vp))) {
ctx->vp = *vp;
ctx->pipe->set_viewport_state(ctx->pipe, vp);
}
+ return PIPE_OK;
}
void cso_save_viewport(struct cso_context *ctx)
@@ -543,11 +658,12 @@ void cso_restore_viewport(struct cso_context *ctx)
-void cso_set_blend_color(struct cso_context *ctx,
- const struct pipe_blend_color *bc)
+enum pipe_error cso_set_blend_color(struct cso_context *ctx,
+ const struct pipe_blend_color *bc)
{
if (memcmp(&ctx->blend_color, bc, sizeof(ctx->blend_color))) {
ctx->blend_color = *bc;
ctx->pipe->set_blend_color(ctx->pipe, bc);
}
+ return PIPE_OK;
}
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h
index 665e8d99110..0405944132b 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.h
+++ b/src/gallium/auxiliary/cso_cache/cso_context.h
@@ -31,6 +31,7 @@
#include "pipe/p_context.h"
#include "pipe/p_state.h"
+#include "pipe/p_error.h"
#ifdef __cplusplus
@@ -45,47 +46,47 @@ void cso_destroy_context( struct cso_context *cso );
-void cso_set_blend( struct cso_context *cso,
- const struct pipe_blend_state *blend );
+enum pipe_error cso_set_blend( struct cso_context *cso,
+ const struct pipe_blend_state *blend );
void cso_save_blend(struct cso_context *cso);
void cso_restore_blend(struct cso_context *cso);
-void cso_set_depth_stencil_alpha( struct cso_context *cso,
- const struct pipe_depth_stencil_alpha_state *dsa );
+enum pipe_error cso_set_depth_stencil_alpha( struct cso_context *cso,
+ const struct pipe_depth_stencil_alpha_state *dsa );
void cso_save_depth_stencil_alpha(struct cso_context *cso);
void cso_restore_depth_stencil_alpha(struct cso_context *cso);
-void cso_set_rasterizer( struct cso_context *cso,
- const struct pipe_rasterizer_state *rasterizer );
+enum pipe_error cso_set_rasterizer( struct cso_context *cso,
+ const struct pipe_rasterizer_state *rasterizer );
void cso_save_rasterizer(struct cso_context *cso);
void cso_restore_rasterizer(struct cso_context *cso);
-void cso_set_samplers( struct cso_context *cso,
- unsigned count,
- const struct pipe_sampler_state **states );
+enum pipe_error cso_set_samplers( struct cso_context *cso,
+ unsigned count,
+ const struct pipe_sampler_state **states );
void cso_save_samplers(struct cso_context *cso);
void cso_restore_samplers(struct cso_context *cso);
/* Alternate interface to support state trackers that like to modify
* samplers one at a time:
*/
-void cso_single_sampler( struct cso_context *cso,
- unsigned nr,
- const struct pipe_sampler_state *states );
+enum pipe_error cso_single_sampler( struct cso_context *cso,
+ unsigned nr,
+ const struct pipe_sampler_state *states );
void cso_single_sampler_done( struct cso_context *cso );
-void cso_set_sampler_textures( struct cso_context *cso,
- uint count,
- struct pipe_texture **textures );
+enum pipe_error cso_set_sampler_textures( struct cso_context *cso,
+ uint count,
+ struct pipe_texture **textures );
void cso_save_sampler_textures( struct cso_context *cso );
void cso_restore_sampler_textures( struct cso_context *cso );
@@ -96,34 +97,37 @@ void cso_restore_sampler_textures( struct cso_context *cso );
* (eg mesa's internall-generated texenv programs), it will be up to
* the state tracker to implement their own specialized caching.
*/
-void cso_set_fragment_shader( struct cso_context *cso,
- const struct pipe_shader_state *shader );
+enum pipe_error cso_set_fragment_shader_handle(struct cso_context *ctx,
+ void *handle );
+enum pipe_error cso_set_fragment_shader( struct cso_context *cso,
+ const struct pipe_shader_state *shader );
void cso_save_fragment_shader(struct cso_context *cso);
void cso_restore_fragment_shader(struct cso_context *cso);
-
-void cso_set_vertex_shader( struct cso_context *cso,
- const struct pipe_shader_state *shader );
+enum pipe_error cso_set_vertex_shader_handle(struct cso_context *ctx,
+ void *handle );
+enum pipe_error cso_set_vertex_shader( struct cso_context *cso,
+ const struct pipe_shader_state *shader );
void cso_save_vertex_shader(struct cso_context *cso);
void cso_restore_vertex_shader(struct cso_context *cso);
-void cso_set_framebuffer(struct cso_context *cso,
- const struct pipe_framebuffer_state *fb);
+enum pipe_error cso_set_framebuffer(struct cso_context *cso,
+ const struct pipe_framebuffer_state *fb);
void cso_save_framebuffer(struct cso_context *cso);
void cso_restore_framebuffer(struct cso_context *cso);
-void cso_set_viewport(struct cso_context *cso,
- const struct pipe_viewport_state *vp);
+enum pipe_error cso_set_viewport(struct cso_context *cso,
+ const struct pipe_viewport_state *vp);
void cso_save_viewport(struct cso_context *cso);
void cso_restore_viewport(struct cso_context *cso);
-void cso_set_blend_color(struct cso_context *cso,
- const struct pipe_blend_color *bc);
+enum pipe_error cso_set_blend_color(struct cso_context *cso,
+ const struct pipe_blend_color *bc);
#ifdef __cplusplus
diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.c b/src/gallium/auxiliary/cso_cache/cso_hash.c
index ddce3822f7f..0646efd9527 100644
--- a/src/gallium/auxiliary/cso_cache/cso_hash.c
+++ b/src/gallium/auxiliary/cso_cache/cso_hash.c
@@ -110,6 +110,10 @@ cso_hash_create_node(struct cso_hash *hash,
struct cso_node **anextNode)
{
struct cso_node *node = cso_data_allocate_node(hash->data.d);
+
+ if (!node)
+ return NULL;
+
node->key = akey;
node->value = avalue;
@@ -219,15 +223,30 @@ struct cso_hash_iter cso_hash_insert(struct cso_hash *hash,
{
struct cso_node **nextNode = cso_hash_find_node(hash, key);
struct cso_node *node = cso_hash_create_node(hash, key, data, nextNode);
- struct cso_hash_iter iter = {hash, node};
- return iter;
+ if (!node) {
+ struct cso_hash_iter null_iter = {hash, 0};
+ return null_iter;
+ }
+
+ {
+ struct cso_hash_iter iter = {hash, node};
+ return iter;
+ }
}
}
struct cso_hash * cso_hash_create(void)
{
struct cso_hash *hash = MALLOC_STRUCT(cso_hash);
+ if (!hash)
+ return NULL;
+
hash->data.d = MALLOC_STRUCT(cso_hash_data);
+ if (!hash->data.d) {
+ FREE(hash);
+ return NULL;
+ }
+
hash->data.d->fakeNext = 0;
hash->data.d->buckets = 0;
hash->data.d->size = 0;
diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile
index 5ab3cfe5ce9..bc6acfe4589 100644
--- a/src/gallium/auxiliary/draw/Makefile
+++ b/src/gallium/auxiliary/draw/Makefile
@@ -4,40 +4,36 @@ include $(TOP)/configs/current
LIBNAME = draw
C_SOURCES = \
- draw_aaline.c \
- draw_aapoint.c \
- draw_clip.c \
- draw_vs_exec.c \
- draw_vs_sse.c \
- draw_vs_llvm.c \
- draw_context.c\
- draw_cull.c \
- draw_debug.c \
- draw_flatshade.c \
- draw_offset.c \
+ draw_context.c \
+ draw_pipe.c \
+ draw_pipe_aaline.c \
+ draw_pipe_aapoint.c \
+ draw_pipe_clip.c \
+ draw_pipe_cull.c \
+ draw_pipe_flatshade.c \
+ draw_pipe_offset.c \
+ draw_pipe_pstipple.c \
+ draw_pipe_stipple.c \
+ draw_pipe_twoside.c \
+ draw_pipe_unfilled.c \
+ draw_pipe_util.c \
+ draw_pipe_validate.c \
+ draw_pipe_vbuf.c \
+ draw_pipe_wide_line.c \
+ draw_pipe_wide_point.c \
draw_pt.c \
- draw_pt_vcache.c \
+ draw_pt_elts.c \
+ draw_pt_emit.c \
+ draw_pt_fetch.c \
draw_pt_fetch_emit.c \
- draw_pt_fetch_pipeline.c \
draw_pt_fetch_shade_pipeline.c \
- draw_pt_pipeline.c \
- draw_pt_elts.c \
- draw_prim.c \
- draw_pstipple.c \
- draw_stipple.c \
- draw_twoside.c \
- draw_unfilled.c \
- draw_validate.c \
- draw_vbuf.c \
+ draw_pt_post_vs.c \
+ draw_pt_vcache.c \
draw_vertex.c \
- draw_vertex_cache.c \
- draw_vertex_fetch.c \
- draw_vertex_shader.c \
- draw_vf.c \
- draw_vf_generic.c \
- draw_vf_sse.c \
- draw_wide_line.c \
- draw_wide_point.c
+ draw_vs.c \
+ draw_vs_exec.c \
+ draw_vs_llvm.c \
+ draw_vs_sse.c
include ../../Makefile.template
diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript
index a7fb5dbd613..0b9852f633e 100644
--- a/src/gallium/auxiliary/draw/SConscript
+++ b/src/gallium/auxiliary/draw/SConscript
@@ -3,40 +3,36 @@ Import('*')
draw = env.ConvenienceLibrary(
target = 'draw',
source = [
- 'draw_aaline.c',
- 'draw_aapoint.c',
- 'draw_clip.c',
- 'draw_vs_exec.c',
- 'draw_vs_sse.c',
- 'draw_vs_llvm.c',
'draw_context.c',
- 'draw_cull.c',
- 'draw_debug.c',
- 'draw_flatshade.c',
- 'draw_offset.c',
+ 'draw_pipe.c',
+ 'draw_pipe_aaline.c',
+ 'draw_pipe_aapoint.c',
+ 'draw_pipe_clip.c',
+ 'draw_pipe_cull.c',
+ 'draw_pipe_flatshade.c',
+ 'draw_pipe_offset.c',
+ 'draw_pipe_pstipple.c',
+ 'draw_pipe_stipple.c',
+ 'draw_pipe_twoside.c',
+ 'draw_pipe_unfilled.c',
+ 'draw_pipe_util.c',
+ 'draw_pipe_validate.c',
+ 'draw_pipe_vbuf.c',
+ 'draw_pipe_wide_line.c',
+ 'draw_pipe_wide_point.c',
'draw_pt.c',
- 'draw_pt_vcache.c',
+ 'draw_pt_elts.c',
+ 'draw_pt_emit.c',
+ 'draw_pt_fetch.c',
'draw_pt_fetch_emit.c',
- 'draw_pt_fetch_pipeline.c',
'draw_pt_fetch_shade_pipeline.c',
- 'draw_pt_pipeline.c',
- 'draw_pt_elts.c',
- 'draw_prim.c',
- 'draw_pstipple.c',
- 'draw_stipple.c',
- 'draw_twoside.c',
- 'draw_unfilled.c',
- 'draw_validate.c',
- 'draw_vbuf.c',
+ 'draw_pt_post_vs.c',
+ 'draw_pt_vcache.c',
'draw_vertex.c',
- 'draw_vertex_cache.c',
- 'draw_vertex_fetch.c',
- 'draw_vertex_shader.c',
- 'draw_vf.c',
- 'draw_vf_generic.c',
- 'draw_vf_sse.c',
- 'draw_wide_point.c',
- 'draw_wide_line.c'
+ 'draw_vs.c',
+ 'draw_vs_exec.c',
+ 'draw_vs_llvm.c',
+ 'draw_vs_sse.c',
])
auxiliaries.insert(0, draw)
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index 0c314f6e1da..f90187816b5 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -33,8 +33,10 @@
#include "pipe/p_util.h"
#include "draw_context.h"
-#include "draw_private.h"
#include "draw_vbuf.h"
+#include "draw_vs.h"
+#include "draw_pt.h"
+#include "draw_pipe.h"
struct draw_context *draw_create( void )
@@ -43,40 +45,6 @@ struct draw_context *draw_create( void )
if (draw == NULL)
goto fail;
-#if defined(__i386__) || defined(__386__)
- draw->use_sse = GETENV( "GALLIUM_NOSSE" ) == NULL;
-#else
- draw->use_sse = FALSE;
-#endif
-
- draw->use_pt_shaders = GETENV( "GALLIUM_PT_SHADERS" ) != NULL;
-
- /* create pipeline stages */
- draw->pipeline.wide_line = draw_wide_line_stage( draw );
- draw->pipeline.wide_point = draw_wide_point_stage( draw );
- draw->pipeline.stipple = draw_stipple_stage( draw );
- draw->pipeline.unfilled = draw_unfilled_stage( draw );
- draw->pipeline.twoside = draw_twoside_stage( draw );
- draw->pipeline.offset = draw_offset_stage( draw );
- draw->pipeline.clip = draw_clip_stage( draw );
- draw->pipeline.flatshade = draw_flatshade_stage( draw );
- draw->pipeline.cull = draw_cull_stage( draw );
- draw->pipeline.validate = draw_validate_stage( draw );
- draw->pipeline.first = draw->pipeline.validate;
-
- if (!draw->pipeline.wide_line ||
- !draw->pipeline.wide_point ||
- !draw->pipeline.stipple ||
- !draw->pipeline.unfilled ||
- !draw->pipeline.twoside ||
- !draw->pipeline.offset ||
- !draw->pipeline.clip ||
- !draw->pipeline.flatshade ||
- !draw->pipeline.cull ||
- !draw->pipeline.validate)
- goto fail;
-
-
ASSIGN_4V( draw->plane[0], -1, 0, 0, 1 );
ASSIGN_4V( draw->plane[1], 1, 0, 0, 1 );
ASSIGN_4V( draw->plane[2], 0, -1, 0, 1 );
@@ -85,28 +53,18 @@ struct draw_context *draw_create( void )
ASSIGN_4V( draw->plane[5], 0, 0, -1, 1 ); /* mesa's a bit wonky */
draw->nr_planes = 6;
- /* Statically allocate maximum sized vertices for the cache - could be cleverer...
- */
- {
- char *tmp = align_malloc(VS_QUEUE_LENGTH * MAX_VERTEX_ALLOCATION, 16);
- if (!tmp)
- goto fail;
-
- draw->vs.vertex_cache = tmp;
- }
- draw->shader_queue_flush = draw_vertex_shader_queue_flush;
+ draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */
- /* these defaults are oriented toward the needs of softpipe */
- draw->wide_point_threshold = 1000000.0; /* infinity */
- draw->wide_line_threshold = 1.0;
- draw->line_stipple = TRUE;
- draw->point_sprite = TRUE;
+ tgsi_exec_machine_init(&draw->machine);
- draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */
+ /* FIXME: give this machine thing a proper constructor:
+ */
+ draw->machine.Inputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16);
+ draw->machine.Outputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16);
- draw_vertex_cache_invalidate( draw );
- draw_set_mapped_element_buffer( draw, 0, NULL );
+ if (!draw_pipeline_init( draw ))
+ goto fail;
if (!draw_pt_init( draw ))
goto fail;
@@ -124,39 +82,14 @@ void draw_destroy( struct draw_context *draw )
if (!draw)
return;
- if (draw->pipeline.wide_line)
- draw->pipeline.wide_line->destroy( draw->pipeline.wide_line );
- if (draw->pipeline.wide_point)
- draw->pipeline.wide_point->destroy( draw->pipeline.wide_point );
- if (draw->pipeline.stipple)
- draw->pipeline.stipple->destroy( draw->pipeline.stipple );
- if (draw->pipeline.unfilled)
- draw->pipeline.unfilled->destroy( draw->pipeline.unfilled );
- if (draw->pipeline.twoside)
- draw->pipeline.twoside->destroy( draw->pipeline.twoside );
- if (draw->pipeline.offset)
- draw->pipeline.offset->destroy( draw->pipeline.offset );
- if (draw->pipeline.clip)
- draw->pipeline.clip->destroy( draw->pipeline.clip );
- if (draw->pipeline.flatshade)
- draw->pipeline.flatshade->destroy( draw->pipeline.flatshade );
- if (draw->pipeline.cull)
- draw->pipeline.cull->destroy( draw->pipeline.cull );
- if (draw->pipeline.validate)
- draw->pipeline.validate->destroy( draw->pipeline.validate );
- if (draw->pipeline.aaline)
- draw->pipeline.aaline->destroy( draw->pipeline.aaline );
- if (draw->pipeline.aapoint)
- draw->pipeline.aapoint->destroy( draw->pipeline.aapoint );
- if (draw->pipeline.pstipple)
- draw->pipeline.pstipple->destroy( draw->pipeline.pstipple );
- if (draw->pipeline.rasterize)
- draw->pipeline.rasterize->destroy( draw->pipeline.rasterize );
+
+ if (draw->machine.Inputs)
+ align_free(draw->machine.Inputs);
+
+ if (draw->machine.Outputs)
+ align_free(draw->machine.Outputs);
tgsi_exec_machine_free_data(&draw->machine);
-
- if (draw->vs.vertex_cache)
- align_free( draw->vs.vertex_cache ); /* Frees all the vertices. */
/* Not so fast -- we're just borrowing this at the moment.
*
@@ -164,6 +97,7 @@ void draw_destroy( struct draw_context *draw )
draw->render->destroy( draw->render );
*/
+ draw_pipeline_destroy( draw );
draw_pt_destroy( draw );
FREE( draw );
@@ -188,6 +122,20 @@ void draw_set_rasterizer_state( struct draw_context *draw,
draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
draw->rasterizer = raster;
+ draw->bypass_clipping =
+ ((draw->rasterizer && draw->rasterizer->bypass_clipping) ||
+ draw->driver.bypass_clipping);
+}
+
+
+void draw_set_driver_clipping( struct draw_context *draw,
+ boolean bypass_clipping )
+{
+ draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+
+ draw->driver.bypass_clipping = bypass_clipping;
+ draw->bypass_clipping = (draw->rasterizer->bypass_clipping ||
+ draw->driver.bypass_clipping);
}
@@ -246,9 +194,8 @@ draw_set_vertex_buffers(struct draw_context *draw,
{
assert(count <= PIPE_MAX_ATTRIBS);
- draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ );
-
- memcpy(draw->vertex_buffer, buffers, count * sizeof(buffers[0]));
+ memcpy(draw->pt.vertex_buffer, buffers, count * sizeof(buffers[0]));
+ draw->pt.nr_vertex_buffers = count;
}
@@ -259,9 +206,8 @@ draw_set_vertex_elements(struct draw_context *draw,
{
assert(count <= PIPE_MAX_ATTRIBS);
- draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ );
-
- memcpy(draw->vertex_element, elements, count * sizeof(elements[0]));
+ memcpy(draw->pt.vertex_element, elements, count * sizeof(elements[0]));
+ draw->pt.nr_vertex_elements = count;
}
@@ -272,8 +218,7 @@ void
draw_set_mapped_vertex_buffer(struct draw_context *draw,
unsigned attr, const void *buffer)
{
- draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ );
- draw->user.vbuffer[attr] = buffer;
+ draw->pt.user.vbuffer[attr] = buffer;
}
@@ -281,8 +226,7 @@ void
draw_set_mapped_constant_buffer(struct draw_context *draw,
const void *buffer)
{
- draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ );
- draw->user.constants = buffer;
+ draw->pt.user.constants = buffer;
}
@@ -294,7 +238,7 @@ void
draw_wide_point_threshold(struct draw_context *draw, float threshold)
{
draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
- draw->wide_point_threshold = threshold;
+ draw->pipeline.wide_point_threshold = threshold;
}
@@ -306,7 +250,7 @@ void
draw_wide_line_threshold(struct draw_context *draw, float threshold)
{
draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
- draw->wide_line_threshold = threshold;
+ draw->pipeline.wide_line_threshold = threshold;
}
@@ -317,7 +261,7 @@ void
draw_enable_line_stipple(struct draw_context *draw, boolean enable)
{
draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
- draw->line_stipple = enable;
+ draw->pipeline.line_stipple = enable;
}
@@ -328,7 +272,7 @@ void
draw_enable_point_sprites(struct draw_context *draw, boolean enable)
{
draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
- draw->point_sprite = enable;
+ draw->pipeline.point_sprite = enable;
}
@@ -383,79 +327,54 @@ draw_num_vs_outputs(struct draw_context *draw)
}
-/**
- * Allocate space for temporary post-transform vertices, such as for clipping.
- */
-void draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr )
-{
- assert(!stage->tmp);
-
- stage->nr_tmps = nr;
-
- if (nr) {
- ubyte *store = (ubyte *) MALLOC( MAX_VERTEX_SIZE * nr );
- unsigned i;
-
- stage->tmp = (struct vertex_header **) MALLOC( sizeof(struct vertex_header *) * nr );
-
- for (i = 0; i < nr; i++)
- stage->tmp[i] = (struct vertex_header *)(store + i * MAX_VERTEX_SIZE);
- }
-}
-
-void draw_free_temp_verts( struct draw_stage *stage )
+void draw_set_render( struct draw_context *draw,
+ struct vbuf_render *render )
{
- if (stage->tmp) {
- FREE( stage->tmp[0] );
- FREE( stage->tmp );
- stage->tmp = NULL;
- }
+ draw->render = render;
}
-
-boolean draw_use_sse(struct draw_context *draw)
+void draw_set_edgeflags( struct draw_context *draw,
+ const unsigned *edgeflag )
{
- return (boolean) draw->use_sse;
+ draw->pt.user.edgeflag = edgeflag;
}
-void draw_reset_vertex_ids(struct draw_context *draw)
-{
- struct draw_stage *stage = draw->pipeline.first;
-
- while (stage) {
- unsigned i;
-
- for (i = 0; i < stage->nr_tmps; i++)
- stage->tmp[i]->vertex_id = UNDEFINED_VERTEX_ID;
- stage = stage->next;
- }
- draw_vertex_cache_reset_vertex_ids(draw); /* going away soon */
- draw_pt_reset_vertex_ids(draw);
-}
-
-
-void draw_set_render( struct draw_context *draw,
- struct vbuf_render *render )
+/**
+ * Tell the drawing context about the index/element buffer to use
+ * (ala glDrawElements)
+ * If no element buffer is to be used (i.e. glDrawArrays) then this
+ * should be called with eltSize=0 and elements=NULL.
+ *
+ * \param draw the drawing context
+ * \param eltSize size of each element (1, 2 or 4 bytes)
+ * \param elements the element buffer ptr
+ */
+void
+draw_set_mapped_element_buffer( struct draw_context *draw,
+ unsigned eltSize, void *elements )
{
- draw->render = render;
+ draw->pt.user.elts = elements;
+ draw->pt.user.eltSize = eltSize;
}
-void draw_set_edgeflags( struct draw_context *draw,
- const unsigned *edgeflag )
+
+
+/* Revamp me please:
+ */
+void draw_do_flush( struct draw_context *draw, unsigned flags )
{
- draw->user.edgeflag = edgeflag;
-}
+ if (!draw->flushing && !draw->vcache_flushing)
+ {
+ draw->flushing = TRUE;
+ draw_pipeline_flush( draw, flags );
-boolean draw_get_edgeflag( struct draw_context *draw,
- unsigned idx )
-{
- if (draw->user.edgeflag)
- return (draw->user.edgeflag[idx/32] & (1 << (idx%32))) != 0;
- else
- return 1;
+ draw->reduced_prim = ~0; /* is reduced_prim needed any more? */
+
+ draw->flushing = FALSE;
+ }
}
diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h
index c7ac32b4522..c5c3d3b09e0 100644
--- a/src/gallium/auxiliary/draw/draw_context.h
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -42,37 +42,11 @@
struct pipe_context;
-struct vertex_buffer;
-struct vertex_info;
struct draw_context;
struct draw_stage;
struct draw_vertex_shader;
-/**
- * Clipmask flags
- */
-/*@{*/
-#define CLIP_RIGHT_BIT 0x01
-#define CLIP_LEFT_BIT 0x02
-#define CLIP_TOP_BIT 0x04
-#define CLIP_BOTTOM_BIT 0x08
-#define CLIP_NEAR_BIT 0x10
-#define CLIP_FAR_BIT 0x20
-/*@}*/
-
-/**
- * Bitshift for each clip flag
- */
-/*@{*/
-#define CLIP_RIGHT_SHIFT 0
-#define CLIP_LEFT_SHIFT 1
-#define CLIP_TOP_SHIFT 2
-#define CLIP_BOTTOM_SHIFT 3
-#define CLIP_NEAR_SHIFT 4
-#define CLIP_FAR_SHIFT 5
-/*@}*/
-
struct draw_context *draw_create( void );
@@ -99,15 +73,13 @@ void draw_enable_line_stipple(struct draw_context *draw, boolean enable);
void draw_enable_point_sprites(struct draw_context *draw, boolean enable);
-boolean draw_use_sse(struct draw_context *draw);
-
-void
+boolean
draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe);
-void
+boolean
draw_install_aapoint_stage(struct draw_context *draw, struct pipe_context *pipe);
-void
+boolean
draw_install_pstipple_stage(struct draw_context *draw, struct pipe_context *pipe);
@@ -168,17 +140,24 @@ void draw_arrays(struct draw_context *draw, unsigned prim,
void draw_flush(struct draw_context *draw);
-/***********************************************************************
- * draw_debug.c
+
+/*******************************************************************************
+ * Driver backend interface
*/
-boolean draw_validate_prim( unsigned prim, unsigned length );
-unsigned draw_trim_prim( unsigned mode, unsigned count );
+struct vbuf_render;
+void draw_set_render( struct draw_context *draw,
+ struct vbuf_render *render );
+void draw_set_driver_clipping( struct draw_context *draw,
+ boolean bypass_clipping );
+/*******************************************************************************
+ * Draw pipeline
+ */
+boolean draw_need_pipeline(const struct draw_context *draw,
+ const struct pipe_rasterizer_state *rasterizer,
+ unsigned prim );
-struct vbuf_render;
-void draw_set_render( struct draw_context *draw,
- struct vbuf_render *render );
#endif /* DRAW_CONTEXT_H */
diff --git a/src/gallium/auxiliary/draw/draw_pt_pipeline.c b/src/gallium/auxiliary/draw/draw_pipe.c
index e70e63d08fb..d0890203a5e 100644
--- a/src/gallium/auxiliary/draw/draw_pt_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe.c
@@ -31,16 +31,86 @@
*/
#include "pipe/p_util.h"
-#include "draw/draw_context.h"
#include "draw/draw_private.h"
-#include "draw/draw_vertex.h"
-#include "draw/draw_pt.h"
+#include "draw/draw_pipe.h"
+
+
+
+boolean draw_pipeline_init( struct draw_context *draw )
+{
+ /* create pipeline stages */
+ draw->pipeline.wide_line = draw_wide_line_stage( draw );
+ draw->pipeline.wide_point = draw_wide_point_stage( draw );
+ draw->pipeline.stipple = draw_stipple_stage( draw );
+ draw->pipeline.unfilled = draw_unfilled_stage( draw );
+ draw->pipeline.twoside = draw_twoside_stage( draw );
+ draw->pipeline.offset = draw_offset_stage( draw );
+ draw->pipeline.clip = draw_clip_stage( draw );
+ draw->pipeline.flatshade = draw_flatshade_stage( draw );
+ draw->pipeline.cull = draw_cull_stage( draw );
+ draw->pipeline.validate = draw_validate_stage( draw );
+ draw->pipeline.first = draw->pipeline.validate;
+
+ if (!draw->pipeline.wide_line ||
+ !draw->pipeline.wide_point ||
+ !draw->pipeline.stipple ||
+ !draw->pipeline.unfilled ||
+ !draw->pipeline.twoside ||
+ !draw->pipeline.offset ||
+ !draw->pipeline.clip ||
+ !draw->pipeline.flatshade ||
+ !draw->pipeline.cull ||
+ !draw->pipeline.validate)
+ return FALSE;
+
+ /* these defaults are oriented toward the needs of softpipe */
+ draw->pipeline.wide_point_threshold = 1000000.0; /* infinity */
+ draw->pipeline.wide_line_threshold = 1.0;
+ draw->pipeline.line_stipple = TRUE;
+ draw->pipeline.point_sprite = TRUE;
+
+ return TRUE;
+}
+
+
+void draw_pipeline_destroy( struct draw_context *draw )
+{
+ if (draw->pipeline.wide_line)
+ draw->pipeline.wide_line->destroy( draw->pipeline.wide_line );
+ if (draw->pipeline.wide_point)
+ draw->pipeline.wide_point->destroy( draw->pipeline.wide_point );
+ if (draw->pipeline.stipple)
+ draw->pipeline.stipple->destroy( draw->pipeline.stipple );
+ if (draw->pipeline.unfilled)
+ draw->pipeline.unfilled->destroy( draw->pipeline.unfilled );
+ if (draw->pipeline.twoside)
+ draw->pipeline.twoside->destroy( draw->pipeline.twoside );
+ if (draw->pipeline.offset)
+ draw->pipeline.offset->destroy( draw->pipeline.offset );
+ if (draw->pipeline.clip)
+ draw->pipeline.clip->destroy( draw->pipeline.clip );
+ if (draw->pipeline.flatshade)
+ draw->pipeline.flatshade->destroy( draw->pipeline.flatshade );
+ if (draw->pipeline.cull)
+ draw->pipeline.cull->destroy( draw->pipeline.cull );
+ if (draw->pipeline.validate)
+ draw->pipeline.validate->destroy( draw->pipeline.validate );
+ if (draw->pipeline.aaline)
+ draw->pipeline.aaline->destroy( draw->pipeline.aaline );
+ if (draw->pipeline.aapoint)
+ draw->pipeline.aapoint->destroy( draw->pipeline.aapoint );
+ if (draw->pipeline.pstipple)
+ draw->pipeline.pstipple->destroy( draw->pipeline.pstipple );
+ if (draw->pipeline.rasterize)
+ draw->pipeline.rasterize->destroy( draw->pipeline.rasterize );
+}
+
+
+
+
+
-/**
- * Add a point to the primitive queue.
- * \param i0 index into user's vertex arrays
- */
static void do_point( struct draw_context *draw,
const char *v0 )
{
@@ -55,11 +125,6 @@ static void do_point( struct draw_context *draw,
}
-/**
- * Add a line to the primitive queue.
- * \param i0 index into user's vertex arrays
- * \param i1 index into user's vertex arrays
- */
static void do_line( struct draw_context *draw,
const char *v0,
const char *v1 )
@@ -75,9 +140,7 @@ static void do_line( struct draw_context *draw,
draw->pipeline.first->line( draw->pipeline.first, &prim );
}
-/**
- * Add a triangle to the primitive queue.
- */
+
static void do_triangle( struct draw_context *draw,
char *v0,
char *v1,
@@ -94,28 +157,11 @@ static void do_triangle( struct draw_context *draw,
(prim.v[2]->edgeflag << 2));
prim.pad = 0;
- if (0) debug_printf("tri ef: %d %d %d\n",
- prim.v[0]->edgeflag,
- prim.v[1]->edgeflag,
- prim.v[2]->edgeflag);
-
draw->pipeline.first->tri( draw->pipeline.first, &prim );
}
-void draw_pt_reset_vertex_ids( struct draw_context *draw )
-{
- unsigned i;
- char *verts = draw->pt.pipeline.verts;
- unsigned stride = draw->pt.pipeline.vertex_stride;
-
- for (i = 0; i < draw->pt.pipeline.vertex_count; i++) {
- ((struct vertex_header *)verts)->vertex_id = UNDEFINED_VERTEX_ID;
- verts += stride;
- }
-}
-
/* Code to run the pipeline on a fairly arbitary collection of vertices.
*
@@ -127,19 +173,20 @@ void draw_pt_reset_vertex_ids( struct draw_context *draw )
* This code provides a callback to reset the vertex id's which the
* draw_vbuf.c code uses when it has to perform a flush.
*/
-void draw_pt_run_pipeline( struct draw_context *draw,
- unsigned prim,
- char *verts,
- unsigned stride,
- unsigned vertex_count,
- const ushort *elts,
- unsigned count )
+void draw_pipeline_run( struct draw_context *draw,
+ unsigned prim,
+ struct vertex_header *vertices,
+ unsigned vertex_count,
+ unsigned stride,
+ const ushort *elts,
+ unsigned count )
{
+ char *verts = (char *)vertices;
unsigned i;
- draw->pt.pipeline.verts = verts;
- draw->pt.pipeline.vertex_stride = stride;
- draw->pt.pipeline.vertex_count = vertex_count;
+ draw->pipeline.verts = verts;
+ draw->pipeline.vertex_stride = stride;
+ draw->pipeline.vertex_count = vertex_count;
switch (prim) {
case PIPE_PRIM_POINTS:
@@ -162,7 +209,15 @@ void draw_pt_run_pipeline( struct draw_context *draw,
break;
}
- draw->pt.pipeline.verts = NULL;
- draw->pt.pipeline.vertex_count = 0;
+ draw->pipeline.verts = NULL;
+ draw->pipeline.vertex_count = 0;
}
+
+
+void draw_pipeline_flush( struct draw_context *draw,
+ unsigned flags )
+{
+ draw->pipeline.first->flush( draw->pipeline.first, flags );
+ draw->pipeline.first = draw->pipeline.validate;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pipe.h b/src/gallium/auxiliary/draw/draw_pipe.h
new file mode 100644
index 00000000000..2476abb2b23
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pipe.h
@@ -0,0 +1,114 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+#ifndef DRAW_PIPE_H
+#define DRAW_PIPE_H
+
+#include "pipe/p_compiler.h"
+#include "draw_private.h" /* for sizeof(vertex_header) */
+
+
+
+/**
+ * Base class for all primitive drawing stages.
+ */
+struct draw_stage
+{
+ struct draw_context *draw; /**< parent context */
+
+ struct draw_stage *next; /**< next stage in pipeline */
+
+ struct vertex_header **tmp; /**< temp vert storage, such as for clipping */
+ unsigned nr_tmps;
+
+ void (*point)( struct draw_stage *,
+ struct prim_header * );
+
+ void (*line)( struct draw_stage *,
+ struct prim_header * );
+
+ void (*tri)( struct draw_stage *,
+ struct prim_header * );
+
+ void (*flush)( struct draw_stage *,
+ unsigned flags );
+
+ void (*reset_stipple_counter)( struct draw_stage * );
+
+ void (*destroy)( struct draw_stage * );
+};
+
+
+extern struct draw_stage *draw_unfilled_stage( struct draw_context *context );
+extern struct draw_stage *draw_twoside_stage( struct draw_context *context );
+extern struct draw_stage *draw_offset_stage( struct draw_context *context );
+extern struct draw_stage *draw_clip_stage( struct draw_context *context );
+extern struct draw_stage *draw_flatshade_stage( struct draw_context *context );
+extern struct draw_stage *draw_cull_stage( struct draw_context *context );
+extern struct draw_stage *draw_stipple_stage( struct draw_context *context );
+extern struct draw_stage *draw_wide_line_stage( struct draw_context *context );
+extern struct draw_stage *draw_wide_point_stage( struct draw_context *context );
+extern struct draw_stage *draw_validate_stage( struct draw_context *context );
+
+
+extern void draw_free_temp_verts( struct draw_stage *stage );
+extern boolean draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr );
+
+extern void draw_reset_vertex_ids( struct draw_context *draw );
+
+void draw_pipe_passthrough_tri(struct draw_stage *stage, struct prim_header *header);
+void draw_pipe_passthrough_line(struct draw_stage *stage, struct prim_header *header);
+void draw_pipe_passthrough_point(struct draw_stage *stage, struct prim_header *header);
+
+
+
+/**
+ * Get a writeable copy of a vertex.
+ * \param stage drawing stage info
+ * \param vert the vertex to copy (source)
+ * \param idx index into stage's tmp[] array to put the copy (dest)
+ * \return pointer to the copied vertex
+ */
+static INLINE struct vertex_header *
+dup_vert( struct draw_stage *stage,
+ const struct vertex_header *vert,
+ unsigned idx )
+{
+ struct vertex_header *tmp = stage->tmp[idx];
+ const uint vsize = sizeof(struct vertex_header)
+ + stage->draw->num_vs_outputs * 4 * sizeof(float);
+ memcpy(tmp, vert, vsize);
+ tmp->vertex_id = UNDEFINED_VERTEX_ID;
+ return tmp;
+}
+
+#endif
diff --git a/src/gallium/auxiliary/draw/draw_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index e8d2a45102c..7e5f8bd2819 100644
--- a/src/gallium/auxiliary/draw/draw_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -43,6 +43,7 @@
#include "draw_context.h"
#include "draw_private.h"
+#include "draw_pipe.h"
/**
@@ -333,11 +334,10 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
* Generate the frag shader we'll use for drawing AA lines.
* This will be the user's shader plus some texture/modulate instructions.
*/
-static void
+static boolean
generate_aaline_fs(struct aaline_stage *aaline)
{
const struct pipe_shader_state *orig_fs = &aaline->fs->state;
- //struct draw_context *draw = aaline->stage.draw;
struct pipe_shader_state aaline_fs;
struct aa_transform_context transform;
@@ -345,6 +345,8 @@ generate_aaline_fs(struct aaline_stage *aaline)
aaline_fs = *orig_fs; /* copy to init */
aaline_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX);
+ if (aaline_fs.tokens == NULL)
+ return FALSE;
memset(&transform, 0, sizeof(transform));
transform.colorOutput = -1;
@@ -369,15 +371,18 @@ generate_aaline_fs(struct aaline_stage *aaline)
aaline->fs->aaline_fs
= aaline->driver_create_fs_state(aaline->pipe, &aaline_fs);
+ if (aaline->fs->aaline_fs == NULL)
+ return FALSE;
aaline->fs->generic_attrib = transform.maxGeneric + 1;
+ return TRUE;
}
/**
* Create the texture map we'll use for antialiasing the lines.
*/
-static void
+static boolean
aaline_create_texture(struct aaline_stage *aaline)
{
struct pipe_context *pipe = aaline->pipe;
@@ -395,6 +400,8 @@ aaline_create_texture(struct aaline_stage *aaline)
texTemp.cpp = 1;
aaline->texture = screen->texture_create(screen, &texTemp);
+ if (!aaline->texture)
+ return FALSE;
/* Fill in mipmap images.
* Basically each level is solid opaque, except for the outermost
@@ -410,6 +417,8 @@ aaline_create_texture(struct aaline_stage *aaline)
surface = screen->get_tex_surface(screen, aaline->texture, 0, level, 0);
data = pipe_surface_map(surface);
+ if (data == NULL)
+ return FALSE;
for (i = 0; i < size; i++) {
for (j = 0; j < size; j++) {
@@ -435,6 +444,7 @@ aaline_create_texture(struct aaline_stage *aaline)
pipe_surface_reference(&surface, NULL);
pipe->texture_update(pipe, aaline->texture, 0, (1 << level));
}
+ return TRUE;
}
@@ -443,7 +453,7 @@ aaline_create_texture(struct aaline_stage *aaline)
* By using a mipmapped texture, we don't have to generate a different
* texture image for each line size.
*/
-static void
+static boolean
aaline_create_sampler(struct aaline_stage *aaline)
{
struct pipe_sampler_state sampler;
@@ -461,6 +471,10 @@ aaline_create_sampler(struct aaline_stage *aaline)
sampler.max_lod = MAX_TEXTURE_LEVEL;
aaline->sampler_cso = pipe->create_sampler_state(pipe, &sampler);
+ if (aaline->sampler_cso == NULL)
+ return FALSE;
+
+ return TRUE;
}
@@ -468,13 +482,15 @@ aaline_create_sampler(struct aaline_stage *aaline)
* When we're about to draw our first AA line in a batch, this function is
* called to tell the driver to bind our modified fragment shader.
*/
-static void
+static boolean
bind_aaline_fragment_shader(struct aaline_stage *aaline)
{
- if (!aaline->fs->aaline_fs) {
- generate_aaline_fs(aaline);
- }
+ if (!aaline->fs->aaline_fs &&
+ !generate_aaline_fs(aaline))
+ return FALSE;
+
aaline->driver_bind_fs_state(aaline->pipe, aaline->fs->aaline_fs);
+ return TRUE;
}
@@ -486,20 +502,6 @@ aaline_stage( struct draw_stage *stage )
}
-static void
-passthrough_point(struct draw_stage *stage, struct prim_header *header)
-{
- stage->next->point(stage->next, header);
-}
-
-
-static void
-passthrough_tri(struct draw_stage *stage, struct prim_header *header)
-{
- stage->next->tri(stage->next, header);
-}
-
-
/**
* Draw a wide line by drawing a quad, using geometry which will
* fullfill GL's antialiased line requirements.
@@ -637,7 +639,11 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header)
/*
* Bind (generate) our fragprog, sampler and texture
*/
- bind_aaline_fragment_shader(aaline);
+ if (!bind_aaline_fragment_shader(aaline)) {
+ stage->line = draw_pipe_passthrough_line;
+ stage->line(stage, header);
+ return;
+ }
/* update vertex attrib info */
aaline->tex_slot = draw->num_vs_outputs;
@@ -701,9 +707,11 @@ aaline_destroy(struct draw_stage *stage)
{
struct aaline_stage *aaline = aaline_stage(stage);
- aaline->pipe->delete_sampler_state(aaline->pipe, aaline->sampler_cso);
+ if (aaline->sampler_cso)
+ aaline->pipe->delete_sampler_state(aaline->pipe, aaline->sampler_cso);
- pipe_texture_release(&aaline->texture);
+ if (aaline->texture)
+ pipe_texture_release(&aaline->texture);
draw_free_temp_verts( stage );
@@ -715,19 +723,28 @@ static struct aaline_stage *
draw_aaline_stage(struct draw_context *draw)
{
struct aaline_stage *aaline = CALLOC_STRUCT(aaline_stage);
+ if (aaline == NULL)
+ return NULL;
- draw_alloc_temp_verts( &aaline->stage, 8 );
+ if (!draw_alloc_temp_verts( &aaline->stage, 8 ))
+ goto fail;
aaline->stage.draw = draw;
aaline->stage.next = NULL;
- aaline->stage.point = passthrough_point;
+ aaline->stage.point = draw_pipe_passthrough_point;
aaline->stage.line = aaline_first_line;
- aaline->stage.tri = passthrough_tri;
+ aaline->stage.tri = draw_pipe_passthrough_tri;
aaline->stage.flush = aaline_flush;
aaline->stage.reset_stipple_counter = aaline_reset_stipple_counter;
aaline->stage.destroy = aaline_destroy;
return aaline;
+
+ fail:
+ if (aaline)
+ aaline_destroy(&aaline->stage);
+
+ return NULL;
}
@@ -749,13 +766,13 @@ aaline_create_fs_state(struct pipe_context *pipe,
{
struct aaline_stage *aaline = aaline_stage_from_pipe(pipe);
struct aaline_fragment_shader *aafs = CALLOC_STRUCT(aaline_fragment_shader);
+ if (aafs == NULL)
+ return NULL;
- if (aafs) {
- aafs->state = *fs;
+ aafs->state = *fs;
- /* pass-through */
- aafs->driver_fs = aaline->driver_create_fs_state(aaline->pipe, fs);
- }
+ /* pass-through */
+ aafs->driver_fs = aaline->driver_create_fs_state(aaline->pipe, fs);
return aafs;
}
@@ -821,7 +838,7 @@ aaline_set_sampler_textures(struct pipe_context *pipe,
* into the draw module's pipeline. This will not be used if the
* hardware has native support for AA lines.
*/
-void
+boolean
draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe)
{
struct aaline_stage *aaline;
@@ -832,14 +849,17 @@ draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe)
* Create / install AA line drawing / prim stage
*/
aaline = draw_aaline_stage( draw );
- assert(aaline);
- draw->pipeline.aaline = &aaline->stage;
+ if (!aaline)
+ goto fail;
aaline->pipe = pipe;
/* create special texture, sampler state */
- aaline_create_texture(aaline);
- aaline_create_sampler(aaline);
+ if (!aaline_create_texture(aaline))
+ goto fail;
+
+ if (!aaline_create_sampler(aaline))
+ goto fail;
/* save original driver functions */
aaline->driver_create_fs_state = pipe->create_fs_state;
@@ -856,4 +876,16 @@ draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe)
pipe->bind_sampler_states = aaline_bind_sampler_states;
pipe->set_sampler_textures = aaline_set_sampler_textures;
+
+ /* Install once everything is known to be OK:
+ */
+ draw->pipeline.aaline = &aaline->stage;
+
+ return TRUE;
+
+ fail:
+ if (aaline)
+ aaline->stage.destroy( &aaline->stage );
+
+ return FALSE;
}
diff --git a/src/gallium/auxiliary/draw/draw_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
index fcebe3e7a0c..ac0aa4cd7ce 100644
--- a/src/gallium/auxiliary/draw/draw_aapoint.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -48,7 +48,8 @@
#include "tgsi/util/tgsi_dump.h"
#include "draw_context.h"
-#include "draw_private.h"
+#include "draw_vs.h"
+#include "draw_pipe.h"
/*
@@ -483,7 +484,7 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
* Generate the frag shader we'll use for drawing AA lines.
* This will be the user's shader plus some texture/modulate instructions.
*/
-static void
+static boolean
generate_aapoint_fs(struct aapoint_stage *aapoint)
{
const struct pipe_shader_state *orig_fs = &aapoint->fs->state;
@@ -494,6 +495,8 @@ generate_aapoint_fs(struct aapoint_stage *aapoint)
aapoint_fs = *orig_fs; /* copy to init */
aapoint_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX);
+ if (aapoint_fs.tokens == NULL)
+ return FALSE;
memset(&transform, 0, sizeof(transform));
transform.colorOutput = -1;
@@ -518,8 +521,12 @@ generate_aapoint_fs(struct aapoint_stage *aapoint)
aapoint->fs->aapoint_fs
= aapoint->driver_create_fs_state(aapoint->pipe, &aapoint_fs);
+ if (aapoint->fs->aapoint_fs == NULL)
+ return FALSE;
aapoint->fs->generic_attrib = transform.maxGeneric + 1;
+
+ return TRUE;
}
@@ -527,13 +534,15 @@ generate_aapoint_fs(struct aapoint_stage *aapoint)
* When we're about to draw our first AA line in a batch, this function is
* called to tell the driver to bind our modified fragment shader.
*/
-static void
+static boolean
bind_aapoint_fragment_shader(struct aapoint_stage *aapoint)
{
- if (!aapoint->fs->aapoint_fs) {
- generate_aapoint_fs(aapoint);
- }
+ if (!aapoint->fs->aapoint_fs &&
+ !generate_aapoint_fs(aapoint))
+ return FALSE;
+
aapoint->driver_bind_fs_state(aapoint->pipe, aapoint->fs->aapoint_fs);
+ return TRUE;
}
@@ -545,18 +554,6 @@ aapoint_stage( struct draw_stage *stage )
}
-static void
-passthrough_line(struct draw_stage *stage, struct prim_header *header)
-{
- stage->next->line(stage->next, header);
-}
-
-
-static void
-passthrough_tri(struct draw_stage *stage, struct prim_header *header)
-{
- stage->next->tri(stage->next, header);
-}
/**
@@ -742,19 +739,29 @@ static struct aapoint_stage *
draw_aapoint_stage(struct draw_context *draw)
{
struct aapoint_stage *aapoint = CALLOC_STRUCT(aapoint_stage);
+ if (aapoint == NULL)
+ goto fail;
- draw_alloc_temp_verts( &aapoint->stage, 4 );
+ if (!draw_alloc_temp_verts( &aapoint->stage, 4 ))
+ goto fail;
aapoint->stage.draw = draw;
aapoint->stage.next = NULL;
aapoint->stage.point = aapoint_first_point;
- aapoint->stage.line = passthrough_line;
- aapoint->stage.tri = passthrough_tri;
+ aapoint->stage.line = draw_pipe_passthrough_line;
+ aapoint->stage.tri = draw_pipe_passthrough_tri;
aapoint->stage.flush = aapoint_flush;
aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter;
aapoint->stage.destroy = aapoint_destroy;
return aapoint;
+
+ fail:
+ if (aapoint)
+ aapoint_destroy(&aapoint->stage);
+
+ return NULL;
+
}
@@ -776,13 +783,13 @@ aapoint_create_fs_state(struct pipe_context *pipe,
{
struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
struct aapoint_fragment_shader *aafs = CALLOC_STRUCT(aapoint_fragment_shader);
+ if (aafs == NULL)
+ return NULL;
- if (aafs) {
- aafs->state = *fs;
+ aafs->state = *fs;
- /* pass-through */
- aafs->driver_fs = aapoint->driver_create_fs_state(aapoint->pipe, fs);
- }
+ /* pass-through */
+ aafs->driver_fs = aapoint->driver_create_fs_state(aapoint->pipe, fs);
return aafs;
}
@@ -817,7 +824,7 @@ aapoint_delete_fs_state(struct pipe_context *pipe, void *fs)
* into the draw module's pipeline. This will not be used if the
* hardware has native support for AA points.
*/
-void
+boolean
draw_install_aapoint_stage(struct draw_context *draw,
struct pipe_context *pipe)
{
@@ -829,8 +836,8 @@ draw_install_aapoint_stage(struct draw_context *draw,
* Create / install AA point drawing / prim stage
*/
aapoint = draw_aapoint_stage( draw );
- assert(aapoint);
- draw->pipeline.aapoint = &aapoint->stage;
+ if (aapoint == NULL)
+ goto fail;
aapoint->pipe = pipe;
@@ -843,4 +850,14 @@ draw_install_aapoint_stage(struct draw_context *draw,
pipe->create_fs_state = aapoint_create_fs_state;
pipe->bind_fs_state = aapoint_bind_fs_state;
pipe->delete_fs_state = aapoint_delete_fs_state;
+
+ draw->pipeline.aapoint = &aapoint->stage;
+
+ return TRUE;
+
+ fail:
+ if (aapoint)
+ aapoint->stage.destroy( &aapoint->stage );
+
+ return FALSE;
}
diff --git a/src/gallium/auxiliary/draw/draw_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c
index e24c5d80322..21216addeab 100644
--- a/src/gallium/auxiliary/draw/draw_clip.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c
@@ -35,8 +35,8 @@
#include "pipe/p_util.h"
#include "pipe/p_shader_tokens.h"
-#include "draw_context.h"
-#include "draw_private.h"
+#include "draw_vs.h"
+#include "draw_pipe.h"
#ifndef IS_NEGATIVE
@@ -204,7 +204,14 @@ static void emit_poly( struct draw_stage *stage,
}
}
-
+static INLINE float
+dot4(const float *a, const float *b)
+{
+ return (a[0]*b[0] +
+ a[1]*b[1] +
+ a[2]*b[2] +
+ a[3]*b[3]);
+}
/* Clip a triangle against the viewport and user clip planes.
@@ -486,8 +493,11 @@ static void clip_destroy( struct draw_stage *stage )
struct draw_stage *draw_clip_stage( struct draw_context *draw )
{
struct clipper *clipper = CALLOC_STRUCT(clipper);
+ if (clipper == NULL)
+ goto fail;
- draw_alloc_temp_verts( &clipper->stage, MAX_CLIPPED_VERTICES+1 );
+ if (!draw_alloc_temp_verts( &clipper->stage, MAX_CLIPPED_VERTICES+1 ))
+ goto fail;
clipper->stage.draw = draw;
clipper->stage.point = clip_point;
@@ -500,4 +510,10 @@ struct draw_stage *draw_clip_stage( struct draw_context *draw )
clipper->plane = draw->plane;
return &clipper->stage;
+
+ fail:
+ if (clipper)
+ clipper->stage.destroy( &clipper->stage );
+
+ return NULL;
}
diff --git a/src/gallium/auxiliary/draw/draw_cull.c b/src/gallium/auxiliary/draw/draw_pipe_cull.c
index 8177b0ac86e..87aaf1f85bd 100644
--- a/src/gallium/auxiliary/draw/draw_cull.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_cull.c
@@ -35,7 +35,7 @@
#include "pipe/p_util.h"
#include "pipe/p_defines.h"
-#include "draw_private.h"
+#include "draw_pipe.h"
struct cull_stage {
@@ -95,20 +95,6 @@ static void cull_first_tri( struct draw_stage *stage,
-static void cull_line( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->line( stage->next, header );
-}
-
-
-static void cull_point( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->point( stage->next, header );
-}
-
-
static void cull_flush( struct draw_stage *stage, unsigned flags )
{
stage->tri = cull_first_tri;
@@ -134,17 +120,26 @@ static void cull_destroy( struct draw_stage *stage )
struct draw_stage *draw_cull_stage( struct draw_context *draw )
{
struct cull_stage *cull = CALLOC_STRUCT(cull_stage);
+ if (cull == NULL)
+ goto fail;
- draw_alloc_temp_verts( &cull->stage, 0 );
+ if (!draw_alloc_temp_verts( &cull->stage, 0 ))
+ goto fail;
cull->stage.draw = draw;
cull->stage.next = NULL;
- cull->stage.point = cull_point;
- cull->stage.line = cull_line;
+ cull->stage.point = draw_pipe_passthrough_point;
+ cull->stage.line = draw_pipe_passthrough_line;
cull->stage.tri = cull_first_tri;
cull->stage.flush = cull_flush;
cull->stage.reset_stipple_counter = cull_reset_stipple_counter;
cull->stage.destroy = cull_destroy;
return &cull->stage;
+
+ fail:
+ if (cull)
+ cull->stage.destroy( &cull->stage );
+
+ return NULL;
}
diff --git a/src/gallium/auxiliary/draw/draw_flatshade.c b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
index af2cb05c989..205000cbea5 100644
--- a/src/gallium/auxiliary/draw/draw_flatshade.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
@@ -30,7 +30,8 @@
#include "pipe/p_util.h"
#include "pipe/p_shader_tokens.h"
-#include "draw_private.h"
+#include "draw_vs.h"
+#include "draw_pipe.h"
/** subclass of draw_stage */
@@ -151,13 +152,6 @@ static void flatshade_line_1( struct draw_stage *stage,
}
-/* Flatshade point -- passthrough.
- */
-static void flatshade_point( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->point( stage->next, header );
-}
static void flatshade_init_state( struct draw_stage *stage )
@@ -230,12 +224,15 @@ static void flatshade_destroy( struct draw_stage *stage )
struct draw_stage *draw_flatshade_stage( struct draw_context *draw )
{
struct flat_stage *flatshade = CALLOC_STRUCT(flat_stage);
+ if (flatshade == NULL)
+ goto fail;
- draw_alloc_temp_verts( &flatshade->stage, 2 );
+ if (!draw_alloc_temp_verts( &flatshade->stage, 2 ))
+ goto fail;
flatshade->stage.draw = draw;
flatshade->stage.next = NULL;
- flatshade->stage.point = flatshade_point;
+ flatshade->stage.point = draw_pipe_passthrough_point;
flatshade->stage.line = flatshade_first_line;
flatshade->stage.tri = flatshade_first_tri;
flatshade->stage.flush = flatshade_flush;
@@ -243,6 +240,12 @@ struct draw_stage *draw_flatshade_stage( struct draw_context *draw )
flatshade->stage.destroy = flatshade_destroy;
return &flatshade->stage;
+
+ fail:
+ if (flatshade)
+ flatshade->stage.destroy( &flatshade->stage );
+
+ return NULL;
}
diff --git a/src/gallium/auxiliary/draw/draw_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c
index dbc676deae4..ffec85ccdd8 100644
--- a/src/gallium/auxiliary/draw/draw_offset.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c
@@ -33,7 +33,7 @@
*/
#include "pipe/p_util.h"
-#include "draw_private.h"
+#include "draw_pipe.h"
@@ -129,18 +129,6 @@ static void offset_first_tri( struct draw_stage *stage,
}
-static void offset_line( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->line( stage->next, header );
-}
-
-
-static void offset_point( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->point( stage->next, header );
-}
static void offset_flush( struct draw_stage *stage,
@@ -170,17 +158,25 @@ static void offset_destroy( struct draw_stage *stage )
struct draw_stage *draw_offset_stage( struct draw_context *draw )
{
struct offset_stage *offset = CALLOC_STRUCT(offset_stage);
+ if (offset == NULL)
+ goto fail;
draw_alloc_temp_verts( &offset->stage, 3 );
offset->stage.draw = draw;
offset->stage.next = NULL;
- offset->stage.point = offset_point;
- offset->stage.line = offset_line;
+ offset->stage.point = draw_pipe_passthrough_point;
+ offset->stage.line = draw_pipe_passthrough_line;
offset->stage.tri = offset_first_tri;
offset->stage.flush = offset_flush;
offset->stage.reset_stipple_counter = offset_reset_stipple_counter;
offset->stage.destroy = offset_destroy;
return &offset->stage;
+
+ fail:
+ if (offset)
+ offset->stage.destroy( &offset->stage );
+
+ return NULL;
}
diff --git a/src/gallium/auxiliary/draw/draw_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index 4dddb72906f..aec485a6e72 100644
--- a/src/gallium/auxiliary/draw/draw_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -44,7 +44,7 @@
#include "tgsi/util/tgsi_dump.h"
#include "draw_context.h"
-#include "draw_private.h"
+#include "draw_pipe.h"
@@ -320,7 +320,7 @@ pstip_transform_inst(struct tgsi_transform_context *ctx,
* Generate the frag shader we'll use for doing polygon stipple.
* This will be the user's shader prefixed with a TEX and KIL instruction.
*/
-static void
+static boolean
generate_pstip_fs(struct pstip_stage *pstip)
{
const struct pipe_shader_state *orig_fs = &pstip->fs->state;
@@ -332,6 +332,8 @@ generate_pstip_fs(struct pstip_stage *pstip)
pstip_fs = *orig_fs; /* copy to init */
pstip_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX);
+ if (pstip_fs.tokens == NULL)
+ return FALSE;
memset(&transform, 0, sizeof(transform));
transform.wincoordInput = -1;
@@ -355,6 +357,8 @@ generate_pstip_fs(struct pstip_stage *pstip)
assert(pstip->fs->sampler_unit < PIPE_MAX_SAMPLERS);
pstip->fs->pstip_fs = pstip->driver_create_fs_state(pstip->pipe, &pstip_fs);
+
+ return TRUE;
}
@@ -404,7 +408,7 @@ pstip_update_texture(struct pstip_stage *pstip)
/**
* Create the texture map we'll use for stippling.
*/
-static void
+static boolean
pstip_create_texture(struct pstip_stage *pstip)
{
struct pipe_context *pipe = pstip->pipe;
@@ -421,7 +425,10 @@ pstip_create_texture(struct pstip_stage *pstip)
texTemp.cpp = 1;
pstip->texture = screen->texture_create(screen, &texTemp);
- assert(pstip->texture->refcount == 1);
+ if (pstip->texture == NULL)
+ return FALSE;
+
+ return TRUE;
}
@@ -430,7 +437,7 @@ pstip_create_texture(struct pstip_stage *pstip)
* By using a mipmapped texture, we don't have to generate a different
* texture image for each line size.
*/
-static void
+static boolean
pstip_create_sampler(struct pstip_stage *pstip)
{
struct pipe_sampler_state sampler;
@@ -448,6 +455,10 @@ pstip_create_sampler(struct pstip_stage *pstip)
sampler.max_lod = 0.0f;
pstip->sampler_cso = pipe->create_sampler_state(pipe, &sampler);
+ if (pstip->sampler_cso == NULL)
+ return FALSE;
+
+ return TRUE;
}
@@ -455,13 +466,15 @@ pstip_create_sampler(struct pstip_stage *pstip)
* When we're about to draw our first AA line in a batch, this function is
* called to tell the driver to bind our modified fragment shader.
*/
-static void
+static boolean
bind_pstip_fragment_shader(struct pstip_stage *pstip)
{
- if (!pstip->fs->pstip_fs) {
- generate_pstip_fs(pstip);
- }
+ if (!pstip->fs->pstip_fs &&
+ !generate_pstip_fs(pstip))
+ return FALSE;
+
pstip->driver_bind_fs_state(pstip->pipe, pstip->fs->pstip_fs);
+ return TRUE;
}
@@ -473,25 +486,6 @@ pstip_stage( struct draw_stage *stage )
}
-static void
-passthrough_point(struct draw_stage *stage, struct prim_header *header)
-{
- stage->next->point(stage->next, header);
-}
-
-
-static void
-passthrough_line(struct draw_stage *stage, struct prim_header *header)
-{
- stage->next->line(stage->next, header);
-}
-
-
-static void
-passthrough_tri(struct draw_stage *stage, struct prim_header *header)
-{
- stage->next->tri(stage->next, header);
-}
@@ -505,7 +499,12 @@ pstip_first_tri(struct draw_stage *stage, struct prim_header *header)
assert(stage->draw->rasterizer->poly_stipple_enable);
/* bind our fragprog */
- bind_pstip_fragment_shader(pstip);
+ if (!bind_pstip_fragment_shader(pstip)) {
+ stage->tri = draw_pipe_passthrough_tri;
+ stage->tri(stage, header);
+ return;
+ }
+
/* how many samplers? */
/* we'll use sampler/texture[pstip->sampler_unit] for the stipple */
@@ -523,7 +522,7 @@ pstip_first_tri(struct draw_stage *stage, struct prim_header *header)
pstip->driver_set_sampler_textures(pipe, num_samplers, pstip->state.textures);
/* now really draw first line */
- stage->tri = passthrough_tri;
+ stage->tri = draw_pipe_passthrough_tri;
stage->tri(stage, header);
}
@@ -579,8 +578,8 @@ draw_pstip_stage(struct draw_context *draw)
pstip->stage.draw = draw;
pstip->stage.next = NULL;
- pstip->stage.point = passthrough_point;
- pstip->stage.line = passthrough_line;
+ pstip->stage.point = draw_pipe_passthrough_point;
+ pstip->stage.line = draw_pipe_passthrough_line;
pstip->stage.tri = pstip_first_tri;
pstip->stage.flush = pstip_flush;
pstip->stage.reset_stipple_counter = pstip_reset_stipple_counter;
@@ -705,7 +704,7 @@ pstip_set_polygon_stipple(struct pipe_context *pipe,
* into the draw module's pipeline. This will not be used if the
* hardware has native support for AA lines.
*/
-void
+boolean
draw_install_pstipple_stage(struct draw_context *draw,
struct pipe_context *pipe)
{
@@ -717,14 +716,19 @@ draw_install_pstipple_stage(struct draw_context *draw,
* Create / install AA line drawing / prim stage
*/
pstip = draw_pstip_stage( draw );
- assert(pstip);
+ if (pstip == NULL)
+ goto fail;
+
draw->pipeline.pstipple = &pstip->stage;
pstip->pipe = pipe;
/* create special texture, sampler state */
- pstip_create_texture(pstip);
- pstip_create_sampler(pstip);
+ if (!pstip_create_texture(pstip))
+ goto fail;
+
+ if (!pstip_create_sampler(pstip))
+ goto fail;
/* save original driver functions */
pstip->driver_create_fs_state = pipe->create_fs_state;
@@ -743,4 +747,12 @@ draw_install_pstipple_stage(struct draw_context *draw,
pipe->bind_sampler_states = pstip_bind_sampler_states;
pipe->set_sampler_textures = pstip_set_sampler_textures;
pipe->set_polygon_stipple = pstip_set_polygon_stipple;
+
+ return TRUE;
+
+ fail:
+ if (pstip)
+ pstip->stage.destroy( &pstip->stage );
+
+ return FALSE;
}
diff --git a/src/gallium/auxiliary/draw/draw_stipple.c b/src/gallium/auxiliary/draw/draw_pipe_stipple.c
index 506f33512c8..9cf5840ccee 100644
--- a/src/gallium/auxiliary/draw/draw_stipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c
@@ -39,7 +39,7 @@
#include "pipe/p_util.h"
#include "pipe/p_defines.h"
#include "pipe/p_shader_tokens.h"
-#include "draw_private.h"
+#include "draw_pipe.h"
/** Subclass of draw_stage */
@@ -195,18 +195,6 @@ stipple_flush(struct draw_stage *stage, unsigned flags)
}
-static void
-passthrough_point(struct draw_stage *stage, struct prim_header *header)
-{
- stage->next->point( stage->next, header );
-}
-
-
-static void
-passthrough_tri(struct draw_stage *stage, struct prim_header *header)
-{
- stage->next->tri(stage->next, header);
-}
static void
@@ -228,9 +216,9 @@ struct draw_stage *draw_stipple_stage( struct draw_context *draw )
stipple->stage.draw = draw;
stipple->stage.next = NULL;
- stipple->stage.point = passthrough_point;
+ stipple->stage.point = draw_pipe_passthrough_point;
stipple->stage.line = stipple_first_line;
- stipple->stage.tri = passthrough_tri;
+ stipple->stage.tri = draw_pipe_passthrough_tri;
stipple->stage.reset_stipple_counter = reset_stipple_counter;
stipple->stage.flush = stipple_flush;
stipple->stage.destroy = stipple_destroy;
diff --git a/src/gallium/auxiliary/draw/draw_twoside.c b/src/gallium/auxiliary/draw/draw_pipe_twoside.c
index 3debaac2822..5910dccc43c 100644
--- a/src/gallium/auxiliary/draw/draw_twoside.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_twoside.c
@@ -31,8 +31,8 @@
#include "pipe/p_util.h"
#include "pipe/p_defines.h"
#include "pipe/p_shader_tokens.h"
-#include "draw_private.h"
-
+#include "draw_vs.h"
+#include "draw_pipe.h"
struct twoside_stage {
struct draw_stage stage;
@@ -99,21 +99,6 @@ static void twoside_tri( struct draw_stage *stage,
}
-static void twoside_line( struct draw_stage *stage,
- struct prim_header *header )
-{
- /* pass-through */
- stage->next->line( stage->next, header );
-}
-
-
-static void twoside_point( struct draw_stage *stage,
- struct prim_header *header )
-{
- /* pass-through */
- stage->next->point( stage->next, header );
-}
-
static void twoside_first_tri( struct draw_stage *stage,
struct prim_header *header )
@@ -187,17 +172,26 @@ static void twoside_destroy( struct draw_stage *stage )
struct draw_stage *draw_twoside_stage( struct draw_context *draw )
{
struct twoside_stage *twoside = CALLOC_STRUCT(twoside_stage);
+ if (twoside == NULL)
+ goto fail;
- draw_alloc_temp_verts( &twoside->stage, 3 );
+ if (!draw_alloc_temp_verts( &twoside->stage, 3 ))
+ goto fail;
twoside->stage.draw = draw;
twoside->stage.next = NULL;
- twoside->stage.point = twoside_point;
- twoside->stage.line = twoside_line;
+ twoside->stage.point = draw_pipe_passthrough_point;
+ twoside->stage.line = draw_pipe_passthrough_line;
twoside->stage.tri = twoside_first_tri;
twoside->stage.flush = twoside_flush;
twoside->stage.reset_stipple_counter = twoside_reset_stipple_counter;
twoside->stage.destroy = twoside_destroy;
return &twoside->stage;
+
+ fail:
+ if (twoside)
+ twoside->stage.destroy( &twoside->stage );
+
+ return NULL;
}
diff --git a/src/gallium/auxiliary/draw/draw_unfilled.c b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
index b07860cd9ea..eeb2bc43f91 100644
--- a/src/gallium/auxiliary/draw/draw_unfilled.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
@@ -36,6 +36,7 @@
#include "pipe/p_util.h"
#include "pipe/p_defines.h"
#include "draw_private.h"
+#include "draw_pipe.h"
struct unfilled_stage {
@@ -147,19 +148,6 @@ static void unfilled_first_tri( struct draw_stage *stage,
}
-static void unfilled_line( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->line( stage->next, header );
-}
-
-
-static void unfilled_point( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->point( stage->next, header );
-}
-
static void unfilled_flush( struct draw_stage *stage,
unsigned flags )
@@ -189,18 +177,27 @@ static void unfilled_destroy( struct draw_stage *stage )
struct draw_stage *draw_unfilled_stage( struct draw_context *draw )
{
struct unfilled_stage *unfilled = CALLOC_STRUCT(unfilled_stage);
+ if (unfilled == NULL)
+ goto fail;
- draw_alloc_temp_verts( &unfilled->stage, 0 );
+ if (!draw_alloc_temp_verts( &unfilled->stage, 0 ))
+ goto fail;
unfilled->stage.draw = draw;
unfilled->stage.next = NULL;
unfilled->stage.tmp = NULL;
- unfilled->stage.point = unfilled_point;
- unfilled->stage.line = unfilled_line;
+ unfilled->stage.point = draw_pipe_passthrough_point;
+ unfilled->stage.line = draw_pipe_passthrough_line;
unfilled->stage.tri = unfilled_first_tri;
unfilled->stage.flush = unfilled_flush;
unfilled->stage.reset_stipple_counter = unfilled_reset_stipple_counter;
unfilled->stage.destroy = unfilled_destroy;
return &unfilled->stage;
+
+ fail:
+ if (unfilled)
+ unfilled->stage.destroy( &unfilled->stage );
+
+ return NULL;
}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_util.c b/src/gallium/auxiliary/draw/draw_pipe_util.c
new file mode 100644
index 00000000000..04438f4dd08
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pipe_util.c
@@ -0,0 +1,137 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+#include "pipe/p_util.h"
+#include "draw/draw_private.h"
+#include "draw/draw_pipe.h"
+
+
+
+void
+draw_pipe_passthrough_point(struct draw_stage *stage, struct prim_header *header)
+{
+ stage->next->point(stage->next, header);
+}
+
+void
+draw_pipe_passthrough_line(struct draw_stage *stage, struct prim_header *header)
+{
+ stage->next->line(stage->next, header);
+}
+
+void
+draw_pipe_passthrough_tri(struct draw_stage *stage, struct prim_header *header)
+{
+ stage->next->tri(stage->next, header);
+}
+
+
+
+
+
+/* This is only used for temporary verts.
+ */
+#define MAX_VERTEX_SIZE ((2 + PIPE_MAX_SHADER_OUTPUTS) * 4 * sizeof(float))
+
+
+/**
+ * Allocate space for temporary post-transform vertices, such as for clipping.
+ */
+boolean draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr )
+{
+ assert(!stage->tmp);
+
+ stage->tmp = NULL;
+ stage->nr_tmps = nr;
+
+ if (nr != 0)
+ {
+ unsigned i;
+ ubyte *store = (ubyte *) MALLOC( MAX_VERTEX_SIZE * nr );
+
+ if (store == NULL)
+ return FALSE;
+
+ stage->tmp = (struct vertex_header **) MALLOC( sizeof(struct vertex_header *) * nr );
+ if (stage->tmp == NULL) {
+ FREE(store);
+ return FALSE;
+ }
+
+ for (i = 0; i < nr; i++)
+ stage->tmp[i] = (struct vertex_header *)(store + i * MAX_VERTEX_SIZE);
+ }
+
+ return TRUE;
+}
+
+
+void draw_free_temp_verts( struct draw_stage *stage )
+{
+ if (stage->tmp) {
+ FREE( stage->tmp[0] );
+ FREE( stage->tmp );
+ stage->tmp = NULL;
+ }
+}
+
+
+/* Reset vertex ids. This is basically a type of flush.
+ *
+ * Called only from draw_pipe_vbuf.c
+ */
+void draw_reset_vertex_ids(struct draw_context *draw)
+{
+ struct draw_stage *stage = draw->pipeline.first;
+
+ while (stage) {
+ unsigned i;
+
+ for (i = 0; i < stage->nr_tmps; i++)
+ stage->tmp[i]->vertex_id = UNDEFINED_VERTEX_ID;
+
+ stage = stage->next;
+ }
+
+ if (draw->pipeline.verts)
+ {
+ unsigned i;
+ char *verts = draw->pipeline.verts;
+ unsigned stride = draw->pipeline.vertex_stride;
+
+ for (i = 0; i < draw->pipeline.vertex_count; i++) {
+ ((struct vertex_header *)verts)->vertex_id = UNDEFINED_VERTEX_ID;
+ verts += stride;
+ }
+ }
+}
+
diff --git a/src/gallium/auxiliary/draw/draw_validate.c b/src/gallium/auxiliary/draw/draw_pipe_validate.c
index e163e078f0f..6be1d369c33 100644
--- a/src/gallium/auxiliary/draw/draw_validate.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_validate.c
@@ -31,6 +31,8 @@
#include "pipe/p_util.h"
#include "pipe/p_defines.h"
#include "draw_private.h"
+#include "draw_pipe.h"
+#include "draw_context.h"
static boolean points( unsigned prim )
{
@@ -56,7 +58,8 @@ static boolean triangles( unsigned prim )
* pipeline stages.
*/
boolean
-draw_need_pipeline(const struct draw_context *draw,
+draw_need_pipeline(const struct draw_context *draw,
+ const struct pipe_rasterizer_state *rasterizer,
unsigned int prim )
{
/* Don't have to worry about triangles turning into lines/points
@@ -66,30 +69,30 @@ draw_need_pipeline(const struct draw_context *draw,
if (lines(prim))
{
/* line stipple */
- if (draw->rasterizer->line_stipple_enable && draw->line_stipple)
+ if (rasterizer->line_stipple_enable && draw->pipeline.line_stipple)
return TRUE;
/* wide lines */
- if (draw->rasterizer->line_width > draw->wide_line_threshold)
+ if (rasterizer->line_width > draw->pipeline.wide_line_threshold)
return TRUE;
/* AA lines */
- if (draw->rasterizer->line_smooth && draw->pipeline.aaline)
+ if (rasterizer->line_smooth && draw->pipeline.aaline)
return TRUE;
}
if (points(prim))
{
/* large points */
- if (draw->rasterizer->point_size > draw->wide_point_threshold)
+ if (rasterizer->point_size > draw->pipeline.wide_point_threshold)
return TRUE;
/* AA points */
- if (draw->rasterizer->point_smooth && draw->pipeline.aapoint)
+ if (rasterizer->point_smooth && draw->pipeline.aapoint)
return TRUE;
/* point sprites */
- if (draw->rasterizer->point_sprite && draw->point_sprite)
+ if (rasterizer->point_sprite && draw->pipeline.point_sprite)
return TRUE;
}
@@ -97,20 +100,20 @@ draw_need_pipeline(const struct draw_context *draw,
if (triangles(prim))
{
/* polygon stipple */
- if (draw->rasterizer->poly_stipple_enable && draw->pipeline.pstipple)
+ if (rasterizer->poly_stipple_enable && draw->pipeline.pstipple)
return TRUE;
/* unfilled polygons */
- if (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL ||
- draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL)
+ if (rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL ||
+ rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL)
return TRUE;
/* polygon offset */
- if (draw->rasterizer->offset_cw || draw->rasterizer->offset_ccw)
+ if (rasterizer->offset_cw || rasterizer->offset_ccw)
return TRUE;
/* two-side lighting */
- if (draw->rasterizer->light_twoside)
+ if (rasterizer->light_twoside)
return TRUE;
}
@@ -119,7 +122,7 @@ draw_need_pipeline(const struct draw_context *draw,
*
* Generally this isn't a reason to require the pipeline, though.
*
- if (draw->rasterizer->cull_mode)
+ if (rasterizer->cull_mode)
return TRUE;
*/
@@ -145,15 +148,15 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
stage->next = next;
/* drawing wide lines? */
- wide_lines = (draw->rasterizer->line_width > draw->wide_line_threshold
+ wide_lines = (draw->rasterizer->line_width > draw->pipeline.wide_line_threshold
&& !draw->rasterizer->line_smooth);
/* drawing large points? */
- if (draw->rasterizer->point_sprite && draw->point_sprite)
+ if (draw->rasterizer->point_sprite && draw->pipeline.point_sprite)
wide_points = TRUE;
else if (draw->rasterizer->point_smooth && draw->pipeline.aapoint)
wide_points = FALSE;
- else if (draw->rasterizer->point_size > draw->wide_point_threshold)
+ else if (draw->rasterizer->point_size > draw->pipeline.wide_point_threshold)
wide_points = TRUE;
else
wide_points = FALSE;
@@ -186,7 +189,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
next = draw->pipeline.wide_point;
}
- if (draw->rasterizer->line_stipple_enable && draw->line_stipple) {
+ if (draw->rasterizer->line_stipple_enable && draw->pipeline.line_stipple) {
draw->pipeline.stipple->next = next;
next = draw->pipeline.stipple;
precalc_flat = 1; /* only needed for lines really */
@@ -238,7 +241,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
/* Clip stage
*/
- if (!draw->rasterizer->bypass_clipping)
+ if (!draw->bypass_clipping)
{
draw->pipeline.clip->next = next;
next = draw->pipeline.clip;
@@ -298,6 +301,8 @@ static void validate_destroy( struct draw_stage *stage )
struct draw_stage *draw_validate_stage( struct draw_context *draw )
{
struct draw_stage *stage = CALLOC_STRUCT(draw_stage);
+ if (stage == NULL)
+ return NULL;
stage->draw = draw;
stage->next = NULL;
diff --git a/src/gallium/auxiliary/draw/draw_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
index e3216ff711d..afd5f5544d5 100644
--- a/src/gallium/auxiliary/draw/draw_vbuf.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
@@ -40,7 +40,8 @@
#include "draw_vbuf.h"
#include "draw_private.h"
#include "draw_vertex.h"
-#include "draw_vf.h"
+#include "draw_pipe.h"
+#include "translate/translate.h"
/**
@@ -56,7 +57,7 @@ struct vbuf_stage {
/** Vertex size in bytes */
unsigned vertex_size;
- struct draw_vertex_fetch *vf;
+ struct translate *translate;
/* FIXME: we have no guarantee that 'unsigned' is 32bit */
@@ -71,8 +72,9 @@ struct vbuf_stage {
unsigned max_indices;
unsigned nr_indices;
- /** Pipe primitive */
- unsigned prim;
+ /* Cache point size somewhere it's address won't change:
+ */
+ float point_size;
};
@@ -113,61 +115,6 @@ check_space( struct vbuf_stage *vbuf, unsigned nr )
}
-#if 0
-static INLINE void
-dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data)
-{
- assert(vinfo == vbuf->render->get_vertex_info(vbuf->render));
- unsigned i, j, k;
-
- for (i = 0; i < vinfo->num_attribs; i++) {
- j = vinfo->src_index[i];
- switch (vinfo->emit[i]) {
- case EMIT_OMIT:
- debug_printf("EMIT_OMIT:");
- break;
- case EMIT_1F:
- debug_printf("EMIT_1F:\t");
- debug_printf("%f ", *(float *)data); data += sizeof(float);
- break;
- case EMIT_1F_PSIZE:
- debug_printf("EMIT_1F_PSIZE:\t");
- debug_printf("%f ", *(float *)data); data += sizeof(float);
- break;
- case EMIT_2F:
- debug_printf("EMIT_2F:\t");
- debug_printf("%f ", *(float *)data); data += sizeof(float);
- debug_printf("%f ", *(float *)data); data += sizeof(float);
- break;
- case EMIT_3F:
- debug_printf("EMIT_3F:\t");
- debug_printf("%f ", *(float *)data); data += sizeof(float);
- debug_printf("%f ", *(float *)data); data += sizeof(float);
- debug_printf("%f ", *(float *)data); data += sizeof(float);
- data += sizeof(float);
- break;
- case EMIT_4F:
- debug_printf("EMIT_4F:\t");
- debug_printf("%f ", *(float *)data); data += sizeof(float);
- debug_printf("%f ", *(float *)data); data += sizeof(float);
- debug_printf("%f ", *(float *)data); data += sizeof(float);
- debug_printf("%f ", *(float *)data); data += sizeof(float);
- break;
- case EMIT_4UB:
- debug_printf("EMIT_4UB:\t");
- debug_printf("%u ", *data++);
- debug_printf("%u ", *data++);
- debug_printf("%u ", *data++);
- debug_printf("%u ", *data++);
- break;
- default:
- assert(0);
- }
- debug_printf("\n");
- }
- debug_printf("\n");
-}
-#endif
/**
@@ -177,96 +124,25 @@ dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data)
* have a couple of slots at the beginning (1-dword header, 4-dword
* clip pos) that we ignore here. We only use the vertex->data[] fields.
*/
-static INLINE void
+static INLINE ushort
emit_vertex( struct vbuf_stage *vbuf,
struct vertex_header *vertex )
{
-#if 0
- debug_printf("emit vertex %d to %p\n",
- vbuf->nr_vertices, vbuf->vertex_ptr);
-#endif
-
- if(vertex->vertex_id != UNDEFINED_VERTEX_ID) {
- if(vertex->vertex_id < vbuf->nr_vertices)
- return;
- else
- debug_printf("Bad vertex id 0x%04x (>= 0x%04x)\n",
- vertex->vertex_id, vbuf->nr_vertices);
- return;
- }
-
- vertex->vertex_id = vbuf->nr_vertices++;
-
- if(!vbuf->vf) {
- const struct vertex_info *vinfo = vbuf->vinfo;
- uint i;
- uint count = 0; /* for debug/sanity */
+ if(vertex->vertex_id == UNDEFINED_VERTEX_ID) {
+ /* Hmm - vertices are emitted one at a time - better make sure
+ * set_buffer is efficient. Consider a special one-shot mode for
+ * translate.
+ */
+ vbuf->translate->set_buffer(vbuf->translate, 0, vertex->data[0], 0);
+ vbuf->translate->run(vbuf->translate, 0, 1, vbuf->vertex_ptr);
+
+ if (0) draw_dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr);
- assert(vinfo == vbuf->render->get_vertex_info(vbuf->render));
-
- for (i = 0; i < vinfo->num_attribs; i++) {
- uint j = vinfo->src_index[i];
- switch (vinfo->emit[i]) {
- case EMIT_OMIT:
- /* no-op */
- break;
- case EMIT_1F:
- *vbuf->vertex_ptr++ = fui(vertex->data[j][0]);
- count++;
- break;
- case EMIT_1F_PSIZE:
- *vbuf->vertex_ptr++ = fui(vbuf->stage.draw->rasterizer->point_size);
- count++;
- break;
- case EMIT_2F:
- *vbuf->vertex_ptr++ = fui(vertex->data[j][0]);
- *vbuf->vertex_ptr++ = fui(vertex->data[j][1]);
- count += 2;
- break;
- case EMIT_3F:
- *vbuf->vertex_ptr++ = fui(vertex->data[j][0]);
- *vbuf->vertex_ptr++ = fui(vertex->data[j][1]);
- *vbuf->vertex_ptr++ = fui(vertex->data[j][2]);
- count += 3;
- break;
- case EMIT_4F:
- *vbuf->vertex_ptr++ = fui(vertex->data[j][0]);
- *vbuf->vertex_ptr++ = fui(vertex->data[j][1]);
- *vbuf->vertex_ptr++ = fui(vertex->data[j][2]);
- *vbuf->vertex_ptr++ = fui(vertex->data[j][3]);
- count += 4;
- break;
- case EMIT_4UB:
- *vbuf->vertex_ptr++ = pack_ub4(float_to_ubyte( vertex->data[j][2] ),
- float_to_ubyte( vertex->data[j][1] ),
- float_to_ubyte( vertex->data[j][0] ),
- float_to_ubyte( vertex->data[j][3] ));
- count += 1;
- break;
- default:
- assert(0);
- }
- }
- assert(count == vinfo->size);
-#if 0
- {
- static float data[256];
- draw_vf_emit_vertex(vbuf->vf, vertex, data);
- if(memcmp((uint8_t *)vbuf->vertex_ptr - vbuf->vertex_size, data, vbuf->vertex_size)) {
- debug_printf("With VF:\n");
- dump_emitted_vertex(vbuf->vinfo, (uint8_t *)data);
- debug_printf("Without VF:\n");
- dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr - vbuf->vertex_size);
- assert(0);
- }
- }
-#endif
- }
- else {
- draw_vf_emit_vertex(vbuf->vf, vertex, vbuf->vertex_ptr);
-
vbuf->vertex_ptr += vbuf->vertex_size/4;
+ vertex->vertex_id = vbuf->nr_vertices++;
}
+
+ return vertex->vertex_id;
}
@@ -280,9 +156,7 @@ vbuf_tri( struct draw_stage *stage,
check_space( vbuf, 3 );
for (i = 0; i < 3; i++) {
- emit_vertex( vbuf, prim->v[i] );
-
- vbuf->indices[vbuf->nr_indices++] = (ushort) prim->v[i]->vertex_id;
+ vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[i] );
}
}
@@ -297,9 +171,7 @@ vbuf_line( struct draw_stage *stage,
check_space( vbuf, 2 );
for (i = 0; i < 2; i++) {
- emit_vertex( vbuf, prim->v[i] );
-
- vbuf->indices[vbuf->nr_indices++] = (ushort) prim->v[i]->vertex_id;
+ vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[i] );
}
}
@@ -312,43 +184,112 @@ vbuf_point( struct draw_stage *stage,
check_space( vbuf, 1 );
- emit_vertex( vbuf, prim->v[0] );
-
- vbuf->indices[vbuf->nr_indices++] = (ushort) prim->v[0]->vertex_id;
+ vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[0] );
}
+
+
/**
* Set the prim type for subsequent vertices.
* This may result in a new vertex size. The existing vbuffer (if any)
* will be flushed if needed and a new one allocated.
*/
static void
-vbuf_set_prim( struct vbuf_stage *vbuf, uint newprim )
+vbuf_set_prim( struct vbuf_stage *vbuf, uint prim )
{
- const struct vertex_info *vinfo;
- unsigned vertex_size;
-
- assert(newprim == PIPE_PRIM_POINTS ||
- newprim == PIPE_PRIM_LINES ||
- newprim == PIPE_PRIM_TRIANGLES);
+ struct translate_key hw_key;
+ unsigned dst_offset;
+ unsigned i;
- vbuf->prim = newprim;
- vbuf->render->set_primitive(vbuf->render, newprim);
+ vbuf->render->set_primitive(vbuf->render, prim);
- vinfo = vbuf->render->get_vertex_info(vbuf->render);
- vertex_size = vinfo->size * sizeof(float);
+ /* Must do this after set_primitive() above:
+ *
+ * XXX: need some state managment to track when this needs to be
+ * recalculated. The driver should tell us whether there was a
+ * state change.
+ */
+ vbuf->vinfo = vbuf->render->get_vertex_info(vbuf->render);
- if (vertex_size != vbuf->vertex_size)
+ if (vbuf->vertex_size != vbuf->vinfo->size * sizeof(float)) {
vbuf_flush_vertices(vbuf);
+ vbuf->vertex_size = vbuf->vinfo->size * sizeof(float);
+ }
- vbuf->vinfo = vinfo;
- vbuf->vertex_size = vertex_size;
- if(vbuf->vf)
- draw_vf_set_vertex_info(vbuf->vf,
- vbuf->vinfo,
- vbuf->stage.draw->rasterizer->point_size);
-
+ /* Translate from pipeline vertices to hw vertices.
+ */
+ dst_offset = 0;
+ memset(&hw_key, 0, sizeof(hw_key));
+
+ for (i = 0; i < vbuf->vinfo->num_attribs; i++) {
+ unsigned emit_sz = 0;
+ unsigned src_buffer = 0;
+ unsigned output_format;
+ unsigned src_offset = (vbuf->vinfo->src_index[i] * 4 * sizeof(float) );
+
+ switch (vbuf->vinfo->emit[i]) {
+ case EMIT_4F:
+ output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ emit_sz = 4 * sizeof(float);
+ break;
+ case EMIT_3F:
+ output_format = PIPE_FORMAT_R32G32B32_FLOAT;
+ emit_sz = 3 * sizeof(float);
+ break;
+ case EMIT_2F:
+ output_format = PIPE_FORMAT_R32G32_FLOAT;
+ emit_sz = 2 * sizeof(float);
+ break;
+ case EMIT_1F:
+ output_format = PIPE_FORMAT_R32_FLOAT;
+ emit_sz = 1 * sizeof(float);
+ break;
+ case EMIT_1F_PSIZE:
+ output_format = PIPE_FORMAT_R32_FLOAT;
+ emit_sz = 1 * sizeof(float);
+ src_buffer = 1;
+ src_offset = 0;
+ break;
+ case EMIT_4UB:
+ output_format = PIPE_FORMAT_B8G8R8A8_UNORM;
+ emit_sz = 4 * sizeof(ubyte);
+ default:
+ assert(0);
+ output_format = PIPE_FORMAT_NONE;
+ emit_sz = 0;
+ break;
+ }
+
+ hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ hw_key.element[i].input_buffer = src_buffer;
+ hw_key.element[i].input_offset = src_offset;
+ hw_key.element[i].output_format = output_format;
+ hw_key.element[i].output_offset = dst_offset;
+
+ dst_offset += emit_sz;
+ }
+
+ hw_key.nr_elements = vbuf->vinfo->num_attribs;
+ hw_key.output_stride = vbuf->vinfo->size * 4;
+
+ /* Don't bother with caching at this stage:
+ */
+ if (!vbuf->translate ||
+ memcmp(&vbuf->translate->key, &hw_key, sizeof(hw_key)) != 0)
+ {
+ if (vbuf->translate)
+ vbuf->translate->release(vbuf->translate);
+
+ vbuf->translate = translate_create( &hw_key );
+
+ vbuf->translate->set_buffer(vbuf->translate, 1, &vbuf->point_size, 0);
+ }
+
+ vbuf->point_size = vbuf->stage.draw->rasterizer->point_size;
+
+ /* Allocate new buffer?
+ */
if (!vbuf->vertices)
vbuf_alloc_vertices(vbuf);
}
@@ -402,29 +343,9 @@ vbuf_flush_indices( struct vbuf_stage *vbuf )
assert((uint) (vbuf->vertex_ptr - vbuf->vertices) ==
vbuf->nr_vertices * vbuf->vertex_size / sizeof(unsigned));
- switch(vbuf->prim) {
- case PIPE_PRIM_POINTS:
- break;
- case PIPE_PRIM_LINES:
- assert(vbuf->nr_indices % 2 == 0);
- break;
- case PIPE_PRIM_TRIANGLES:
- assert(vbuf->nr_indices % 3 == 0);
- break;
- default:
- assert(0);
- }
-
vbuf->render->draw(vbuf->render, vbuf->indices, vbuf->nr_indices);
vbuf->nr_indices = 0;
-
- /* don't need to reset point/line/tri functions */
-#if 0
- stage->point = vbuf_first_point;
- stage->line = vbuf_first_line;
- stage->tri = vbuf_first_tri;
-#endif
}
@@ -466,8 +387,8 @@ vbuf_alloc_vertices( struct vbuf_stage *vbuf )
/* Allocate a new vertex buffer */
vbuf->max_vertices = vbuf->render->max_vertex_buffer_bytes / vbuf->vertex_size;
vbuf->vertices = (uint *) vbuf->render->allocate_vertices(vbuf->render,
- (ushort) vbuf->vertex_size,
- (ushort) vbuf->max_vertices);
+ (ushort) vbuf->vertex_size,
+ (ushort) vbuf->max_vertices);
vbuf->vertex_ptr = vbuf->vertices;
}
@@ -505,8 +426,8 @@ static void vbuf_destroy( struct draw_stage *stage )
if(vbuf->indices)
align_free( vbuf->indices );
- if(vbuf->vf)
- draw_vf_destroy( vbuf->vf );
+ if(vbuf->translate)
+ vbuf->translate->release( vbuf->translate );
if (vbuf->render)
vbuf->render->destroy( vbuf->render );
@@ -522,9 +443,8 @@ struct draw_stage *draw_vbuf_stage( struct draw_context *draw,
struct vbuf_render *render )
{
struct vbuf_stage *vbuf = CALLOC_STRUCT(vbuf_stage);
-
- if(!vbuf)
- return NULL;
+ if (vbuf == NULL)
+ goto fail;
vbuf->stage.draw = draw;
vbuf->stage.point = vbuf_first_point;
@@ -535,21 +455,22 @@ struct draw_stage *draw_vbuf_stage( struct draw_context *draw,
vbuf->stage.destroy = vbuf_destroy;
vbuf->render = render;
+ vbuf->max_indices = MAX2(render->max_indices, UNDEFINED_VERTEX_ID-1);
- assert(render->max_indices < UNDEFINED_VERTEX_ID);
- vbuf->max_indices = render->max_indices;
- vbuf->indices = (ushort *)
- align_malloc( vbuf->max_indices * sizeof(vbuf->indices[0]), 16 );
- if(!vbuf->indices)
- vbuf_destroy(&vbuf->stage);
+ vbuf->indices = (ushort *) align_malloc( vbuf->max_indices *
+ sizeof(vbuf->indices[0]),
+ 16 );
+ if (!vbuf->indices)
+ goto fail;
vbuf->vertices = NULL;
vbuf->vertex_ptr = vbuf->vertices;
-
- vbuf->prim = ~0;
-
- if(!GETENV("GALLIUM_NOVF"))
- vbuf->vf = draw_vf_create();
return &vbuf->stage;
+
+ fail:
+ if (vbuf)
+ vbuf_destroy(&vbuf->stage);
+
+ return NULL;
}
diff --git a/src/gallium/auxiliary/draw/draw_wide_line.c b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
index 9a168ce8bdc..452732e662c 100644
--- a/src/gallium/auxiliary/draw/draw_wide_line.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
@@ -32,6 +32,7 @@
#include "pipe/p_defines.h"
#include "pipe/p_shader_tokens.h"
#include "draw_private.h"
+#include "draw_pipe.h"
struct wideline_stage {
@@ -48,19 +49,6 @@ static INLINE struct wideline_stage *wideline_stage( struct draw_stage *stage )
}
-static void wideline_point( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->point( stage->next, header );
-}
-
-
-static void wideline_tri( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->tri(stage->next, header);
-}
-
/**
* Draw a wide line by drawing a quad (two triangles).
@@ -179,9 +167,9 @@ struct draw_stage *draw_wide_line_stage( struct draw_context *draw )
wide->stage.draw = draw;
wide->stage.next = NULL;
- wide->stage.point = wideline_point;
+ wide->stage.point = draw_pipe_passthrough_point;
wide->stage.line = wideline_line;
- wide->stage.tri = wideline_tri;
+ wide->stage.tri = draw_pipe_passthrough_point;
wide->stage.flush = wideline_flush;
wide->stage.reset_stipple_counter = wideline_reset_stipple_counter;
wide->stage.destroy = wideline_destroy;
diff --git a/src/gallium/auxiliary/draw/draw_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
index 6fc7c9fcd7a..ed08573382d 100644
--- a/src/gallium/auxiliary/draw/draw_wide_point.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
@@ -31,7 +31,8 @@
#include "pipe/p_util.h"
#include "pipe/p_defines.h"
#include "pipe/p_shader_tokens.h"
-#include "draw_private.h"
+#include "draw_vs.h"
+#include "draw_pipe.h"
struct widepoint_stage {
@@ -60,23 +61,6 @@ widepoint_stage( struct draw_stage *stage )
}
-static void passthrough_point( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->point( stage->next, header );
-}
-
-static void widepoint_line( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->line(stage->next, header);
-}
-
-static void widepoint_tri( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->tri(stage->next, header);
-}
/**
@@ -199,16 +183,16 @@ static void widepoint_first_point( struct draw_stage *stage,
wide->ybias = 0.0;
if (draw->rasterizer->gl_rasterization_rules) {
- wide->ybias = -0.125;
+ wide->xbias = 0.125;
}
/* XXX we won't know the real size if it's computed by the vertex shader! */
- if ((draw->rasterizer->point_size > draw->wide_point_threshold) ||
- (draw->rasterizer->point_sprite && draw->point_sprite)) {
+ if ((draw->rasterizer->point_size > draw->pipeline.wide_point_threshold) ||
+ (draw->rasterizer->point_sprite && draw->pipeline.point_sprite)) {
stage->point = widepoint_point;
}
else {
- stage->point = passthrough_point;
+ stage->point = draw_pipe_passthrough_point;
}
if (draw->rasterizer->point_sprite) {
@@ -265,17 +249,26 @@ static void widepoint_destroy( struct draw_stage *stage )
struct draw_stage *draw_wide_point_stage( struct draw_context *draw )
{
struct widepoint_stage *wide = CALLOC_STRUCT(widepoint_stage);
+ if (wide == NULL)
+ goto fail;
- draw_alloc_temp_verts( &wide->stage, 4 );
+ if (!draw_alloc_temp_verts( &wide->stage, 4 ))
+ goto fail;
wide->stage.draw = draw;
wide->stage.next = NULL;
wide->stage.point = widepoint_first_point;
- wide->stage.line = widepoint_line;
- wide->stage.tri = widepoint_tri;
+ wide->stage.line = draw_pipe_passthrough_line;
+ wide->stage.tri = draw_pipe_passthrough_tri;
wide->stage.flush = widepoint_flush;
wide->stage.reset_stipple_counter = widepoint_reset_stipple_counter;
wide->stage.destroy = widepoint_destroy;
return &wide->stage;
+
+ fail:
+ if (wide)
+ wide->stage.destroy( &wide->stage );
+
+ return NULL;
}
diff --git a/src/gallium/auxiliary/draw/draw_prim.c b/src/gallium/auxiliary/draw/draw_prim.c
deleted file mode 100644
index 51b69503344..00000000000
--- a/src/gallium/auxiliary/draw/draw_prim.c
+++ /dev/null
@@ -1,523 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Keith Whitwell <[email protected]>
- */
-
-#include "pipe/p_debug.h"
-#include "pipe/p_util.h"
-
-#include "draw_private.h"
-#include "draw_context.h"
-
-
-
-#define RP_NONE 0
-#define RP_POINT 1
-#define RP_LINE 2
-#define RP_TRI 3
-
-
-static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = {
- RP_POINT,
- RP_LINE,
- RP_LINE,
- RP_LINE,
- RP_TRI,
- RP_TRI,
- RP_TRI,
- RP_TRI,
- RP_TRI,
- RP_TRI
-};
-
-
-static void draw_prim_queue_flush( struct draw_context *draw )
-{
- unsigned i;
-
- if (0)
- debug_printf("Flushing with %d prims, %d verts\n",
- draw->pq.queue_nr, draw->vs.queue_nr);
-
- assert (draw->pq.queue_nr != 0);
-
- /* NOTE: we cannot save draw->pipeline->first in a local var because
- * draw->pipeline->first is often changed by the first call to tri(),
- * line(), etc.
- */
- if (draw->rasterizer->line_stipple_enable) {
- switch (draw->reduced_prim) {
- case RP_TRI:
- for (i = 0; i < draw->pq.queue_nr; i++) {
- if (draw->pq.queue[i].reset_line_stipple)
- draw->pipeline.first->reset_stipple_counter( draw->pipeline.first );
-
- draw->pipeline.first->tri( draw->pipeline.first, &draw->pq.queue[i] );
- }
- break;
- case RP_LINE:
- for (i = 0; i < draw->pq.queue_nr; i++) {
- if (draw->pq.queue[i].reset_line_stipple)
- draw->pipeline.first->reset_stipple_counter( draw->pipeline.first );
-
- draw->pipeline.first->line( draw->pipeline.first, &draw->pq.queue[i] );
- }
- break;
- case RP_POINT:
- draw->pipeline.first->reset_stipple_counter( draw->pipeline.first );
- for (i = 0; i < draw->pq.queue_nr; i++)
- draw->pipeline.first->point( draw->pipeline.first, &draw->pq.queue[i] );
- break;
- }
- }
- else {
- switch (draw->reduced_prim) {
- case RP_TRI:
- for (i = 0; i < draw->pq.queue_nr; i++)
- draw->pipeline.first->tri( draw->pipeline.first, &draw->pq.queue[i] );
- break;
- case RP_LINE:
- for (i = 0; i < draw->pq.queue_nr; i++)
- draw->pipeline.first->line( draw->pipeline.first, &draw->pq.queue[i] );
- break;
- case RP_POINT:
- for (i = 0; i < draw->pq.queue_nr; i++)
- draw->pipeline.first->point( draw->pipeline.first, &draw->pq.queue[i] );
- break;
- }
- }
-
- draw->pq.queue_nr = 0;
- draw->vs.post_nr = 0;
- draw_vertex_cache_unreference( draw );
-}
-
-void draw_do_flush( struct draw_context *draw, unsigned flags )
-{
- if (0)
- debug_printf("Flushing with %d verts, %d prims\n",
- draw->vs.queue_nr,
- draw->pq.queue_nr );
-
- if (draw->flushing)
- return;
-
- draw->flushing = TRUE;
-
- if (flags >= DRAW_FLUSH_SHADER_QUEUE) {
- if (draw->vs.queue_nr) {
- (*draw->shader_queue_flush)(draw);
- }
-
- if (flags >= DRAW_FLUSH_PRIM_QUEUE) {
- if (draw->pq.queue_nr)
- draw_prim_queue_flush(draw);
-
- if (flags >= DRAW_FLUSH_VERTEX_CACHE) {
- draw_vertex_cache_invalidate(draw);
-
- if (flags >= DRAW_FLUSH_STATE_CHANGE) {
- draw->pipeline.first->flush( draw->pipeline.first, flags );
- draw->pipeline.first = draw->pipeline.validate;
- draw->reduced_prim = ~0;
- }
- }
- }
- }
-
- draw->flushing = FALSE;
-}
-
-
-
-/* Return a pointer to a freshly queued primitive header. Ensure that
- * there is room in the vertex cache for a maximum of "nr_verts" new
- * vertices. Flush primitive and/or vertex queues if necessary to
- * make space.
- */
-static struct prim_header *get_queued_prim( struct draw_context *draw,
- unsigned nr_verts )
-{
- if (!draw_vertex_cache_check_space( draw, nr_verts )) {
-// debug_printf("v");
- draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE );
- }
- else if (draw->pq.queue_nr == PRIM_QUEUE_LENGTH) {
-// debug_printf("p");
- draw_do_flush( draw, DRAW_FLUSH_PRIM_QUEUE );
- }
-
- assert(draw->pq.queue_nr < PRIM_QUEUE_LENGTH);
-
- return &draw->pq.queue[draw->pq.queue_nr++];
-}
-
-
-
-/**
- * Add a point to the primitive queue.
- * \param i0 index into user's vertex arrays
- */
-static void do_point( struct draw_context *draw,
- unsigned i0 )
-{
- struct prim_header *prim = get_queued_prim( draw, 1 );
-
- prim->reset_line_stipple = 0;
- prim->edgeflags = 1;
- prim->pad = 0;
- prim->v[0] = draw->vcache.get_vertex( draw, i0 );
-}
-
-
-/**
- * Add a line to the primitive queue.
- * \param i0 index into user's vertex arrays
- * \param i1 index into user's vertex arrays
- */
-static void do_line( struct draw_context *draw,
- boolean reset_stipple,
- unsigned i0,
- unsigned i1 )
-{
- struct prim_header *prim = get_queued_prim( draw, 2 );
-
- prim->reset_line_stipple = reset_stipple;
- prim->edgeflags = 1;
- prim->pad = 0;
- prim->v[0] = draw->vcache.get_vertex( draw, i0 );
- prim->v[1] = draw->vcache.get_vertex( draw, i1 );
-}
-
-/**
- * Add a triangle to the primitive queue.
- */
-static void do_triangle( struct draw_context *draw,
- unsigned i0,
- unsigned i1,
- unsigned i2 )
-{
- struct prim_header *prim = get_queued_prim( draw, 3 );
-
-// _mesa_printf("tri %d %d %d\n", i0, i1, i2);
- prim->reset_line_stipple = 1;
- prim->edgeflags = ~0;
- prim->pad = 0;
- prim->v[0] = draw->vcache.get_vertex( draw, i0 );
- prim->v[1] = draw->vcache.get_vertex( draw, i1 );
- prim->v[2] = draw->vcache.get_vertex( draw, i2 );
-}
-
-static void do_ef_triangle( struct draw_context *draw,
- boolean reset_stipple,
- unsigned ef_mask,
- unsigned i0,
- unsigned i1,
- unsigned i2 )
-{
- struct prim_header *prim = get_queued_prim( draw, 3 );
- struct vertex_header *v0 = draw->vcache.get_vertex( draw, i0 );
- struct vertex_header *v1 = draw->vcache.get_vertex( draw, i1 );
- struct vertex_header *v2 = draw->vcache.get_vertex( draw, i2 );
-
- prim->reset_line_stipple = reset_stipple;
-
- prim->edgeflags = ef_mask & ((v0->edgeflag << 0) |
- (v1->edgeflag << 1) |
- (v2->edgeflag << 2));
- prim->pad = 0;
- prim->v[0] = v0;
- prim->v[1] = v1;
- prim->v[2] = v2;
-}
-
-
-static void do_ef_quad( struct draw_context *draw,
- unsigned v0,
- unsigned v1,
- unsigned v2,
- unsigned v3 )
-{
- const unsigned omitEdge2 = ~(1 << 1);
- const unsigned omitEdge3 = ~(1 << 2);
- do_ef_triangle( draw, 1, omitEdge2, v0, v1, v3 );
- do_ef_triangle( draw, 0, omitEdge3, v1, v2, v3 );
-}
-
-static void do_quad( struct draw_context *draw,
- unsigned v0,
- unsigned v1,
- unsigned v2,
- unsigned v3 )
-{
- do_triangle( draw, v0, v1, v3 );
- do_triangle( draw, v1, v2, v3 );
-}
-
-
-/**
- * Main entrypoint to draw some number of points/lines/triangles
- */
-static void
-draw_prim( struct draw_context *draw,
- unsigned prim, unsigned start, unsigned count )
-{
- unsigned i;
- boolean unfilled = (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL ||
- draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL);
- boolean flatfirst =
- (draw->rasterizer->flatshade & draw->rasterizer->flatshade_first) ? TRUE : FALSE;
-
-// debug_printf("%s (%d) %d/%d\n", __FUNCTION__, draw->prim, start, count );
-
- switch (prim) {
- case PIPE_PRIM_POINTS:
- for (i = 0; i < count; i ++) {
- do_point( draw,
- start + i );
- }
- break;
-
- case PIPE_PRIM_LINES:
- for (i = 0; i+1 < count; i += 2) {
- do_line( draw,
- TRUE,
- start + i + 0,
- start + i + 1);
- }
- break;
-
- case PIPE_PRIM_LINE_LOOP:
- if (count >= 2) {
- for (i = 1; i < count; i++) {
- do_line( draw,
- i == 1, /* XXX: only if vb not split */
- start + i - 1,
- start + i );
- }
-
- do_line( draw,
- 0,
- start + count - 1,
- start + 0 );
- }
- break;
-
- case PIPE_PRIM_LINE_STRIP:
- for (i = 1; i < count; i++) {
- do_line( draw,
- i == 1,
- start + i - 1,
- start + i );
- }
- break;
-
- case PIPE_PRIM_TRIANGLES:
- if (unfilled) {
- for (i = 0; i+2 < count; i += 3) {
- do_ef_triangle( draw,
- 1,
- ~0,
- start + i + 0,
- start + i + 1,
- start + i + 2 );
- }
- }
- else {
- for (i = 0; i+2 < count; i += 3) {
- do_triangle( draw,
- start + i + 0,
- start + i + 1,
- start + i + 2 );
- }
- }
- break;
-
- case PIPE_PRIM_TRIANGLE_STRIP:
- if (flatfirst) {
- for (i = 0; i+2 < count; i++) {
- if (i & 1) {
- do_triangle( draw,
- start + i + 0,
- start + i + 2,
- start + i + 1 );
- }
- else {
- do_triangle( draw,
- start + i + 0,
- start + i + 1,
- start + i + 2 );
- }
- }
- }
- else {
- for (i = 0; i+2 < count; i++) {
- if (i & 1) {
- do_triangle( draw,
- start + i + 1,
- start + i + 0,
- start + i + 2 );
- }
- else {
- do_triangle( draw,
- start + i + 0,
- start + i + 1,
- start + i + 2 );
- }
- }
- }
- break;
-
- case PIPE_PRIM_TRIANGLE_FAN:
- if (count >= 3) {
- if (flatfirst) {
- for (i = 0; i+2 < count; i++) {
- do_triangle( draw,
- start + i + 1,
- start + i + 2,
- start + 0 );
- }
- }
- else {
- for (i = 0; i+2 < count; i++) {
- do_triangle( draw,
- start + 0,
- start + i + 1,
- start + i + 2 );
- }
- }
- }
- break;
-
-
- case PIPE_PRIM_QUADS:
- if (unfilled) {
- for (i = 0; i+3 < count; i += 4) {
- do_ef_quad( draw,
- start + i + 0,
- start + i + 1,
- start + i + 2,
- start + i + 3);
- }
- }
- else {
- for (i = 0; i+3 < count; i += 4) {
- do_quad( draw,
- start + i + 0,
- start + i + 1,
- start + i + 2,
- start + i + 3);
- }
- }
- break;
-
- case PIPE_PRIM_QUAD_STRIP:
- if (unfilled) {
- for (i = 0; i+3 < count; i += 2) {
- do_ef_quad( draw,
- start + i + 2,
- start + i + 0,
- start + i + 1,
- start + i + 3);
- }
- }
- else {
- for (i = 0; i+3 < count; i += 2) {
- do_quad( draw,
- start + i + 2,
- start + i + 0,
- start + i + 1,
- start + i + 3);
- }
- }
- break;
-
- case PIPE_PRIM_POLYGON:
- if (unfilled) {
- unsigned ef_mask = (1<<2) | (1<<0);
-
- for (i = 0; i+2 < count; i++) {
-
- if (i + 3 >= count)
- ef_mask |= (1<<1);
-
- do_ef_triangle( draw,
- i == 0,
- ef_mask,
- start + i + 1,
- start + i + 2,
- start + 0);
-
- ef_mask &= ~(1<<2);
- }
- }
- else {
- for (i = 0; i+2 < count; i++) {
- do_triangle( draw,
- start + i + 1,
- start + i + 2,
- start + 0);
- }
- }
- break;
-
- default:
- assert(0);
- break;
- }
-}
-
-
-
-
-/**
- * Draw vertex arrays
- * This is the main entrypoint into the drawing module.
- * \param prim one of PIPE_PRIM_x
- * \param start index of first vertex to draw
- * \param count number of vertices to draw
- */
-void
-draw_arrays(struct draw_context *draw, unsigned prim,
- unsigned start, unsigned count)
-{
- if (reduced_prim[prim] != draw->reduced_prim) {
- draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
- draw->reduced_prim = reduced_prim[prim];
- }
-
- /* drawing done here: */
- if (!draw_pt_arrays(draw, prim, start, count)) {
- /* we have to run the whole pipeline */
- draw_prim(draw, prim, start, count);
- }
-}
-
-
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index c8cb96c8ba0..39aa81b43cf 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -44,7 +44,6 @@
#include "pipe/p_state.h"
#include "pipe/p_defines.h"
-#include "rtasm/rtasm_x86sse.h"
#include "tgsi/exec/tgsi_exec.h"
#include "tgsi/util/tgsi_scan.h"
@@ -52,11 +51,11 @@
struct pipe_context;
struct gallivm_prog;
struct gallivm_cpu_engine;
+struct draw_vertex_shader;
+struct draw_context;
+struct draw_stage;
+struct vbuf_render;
-struct draw_pt_middle_end;
-struct draw_pt_front_end;
-
-#define MAX_SHADER_VERTICES 128
/**
* Basic vertex info.
@@ -70,17 +69,14 @@ struct vertex_header {
float clip[4];
- float data[][4]; /* Note variable size */
+ /* This will probably become float (*data)[4] soon:
+ */
+ float data[][4];
};
/* NOTE: It should match vertex_id size above */
#define UNDEFINED_VERTEX_ID 0xffff
-/* XXX This is too large */
-#define MAX_VERTEX_SIZE ((2 + PIPE_MAX_SHADER_OUTPUTS) * 4 * sizeof(float))
-#define MAX_VERTEX_ALLOCATION ((MAX_VERTEX_SIZE + 0x0f) & ~0x0f)
-
-
/**
* Basic info for a point/line/triangle primitive.
@@ -95,92 +91,11 @@ struct prim_header {
-struct draw_context;
-
-/**
- * Base class for all primitive drawing stages.
- */
-struct draw_stage
-{
- struct draw_context *draw; /**< parent context */
-
- struct draw_stage *next; /**< next stage in pipeline */
-
- struct vertex_header **tmp; /**< temp vert storage, such as for clipping */
- unsigned nr_tmps;
-
- void (*point)( struct draw_stage *,
- struct prim_header * );
-
- void (*line)( struct draw_stage *,
- struct prim_header * );
-
- void (*tri)( struct draw_stage *,
- struct prim_header * );
-
- void (*flush)( struct draw_stage *,
- unsigned flags );
-
- void (*reset_stipple_counter)( struct draw_stage * );
- void (*destroy)( struct draw_stage * );
-};
-
-
-#define PRIM_QUEUE_LENGTH 32
-#define VCACHE_SIZE 32
-#define VCACHE_OVERFLOW 4
-#define VS_QUEUE_LENGTH (VCACHE_SIZE + VCACHE_OVERFLOW + 1) /* can never fill up */
-
-/**
- * Private version of the compiled vertex_shader
- */
-struct draw_vertex_shader {
-
- /* This member will disappear shortly:
- */
- struct pipe_shader_state state;
-
- struct tgsi_shader_info info;
-
- void (*prepare)( struct draw_vertex_shader *shader,
- struct draw_context *draw );
-
- /* Run the shader - this interface will get cleaned up in the
- * future:
- */
- boolean (*run)( struct draw_vertex_shader *shader,
- struct draw_context *draw,
- const unsigned *elts,
- unsigned count,
- void *out,
- unsigned vertex_size);
-
-
- void (*delete)( struct draw_vertex_shader * );
-};
-
-
-/* Internal function for vertex fetch.
- */
-typedef void (*fetch_func)(const void *ptr, float *attrib);
-
-fetch_func draw_get_fetch_func( enum pipe_format format );
-
-
-
-typedef void (*full_fetch_func)( struct draw_context *draw,
- struct tgsi_exec_machine *machine,
- const unsigned *elts,
- unsigned count );
-
-typedef void (*pt_fetch_func)( struct draw_context *draw,
- float *out,
- unsigned start,
- unsigned count );
-
-
-struct vbuf_render;
+#define PT_SHADE 0x1
+#define PT_CLIPTEST 0x2
+#define PT_PIPELINE 0x4
+#define PT_MAX_MIDDLE 0x8
/**
* Private context for the drawing module.
@@ -207,6 +122,17 @@ struct draw_context
struct draw_stage *wide_line;
struct draw_stage *wide_point;
struct draw_stage *rasterize;
+
+ float wide_point_threshold; /**< convert pnts to tris if larger than this */
+ float wide_line_threshold; /**< convert lines to tris if wider than this */
+ boolean line_stipple; /**< do line stipple? */
+ boolean point_sprite; /**< convert points to quads for sprites? */
+
+ /* Temporary storage while the pipeline is being run:
+ */
+ char *verts;
+ unsigned vertex_stride;
+ unsigned vertex_count;
} pipeline;
@@ -215,71 +141,63 @@ struct draw_context
/* Support prototype passthrough path:
*/
struct {
- unsigned prim; /* XXX: to be removed */
- unsigned hw_vertex_size; /* XXX: to be removed */
-
struct {
struct draw_pt_middle_end *fetch_emit;
- struct draw_pt_middle_end *fetch_pipeline;
- struct draw_pt_middle_end *fetch_shade_emit;
- struct draw_pt_middle_end *fetch_shade_cliptest_pipeline_or_emit;
+ struct draw_pt_middle_end *general;
} middle;
struct {
- struct draw_pt_front_end *noop;
- struct draw_pt_front_end *split_arrays;
struct draw_pt_front_end *vcache;
} front;
+ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
+ unsigned nr_vertex_buffers;
+
+ struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
+ unsigned nr_vertex_elements;
+
+ /* user-space vertex data, buffers */
struct {
- char *verts;
- unsigned vertex_stride;
- unsigned vertex_count;
- } pipeline;
+ const unsigned *edgeflag;
+
+ /** vertex element/index buffer (ex: glDrawElements) */
+ const void *elts;
+ /** bytes per index (0, 1, 2 or 4) */
+ unsigned eltSize;
+
+ /** vertex arrays */
+ const void *vbuffer[PIPE_MAX_ATTRIBS];
+
+ /** constant buffer (for vertex shader) */
+ const void *constants;
+ } user;
} pt;
+ struct {
+ boolean bypass_clipping;
+ } driver;
+
boolean flushing;
+ boolean vcache_flushing;
+ boolean bypass_clipping; /* set if either api or driver bypass_clipping true */
/* pipe state that we need: */
const struct pipe_rasterizer_state *rasterizer;
struct pipe_viewport_state viewport;
- struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
- struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
+
struct draw_vertex_shader *vertex_shader;
boolean identity_viewport;
uint num_vs_outputs; /**< convenience, from vertex_shader */
- /* user-space vertex data, buffers */
- struct {
- const unsigned *edgeflag;
-
- /** vertex element/index buffer (ex: glDrawElements) */
- const void *elts;
- /** bytes per index (0, 1, 2 or 4) */
- unsigned eltSize;
-
- /** vertex arrays */
- const void *vbuffer[PIPE_MAX_ATTRIBS];
-
- /** constant buffer (for vertex shader) */
- const void *constants;
- } user;
/* Clip derived state:
*/
float plane[12][4];
unsigned nr_planes;
- float wide_point_threshold; /**< convert pnts to tris if larger than this */
- float wide_line_threshold; /**< convert lines to tris if wider than this */
- boolean line_stipple; /**< do line stipple? */
- boolean point_sprite; /**< convert points to quads for sprites? */
- boolean use_sse;
- boolean use_pt_shaders; /* temporary flag to switch on pt shader paths */
-
/* If a prim stage introduces new vertex attributes, they'll be stored here
*/
struct {
@@ -293,59 +211,6 @@ struct draw_context
/** TGSI program interpreter runtime state */
struct tgsi_exec_machine machine;
- /* Vertex fetch internal state
- */
- struct {
- const ubyte *src_ptr[PIPE_MAX_ATTRIBS];
- unsigned pitch[PIPE_MAX_ATTRIBS];
- fetch_func fetch[PIPE_MAX_ATTRIBS];
- unsigned nr_attrs;
- full_fetch_func fetch_func;
- pt_fetch_func pt_fetch;
- } vertex_fetch;
-
- /* Post-tnl vertex cache:
- */
- struct {
- unsigned referenced; /**< bitfield */
-
- struct {
- unsigned in; /* client array element */
- unsigned out; /* index in vs queue/array */
- } idx[VCACHE_SIZE + VCACHE_OVERFLOW];
-
- unsigned overflow;
-
- /** To find space in the vertex cache: */
- struct vertex_header *(*get_vertex)( struct draw_context *draw,
- unsigned i );
- } vcache;
-
- /* Vertex shader queue:
- */
- struct {
- unsigned elts[VS_QUEUE_LENGTH]; /**< index into the user's vertex arrays */
- char *vertex_cache;
- unsigned queue_nr;
- unsigned post_nr;
- } vs;
-
- /**
- * Run the vertex shader on all vertices in the vertex queue.
- */
- void (*shader_queue_flush)(struct draw_context *draw);
-
- /* Prim pipeline queue:
- */
- struct {
- /* Need to queue up primitives until their vertices have been
- * transformed by a vs queue flush.
- */
- struct prim_header queue[PRIM_QUEUE_LENGTH];
- unsigned queue_nr;
- } pq;
-
-
/* This (and the tgsi_exec_machine struct) probably need to be moved somewhere private.
*/
struct gallivm_cpu_engine *engine;
@@ -354,107 +219,49 @@ struct draw_context
-extern struct draw_stage *draw_unfilled_stage( struct draw_context *context );
-extern struct draw_stage *draw_twoside_stage( struct draw_context *context );
-extern struct draw_stage *draw_offset_stage( struct draw_context *context );
-extern struct draw_stage *draw_clip_stage( struct draw_context *context );
-extern struct draw_stage *draw_flatshade_stage( struct draw_context *context );
-extern struct draw_stage *draw_cull_stage( struct draw_context *context );
-extern struct draw_stage *draw_stipple_stage( struct draw_context *context );
-extern struct draw_stage *draw_wide_line_stage( struct draw_context *context );
-extern struct draw_stage *draw_wide_point_stage( struct draw_context *context );
-extern struct draw_stage *draw_validate_stage( struct draw_context *context );
-extern void draw_free_temp_verts( struct draw_stage *stage );
-extern void draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr );
-extern void draw_reset_vertex_ids( struct draw_context *draw );
+/*******************************************************************************
+ * Vertex processing (was passthrough) code:
+ */
+boolean draw_pt_init( struct draw_context *draw );
+void draw_pt_destroy( struct draw_context *draw );
+void draw_pt_reset_vertex_ids( struct draw_context *draw );
-extern int draw_vertex_cache_check_space( struct draw_context *draw,
- unsigned nr_verts );
+/*******************************************************************************
+ * Primitive processing (pipeline) code:
+ */
-extern void draw_vertex_cache_invalidate( struct draw_context *draw );
-extern void draw_vertex_cache_unreference( struct draw_context *draw );
-extern void draw_vertex_cache_reset_vertex_ids( struct draw_context *draw );
+boolean draw_pipeline_init( struct draw_context *draw );
+void draw_pipeline_destroy( struct draw_context *draw );
-extern void draw_vertex_shader_queue_flush( struct draw_context *draw );
+void draw_pipeline_run( struct draw_context *draw,
+ unsigned prim,
+ struct vertex_header *vertices,
+ unsigned vertex_count,
+ unsigned stride,
+ const ushort *elts,
+ unsigned count );
-extern void draw_update_vertex_fetch( struct draw_context *draw );
+void draw_pipeline_flush( struct draw_context *draw,
+ unsigned flags );
-extern boolean draw_need_pipeline(const struct draw_context *draw,
- unsigned prim );
-/* Passthrough mode (second attempt):
+/*******************************************************************************
+ * Flushing
*/
-boolean draw_pt_init( struct draw_context *draw );
-void draw_pt_destroy( struct draw_context *draw );
-boolean draw_pt_arrays( struct draw_context *draw,
- unsigned prim,
- unsigned start,
- unsigned count );
-void draw_pt_reset_vertex_ids( struct draw_context *draw );
-void draw_pt_run_pipeline( struct draw_context *draw,
- unsigned prim,
- char *verts,
- unsigned vertex_stride,
- unsigned vertex_count,
- const ushort *elts,
- unsigned count );
-
-
-#define DRAW_FLUSH_SHADER_QUEUE 0x1 /* sized not to overflow, never raised */
-#define DRAW_FLUSH_PRIM_QUEUE 0x2
-#define DRAW_FLUSH_VERTEX_CACHE 0x4
#define DRAW_FLUSH_STATE_CHANGE 0x8
#define DRAW_FLUSH_BACKEND 0x10
void draw_do_flush( struct draw_context *draw, unsigned flags );
-boolean draw_get_edgeflag( struct draw_context *draw,
- unsigned idx );
-/**
- * Get a writeable copy of a vertex.
- * \param stage drawing stage info
- * \param vert the vertex to copy (source)
- * \param idx index into stage's tmp[] array to put the copy (dest)
- * \return pointer to the copied vertex
- */
-static INLINE struct vertex_header *
-dup_vert( struct draw_stage *stage,
- const struct vertex_header *vert,
- unsigned idx )
-{
- struct vertex_header *tmp = stage->tmp[idx];
- const uint vsize = sizeof(struct vertex_header)
- + stage->draw->num_vs_outputs * 4 * sizeof(float);
- memcpy(tmp, vert, vsize);
- tmp->vertex_id = UNDEFINED_VERTEX_ID;
- return tmp;
-}
-
-static INLINE float
-dot4(const float *a, const float *b)
-{
- float result = (a[0]*b[0] +
- a[1]*b[1] +
- a[2]*b[2] +
- a[3]*b[3]);
-
- return result;
-}
-
-static INLINE struct vertex_header *
-draw_header_from_block(char *block, int size, int num)
-{
- return (struct vertex_header*)(block + num * size);
-}
#endif /* DRAW_PRIVATE_H */
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index 3d2e7bf7b82..f5a3bf390e8 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -36,146 +36,53 @@
#include "draw/draw_pt.h"
-#if 0
-static boolean too_many_elts( struct draw_context *draw,
- unsigned elts )
-{
- return elts > (8 * 1024);
-}
-#endif
-
-static INLINE unsigned reduced_prim(unsigned prim)
-{
- /*FIXME*/
- return prim;
-}
-static INLINE boolean good_prim(unsigned prim)
-{
- /*FIXME*/
- return FALSE;
-}
-boolean
+/* Overall we split things into:
+ * - frontend -- prepare fetch_elts, draw_elts - eg vcache
+ * - middle -- fetch, shade, cliptest, viewport
+ * - pipeline -- the prim pipeline: clipping, wide lines, etc
+ * - backend -- the vbuf_render provided by the driver.
+ */
+static boolean
draw_pt_arrays(struct draw_context *draw,
unsigned prim,
unsigned start,
unsigned count)
{
- const boolean pipeline = draw_need_pipeline(draw, prim);
- const boolean cliptest = !draw->rasterizer->bypass_clipping;
- const boolean shading = !draw->rasterizer->bypass_vs;
struct draw_pt_front_end *frontend = NULL;
struct draw_pt_middle_end *middle = NULL;
+ unsigned opt = 0;
- if (!draw->render)
- return FALSE;
- /*debug_printf("XXXXXXXXXX needs_pipeline = %d\n", pipeline);*/
+ if (!draw->render) {
+ opt |= PT_PIPELINE;
+ }
- /* Overall we do:
- * - frontend -- prepare fetch_elts, draw_elts - eg vcache
- * - middle -- fetch, shade, cliptest, viewport
- * - pipeline -- the prim pipeline: clipping, wide lines, etc
- * - backend -- the vbuf_render provided by the driver.
- */
+ if (draw_need_pipeline(draw,
+ draw->rasterizer,
+ prim)) {
+ opt |= PT_PIPELINE;
+ }
- if (shading && !draw->use_pt_shaders)
- return FALSE;
+ if (!draw->bypass_clipping) {
+ opt |= PT_CLIPTEST;
+ }
+ if (!draw->rasterizer->bypass_vs) {
+ opt |= PT_SHADE;
+ }
- if (!cliptest && !pipeline && !shading) {
- /* This is the 'passthrough' path:
- */
- /* Fetch user verts, emit hw verts:
- */
+ if (opt)
+ middle = draw->pt.middle.general;
+ else
middle = draw->pt.middle.fetch_emit;
- }
- else if (!cliptest && !shading) {
- /* This is the 'passthrough' path targetting the pipeline backend.
- */
- /* Fetch user verts, emit pipeline verts, run pipeline:
- */
- middle = draw->pt.middle.fetch_pipeline;
- }
- else if (!cliptest && !pipeline) {
- /* Fetch user verts, run vertex shader, emit hw verts:
- */
- middle = draw->pt.middle.fetch_shade_emit;
- }
- else if (!pipeline) {
- /* Even though !pipeline, we have to run it to get clipping. We
- * do know that the pipeline is just the clipping operation, but
- * that probably doesn't help much.
- *
- * This is going to be the most important path for a lot of
- * swtnl cards.
- */
- /* Fetch user verts,
- * run vertex shader,
- * cliptest and viewport trasform
- * if no clipped vertices,
- * emit hw verts
- * else
- * run pipline
- */
- middle = draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit;
- }
- else {
- /* This is what we're currently always doing:
- */
- /* Fetch user verts, run vertex shader, cliptest, run pipeline
- * or
- * Fetch user verts, run vertex shader, run pipeline
- */
- middle = draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit;
- }
- /* If !pipeline, need to make sure we respect the driver's limited
- * capabilites to receive blocks of vertex data and elements.
+ /* May create a short-circuited version of this for small primitives:
*/
-#if 0
- if (!pipeline) {
- unsigned vertex_mode = passthrough;
- unsigned nr_verts = count_vertices( draw, start, count );
- unsigned hw_prim = prim;
-
- if (is_elts(draw)) {
- frontend = draw->pt.front.vcache;
- hw_prim = reduced_prim(prim);
- }
-#if 0
- if (too_many_verts(nr_verts)) {
- /* if (is_verts(draw) && can_split(prim)) {
- draw = draw_arrays_split;
- }
- else */ {
- frontend = draw->pt.front.vcache;
- hw_prim = reduced_prim(prim);
- }
- }
-#endif
-
- if (too_many_elts(count)) {
-
- /* if (is_elts(draw) && can_split(prim)) {
- draw = draw_elts_split;
- }
- else */ {
- frontend = draw->pt.front.vcache;
- hw_prim = reduced_prim(prim);
- }
- }
-
- if (!good_prim(hw_prim)) {
- frontend = draw->pt.front.vcache;
- }
- }
-#else
frontend = draw->pt.front.vcache;
-#endif
- frontend->prepare( frontend, prim, middle );
+ frontend->prepare( frontend, prim, middle, opt );
frontend->run( frontend,
draw_pt_elt_func( draw ),
@@ -190,21 +97,16 @@ draw_pt_arrays(struct draw_context *draw,
boolean draw_pt_init( struct draw_context *draw )
{
- draw->pt.middle.fetch_emit = draw_pt_fetch_emit( draw );
- if (!draw->pt.middle.fetch_emit)
- return FALSE;
-
- draw->pt.middle.fetch_pipeline = draw_pt_fetch_pipeline( draw );
- if (!draw->pt.middle.fetch_pipeline)
+ draw->pt.front.vcache = draw_pt_vcache( draw );
+ if (!draw->pt.front.vcache)
return FALSE;
- draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit =
- draw_pt_fetch_pipeline_or_emit( draw );
- if (!draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit)
+ draw->pt.middle.fetch_emit = draw_pt_fetch_emit( draw );
+ if (!draw->pt.middle.fetch_emit)
return FALSE;
- draw->pt.front.vcache = draw_pt_vcache( draw );
- if (!draw->pt.front.vcache)
+ draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw );
+ if (!draw->pt.middle.general)
return FALSE;
return TRUE;
@@ -213,24 +115,63 @@ boolean draw_pt_init( struct draw_context *draw )
void draw_pt_destroy( struct draw_context *draw )
{
+ if (draw->pt.middle.general) {
+ draw->pt.middle.general->destroy( draw->pt.middle.general );
+ draw->pt.middle.general = NULL;
+ }
+
if (draw->pt.middle.fetch_emit) {
draw->pt.middle.fetch_emit->destroy( draw->pt.middle.fetch_emit );
draw->pt.middle.fetch_emit = NULL;
}
- if (draw->pt.middle.fetch_pipeline) {
- draw->pt.middle.fetch_pipeline->destroy( draw->pt.middle.fetch_pipeline );
- draw->pt.middle.fetch_pipeline = NULL;
- }
-
- if (draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit) {
- draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit->destroy(
- draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit );
- draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit = NULL;
- }
-
if (draw->pt.front.vcache) {
draw->pt.front.vcache->destroy( draw->pt.front.vcache );
draw->pt.front.vcache = NULL;
}
}
+
+
+
+static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = {
+ PIPE_PRIM_POINTS,
+ PIPE_PRIM_LINES,
+ PIPE_PRIM_LINES,
+ PIPE_PRIM_LINES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES
+};
+
+
+/**
+ * Draw vertex arrays
+ * This is the main entrypoint into the drawing module.
+ * \param prim one of PIPE_PRIM_x
+ * \param start index of first vertex to draw
+ * \param count number of vertices to draw
+ */
+void
+draw_arrays(struct draw_context *draw, unsigned prim,
+ unsigned start, unsigned count)
+{
+ if (reduced_prim[prim] != draw->reduced_prim) {
+ draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+ draw->reduced_prim = reduced_prim[prim];
+ }
+
+ /* drawing done here: */
+ draw_pt_arrays(draw, prim, start, count);
+}
+
+boolean draw_pt_get_edgeflag( struct draw_context *draw,
+ unsigned idx )
+{
+ if (draw->pt.user.edgeflag)
+ return (draw->pt.user.edgeflag[idx/32] & (1 << (idx%32))) != 0;
+ else
+ return 1;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h
index 48413b648a6..fd0d158fcf7 100644
--- a/src/gallium/auxiliary/draw/draw_pt.h
+++ b/src/gallium/auxiliary/draw/draw_pt.h
@@ -50,6 +50,12 @@ struct draw_context;
#define DRAW_PT_FLAG_MASK (3<<30)
+#define PT_SHADE 0x1
+#define PT_CLIPTEST 0x2
+#define PT_PIPELINE 0x4
+#define PT_MAX_MIDDLE 0x8
+
+
/* The "front end" - prepare sets of fetch, draw elements for the
* middle end.
*
@@ -64,7 +70,8 @@ struct draw_context;
struct draw_pt_front_end {
void (*prepare)( struct draw_pt_front_end *,
unsigned prim,
- struct draw_pt_middle_end * );
+ struct draw_pt_middle_end *,
+ unsigned opt );
void (*run)( struct draw_pt_front_end *,
pt_elt_func elt_func,
@@ -82,15 +89,11 @@ struct draw_pt_front_end {
* Currently two versions of this:
* - fetch, vertex shade, cliptest, prim-pipeline
* - fetch, emit (ie passthrough)
- * Later:
- * - fetch, vertex shade, cliptest, maybe-pipeline, maybe-emit
- * - fetch, vertex shade, emit
- *
- * Currenly only using the passthrough version.
*/
struct draw_pt_middle_end {
void (*prepare)( struct draw_pt_middle_end *,
- unsigned prim );
+ unsigned prim,
+ unsigned opt );
void (*run)( struct draw_pt_middle_end *,
const unsigned *fetch_elts,
@@ -104,12 +107,9 @@ struct draw_pt_middle_end {
/* The "back end" - supplied by the driver, defined in draw_vbuf.h.
- *
- * Not sure whether to wrap the prim pipeline up as an alternate
- * backend. Would be a win for everything except pure passthrough
- * mode...
*/
struct vbuf_render;
+struct vertex_header;
/* Helper functions.
@@ -118,12 +118,88 @@ pt_elt_func draw_pt_elt_func( struct draw_context *draw );
const void *draw_pt_elt_ptr( struct draw_context *draw,
unsigned start );
-/* Implementations:
+/* Frontends:
+ *
+ * Currently only the general-purpose vcache implementation, could add
+ * a special case for tiny vertex buffers.
*/
struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw );
+
+/* Middle-ends:
+ *
+ * Currently one general-purpose case which can do all possibilities,
+ * at the slight expense of creating a vertex_header in some cases
+ * unecessarily.
+ *
+ * The special case fetch_emit code avoids pipeline vertices
+ * altogether and builds hardware vertices directly from API
+ * vertex_elements.
+ */
struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw );
-struct draw_pt_middle_end *draw_pt_fetch_pipeline( struct draw_context *draw );
struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit(struct draw_context *draw);
+/* More helpers:
+ */
+boolean draw_pt_get_edgeflag( struct draw_context *draw,
+ unsigned idx );
+
+
+/*******************************************************************************
+ * HW vertex emit:
+ */
+struct pt_emit;
+
+void draw_pt_emit_prepare( struct pt_emit *emit,
+ unsigned prim );
+
+void draw_pt_emit( struct pt_emit *emit,
+ const float (*vertex_data)[4],
+ unsigned vertex_count,
+ unsigned stride,
+ const ushort *elts,
+ unsigned count );
+
+void draw_pt_emit_destroy( struct pt_emit *emit );
+
+struct pt_emit *draw_pt_emit_create( struct draw_context *draw );
+
+
+/*******************************************************************************
+ * API vertex fetch:
+ */
+
+struct pt_fetch;
+void draw_pt_fetch_prepare( struct pt_fetch *fetch,
+ unsigned vertex_size );
+
+void draw_pt_fetch_run( struct pt_fetch *fetch,
+ const unsigned *elts,
+ unsigned count,
+ char *verts );
+
+void draw_pt_fetch_destroy( struct pt_fetch *fetch );
+
+struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw );
+
+/*******************************************************************************
+ * Post-VS: cliptest, rhw, viewport
+ */
+struct pt_post_vs;
+
+boolean draw_pt_post_vs_run( struct pt_post_vs *pvs,
+ struct vertex_header *pipeline_verts,
+ unsigned stride,
+ unsigned count );
+
+void draw_pt_post_vs_prepare( struct pt_post_vs *pvs,
+ boolean bypass_clipping,
+ boolean identity_viewport,
+ boolean opengl );
+
+struct pt_post_vs *draw_pt_post_vs_create( struct draw_context *draw );
+
+void draw_pt_post_vs_destroy( struct pt_post_vs *pvs );
+
+
#endif
diff --git a/src/gallium/auxiliary/draw/draw_pt_elts.c b/src/gallium/auxiliary/draw/draw_pt_elts.c
index d49770e7b2d..2094c081ed4 100644
--- a/src/gallium/auxiliary/draw/draw_pt_elts.c
+++ b/src/gallium/auxiliary/draw/draw_pt_elts.c
@@ -59,7 +59,7 @@ static unsigned elt_vert( const void *elts, unsigned idx )
pt_elt_func draw_pt_elt_func( struct draw_context *draw )
{
- switch (draw->user.eltSize) {
+ switch (draw->pt.user.eltSize) {
case 0: return elt_vert;
case 1: return elt_ubyte;
case 2: return elt_ushort;
@@ -71,9 +71,9 @@ pt_elt_func draw_pt_elt_func( struct draw_context *draw )
const void *draw_pt_elt_ptr( struct draw_context *draw,
unsigned start )
{
- const char *elts = draw->user.elts;
+ const char *elts = draw->pt.user.elts;
- switch (draw->user.eltSize) {
+ switch (draw->pt.user.eltSize) {
case 0:
return (const void *)(((const ubyte *)NULL) + start);
case 1:
diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c
new file mode 100644
index 00000000000..d35329aba0f
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_emit.c
@@ -0,0 +1,252 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe/p_util.h"
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_vbuf.h"
+#include "draw/draw_vertex.h"
+#include "draw/draw_pt.h"
+#include "translate/translate.h"
+
+#include "cso_cache/cso_cache.h"
+#include "cso_cache/cso_hash.h"
+
+struct pt_emit {
+ struct draw_context *draw;
+
+ struct translate *translate;
+
+ struct cso_hash *hash;
+};
+
+static INLINE unsigned translate_hash_key_size(struct translate_key *key)
+{
+ unsigned size = sizeof(struct translate_key) -
+ sizeof(struct translate_element) * (PIPE_MAX_ATTRIBS - key->nr_elements);
+ return size;
+}
+
+static INLINE unsigned create_key(struct translate_key *key)
+{
+ unsigned hash_key;
+ unsigned size = translate_hash_key_size(key);
+ /*debug_printf("key size = %d, (els = %d)\n",
+ size, key->nr_elements);*/
+ hash_key = cso_construct_key(key, size);
+ return hash_key;
+}
+
+static struct translate *cached_translate(struct pt_emit *emit,
+ struct translate_key *key)
+{
+ unsigned hash_key = create_key(key);
+ struct cso_hash_iter iter = cso_hash_find(emit->hash, hash_key);
+ struct translate *translate = 0;
+
+ if (cso_hash_iter_is_null(iter)) {
+ translate = translate_create(key);
+ cso_hash_insert(emit->hash, hash_key, translate);
+ /*debug_printf("\tCREATED with %d\n", hash_key);*/
+ } else {
+ translate = cso_hash_iter_data(iter);
+ /*debug_printf("\tOK with %d\n", hash_key);*/
+ }
+
+ return translate;
+}
+
+
+static INLINE void delete_translates(struct pt_emit *emit)
+{
+ struct cso_hash *hash = emit->hash;
+ struct cso_hash_iter iter = cso_hash_first_node(hash);
+ while (!cso_hash_iter_is_null(iter)) {
+ struct translate *state = (struct translate*)cso_hash_iter_data(iter);
+ iter = cso_hash_iter_next(iter);
+ if (state) {
+ state->release(state);
+ }
+ }
+}
+
+void draw_pt_emit_prepare( struct pt_emit *emit,
+ unsigned prim )
+{
+ struct draw_context *draw = emit->draw;
+ const struct vertex_info *vinfo;
+ unsigned dst_offset;
+ struct translate_key hw_key;
+ unsigned i;
+ boolean ok;
+
+ ok = draw->render->set_primitive(draw->render, prim);
+ if (!ok) {
+ assert(0);
+ return;
+ }
+
+ /* Must do this after set_primitive() above:
+ */
+ vinfo = draw->render->get_vertex_info(draw->render);
+
+
+ /* Translate from pipeline vertices to hw vertices.
+ */
+ dst_offset = 0;
+ for (i = 0; i < vinfo->num_attribs; i++) {
+ unsigned emit_sz = 0;
+ unsigned src_buffer = 0;
+ unsigned output_format;
+ unsigned src_offset = (vinfo->src_index[i] * 4 * sizeof(float) );
+
+
+
+ switch (vinfo->emit[i]) {
+ case EMIT_4F:
+ output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ emit_sz = 4 * sizeof(float);
+ break;
+ case EMIT_3F:
+ output_format = PIPE_FORMAT_R32G32B32_FLOAT;
+ emit_sz = 3 * sizeof(float);
+ break;
+ case EMIT_2F:
+ output_format = PIPE_FORMAT_R32G32_FLOAT;
+ emit_sz = 2 * sizeof(float);
+ break;
+ case EMIT_1F:
+ output_format = PIPE_FORMAT_R32_FLOAT;
+ emit_sz = 1 * sizeof(float);
+ break;
+ case EMIT_1F_PSIZE:
+ output_format = PIPE_FORMAT_R32_FLOAT;
+ emit_sz = 1 * sizeof(float);
+ src_buffer = 1;
+ src_offset = 0;
+ break;
+ case EMIT_4UB:
+ output_format = PIPE_FORMAT_B8G8R8A8_UNORM;
+ emit_sz = 4 * sizeof(ubyte);
+ default:
+ assert(0);
+ output_format = PIPE_FORMAT_NONE;
+ emit_sz = 0;
+ break;
+ }
+
+ hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ hw_key.element[i].input_buffer = src_buffer;
+ hw_key.element[i].input_offset = src_offset;
+ hw_key.element[i].output_format = output_format;
+ hw_key.element[i].output_offset = dst_offset;
+
+ dst_offset += emit_sz;
+ }
+
+ hw_key.nr_elements = vinfo->num_attribs;
+ hw_key.output_stride = vinfo->size * 4;
+
+ /* Don't bother with caching at this stage:
+ */
+ if (!emit->translate ||
+ memcmp(&emit->translate->key, &hw_key, sizeof(hw_key)) != 0)
+ {
+ emit->translate = cached_translate(emit, &hw_key);
+ }
+}
+
+
+void draw_pt_emit( struct pt_emit *emit,
+ const float (*vertex_data)[4],
+ unsigned vertex_count,
+ unsigned stride,
+ const ushort *elts,
+ unsigned count )
+{
+ struct draw_context *draw = emit->draw;
+ struct translate *translate = emit->translate;
+ struct vbuf_render *render = draw->render;
+ void *hw_verts;
+
+ /* XXX: need to flush to get prim_vbuf.c to release its allocation??
+ */
+ draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+
+ hw_verts = render->allocate_vertices(render,
+ (ushort)translate->key.output_stride,
+ (ushort)count);
+ if (!hw_verts) {
+ assert(0);
+ return;
+ }
+
+ translate->set_buffer(translate,
+ 0,
+ vertex_data,
+ stride );
+
+ translate->set_buffer(translate,
+ 1,
+ &draw->rasterizer->point_size,
+ 0);
+
+ translate->run( translate,
+ 0,
+ vertex_count,
+ hw_verts );
+
+ render->draw(render,
+ elts,
+ count);
+
+ render->release_vertices(render,
+ hw_verts,
+ translate->key.output_stride,
+ vertex_count);
+}
+
+
+struct pt_emit *draw_pt_emit_create( struct draw_context *draw )
+{
+ struct pt_emit *emit = CALLOC_STRUCT(pt_emit);
+ if (!emit)
+ return NULL;
+
+ emit->draw = draw;
+ emit->hash = cso_hash_create();
+
+ return emit;
+}
+
+void draw_pt_emit_destroy( struct pt_emit *emit )
+{
+ delete_translates(emit);
+ cso_hash_delete(emit->hash);
+
+ FREE(emit);
+}
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c
new file mode 100644
index 00000000000..93da811ed81
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c
@@ -0,0 +1,223 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe/p_util.h"
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_vbuf.h"
+#include "draw/draw_vertex.h"
+#include "draw/draw_pt.h"
+#include "translate/translate.h"
+
+#include "cso_cache/cso_cache.h"
+#include "cso_cache/cso_hash.h"
+
+struct pt_fetch {
+ struct draw_context *draw;
+
+ struct translate *translate;
+
+ unsigned vertex_size;
+
+ struct cso_hash *hash;
+};
+
+static INLINE unsigned translate_hash_key_size(struct translate_key *key)
+{
+ unsigned size = sizeof(struct translate_key) -
+ sizeof(struct translate_element) * (PIPE_MAX_ATTRIBS - key->nr_elements);
+ return size;
+}
+
+static INLINE unsigned create_key(struct translate_key *key)
+{
+ unsigned hash_key;
+ unsigned size = translate_hash_key_size(key);
+ /*debug_printf("key size = %d, (els = %d)\n",
+ size, key->nr_elements);*/
+ hash_key = cso_construct_key(key, size);
+ return hash_key;
+}
+
+static struct translate *cached_translate(struct pt_fetch *fetch,
+ struct translate_key *key)
+{
+ unsigned hash_key = create_key(key);
+ struct cso_hash_iter iter = cso_hash_find(fetch->hash, hash_key);
+ struct translate *translate = 0;
+
+ if (cso_hash_iter_is_null(iter)) {
+ translate = translate_create(key);
+ cso_hash_insert(fetch->hash, hash_key, translate);
+ /*debug_printf("\tCREATED with %d\n", hash_key);*/
+ } else {
+ translate = cso_hash_iter_data(iter);
+ /*debug_printf("\tOK with %d\n", hash_key);*/
+ }
+
+ return translate;
+}
+
+static INLINE void delete_translates(struct pt_fetch *fetch)
+{
+ struct cso_hash *hash = fetch->hash;
+ struct cso_hash_iter iter = cso_hash_first_node(hash);
+ while (!cso_hash_iter_is_null(iter)) {
+ struct translate *state = (struct translate*)cso_hash_iter_data(iter);
+ iter = cso_hash_iter_next(iter);
+ if (state) {
+ state->release(state);
+ }
+ }
+}
+
+/* Perform the fetch from API vertex elements & vertex buffers, to a
+ * contiguous set of float[4] attributes as required for the
+ * vertex_shader->run_linear() method.
+ *
+ * This is used in all cases except pure passthrough
+ * (draw_pt_fetch_emit.c) which has its own version to translate
+ * directly to hw vertices.
+ *
+ */
+void draw_pt_fetch_prepare( struct pt_fetch *fetch,
+ unsigned vertex_size )
+{
+ struct draw_context *draw = fetch->draw;
+ unsigned i, nr = 0;
+ unsigned dst_offset = 0;
+ struct translate_key key;
+
+ fetch->vertex_size = vertex_size;
+
+ memset(&key, 0, sizeof(key));
+
+ /* Always emit/leave space for a vertex header.
+ *
+ * It's worth considering whether the vertex headers should contain
+ * a pointer to the 'data', rather than having it inline.
+ * Something to look at after we've fully switched over to the pt
+ * paths.
+ */
+ {
+ /* Need to set header->vertex_id = 0xffff somehow.
+ */
+ key.element[nr].input_format = PIPE_FORMAT_R32_FLOAT;
+ key.element[nr].input_buffer = draw->pt.nr_vertex_buffers;
+ key.element[nr].input_offset = 0;
+ key.element[nr].output_format = PIPE_FORMAT_R32_FLOAT;
+ key.element[nr].output_offset = dst_offset;
+ dst_offset += 1 * sizeof(float);
+ nr++;
+
+
+ /* Just leave the clip[] array untouched.
+ */
+ dst_offset += 4 * sizeof(float);
+ }
+
+
+ for (i = 0; i < draw->pt.nr_vertex_elements; i++) {
+ key.element[nr].input_format = draw->pt.vertex_element[i].src_format;
+ key.element[nr].input_buffer = draw->pt.vertex_element[i].vertex_buffer_index;
+ key.element[nr].input_offset = draw->pt.vertex_element[i].src_offset;
+ key.element[nr].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ key.element[nr].output_offset = dst_offset;
+
+ dst_offset += 4 * sizeof(float);
+ nr++;
+ }
+
+ assert(dst_offset <= vertex_size);
+
+ key.nr_elements = nr;
+ key.output_stride = vertex_size;
+
+
+ /* Don't bother with caching at this stage:
+ */
+ if (!fetch->translate ||
+ memcmp(&fetch->translate->key, &key, sizeof(key)) != 0)
+ {
+ fetch->translate = cached_translate(fetch, &key);
+
+ {
+ static struct vertex_header vh = { 0, 0, 0, 0xffff };
+ fetch->translate->set_buffer(fetch->translate,
+ draw->pt.nr_vertex_buffers,
+ &vh,
+ 0);
+ }
+ }
+}
+
+
+
+
+void draw_pt_fetch_run( struct pt_fetch *fetch,
+ const unsigned *elts,
+ unsigned count,
+ char *verts )
+{
+ struct draw_context *draw = fetch->draw;
+ struct translate *translate = fetch->translate;
+ unsigned i;
+
+ for (i = 0; i < draw->pt.nr_vertex_buffers; i++) {
+ translate->set_buffer(translate,
+ i,
+ ((char *)draw->pt.user.vbuffer[i] +
+ draw->pt.vertex_buffer[i].buffer_offset),
+ draw->pt.vertex_buffer[i].pitch );
+ }
+
+ translate->run_elts( translate,
+ elts,
+ count,
+ verts );
+}
+
+
+struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw )
+{
+ struct pt_fetch *fetch = CALLOC_STRUCT(pt_fetch);
+ if (!fetch)
+ return NULL;
+
+ fetch->draw = draw;
+ fetch->hash = cso_hash_create();
+ return fetch;
+}
+
+void draw_pt_fetch_destroy( struct pt_fetch *fetch )
+{
+ delete_translates(fetch);
+ cso_hash_delete(fetch->hash);
+
+ FREE(fetch);
+}
+
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
index 3a26a5d7123..68b2c5b1e3d 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
@@ -36,6 +36,7 @@
#include "draw/draw_vbuf.h"
#include "draw/draw_vertex.h"
#include "draw/draw_pt.h"
+#include "translate/translate.h"
/* The simplest 'middle end' in the new vertex code.
*
@@ -72,105 +73,29 @@
struct fetch_emit_middle_end {
struct draw_pt_middle_end base;
struct draw_context *draw;
-
- struct {
- const ubyte *ptr;
- unsigned pitch;
- void (*fetch)( const void *from, float *attrib);
- void (*emit)( const float *attrib, float **out );
- } fetch[PIPE_MAX_ATTRIBS];
- unsigned nr_fetch;
- unsigned hw_vertex_size;
-};
-
-
-
-static void fetch_R32_FLOAT( const void *from,
- float *attrib )
-{
- float *f = (float *) from;
- attrib[0] = f[0];
- attrib[1] = 0.0;
- attrib[2] = 0.0;
- attrib[3] = 1.0;
-}
-
-
-static void emit_R32_FLOAT( const float *attrib,
- float **out )
-{
- (*out)[0] = attrib[0];
- (*out) += 1;
-}
+ struct translate *translate;
+ const struct vertex_info *vinfo;
-static void emit_R32G32_FLOAT( const float *attrib,
- float **out )
-{
- (*out)[0] = attrib[0];
- (*out)[1] = attrib[1];
- (*out) += 2;
-}
+ /* Cache point size somewhere it's address won't change:
+ */
+ float point_size;
-static void emit_R32G32B32_FLOAT( const float *attrib,
- float **out )
-{
- (*out)[0] = attrib[0];
- (*out)[1] = attrib[1];
- (*out)[2] = attrib[2];
- (*out) += 3;
-}
+};
-static void emit_R32G32B32A32_FLOAT( const float *attrib,
- float **out )
-{
- (*out)[0] = attrib[0];
- (*out)[1] = attrib[1];
- (*out)[2] = attrib[2];
- (*out)[3] = attrib[3];
- (*out) += 4;
-}
-
-
-/**
- * General-purpose fetch from user's vertex arrays, emit to driver's
- * vertex buffer.
- *
- * XXX this is totally temporary.
- */
-static void
-fetch_store_general( struct fetch_emit_middle_end *feme,
- void *out_ptr,
- const unsigned *fetch_elts,
- unsigned count )
-{
- float *out = (float *)out_ptr;
- uint i, j;
-
- for (i = 0; i < count; i++) {
- unsigned elt = fetch_elts[i] & ~DRAW_PT_FLAG_MASK;
-
- for (j = 0; j < feme->nr_fetch; j++) {
- float attrib[4];
- const ubyte *from = (feme->fetch[j].ptr +
- feme->fetch[j].pitch * elt);
-
- feme->fetch[j].fetch( from, attrib );
- feme->fetch[j].emit( attrib, &out );
- }
- }
-}
static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
- unsigned prim )
+ unsigned prim,
+ unsigned opt )
{
struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
struct draw_context *draw = feme->draw;
const struct vertex_info *vinfo;
- unsigned i;
+ unsigned i, dst_offset;
boolean ok;
+ struct translate_key key;
ok = draw->render->set_primitive( draw->render,
@@ -182,49 +107,93 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
/* Must do this after set_primitive() above:
*/
- vinfo = draw->render->get_vertex_info(draw->render);
+ vinfo = feme->vinfo = draw->render->get_vertex_info(draw->render);
+
+
- for (i = 0; i < vinfo->num_attribs; i++) {
- unsigned src_element = vinfo->src_index[i];
- unsigned src_buffer = draw->vertex_element[src_element].vertex_buffer_index;
-
- feme->fetch[i].ptr = ((const ubyte *)draw->user.vbuffer[src_buffer] +
- draw->vertex_buffer[src_buffer].buffer_offset +
- draw->vertex_element[src_element].src_offset);
+ /* Transform from API vertices to HW vertices, skipping the
+ * pipeline_vertex intermediate step.
+ */
+ dst_offset = 0;
+ memset(&key, 0, sizeof(key));
- feme->fetch[i].pitch = draw->vertex_buffer[src_buffer].pitch;
-
- feme->fetch[i].fetch = draw_get_fetch_func(draw->vertex_element[src_element].src_format);
+ for (i = 0; i < vinfo->num_attribs; i++) {
+ const struct pipe_vertex_element *src = &draw->pt.vertex_element[vinfo->src_index[i]];
+ unsigned emit_sz = 0;
+ unsigned input_format = src->src_format;
+ unsigned input_buffer = src->vertex_buffer_index;
+ unsigned input_offset = src->src_offset;
+ unsigned output_format;
switch (vinfo->emit[i]) {
case EMIT_4F:
- feme->fetch[i].emit = emit_R32G32B32A32_FLOAT;
+ output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ emit_sz = 4 * sizeof(float);
break;
case EMIT_3F:
- feme->fetch[i].emit = emit_R32G32B32_FLOAT;
+ output_format = PIPE_FORMAT_R32G32B32_FLOAT;
+ emit_sz = 3 * sizeof(float);
break;
case EMIT_2F:
- feme->fetch[i].emit = emit_R32G32_FLOAT;
+ output_format = PIPE_FORMAT_R32G32_FLOAT;
+ emit_sz = 2 * sizeof(float);
break;
case EMIT_1F:
- feme->fetch[i].emit = emit_R32_FLOAT;
+ output_format = PIPE_FORMAT_R32_FLOAT;
+ emit_sz = 1 * sizeof(float);
break;
case EMIT_1F_PSIZE:
- feme->fetch[i].ptr = (const ubyte *)&feme->draw->rasterizer->point_size;
- feme->fetch[i].pitch = 0;
- feme->fetch[i].fetch = fetch_R32_FLOAT;
- feme->fetch[i].emit = emit_R32_FLOAT;
+ input_format = PIPE_FORMAT_R32_FLOAT;
+ input_buffer = draw->pt.nr_vertex_buffers;
+ input_offset = 0;
+ output_format = PIPE_FORMAT_R32_FLOAT;
+ emit_sz = 1 * sizeof(float);
break;
default:
assert(0);
- feme->fetch[i].emit = NULL;
- break;
+ output_format = PIPE_FORMAT_NONE;
+ emit_sz = 0;
+ continue;
}
+
+ key.element[i].input_format = input_format;
+ key.element[i].input_buffer = input_buffer;
+ key.element[i].input_offset = input_offset;
+ key.element[i].output_format = output_format;
+ key.element[i].output_offset = dst_offset;
+
+ dst_offset += emit_sz;
}
- feme->nr_fetch = vinfo->num_attribs;
- feme->hw_vertex_size = vinfo->size * 4;
+ key.nr_elements = vinfo->num_attribs;
+ key.output_stride = vinfo->size * 4;
+
+ /* Don't bother with caching at this stage:
+ */
+ if (!feme->translate ||
+ memcmp(&feme->translate->key, &key, sizeof(key)) != 0)
+ {
+ if (feme->translate)
+ feme->translate->release(feme->translate);
+
+ feme->translate = translate_create( &key );
+
+ feme->translate->set_buffer(feme->translate,
+ draw->pt.nr_vertex_buffers,
+ &feme->point_size,
+ 0);
+ }
+
+ feme->point_size = draw->rasterizer->point_size;
+
+ for (i = 0; i < draw->pt.nr_vertex_buffers; i++) {
+ feme->translate->set_buffer(feme->translate,
+ i,
+ ((char *)draw->pt.user.vbuffer[i] +
+ draw->pt.vertex_buffer[i].buffer_offset),
+ draw->pt.vertex_buffer[i].pitch );
+ }
}
@@ -246,7 +215,7 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
hw_verts = draw->render->allocate_vertices( draw->render,
- (ushort)feme->hw_vertex_size,
+ (ushort)feme->translate->key.output_stride,
(ushort)fetch_count );
if (!hw_verts) {
assert(0);
@@ -256,10 +225,19 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
/* Single routine to fetch vertices and emit HW verts.
*/
- fetch_store_general( feme,
- hw_verts,
- fetch_elts,
- fetch_count );
+ feme->translate->run_elts( feme->translate,
+ fetch_elts,
+ fetch_count,
+ hw_verts );
+
+ if (0) {
+ unsigned i;
+ for (i = 0; i < fetch_count; i++) {
+ debug_printf("\n\nvertex %d:\n", i);
+ draw_dump_emitted_vertex( feme->vinfo,
+ (const uint8_t *)hw_verts + feme->vinfo->size * 4 * i );
+ }
+ }
/* XXX: Draw arrays path to avoid re-emitting index list again and
* again.
@@ -272,7 +250,7 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
*/
draw->render->release_vertices( draw->render,
hw_verts,
- feme->hw_vertex_size,
+ feme->translate->key.output_stride,
fetch_count );
}
@@ -286,6 +264,11 @@ static void fetch_emit_finish( struct draw_pt_middle_end *middle )
static void fetch_emit_destroy( struct draw_pt_middle_end *middle )
{
+ struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
+
+ if (feme->translate)
+ feme->translate->release( feme->translate );
+
FREE(middle);
}
@@ -293,6 +276,8 @@ static void fetch_emit_destroy( struct draw_pt_middle_end *middle )
struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw )
{
struct fetch_emit_middle_end *fetch_emit = CALLOC_STRUCT( fetch_emit_middle_end );
+ if (fetch_emit == NULL)
+ return NULL;
fetch_emit->base.prepare = fetch_emit_prepare;
fetch_emit->base.run = fetch_emit_run;
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c
deleted file mode 100644
index a70d129c93c..00000000000
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c
+++ /dev/null
@@ -1,326 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Keith Whitwell <[email protected]>
- */
-
-#include "pipe/p_util.h"
-#include "draw/draw_context.h"
-#include "draw/draw_private.h"
-#include "draw/draw_vertex.h"
-#include "draw/draw_pt.h"
-
-/* The simplest 'middle end' in the new vertex code.
- *
- * The responsibilities of a middle end are to:
- * - perform vertex fetch using
- * - draw vertex element/buffer state
- * - a list of fetch indices we received as an input
- * - run the vertex shader
- * - cliptest,
- * - clip coord calculation
- * - viewport transformation
- * - if necessary, run the primitive pipeline, passing it:
- * - a linear array of vertex_header vertices constructed here
- * - a set of draw indices we received as an input
- * - otherwise, drive the hw backend,
- * - allocate space for hardware format vertices
- * - translate the vertex-shader output vertices to hw format
- * - calling the backend draw functions.
- *
- * For convenience, we provide a helper function to drive the hardware
- * backend given similar inputs to those required to run the pipeline.
- *
- * In the case of passthrough mode, many of these actions are disabled
- * or noops, so we end up doing:
- *
- * - perform vertex fetch
- * - drive the hw backend
- *
- * IE, basically just vertex fetch to post-vs-format vertices,
- * followed by a call to the backend helper function.
- */
-
-
-struct fetch_pipeline_middle_end {
- struct draw_pt_middle_end base;
- struct draw_context *draw;
-
- void (*header)( unsigned idx, float **out);
-
- struct {
- const ubyte *ptr;
- unsigned pitch;
- void (*fetch)( const void *from, float *attrib);
- void (*emit)( const float *attrib, float **out );
- } fetch[PIPE_MAX_ATTRIBS];
-
- unsigned nr_fetch;
- unsigned pipeline_vertex_size;
- unsigned prim;
-};
-
-
-#if 0
-static void emit_R32_FLOAT( const float *attrib,
- float **out )
-{
- (*out)[0] = attrib[0];
- (*out) += 1;
-}
-
-static void emit_R32G32_FLOAT( const float *attrib,
- float **out )
-{
- (*out)[0] = attrib[0];
- (*out)[1] = attrib[1];
- (*out) += 2;
-}
-
-static void emit_R32G32B32_FLOAT( const float *attrib,
- float **out )
-{
- (*out)[0] = attrib[0];
- (*out)[1] = attrib[1];
- (*out)[2] = attrib[2];
- (*out) += 3;
-}
-#endif
-static void emit_R32G32B32A32_FLOAT( const float *attrib,
- float **out )
-{
- (*out)[0] = attrib[0];
- (*out)[1] = attrib[1];
- (*out)[2] = attrib[2];
- (*out)[3] = attrib[3];
- (*out) += 4;
-}
-
-static void header( unsigned idx,
- float **out )
-{
- struct vertex_header *header = (struct vertex_header *) (*out);
-
- header->clipmask = 0;
- header->edgeflag = 1;
- header->pad = 0;
- header->vertex_id = UNDEFINED_VERTEX_ID;
-
- (*out)[1] = 0;
- (*out)[2] = 0;
- (*out)[3] = 0;
- (*out)[3] = 1;
- (*out) += 5;
-}
-
-
-static void header_ef( unsigned idx,
- float **out )
-{
- struct vertex_header *header = (struct vertex_header *) (*out);
-
- /* XXX: need a reset_stipple flag in the vertex header too?
- */
- header->clipmask = 0;
- header->edgeflag = (idx & DRAW_PT_EDGEFLAG) != 0;
- header->pad = 0;
- header->vertex_id = UNDEFINED_VERTEX_ID;
-
- (*out)[1] = 0;
- (*out)[2] = 0;
- (*out)[3] = 0;
- (*out)[3] = 1;
- (*out) += 5;
-}
-
-
-/**
- * General-purpose fetch from user's vertex arrays, emit to driver's
- * vertex buffer.
- *
- * XXX this is totally temporary.
- */
-static void
-fetch_store_general( struct fetch_pipeline_middle_end *fpme,
- void *out_ptr,
- const unsigned *fetch_elts,
- unsigned count )
-{
- float *out = (float *)out_ptr;
- uint i, j;
-
- for (i = 0; i < count; i++) {
- unsigned elt = fetch_elts[i];
-
- fpme->header( elt, &out );
- elt &= ~DRAW_PT_FLAG_MASK;
-
- for (j = 0; j < fpme->nr_fetch; j++) {
- float attrib[4];
- const ubyte *from = (fpme->fetch[j].ptr +
- fpme->fetch[j].pitch * elt);
-
- fpme->fetch[j].fetch( from, attrib );
- fpme->fetch[j].emit( attrib, &out );
- }
- }
-}
-
-
-/* We aren't running a vertex shader, but are running the pipeline.
- * That means the vertices we need to build look like:
- *
- * dw0: vertex header (zero?)
- * dw1: clip coord 0
- * dw2: clip coord 1
- * dw3: clip coord 2
- * dw4: clip coord 4
- * dw5: screen coord 0
- * dw6: screen coord 0
- * dw7: screen coord 0
- * dw8: screen coord 0
- * dw9: other attribs...
- *
- */
-static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
- unsigned prim )
-{
- struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
- struct draw_context *draw = fpme->draw;
- unsigned i, nr = 0;
-
- fpme->prim = prim;
-
- /* Emit the vertex header and empty clipspace coord field:
- */
- if (draw->user.edgeflag) {
- fpme->header = header_ef;
- }
- else {
- fpme->header = header;
- }
-
-
- /* Need to look at vertex shader inputs (we know it is a
- * passthrough shader, so these define the outputs too). If we
- * were running a shader, we'd still be looking at the inputs at
- * this point.
- */
- for (i = 0; i < draw->vertex_shader->info.num_inputs; i++) {
- unsigned buf = draw->vertex_element[i].vertex_buffer_index;
- enum pipe_format format = draw->vertex_element[i].src_format;
-
- fpme->fetch[nr].ptr = ((const ubyte *) draw->user.vbuffer[buf] +
- draw->vertex_buffer[buf].buffer_offset +
- draw->vertex_element[i].src_offset);
-
- fpme->fetch[nr].pitch = draw->vertex_buffer[buf].pitch;
- fpme->fetch[nr].fetch = draw_get_fetch_func( format );
-
- /* Always do this -- somewhat redundant...
- */
- fpme->fetch[nr].emit = emit_R32G32B32A32_FLOAT;
- nr++;
- }
-
- fpme->nr_fetch = nr;
- fpme->pipeline_vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float);
-}
-
-
-
-
-static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
- const unsigned *fetch_elts,
- unsigned fetch_count,
- const ushort *draw_elts,
- unsigned draw_count )
-{
- struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
- char *pipeline_verts;
-
- pipeline_verts = MALLOC( fpme->pipeline_vertex_size *
- fetch_count );
- if (!pipeline_verts) {
- assert(0);
- return;
- }
-
-
- /* Single routine to fetch vertices and emit pipeline verts.
- */
- fetch_store_general( fpme,
- pipeline_verts,
- fetch_elts,
- fetch_count );
-
-
- /* Run the pipeline
- */
- draw_pt_run_pipeline( fpme->draw,
- fpme->prim,
- pipeline_verts,
- fpme->pipeline_vertex_size,
- fetch_count,
- draw_elts,
- draw_count );
-
-
- /* Done -- that was easy, wasn't it:
- */
- FREE( pipeline_verts );
-}
-
-
-
-static void fetch_pipeline_finish( struct draw_pt_middle_end *middle )
-{
- /* nothing to do */
-}
-
-static void fetch_pipeline_destroy( struct draw_pt_middle_end *middle )
-{
- FREE(middle);
-}
-
-
-struct draw_pt_middle_end *draw_pt_fetch_pipeline( struct draw_context *draw )
-{
- struct fetch_pipeline_middle_end *fetch_pipeline = CALLOC_STRUCT( fetch_pipeline_middle_end );
-
- fetch_pipeline->base.prepare = fetch_pipeline_prepare;
- fetch_pipeline->base.run = fetch_pipeline_run;
- fetch_pipeline->base.finish = fetch_pipeline_finish;
- fetch_pipeline->base.destroy = fetch_pipeline_destroy;
-
- fetch_pipeline->draw = draw;
-
- return &fetch_pipeline->base;
-}
-
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index 04b3d2c4cfb..f0763dad8d7 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -27,109 +27,73 @@
#include "pipe/p_util.h"
#include "draw/draw_context.h"
-#include "draw/draw_private.h"
#include "draw/draw_vbuf.h"
#include "draw/draw_vertex.h"
#include "draw/draw_pt.h"
+#include "draw/draw_vs.h"
+#include "translate/translate.h"
+
struct fetch_pipeline_middle_end {
struct draw_pt_middle_end base;
struct draw_context *draw;
- struct {
- const ubyte *ptr;
- unsigned pitch;
- void (*fetch)( const void *from, float *attrib);
- void (*emit)( const float *attrib, float **out );
- } fetch[PIPE_MAX_ATTRIBS];
+ struct pt_emit *emit;
+ struct pt_fetch *fetch;
+ struct pt_post_vs *post_vs;
- unsigned nr_fetch;
- unsigned pipeline_vertex_size;
- unsigned hw_vertex_size;
+ unsigned vertex_data_offset;
+ unsigned vertex_size;
unsigned prim;
+ unsigned opt;
};
-#if 0
-static void emit_R32_FLOAT( const float *attrib,
- float **out )
-{
- (*out)[0] = attrib[0];
- (*out) += 1;
-}
-
-static void emit_R32G32_FLOAT( const float *attrib,
- float **out )
-{
- (*out)[0] = attrib[0];
- (*out)[1] = attrib[1];
- (*out) += 2;
-}
-
-static void emit_R32G32B32_FLOAT( const float *attrib,
- float **out )
-{
- (*out)[0] = attrib[0];
- (*out)[1] = attrib[1];
- (*out)[2] = attrib[2];
- (*out) += 3;
-}
-#endif
-static void emit_R32G32B32A32_FLOAT( const float *attrib,
- float **out )
-{
- (*out)[0] = attrib[0];
- (*out)[1] = attrib[1];
- (*out)[2] = attrib[2];
- (*out)[3] = attrib[3];
- (*out) += 4;
-}
static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
- unsigned prim )
+ unsigned prim,
+ unsigned opt )
{
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
struct draw_context *draw = fpme->draw;
- unsigned i, nr = 0;
- boolean ok;
- const struct vertex_info *vinfo;
+ struct draw_vertex_shader *vs = draw->vertex_shader;
+
+ /* Add one to num_outputs because the pipeline occasionally tags on
+ * an additional texcoord, eg for AA lines.
+ */
+ unsigned nr = MAX2( vs->info.num_inputs,
+ vs->info.num_outputs + 1 );
fpme->prim = prim;
+ fpme->opt = opt;
- ok = draw->render->set_primitive(draw->render, prim);
- if (!ok) {
- assert(0);
- return;
- }
- /* Must do this after set_primitive() above:
+ /* Always leave room for the vertex header whether we need it or
+ * not. It's hard to get rid of it in particular because of the
+ * viewport code in draw_pt_post_vs.c.
*/
- vinfo = draw->render->get_vertex_info(draw->render);
+ fpme->vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float);
- /* Need to look at vertex shader inputs (we know it is a
- * passthrough shader, so these define the outputs too). If we
- * were running a shader, we'd still be looking at the inputs at
- * this point.
- */
- for (i = 0; i < draw->vertex_shader->info.num_inputs; i++) {
- unsigned buf = draw->vertex_element[i].vertex_buffer_index;
- enum pipe_format format = draw->vertex_element[i].src_format;
+
- fpme->fetch[nr].ptr = ((const ubyte *) draw->user.vbuffer[buf] +
- draw->vertex_buffer[buf].buffer_offset +
- draw->vertex_element[i].src_offset);
+ draw_pt_fetch_prepare( fpme->fetch,
+ fpme->vertex_size );
- fpme->fetch[nr].pitch = draw->vertex_buffer[buf].pitch;
- fpme->fetch[nr].fetch = draw_get_fetch_func( format );
+ /* XXX: it's not really gl rasterization rules we care about here,
+ * but gl vs dx9 clip spaces.
+ */
+ draw_pt_post_vs_prepare( fpme->post_vs,
+ draw->bypass_clipping,
+ draw->identity_viewport,
+ draw->rasterizer->gl_rasterization_rules );
+
- /* Always do this -- somewhat redundant...
- */
- fpme->fetch[nr].emit = emit_R32G32B32A32_FLOAT;
- nr++;
- }
+ if (!(opt & PT_PIPELINE))
+ draw_pt_emit_prepare( fpme->emit,
+ prim );
+
+ /* No need to prepare the shader.
+ */
+ vs->prepare(vs, draw);
- fpme->nr_fetch = nr;
- //fpme->pipeline_vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float);
- fpme->pipeline_vertex_size = MAX_VERTEX_ALLOCATION;
- fpme->hw_vertex_size = vinfo->size * 4;
}
@@ -144,71 +108,67 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
struct draw_context *draw = fpme->draw;
struct draw_vertex_shader *shader = draw->vertex_shader;
- char *pipeline_verts;
+ unsigned opt = fpme->opt;
- pipeline_verts = MALLOC(fpme->pipeline_vertex_size *
- fetch_count);
+ struct vertex_header *pipeline_verts =
+ (struct vertex_header *)MALLOC(fpme->vertex_size * fetch_count);
if (!pipeline_verts) {
+ /* Not much we can do here - just skip the rendering.
+ */
assert(0);
return;
}
+ /* Fetch into our vertex buffer
+ */
+ draw_pt_fetch_run( fpme->fetch,
+ fetch_elts,
+ fetch_count,
+ (char *)pipeline_verts );
+
+ /* Run the shader, note that this overwrites the data[] parts of
+ * the pipeline verts. If there is no shader, ie a bypass shader,
+ * then the inputs == outputs, and are already in the correct
+ * place.
+ */
+ if (opt & PT_SHADE)
+ {
+ shader->run_linear(shader,
+ (const float (*)[4])pipeline_verts->data,
+ ( float (*)[4])pipeline_verts->data,
+ (const float (*)[4])draw->pt.user.constants,
+ fetch_count,
+ fpme->vertex_size,
+ fpme->vertex_size);
+ }
+
+ if (draw_pt_post_vs_run( fpme->post_vs,
+ pipeline_verts,
+ fetch_count,
+ fpme->vertex_size ))
+ {
+ opt |= PT_PIPELINE;
+ }
- /* Shade
+ /* Do we need to run the pipeline?
*/
- shader->prepare(shader, draw);
- if (shader->run(shader, draw, fetch_elts, fetch_count, pipeline_verts,
- fpme->pipeline_vertex_size)) {
- /* Run the pipeline */
- draw_pt_run_pipeline( fpme->draw,
- fpme->prim,
- pipeline_verts,
- fpme->pipeline_vertex_size,
- fetch_count,
- draw_elts,
- draw_count );
- } else {
- unsigned i, j;
- void *hw_verts;
- float *out;
-
- /* XXX: need to flush to get prim_vbuf.c to release its allocation??
- */
- draw_do_flush( draw, DRAW_FLUSH_BACKEND );
-
- hw_verts = draw->render->allocate_vertices(draw->render,
- (ushort)fpme->hw_vertex_size,
- (ushort)fetch_count);
- if (!hw_verts) {
- assert(0);
- return;
- }
-
- out = (float *)hw_verts;
- for (i = 0; i < fetch_count; i++) {
- struct vertex_header *header =
- (struct vertex_header*)(pipeline_verts + (fpme->pipeline_vertex_size * i));
-
- for (j = 0; j < fpme->nr_fetch; j++) {
- float *attrib = header->data[j];
- /*debug_printf("emiting [%f, %f, %f, %f]\n",
- attrib[0], attrib[1],
- attrib[2], attrib[3]);*/
- fpme->fetch[j].emit(attrib, &out);
- }
- }
- /* XXX: Draw arrays path to avoid re-emitting index list again and
- * again.
- */
- draw->render->draw(draw->render,
+ if (opt & PT_PIPELINE) {
+ draw_pipeline_run( fpme->draw,
+ fpme->prim,
+ pipeline_verts,
+ fetch_count,
+ fpme->vertex_size,
draw_elts,
- draw_count);
-
- draw->render->release_vertices(draw->render,
- hw_verts,
- fpme->hw_vertex_size,
- fetch_count);
+ draw_count );
+ }
+ else {
+ draw_pt_emit( fpme->emit,
+ (const float (*)[4])pipeline_verts->data,
+ fetch_count,
+ fpme->vertex_size,
+ draw_elts,
+ draw_count );
}
@@ -224,20 +184,51 @@ static void fetch_pipeline_finish( struct draw_pt_middle_end *middle )
static void fetch_pipeline_destroy( struct draw_pt_middle_end *middle )
{
+ struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
+
+ if (fpme->fetch)
+ draw_pt_fetch_destroy( fpme->fetch );
+
+ if (fpme->emit)
+ draw_pt_emit_destroy( fpme->emit );
+
+ if (fpme->post_vs)
+ draw_pt_post_vs_destroy( fpme->post_vs );
+
FREE(middle);
}
struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit( struct draw_context *draw )
{
- struct fetch_pipeline_middle_end *fetch_pipeline = CALLOC_STRUCT( fetch_pipeline_middle_end );
+ struct fetch_pipeline_middle_end *fpme = CALLOC_STRUCT( fetch_pipeline_middle_end );
+ if (!fpme)
+ goto fail;
+
+ fpme->base.prepare = fetch_pipeline_prepare;
+ fpme->base.run = fetch_pipeline_run;
+ fpme->base.finish = fetch_pipeline_finish;
+ fpme->base.destroy = fetch_pipeline_destroy;
+
+ fpme->draw = draw;
+
+ fpme->fetch = draw_pt_fetch_create( draw );
+ if (!fpme->fetch)
+ goto fail;
+
+ fpme->post_vs = draw_pt_post_vs_create( draw );
+ if (!fpme->post_vs)
+ goto fail;
+
+ fpme->emit = draw_pt_emit_create( draw );
+ if (!fpme->emit)
+ goto fail;
- fetch_pipeline->base.prepare = fetch_pipeline_prepare;
- fetch_pipeline->base.run = fetch_pipeline_run;
- fetch_pipeline->base.finish = fetch_pipeline_finish;
- fetch_pipeline->base.destroy = fetch_pipeline_destroy;
+ return &fpme->base;
- fetch_pipeline->draw = draw;
+ fail:
+ if (fpme)
+ fetch_pipeline_destroy( &fpme->base );
- return &fetch_pipeline->base;
+ return NULL;
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
new file mode 100644
index 00000000000..f98e130ed6a
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
@@ -0,0 +1,215 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe/p_util.h"
+#include "pipe/p_context.h"
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_vbuf.h"
+#include "draw/draw_vertex.h"
+#include "draw/draw_pt.h"
+
+struct pt_post_vs {
+ struct draw_context *draw;
+
+ boolean (*run)( struct pt_post_vs *pvs,
+ struct vertex_header *vertices,
+ unsigned count,
+ unsigned stride );
+};
+
+
+
+static INLINE float
+dot4(const float *a, const float *b)
+{
+ return (a[0]*b[0] +
+ a[1]*b[1] +
+ a[2]*b[2] +
+ a[3]*b[3]);
+}
+
+
+
+static INLINE unsigned
+compute_clipmask_gl(const float *clip, /*const*/ float plane[][4], unsigned nr)
+{
+ unsigned mask = 0x0;
+ unsigned i;
+
+ /* Do the hardwired planes first:
+ */
+ if (-clip[0] + clip[3] < 0) mask |= (1<<0);
+ if ( clip[0] + clip[3] < 0) mask |= (1<<1);
+ if (-clip[1] + clip[3] < 0) mask |= (1<<2);
+ if ( clip[1] + clip[3] < 0) mask |= (1<<3);
+ if ( clip[2] + clip[3] < 0) mask |= (1<<4); /* match mesa clipplane numbering - for now */
+ if (-clip[2] + clip[3] < 0) mask |= (1<<5); /* match mesa clipplane numbering - for now */
+
+ /* Followed by any remaining ones:
+ */
+ for (i = 6; i < nr; i++) {
+ if (dot4(clip, plane[i]) < 0)
+ mask |= (1<<i);
+ }
+
+ return mask;
+}
+
+
+/* The normal case - cliptest, rhw divide, viewport transform.
+ *
+ * Also handle identity viewport here at the expense of a few wasted
+ * instructions
+ */
+static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs,
+ struct vertex_header *vertices,
+ unsigned count,
+ unsigned stride )
+{
+ struct vertex_header *out = vertices;
+ const float *scale = pvs->draw->viewport.scale;
+ const float *trans = pvs->draw->viewport.translate;
+ unsigned clipped = 0;
+ unsigned j;
+
+ if (0) debug_printf("%s\n");
+
+ for (j = 0; j < count; j++) {
+ out->clip[0] = out->data[0][0];
+ out->clip[1] = out->data[0][1];
+ out->clip[2] = out->data[0][2];
+ out->clip[3] = out->data[0][3];
+
+ out->vertex_id = 0xffff;
+ out->edgeflag = 1;
+ out->clipmask = compute_clipmask_gl(out->clip,
+ pvs->draw->plane,
+ pvs->draw->nr_planes);
+ clipped += out->clipmask;
+
+ if (out->clipmask == 0)
+ {
+ /* divide by w */
+ float w = 1.0f / out->data[0][3];
+
+ /* Viewport mapping */
+ out->data[0][0] = out->data[0][0] * w * scale[0] + trans[0];
+ out->data[0][1] = out->data[0][1] * w * scale[1] + trans[1];
+ out->data[0][2] = out->data[0][2] * w * scale[2] + trans[2];
+ out->data[0][3] = w;
+ }
+
+ out = (struct vertex_header *)( (char *)out + stride );
+ }
+
+ return clipped != 0;
+}
+
+
+
+/* If bypass_clipping is set, skip cliptest and rhw divide.
+ */
+static boolean post_vs_viewport( struct pt_post_vs *pvs,
+ struct vertex_header *vertices,
+ unsigned count,
+ unsigned stride )
+{
+ struct vertex_header *out = vertices;
+ const float *scale = pvs->draw->viewport.scale;
+ const float *trans = pvs->draw->viewport.translate;
+ unsigned j;
+
+ if (0) debug_printf("%s\n", __FUNCTION__);
+ for (j = 0; j < count; j++) {
+ /* Viewport mapping only, no cliptest/rhw divide
+ */
+ out->data[0][0] = out->data[0][0] * scale[0] + trans[0];
+ out->data[0][1] = out->data[0][1] * scale[1] + trans[1];
+ out->data[0][2] = out->data[0][2] * scale[2] + trans[2];
+
+ out = (struct vertex_header *)((char *)out + stride);
+ }
+
+ return FALSE;
+}
+
+
+/* If bypass_clipping is set and we have an identity viewport, nothing
+ * to do.
+ */
+static boolean post_vs_none( struct pt_post_vs *pvs,
+ struct vertex_header *vertices,
+ unsigned count,
+ unsigned stride )
+{
+ if (0) debug_printf("%s\n", __FUNCTION__);
+ return FALSE;
+}
+
+boolean draw_pt_post_vs_run( struct pt_post_vs *pvs,
+ struct vertex_header *pipeline_verts,
+ unsigned count,
+ unsigned stride )
+{
+ return pvs->run( pvs, pipeline_verts, count, stride );
+}
+
+
+void draw_pt_post_vs_prepare( struct pt_post_vs *pvs,
+ boolean bypass_clipping,
+ boolean identity_viewport,
+ boolean opengl )
+{
+ if (bypass_clipping) {
+ if (identity_viewport)
+ pvs->run = post_vs_none;
+ else
+ pvs->run = post_vs_viewport;
+ }
+ else {
+ //if (opengl)
+ pvs->run = post_vs_cliptest_viewport_gl;
+ }
+}
+
+
+struct pt_post_vs *draw_pt_post_vs_create( struct draw_context *draw )
+{
+ struct pt_post_vs *pvs = CALLOC_STRUCT( pt_post_vs );
+ if (!pvs)
+ return NULL;
+
+ pvs->draw = draw;
+
+ return pvs;
+}
+
+void draw_pt_post_vs_destroy( struct pt_post_vs *pvs )
+{
+ FREE(pvs);
+}
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c
index 107dcfc269c..afcff410438 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vcache.c
+++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c
@@ -63,6 +63,7 @@ struct vcache_frontend {
static void vcache_flush( struct vcache_frontend *vcache )
{
+ vcache->draw->vcache_flushing = TRUE;
if (vcache->draw_count) {
vcache->middle->run( vcache->middle,
vcache->fetch_elts,
@@ -74,6 +75,7 @@ static void vcache_flush( struct vcache_frontend *vcache )
memset(vcache->in, ~0, sizeof(vcache->in));
vcache->fetch_count = 0;
vcache->draw_count = 0;
+ vcache->draw->vcache_flushing = FALSE;
}
static void vcache_check_flush( struct vcache_frontend *vcache )
@@ -106,7 +108,7 @@ static unsigned add_edgeflag( struct vcache_frontend *vcache,
unsigned idx,
unsigned mask )
{
- if (mask && draw_get_edgeflag(vcache->draw, idx))
+ if (0 && mask && draw_pt_get_edgeflag(vcache->draw, idx))
return idx | DRAW_PT_EDGEFLAG;
else
return idx;
@@ -116,7 +118,7 @@ static unsigned add_edgeflag( struct vcache_frontend *vcache,
static unsigned add_reset_stipple( unsigned idx,
unsigned reset )
{
- if (reset)
+ if (0 && reset)
return idx | DRAW_PT_RESET_STIPPLE;
else
return idx;
@@ -128,9 +130,9 @@ static void vcache_triangle( struct vcache_frontend *vcache,
unsigned i1,
unsigned i2 )
{
- vcache_elt(vcache, i0 | DRAW_PT_EDGEFLAG | DRAW_PT_RESET_STIPPLE);
- vcache_elt(vcache, i1 | DRAW_PT_EDGEFLAG);
- vcache_elt(vcache, i2 | DRAW_PT_EDGEFLAG);
+ vcache_elt(vcache, i0 /* | DRAW_PT_EDGEFLAG | DRAW_PT_RESET_STIPPLE */ );
+ vcache_elt(vcache, i1 /* | DRAW_PT_EDGEFLAG */);
+ vcache_elt(vcache, i2 /* | DRAW_PT_EDGEFLAG */);
vcache_check_flush(vcache);
}
@@ -142,11 +144,12 @@ static void vcache_ef_triangle( struct vcache_frontend *vcache,
unsigned i1,
unsigned i2 )
{
+/*
i0 = add_edgeflag( vcache, i0, (ef_mask >> 0) & 1 );
i1 = add_edgeflag( vcache, i1, (ef_mask >> 1) & 1 );
i2 = add_edgeflag( vcache, i2, (ef_mask >> 2) & 1 );
-
i0 = add_reset_stipple( i0, reset_stipple );
+*/
vcache_elt(vcache, i0);
vcache_elt(vcache, i1);
@@ -448,7 +451,8 @@ static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = {
static void vcache_prepare( struct draw_pt_front_end *frontend,
unsigned prim,
- struct draw_pt_middle_end *middle )
+ struct draw_pt_middle_end *middle,
+ unsigned opt )
{
struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
@@ -464,7 +468,7 @@ static void vcache_prepare( struct draw_pt_front_end *frontend,
vcache->output_prim = reduced_prim[prim];
vcache->middle = middle;
- middle->prepare( middle, vcache->output_prim );
+ middle->prepare( middle, vcache->output_prim, opt );
}
@@ -486,6 +490,8 @@ static void vcache_destroy( struct draw_pt_front_end *frontend )
struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw )
{
struct vcache_frontend *vcache = CALLOC_STRUCT( vcache_frontend );
+ if (vcache == NULL)
+ return NULL;
vcache->base.prepare = vcache_prepare;
vcache->base.run = NULL;
diff --git a/src/gallium/auxiliary/draw/draw_vertex.c b/src/gallium/auxiliary/draw/draw_vertex.c
index 168036eee84..1446f785c51 100644
--- a/src/gallium/auxiliary/draw/draw_vertex.c
+++ b/src/gallium/auxiliary/draw/draw_vertex.c
@@ -72,6 +72,58 @@ draw_compute_vertex_size(struct vertex_info *vinfo)
assert(0);
}
}
+}
+
- assert(vinfo->size * 4 <= MAX_VERTEX_SIZE);
+void
+draw_dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data)
+{
+ unsigned i, j;
+
+ for (i = 0; i < vinfo->num_attribs; i++) {
+ j = vinfo->src_index[i];
+ switch (vinfo->emit[i]) {
+ case EMIT_OMIT:
+ debug_printf("EMIT_OMIT:");
+ break;
+ case EMIT_1F:
+ debug_printf("EMIT_1F:\t");
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ break;
+ case EMIT_1F_PSIZE:
+ debug_printf("EMIT_1F_PSIZE:\t");
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ break;
+ case EMIT_2F:
+ debug_printf("EMIT_2F:\t");
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ break;
+ case EMIT_3F:
+ debug_printf("EMIT_3F:\t");
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ data += sizeof(float);
+ break;
+ case EMIT_4F:
+ debug_printf("EMIT_4F:\t");
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ break;
+ case EMIT_4UB:
+ debug_printf("EMIT_4UB:\t");
+ debug_printf("%u ", *data++);
+ debug_printf("%u ", *data++);
+ debug_printf("%u ", *data++);
+ debug_printf("%u ", *data++);
+ break;
+ default:
+ assert(0);
+ }
+ debug_printf("\n");
+ }
+ debug_printf("\n");
}
diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h
index 65818463cae..6d8bac51384 100644
--- a/src/gallium/auxiliary/draw/draw_vertex.h
+++ b/src/gallium/auxiliary/draw/draw_vertex.h
@@ -106,5 +106,7 @@ draw_emit_vertex_attr(struct vertex_info *vinfo,
extern void draw_compute_vertex_size(struct vertex_info *vinfo);
+void draw_dump_emitted_vertex(const struct vertex_info *vinfo,
+ const uint8_t *data);
#endif /* DRAW_VERTEX_H */
diff --git a/src/gallium/auxiliary/draw/draw_vertex_cache.c b/src/gallium/auxiliary/draw/draw_vertex_cache.c
deleted file mode 100644
index 730c18bcb34..00000000000
--- a/src/gallium/auxiliary/draw/draw_vertex_cache.c
+++ /dev/null
@@ -1,219 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Keith Whitwell <[email protected]>
- */
-
-#include "pipe/p_util.h"
-#include "draw_private.h"
-#include "draw_context.h"
-
-
-void draw_vertex_cache_invalidate( struct draw_context *draw )
-{
- assert(draw->pq.queue_nr == 0);
- assert(draw->vs.queue_nr == 0);
- assert(draw->vcache.referenced == 0);
-
- /* There's an error somewhere in the vcache code that requires this
- * memset. The bug is exposed in q3demo demo001, but probably
- * elsewhere as well. Will track it down later.
- */
- memset(draw->vcache.idx, ~0, sizeof(draw->vcache.idx));
-}
-
-
-/**
- * Check if vertex is in cache, otherwise add it. It won't go through
- * VS yet, not until there is a flush operation or the VS queue fills up.
- *
- * Note that cache entries are basically just two pointers: the first
- * an index into the user's vertex arrays, the second a location in
- * the vertex shader cache for the post-transformed vertex.
- *
- * \return pointer to location of (post-transformed) vertex header in the cache
- */
-static struct vertex_header *get_vertex( struct draw_context *draw,
- unsigned i )
-{
- unsigned slot = (i + (i>>5)) % VCACHE_SIZE;
-
- assert(slot < 32); /* so we don't exceed the bitfield size below */
-
- if (draw->vcache.referenced & (1<<slot))
- {
- /* Cache hit?
- */
- if (draw->vcache.idx[slot].in == i) {
- /*debug_printf("HIT %d %d\n", slot, i);*/
- assert(draw->vcache.idx[slot].out < draw->vs.queue_nr);
- return draw_header_from_block(draw->vs.vertex_cache,
- MAX_VERTEX_ALLOCATION,
- draw->vcache.idx[slot].out);
- }
-
- /* Otherwise a collision
- */
- slot = VCACHE_SIZE + draw->vcache.overflow++;
- /*debug_printf("XXX %d --> %d\n", i, slot);*/
- }
-
- /* Deal with the cache miss:
- */
- {
- unsigned out;
- struct vertex_header *header;
-
- assert(slot < Elements(draw->vcache.idx));
-
- /*debug_printf("NEW %d %d\n", slot, i);*/
- draw->vcache.idx[slot].in = i;
- draw->vcache.idx[slot].out = out = draw->vs.queue_nr++;
- draw->vcache.referenced |= (1 << slot);
-
-
- /* Add to vertex shader queue:
- */
- assert(draw->vs.queue_nr < VS_QUEUE_LENGTH);
-
- header = draw_header_from_block(draw->vs.vertex_cache, MAX_VERTEX_ALLOCATION,
- out);
- draw->vs.elts[out] = i;
- header->clipmask = 0;
- header->edgeflag = draw_get_edgeflag(draw, i);
- header->pad = 0;
- header->vertex_id = UNDEFINED_VERTEX_ID;
-
- /* Need to set the vertex's edge flag here. If we're being called
- * by do_ef_triangle(), that function needs edge flag info!
- */
-
- return draw_header_from_block(draw->vs.vertex_cache,
- MAX_VERTEX_ALLOCATION,
- draw->vcache.idx[slot].out);
- }
-}
-
-
-static struct vertex_header *get_uint_elt_vertex( struct draw_context *draw,
- unsigned i )
-{
- const unsigned *elts = (const unsigned *) draw->user.elts;
- return get_vertex( draw, elts[i] );
-}
-
-
-static struct vertex_header *get_ushort_elt_vertex( struct draw_context *draw,
- unsigned i )
-{
- const ushort *elts = (const ushort *) draw->user.elts;
- return get_vertex( draw, elts[i] );
-}
-
-
-static struct vertex_header *get_ubyte_elt_vertex( struct draw_context *draw,
- unsigned i )
-{
- const ubyte *elts = (const ubyte *) draw->user.elts;
- return get_vertex( draw, elts[i] );
-}
-
-
-void draw_vertex_cache_reset_vertex_ids( struct draw_context *draw )
-{
- unsigned i;
-
- for (i = 0; i < draw->vs.post_nr; i++) {
- struct vertex_header * header =
- draw_header_from_block(draw->vs.vertex_cache,
- MAX_VERTEX_ALLOCATION, i);
- header->vertex_id = UNDEFINED_VERTEX_ID;
- }
-}
-
-
-void draw_vertex_cache_unreference( struct draw_context *draw )
-{
- draw->vcache.referenced = 0;
- draw->vcache.overflow = 0;
-}
-
-
-int draw_vertex_cache_check_space( struct draw_context *draw,
- unsigned nr_verts )
-{
- if (draw->vcache.overflow + nr_verts < VCACHE_OVERFLOW) {
- /* The vs queue is sized so that this can never happen:
- */
- assert(draw->vs.queue_nr + nr_verts < VS_QUEUE_LENGTH);
- return TRUE;
- }
- else
- return FALSE;
-}
-
-
-
-/**
- * Tell the drawing context about the index/element buffer to use
- * (ala glDrawElements)
- * If no element buffer is to be used (i.e. glDrawArrays) then this
- * should be called with eltSize=0 and elements=NULL.
- *
- * \param draw the drawing context
- * \param eltSize size of each element (1, 2 or 4 bytes)
- * \param elements the element buffer ptr
- */
-void
-draw_set_mapped_element_buffer( struct draw_context *draw,
- unsigned eltSize, void *elements )
-{
-// draw_statechange( draw );
-
- /* choose the get_vertex() function to use */
- switch (eltSize) {
- case 0:
- draw->vcache.get_vertex = get_vertex;
- break;
- case 1:
- draw->vcache.get_vertex = get_ubyte_elt_vertex;
- break;
- case 2:
- draw->vcache.get_vertex = get_ushort_elt_vertex;
- break;
- case 4:
- draw->vcache.get_vertex = get_uint_elt_vertex;
- break;
- default:
- assert(0);
- }
- draw->user.elts = elements;
- draw->user.eltSize = eltSize;
-}
-
diff --git a/src/gallium/auxiliary/draw/draw_vertex_fetch.c b/src/gallium/auxiliary/draw/draw_vertex_fetch.c
deleted file mode 100644
index 9041041006e..00000000000
--- a/src/gallium/auxiliary/draw/draw_vertex_fetch.c
+++ /dev/null
@@ -1,528 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Keith Whitwell <[email protected]>
- */
-
-#include "pipe/p_util.h"
-#include "pipe/p_shader_tokens.h"
-#include "draw_private.h"
-#include "draw_context.h"
-
-
-#define DRAW_DBG 0
-
-
-/**
- * Fetch a float[4] vertex attribute from memory, doing format/type
- * conversion as needed.
- *
- * This is probably needed/dupliocated elsewhere, eg format
- * conversion, texture sampling etc.
- */
-#define FETCH_ATTRIB( NAME, SZ, CVT ) \
-static void \
-fetch_##NAME(const void *ptr, float *attrib) \
-{ \
- static const float defaults[4] = { 0,0,0,1 }; \
- int i; \
- \
- for (i = 0; i < SZ; i++) { \
- attrib[i] = CVT(i); \
- } \
- \
- for (; i < 4; i++) { \
- attrib[i] = defaults[i]; \
- } \
-}
-
-#define CVT_64_FLOAT(i) (float) ((double *) ptr)[i]
-#define CVT_32_FLOAT(i) ((float *) ptr)[i]
-
-#define CVT_8_USCALED(i) (float) ((unsigned char *) ptr)[i]
-#define CVT_16_USCALED(i) (float) ((unsigned short *) ptr)[i]
-#define CVT_32_USCALED(i) (float) ((unsigned int *) ptr)[i]
-
-#define CVT_8_SSCALED(i) (float) ((char *) ptr)[i]
-#define CVT_16_SSCALED(i) (float) ((short *) ptr)[i]
-#define CVT_32_SSCALED(i) (float) ((int *) ptr)[i]
-
-#define CVT_8_UNORM(i) (float) ((unsigned char *) ptr)[i] / 255.0f
-#define CVT_16_UNORM(i) (float) ((unsigned short *) ptr)[i] / 65535.0f
-#define CVT_32_UNORM(i) (float) ((unsigned int *) ptr)[i] / 4294967295.0f
-
-#define CVT_8_SNORM(i) (float) ((char *) ptr)[i] / 127.0f
-#define CVT_16_SNORM(i) (float) ((short *) ptr)[i] / 32767.0f
-#define CVT_32_SNORM(i) (float) ((int *) ptr)[i] / 2147483647.0f
-
-FETCH_ATTRIB( R64G64B64A64_FLOAT, 4, CVT_64_FLOAT )
-FETCH_ATTRIB( R64G64B64_FLOAT, 3, CVT_64_FLOAT )
-FETCH_ATTRIB( R64G64_FLOAT, 2, CVT_64_FLOAT )
-FETCH_ATTRIB( R64_FLOAT, 1, CVT_64_FLOAT )
-
-FETCH_ATTRIB( R32G32B32A32_FLOAT, 4, CVT_32_FLOAT )
-FETCH_ATTRIB( R32G32B32_FLOAT, 3, CVT_32_FLOAT )
-FETCH_ATTRIB( R32G32_FLOAT, 2, CVT_32_FLOAT )
-FETCH_ATTRIB( R32_FLOAT, 1, CVT_32_FLOAT )
-
-FETCH_ATTRIB( R32G32B32A32_USCALED, 4, CVT_32_USCALED )
-FETCH_ATTRIB( R32G32B32_USCALED, 3, CVT_32_USCALED )
-FETCH_ATTRIB( R32G32_USCALED, 2, CVT_32_USCALED )
-FETCH_ATTRIB( R32_USCALED, 1, CVT_32_USCALED )
-
-FETCH_ATTRIB( R32G32B32A32_SSCALED, 4, CVT_32_SSCALED )
-FETCH_ATTRIB( R32G32B32_SSCALED, 3, CVT_32_SSCALED )
-FETCH_ATTRIB( R32G32_SSCALED, 2, CVT_32_SSCALED )
-FETCH_ATTRIB( R32_SSCALED, 1, CVT_32_SSCALED )
-
-FETCH_ATTRIB( R32G32B32A32_UNORM, 4, CVT_32_UNORM )
-FETCH_ATTRIB( R32G32B32_UNORM, 3, CVT_32_UNORM )
-FETCH_ATTRIB( R32G32_UNORM, 2, CVT_32_UNORM )
-FETCH_ATTRIB( R32_UNORM, 1, CVT_32_UNORM )
-
-FETCH_ATTRIB( R32G32B32A32_SNORM, 4, CVT_32_SNORM )
-FETCH_ATTRIB( R32G32B32_SNORM, 3, CVT_32_SNORM )
-FETCH_ATTRIB( R32G32_SNORM, 2, CVT_32_SNORM )
-FETCH_ATTRIB( R32_SNORM, 1, CVT_32_SNORM )
-
-FETCH_ATTRIB( R16G16B16A16_USCALED, 4, CVT_16_USCALED )
-FETCH_ATTRIB( R16G16B16_USCALED, 3, CVT_16_USCALED )
-FETCH_ATTRIB( R16G16_USCALED, 2, CVT_16_USCALED )
-FETCH_ATTRIB( R16_USCALED, 1, CVT_16_USCALED )
-
-FETCH_ATTRIB( R16G16B16A16_SSCALED, 4, CVT_16_SSCALED )
-FETCH_ATTRIB( R16G16B16_SSCALED, 3, CVT_16_SSCALED )
-FETCH_ATTRIB( R16G16_SSCALED, 2, CVT_16_SSCALED )
-FETCH_ATTRIB( R16_SSCALED, 1, CVT_16_SSCALED )
-
-FETCH_ATTRIB( R16G16B16A16_UNORM, 4, CVT_16_UNORM )
-FETCH_ATTRIB( R16G16B16_UNORM, 3, CVT_16_UNORM )
-FETCH_ATTRIB( R16G16_UNORM, 2, CVT_16_UNORM )
-FETCH_ATTRIB( R16_UNORM, 1, CVT_16_UNORM )
-
-FETCH_ATTRIB( R16G16B16A16_SNORM, 4, CVT_16_SNORM )
-FETCH_ATTRIB( R16G16B16_SNORM, 3, CVT_16_SNORM )
-FETCH_ATTRIB( R16G16_SNORM, 2, CVT_16_SNORM )
-FETCH_ATTRIB( R16_SNORM, 1, CVT_16_SNORM )
-
-FETCH_ATTRIB( R8G8B8A8_USCALED, 4, CVT_8_USCALED )
-FETCH_ATTRIB( R8G8B8_USCALED, 3, CVT_8_USCALED )
-FETCH_ATTRIB( R8G8_USCALED, 2, CVT_8_USCALED )
-FETCH_ATTRIB( R8_USCALED, 1, CVT_8_USCALED )
-
-FETCH_ATTRIB( R8G8B8A8_SSCALED, 4, CVT_8_SSCALED )
-FETCH_ATTRIB( R8G8B8_SSCALED, 3, CVT_8_SSCALED )
-FETCH_ATTRIB( R8G8_SSCALED, 2, CVT_8_SSCALED )
-FETCH_ATTRIB( R8_SSCALED, 1, CVT_8_SSCALED )
-
-FETCH_ATTRIB( R8G8B8A8_UNORM, 4, CVT_8_UNORM )
-FETCH_ATTRIB( R8G8B8_UNORM, 3, CVT_8_UNORM )
-FETCH_ATTRIB( R8G8_UNORM, 2, CVT_8_UNORM )
-FETCH_ATTRIB( R8_UNORM, 1, CVT_8_UNORM )
-
-FETCH_ATTRIB( R8G8B8A8_SNORM, 4, CVT_8_SNORM )
-FETCH_ATTRIB( R8G8B8_SNORM, 3, CVT_8_SNORM )
-FETCH_ATTRIB( R8G8_SNORM, 2, CVT_8_SNORM )
-FETCH_ATTRIB( R8_SNORM, 1, CVT_8_SNORM )
-
-FETCH_ATTRIB( A8R8G8B8_UNORM, 4, CVT_8_UNORM )
-//FETCH_ATTRIB( R8G8B8A8_UNORM, 4, CVT_8_UNORM )
-
-
-
-static void
-fetch_B8G8R8A8_UNORM(const void *ptr, float *attrib)
-{
- attrib[2] = CVT_8_UNORM(0);
- attrib[1] = CVT_8_UNORM(1);
- attrib[0] = CVT_8_UNORM(2);
- attrib[3] = CVT_8_UNORM(3);
-}
-
-
-fetch_func draw_get_fetch_func( enum pipe_format format )
-{
-#if 0
- {
- char tmp[80];
- pf_sprint_name(tmp, format);
- debug_printf("%s: %s\n", __FUNCTION__, tmp);
- }
-#endif
-
- switch (format) {
- case PIPE_FORMAT_R64_FLOAT:
- return fetch_R64_FLOAT;
- case PIPE_FORMAT_R64G64_FLOAT:
- return fetch_R64G64_FLOAT;
- case PIPE_FORMAT_R64G64B64_FLOAT:
- return fetch_R64G64B64_FLOAT;
- case PIPE_FORMAT_R64G64B64A64_FLOAT:
- return fetch_R64G64B64A64_FLOAT;
-
- case PIPE_FORMAT_R32_FLOAT:
- return fetch_R32_FLOAT;
- case PIPE_FORMAT_R32G32_FLOAT:
- return fetch_R32G32_FLOAT;
- case PIPE_FORMAT_R32G32B32_FLOAT:
- return fetch_R32G32B32_FLOAT;
- case PIPE_FORMAT_R32G32B32A32_FLOAT:
- return fetch_R32G32B32A32_FLOAT;
-
- case PIPE_FORMAT_R32_UNORM:
- return fetch_R32_UNORM;
- case PIPE_FORMAT_R32G32_UNORM:
- return fetch_R32G32_UNORM;
- case PIPE_FORMAT_R32G32B32_UNORM:
- return fetch_R32G32B32_UNORM;
- case PIPE_FORMAT_R32G32B32A32_UNORM:
- return fetch_R32G32B32A32_UNORM;
-
- case PIPE_FORMAT_R32_USCALED:
- return fetch_R32_USCALED;
- case PIPE_FORMAT_R32G32_USCALED:
- return fetch_R32G32_USCALED;
- case PIPE_FORMAT_R32G32B32_USCALED:
- return fetch_R32G32B32_USCALED;
- case PIPE_FORMAT_R32G32B32A32_USCALED:
- return fetch_R32G32B32A32_USCALED;
-
- case PIPE_FORMAT_R32_SNORM:
- return fetch_R32_SNORM;
- case PIPE_FORMAT_R32G32_SNORM:
- return fetch_R32G32_SNORM;
- case PIPE_FORMAT_R32G32B32_SNORM:
- return fetch_R32G32B32_SNORM;
- case PIPE_FORMAT_R32G32B32A32_SNORM:
- return fetch_R32G32B32A32_SNORM;
-
- case PIPE_FORMAT_R32_SSCALED:
- return fetch_R32_SSCALED;
- case PIPE_FORMAT_R32G32_SSCALED:
- return fetch_R32G32_SSCALED;
- case PIPE_FORMAT_R32G32B32_SSCALED:
- return fetch_R32G32B32_SSCALED;
- case PIPE_FORMAT_R32G32B32A32_SSCALED:
- return fetch_R32G32B32A32_SSCALED;
-
- case PIPE_FORMAT_R16_UNORM:
- return fetch_R16_UNORM;
- case PIPE_FORMAT_R16G16_UNORM:
- return fetch_R16G16_UNORM;
- case PIPE_FORMAT_R16G16B16_UNORM:
- return fetch_R16G16B16_UNORM;
- case PIPE_FORMAT_R16G16B16A16_UNORM:
- return fetch_R16G16B16A16_UNORM;
-
- case PIPE_FORMAT_R16_USCALED:
- return fetch_R16_USCALED;
- case PIPE_FORMAT_R16G16_USCALED:
- return fetch_R16G16_USCALED;
- case PIPE_FORMAT_R16G16B16_USCALED:
- return fetch_R16G16B16_USCALED;
- case PIPE_FORMAT_R16G16B16A16_USCALED:
- return fetch_R16G16B16A16_USCALED;
-
- case PIPE_FORMAT_R16_SNORM:
- return fetch_R16_SNORM;
- case PIPE_FORMAT_R16G16_SNORM:
- return fetch_R16G16_SNORM;
- case PIPE_FORMAT_R16G16B16_SNORM:
- return fetch_R16G16B16_SNORM;
- case PIPE_FORMAT_R16G16B16A16_SNORM:
- return fetch_R16G16B16A16_SNORM;
-
- case PIPE_FORMAT_R16_SSCALED:
- return fetch_R16_SSCALED;
- case PIPE_FORMAT_R16G16_SSCALED:
- return fetch_R16G16_SSCALED;
- case PIPE_FORMAT_R16G16B16_SSCALED:
- return fetch_R16G16B16_SSCALED;
- case PIPE_FORMAT_R16G16B16A16_SSCALED:
- return fetch_R16G16B16A16_SSCALED;
-
- case PIPE_FORMAT_R8_UNORM:
- return fetch_R8_UNORM;
- case PIPE_FORMAT_R8G8_UNORM:
- return fetch_R8G8_UNORM;
- case PIPE_FORMAT_R8G8B8_UNORM:
- return fetch_R8G8B8_UNORM;
- case PIPE_FORMAT_R8G8B8A8_UNORM:
- return fetch_R8G8B8A8_UNORM;
-
- case PIPE_FORMAT_R8_USCALED:
- return fetch_R8_USCALED;
- case PIPE_FORMAT_R8G8_USCALED:
- return fetch_R8G8_USCALED;
- case PIPE_FORMAT_R8G8B8_USCALED:
- return fetch_R8G8B8_USCALED;
- case PIPE_FORMAT_R8G8B8A8_USCALED:
- return fetch_R8G8B8A8_USCALED;
-
- case PIPE_FORMAT_R8_SNORM:
- return fetch_R8_SNORM;
- case PIPE_FORMAT_R8G8_SNORM:
- return fetch_R8G8_SNORM;
- case PIPE_FORMAT_R8G8B8_SNORM:
- return fetch_R8G8B8_SNORM;
- case PIPE_FORMAT_R8G8B8A8_SNORM:
- return fetch_R8G8B8A8_SNORM;
-
- case PIPE_FORMAT_R8_SSCALED:
- return fetch_R8_SSCALED;
- case PIPE_FORMAT_R8G8_SSCALED:
- return fetch_R8G8_SSCALED;
- case PIPE_FORMAT_R8G8B8_SSCALED:
- return fetch_R8G8B8_SSCALED;
- case PIPE_FORMAT_R8G8B8A8_SSCALED:
- return fetch_R8G8B8A8_SSCALED;
-
- case PIPE_FORMAT_A8R8G8B8_UNORM:
- return fetch_A8R8G8B8_UNORM;
-
-
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- return fetch_B8G8R8A8_UNORM;
-
- case 0:
- return NULL; /* not sure why this is needed */
-
- default:
- /* This can get hit because draw-state-validation is too eager,
- and can jump in here validating stuff before the state tracker has set
- up everything.
- */
- /* assert(0); */
- return NULL;
- }
-}
-
-
-static void
-transpose_4x4( float *out, const float *in )
-{
- /* This can be achieved in 12 sse instructions, plus the final
- * stores I guess. This is probably a bit more than that - maybe
- * 32 or so?
- */
- out[0] = in[0]; out[1] = in[4]; out[2] = in[8]; out[3] = in[12];
- out[4] = in[1]; out[5] = in[5]; out[6] = in[9]; out[7] = in[13];
- out[8] = in[2]; out[9] = in[6]; out[10] = in[10]; out[11] = in[14];
- out[12] = in[3]; out[13] = in[7]; out[14] = in[11]; out[15] = in[15];
-}
-
-
-
-static void fetch_xyz_rgb( struct draw_context *draw,
- struct tgsi_exec_machine *machine,
- const unsigned *elts,
- unsigned count )
-{
- const unsigned *pitch = draw->vertex_fetch.pitch;
- const ubyte **src = draw->vertex_fetch.src_ptr;
- int i;
-
- assert(count <= 4);
-
-// debug_printf("%s\n", __FUNCTION__);
-
- /* loop over vertex attributes (vertex shader inputs)
- */
-
- for (i = 0; i < 4; i++) {
- {
- const float *in = (const float *)(src[0] + elts[i] * pitch[0]);
- float *out = &machine->Inputs[0].xyzw[0].f[i];
- out[0] = in[0];
- out[4] = in[1];
- out[8] = in[2];
- out[12] = 1.0f;
- }
-
- {
- const float *in = (const float *)(src[1] + elts[i] * pitch[1]);
- float *out = &machine->Inputs[1].xyzw[0].f[i];
- out[0] = in[0];
- out[4] = in[1];
- out[8] = in[2];
- out[12] = 1.0f;
- }
- }
-}
-
-
-
-
-static void fetch_xyz_rgb_st( struct draw_context *draw,
- struct tgsi_exec_machine *machine,
- const unsigned *elts,
- unsigned count )
-{
- const unsigned *pitch = draw->vertex_fetch.pitch;
- const ubyte **src = draw->vertex_fetch.src_ptr;
- int i;
-
- assert(count <= 4);
-
- /* loop over vertex attributes (vertex shader inputs)
- */
-
- for (i = 0; i < 4; i++) {
- {
- const float *in = (const float *)(src[0] + elts[i] * pitch[0]);
- float *out = &machine->Inputs[0].xyzw[0].f[i];
- out[0] = in[0];
- out[4] = in[1];
- out[8] = in[2];
- out[12] = 1.0f;
- }
-
- {
- const float *in = (const float *)(src[1] + elts[i] * pitch[1]);
- float *out = &machine->Inputs[1].xyzw[0].f[i];
- out[0] = in[0];
- out[4] = in[1];
- out[8] = in[2];
- out[12] = 1.0f;
- }
-
- {
- const float *in = (const float *)(src[2] + elts[i] * pitch[2]);
- float *out = &machine->Inputs[2].xyzw[0].f[i];
- out[0] = in[0];
- out[4] = in[1];
- out[8] = 0.0f;
- out[12] = 1.0f;
- }
- }
-}
-
-
-
-
-/**
- * Fetch vertex attributes for 'count' vertices.
- */
-static void generic_vertex_fetch( struct draw_context *draw,
- struct tgsi_exec_machine *machine,
- const unsigned *elts,
- unsigned count )
-{
- unsigned nr_attrs = draw->vertex_fetch.nr_attrs;
- unsigned attr;
-
- assert(count <= 4);
-
-// debug_printf("%s %d\n", __FUNCTION__, count);
-
- /* loop over vertex attributes (vertex shader inputs)
- */
- for (attr = 0; attr < nr_attrs; attr++) {
-
- const unsigned pitch = draw->vertex_fetch.pitch[attr];
- const ubyte *src = draw->vertex_fetch.src_ptr[attr];
- const fetch_func fetch = draw->vertex_fetch.fetch[attr];
- unsigned i;
- float p[4][4];
-
-
- /* Fetch four attributes for four vertices.
- *
- * Could fetch directly into AOS format, but this is meant to be
- * a prototype for an sse implementation, which would have
- * difficulties doing that.
- */
- for (i = 0; i < count; i++)
- fetch( src + elts[i] * pitch, p[i] );
-
- /* Be nice and zero out any missing vertices:
- */
- for ( ; i < 4; i++)
- p[i][0] = p[i][1] = p[i][2] = p[i][3] = 0;
-
- /* Transpose/swizzle into sse-friendly format. Currently
- * assuming that all vertex shader inputs are float[4], but this
- * isn't true -- if the vertex shader only wants tex0.xy, we
- * could optimize for that.
- *
- * To do so fully without codegen would probably require an
- * excessive number of fetch functions, but we could at least
- * minimize the transpose step:
- */
- transpose_4x4( (float *)&machine->Inputs[attr].xyzw[0].f[0], (float *)p );
- }
-}
-
-
-
-void draw_update_vertex_fetch( struct draw_context *draw )
-{
- unsigned nr_attrs, i;
-
-// debug_printf("%s\n", __FUNCTION__);
-
- /* this may happend during context init */
- if (!draw->vertex_shader)
- return;
-
- nr_attrs = draw->vertex_shader->info.num_inputs;
-
- for (i = 0; i < nr_attrs; i++) {
- unsigned buf = draw->vertex_element[i].vertex_buffer_index;
- enum pipe_format format = draw->vertex_element[i].src_format;
-
- draw->vertex_fetch.src_ptr[i] = (const ubyte *) draw->user.vbuffer[buf] +
- draw->vertex_buffer[buf].buffer_offset +
- draw->vertex_element[i].src_offset;
-
- draw->vertex_fetch.pitch[i] = draw->vertex_buffer[buf].pitch;
- draw->vertex_fetch.fetch[i] = draw_get_fetch_func( format );
- }
-
- draw->vertex_fetch.nr_attrs = nr_attrs;
-
- draw->vertex_fetch.fetch_func = generic_vertex_fetch;
-
- switch (nr_attrs) {
- case 2:
- if (draw->vertex_element[0].src_format == PIPE_FORMAT_R32G32B32_FLOAT &&
- draw->vertex_element[1].src_format == PIPE_FORMAT_R32G32B32_FLOAT)
- draw->vertex_fetch.fetch_func = fetch_xyz_rgb;
- break;
- case 3:
- if (draw->vertex_element[0].src_format == PIPE_FORMAT_R32G32B32_FLOAT &&
- draw->vertex_element[1].src_format == PIPE_FORMAT_R32G32B32_FLOAT &&
- draw->vertex_element[2].src_format == PIPE_FORMAT_R32G32_FLOAT)
- draw->vertex_fetch.fetch_func = fetch_xyz_rgb_st;
- break;
- default:
- break;
- }
-
-}
diff --git a/src/gallium/auxiliary/draw/draw_vf.c b/src/gallium/auxiliary/draw/draw_vf.c
deleted file mode 100644
index 9d0154c50dc..00000000000
--- a/src/gallium/auxiliary/draw/draw_vf.c
+++ /dev/null
@@ -1,378 +0,0 @@
-/*
- * Copyright 2003 Tungsten Graphics, inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Keith Whitwell <[email protected]>
- */
-
-
-#include <stddef.h>
-
-#include "pipe/p_compiler.h"
-#include "pipe/p_util.h"
-#include "rtasm/rtasm_execmem.h"
-
-#include "draw_vf.h"
-
-
-#define DRAW_VF_DBG 0
-
-
-static boolean match_fastpath( struct draw_vertex_fetch *vf,
- const struct draw_vf_fastpath *fp)
-{
- unsigned j;
-
- if (vf->attr_count != fp->attr_count)
- return FALSE;
-
- for (j = 0; j < vf->attr_count; j++)
- if (vf->attr[j].format != fp->attr[j].format ||
- vf->attr[j].inputsize != fp->attr[j].size ||
- vf->attr[j].vertoffset != fp->attr[j].offset)
- return FALSE;
-
- if (fp->match_strides) {
- if (vf->vertex_stride != fp->vertex_stride)
- return FALSE;
-
- for (j = 0; j < vf->attr_count; j++)
- if (vf->attr[j].inputstride != fp->attr[j].stride)
- return FALSE;
- }
-
- return TRUE;
-}
-
-static boolean search_fastpath_emit( struct draw_vertex_fetch *vf )
-{
- struct draw_vf_fastpath *fp = vf->fastpath;
-
- for ( ; fp ; fp = fp->next) {
- if (match_fastpath(vf, fp)) {
- vf->emit = fp->func;
- return TRUE;
- }
- }
-
- return FALSE;
-}
-
-void draw_vf_register_fastpath( struct draw_vertex_fetch *vf,
- boolean match_strides )
-{
- struct draw_vf_fastpath *fastpath = CALLOC_STRUCT(draw_vf_fastpath);
- unsigned i;
-
- fastpath->vertex_stride = vf->vertex_stride;
- fastpath->attr_count = vf->attr_count;
- fastpath->match_strides = match_strides;
- fastpath->func = vf->emit;
- fastpath->attr = (struct draw_vf_attr_type *)
- MALLOC(vf->attr_count * sizeof(fastpath->attr[0]));
-
- for (i = 0; i < vf->attr_count; i++) {
- fastpath->attr[i].format = vf->attr[i].format;
- fastpath->attr[i].stride = vf->attr[i].inputstride;
- fastpath->attr[i].size = vf->attr[i].inputsize;
- fastpath->attr[i].offset = vf->attr[i].vertoffset;
- }
-
- fastpath->next = vf->fastpath;
- vf->fastpath = fastpath;
-}
-
-
-
-
-/***********************************************************************
- * Build codegen functions or return generic ones:
- */
-static void choose_emit_func( struct draw_vertex_fetch *vf,
- unsigned count,
- uint8_t *dest)
-{
- vf->emit = NULL;
-
- /* Does this match an existing (hardwired, codegen or known-bad)
- * fastpath?
- */
- if (search_fastpath_emit(vf)) {
- /* Use this result. If it is null, then it is already known
- * that the current state will fail for codegen and there is no
- * point trying again.
- */
- }
- else if (vf->codegen_emit) {
- vf->codegen_emit( vf );
- }
-
- if (!vf->emit) {
- draw_vf_generate_hardwired_emit(vf);
- }
-
- /* Otherwise use the generic version:
- */
- if (!vf->emit)
- vf->emit = draw_vf_generic_emit;
-
- vf->emit( vf, count, dest );
-}
-
-
-
-
-
-/***********************************************************************
- * Public entrypoints, mostly dispatch to the above:
- */
-
-
-
-static unsigned
-draw_vf_set_vertex_attributes( struct draw_vertex_fetch *vf,
- const struct draw_vf_attr_map *map,
- unsigned nr,
- unsigned vertex_stride )
-{
- unsigned offset = 0;
- unsigned i, j;
-
- assert(nr < PIPE_MAX_ATTRIBS);
-
- for (j = 0, i = 0; i < nr; i++) {
- const unsigned format = map[i].format;
- if (format == DRAW_EMIT_PAD) {
-#if (DRAW_VF_DBG)
- debug_printf("%d: pad %d, offset %d\n", i,
- map[i].offset, offset);
-#endif
-
- offset += map[i].offset;
-
- }
- else {
- vf->attr[j].attrib = map[i].attrib;
- vf->attr[j].format = format;
- vf->attr[j].insert = draw_vf_format_info[format].insert;
- vf->attr[j].vertattrsize = draw_vf_format_info[format].attrsize;
- vf->attr[j].vertoffset = offset;
- vf->attr[j].isconst = draw_vf_format_info[format].isconst;
- if(vf->attr[j].isconst)
- memcpy(vf->attr[j].data, &map[i].data, vf->attr[j].vertattrsize);
-
-#if (DRAW_VF_DBG)
- debug_printf("%d: %s, offset %d\n", i,
- draw_vf_format_info[format].name,
- vf->attr[j].vertoffset);
-#endif
-
- offset += draw_vf_format_info[format].attrsize;
- j++;
- }
- }
-
- vf->attr_count = j;
- vf->vertex_stride = vertex_stride ? vertex_stride : offset;
- vf->emit = choose_emit_func;
-
- assert(vf->vertex_stride >= offset);
- return vf->vertex_stride;
-}
-
-
-void draw_vf_set_vertex_info( struct draw_vertex_fetch *vf,
- const struct vertex_info *vinfo,
- float point_size )
-{
- unsigned i, j;
- struct draw_vf_attr *a = vf->attr;
- struct draw_vf_attr_map attrs[PIPE_MAX_SHADER_INPUTS];
- unsigned count = 0; /* for debug/sanity */
- unsigned nr_attrs = 0;
-
- for (i = 0; i < vinfo->num_attribs; i++) {
- j = vinfo->src_index[i];
- switch (vinfo->emit[i]) {
- case EMIT_OMIT:
- /* no-op */
- break;
- case EMIT_1F:
- attrs[nr_attrs].attrib = j;
- attrs[nr_attrs].format = DRAW_EMIT_1F;
- attrs[nr_attrs].offset = 0;
- nr_attrs++;
- count++;
- break;
- case EMIT_1F_PSIZE:
- attrs[nr_attrs].attrib = j;
- attrs[nr_attrs].format = DRAW_EMIT_1F_CONST;
- attrs[nr_attrs].offset = 0;
- attrs[nr_attrs].data.f[0] = point_size;
- nr_attrs++;
- count++;
- break;
- case EMIT_2F:
- attrs[nr_attrs].attrib = j;
- attrs[nr_attrs].format = DRAW_EMIT_2F;
- attrs[nr_attrs].offset = 0;
- nr_attrs++;
- count += 2;
- break;
- case EMIT_3F:
- attrs[nr_attrs].attrib = j;
- attrs[nr_attrs].format = DRAW_EMIT_3F;
- attrs[nr_attrs].offset = 0;
- nr_attrs++;
- count += 3;
- break;
- case EMIT_4F:
- attrs[nr_attrs].attrib = j;
- attrs[nr_attrs].format = DRAW_EMIT_4F;
- attrs[nr_attrs].offset = 0;
- nr_attrs++;
- count += 4;
- break;
- case EMIT_4UB:
- attrs[nr_attrs].attrib = j;
- attrs[nr_attrs].format = DRAW_EMIT_4UB_4F_BGRA;
- attrs[nr_attrs].offset = 0;
- nr_attrs++;
- count += 1;
- break;
- default:
- assert(0);
- }
- }
-
- assert(count == vinfo->size);
-
- draw_vf_set_vertex_attributes(vf,
- attrs,
- nr_attrs,
- vinfo->size * sizeof(float) );
-
- for (j = 0; j < vf->attr_count; j++) {
- a[j].inputsize = 4;
- a[j].do_insert = a[j].insert[4 - 1];
- if(a[j].isconst) {
- a[j].inputptr = a[j].data;
- a[j].inputstride = 0;
- }
- }
-}
-
-
-#if 0
-/* Set attribute pointers, adjusted for start position:
- */
-void draw_vf_set_sources( struct draw_vertex_fetch *vf,
- GLvector4f * const sources[],
- unsigned start )
-{
- struct draw_vf_attr *a = vf->attr;
- unsigned j;
-
- for (j = 0; j < vf->attr_count; j++) {
- const GLvector4f *vptr = sources[a[j].attrib];
-
- if ((a[j].inputstride != vptr->stride) ||
- (a[j].inputsize != vptr->size))
- vf->emit = choose_emit_func;
-
- a[j].inputstride = vptr->stride;
- a[j].inputsize = vptr->size;
- a[j].do_insert = a[j].insert[vptr->size - 1];
- a[j].inputptr = ((uint8_t *)vptr->data) + start * vptr->stride;
- }
-}
-#endif
-
-
-/**
- * Emit a vertex to dest.
- */
-void draw_vf_emit_vertex( struct draw_vertex_fetch *vf,
- struct vertex_header *vertex,
- void *dest )
-{
- struct draw_vf_attr *a = vf->attr;
- unsigned j;
-
- for (j = 0; j < vf->attr_count; j++) {
- if (!a[j].isconst) {
- a[j].inputptr = (uint8_t *)&vertex->data[a[j].attrib][0];
- a[j].inputstride = 0; /* XXX: one-vertex-max ATM */
- }
- }
-
- vf->emit( vf, 1, (uint8_t*) dest );
-}
-
-
-
-struct draw_vertex_fetch *draw_vf_create( void )
-{
- struct draw_vertex_fetch *vf = CALLOC_STRUCT(draw_vertex_fetch);
- unsigned i;
-
- for (i = 0; i < PIPE_MAX_ATTRIBS; i++)
- vf->attr[i].vf = vf;
-
- vf->identity[0] = 0.0;
- vf->identity[1] = 0.0;
- vf->identity[2] = 0.0;
- vf->identity[3] = 1.0;
-
- vf->codegen_emit = NULL;
-
-#ifdef USE_SSE_ASM
- if (!GETENV("GALLIUM_NO_CODEGEN"))
- vf->codegen_emit = draw_vf_generate_sse_emit;
-#endif
-
- return vf;
-}
-
-
-void draw_vf_destroy( struct draw_vertex_fetch *vf )
-{
- struct draw_vf_fastpath *fp, *tmp;
-
- for (fp = vf->fastpath ; fp ; fp = tmp) {
- tmp = fp->next;
- FREE(fp->attr);
-
- /* KW: At the moment, fp->func is constrained to be allocated by
- * rtasm_exec_alloc(), as the hardwired fastpaths in
- * t_vertex_generic.c are handled specially. It would be nice
- * to unify them, but this probably won't change until this
- * module gets another overhaul.
- */
- //rtasm_exec_free((void *) fp->func);
- FREE(fp);
- }
-
- vf->fastpath = NULL;
- FREE(vf);
-}
diff --git a/src/gallium/auxiliary/draw/draw_vf.h b/src/gallium/auxiliary/draw/draw_vf.h
deleted file mode 100644
index 0ef98d6257c..00000000000
--- a/src/gallium/auxiliary/draw/draw_vf.h
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * Copyright 2008 Tungsten Graphics, inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-/**
- * Vertex fetch/store/convert code. This functionality is used in two places:
- * 1. Vertex fetch/convert - to grab vertex data from incoming vertex
- * arrays and convert to format needed by vertex shaders.
- * 2. Vertex store/emit - to convert simple float[][4] vertex attributes
- * (which is the organization used throughout the draw/prim pipeline) to
- * hardware-specific formats and emit into hardware vertex buffers.
- *
- *
- * Authors:
- * Keith Whitwell <[email protected]>
- */
-
-#ifndef DRAW_VF_H
-#define DRAW_VF_H
-
-
-#include "pipe/p_compiler.h"
-#include "pipe/p_state.h"
-
-#include "draw_vertex.h"
-#include "draw_private.h" /* for vertex_header */
-
-
-enum draw_vf_attr_format {
- DRAW_EMIT_1F,
- DRAW_EMIT_2F,
- DRAW_EMIT_3F,
- DRAW_EMIT_4F,
- DRAW_EMIT_3F_XYW, /**< for projective texture */
- DRAW_EMIT_1UB_1F, /**< for fog coordinate */
- DRAW_EMIT_3UB_3F_RGB, /**< for specular color */
- DRAW_EMIT_3UB_3F_BGR, /**< for specular color */
- DRAW_EMIT_4UB_4F_RGBA, /**< for color */
- DRAW_EMIT_4UB_4F_BGRA, /**< for color */
- DRAW_EMIT_4UB_4F_ARGB, /**< for color */
- DRAW_EMIT_4UB_4F_ABGR, /**< for color */
- DRAW_EMIT_1F_CONST,
- DRAW_EMIT_2F_CONST,
- DRAW_EMIT_3F_CONST,
- DRAW_EMIT_4F_CONST,
- DRAW_EMIT_PAD, /**< leave a hole of 'offset' bytes */
- DRAW_EMIT_MAX
-};
-
-struct draw_vf_attr_map
-{
- /** Input attribute number */
- unsigned attrib;
-
- enum draw_vf_attr_format format;
-
- unsigned offset;
-
- /**
- * Constant data for DRAW_EMIT_*_CONST
- */
- union {
- uint8_t ub[4];
- float f[4];
- } data;
-};
-
-struct draw_vertex_fetch;
-
-
-
-#if 0
-unsigned
-draw_vf_set_vertex_attributes( struct draw_vertex_fetch *vf,
- const struct draw_vf_attr_map *map,
- unsigned nr,
- unsigned vertex_stride );
-#endif
-
-void draw_vf_set_vertex_info( struct draw_vertex_fetch *vf,
- const struct vertex_info *vinfo,
- float point_size );
-
-#if 0
-void
-draw_vf_set_sources( struct draw_vertex_fetch *vf,
- GLvector4f * const attrib[],
- unsigned start );
-#endif
-
-void
-draw_vf_emit_vertex( struct draw_vertex_fetch *vf,
- struct vertex_header *vertex,
- void *dest );
-
-struct draw_vertex_fetch *
-draw_vf_create( void );
-
-void
-draw_vf_destroy( struct draw_vertex_fetch *vf );
-
-
-
-/***********************************************************************
- * Internal functions and structs:
- */
-
-struct draw_vf_attr;
-
-
-typedef void (*draw_vf_insert_func)( const struct draw_vf_attr *a,
- uint8_t *v,
- const float *in );
-
-typedef void (*draw_vf_emit_func)( struct draw_vertex_fetch *vf,
- unsigned count,
- uint8_t *dest );
-
-
-
-/**
- * Describes how to convert/move a vertex attribute from a vertex
- * array to a vertex structure.
- */
-struct draw_vf_attr
-{
- struct draw_vertex_fetch *vf;
-
- unsigned format;
- unsigned inputsize;
- unsigned inputstride;
- unsigned vertoffset; /**< position of the attrib in the vertex struct */
-
- boolean isconst; /**< read from const data below */
- uint8_t data[16];
-
- unsigned attrib; /**< which vertex attrib (0=position, etc) */
- unsigned vertattrsize; /**< size of the attribute in bytes */
-
- uint8_t *inputptr;
- const draw_vf_insert_func *insert;
- draw_vf_insert_func do_insert;
-};
-
-struct draw_vertex_fetch
-{
- struct draw_vf_attr attr[PIPE_MAX_ATTRIBS];
- unsigned attr_count;
- unsigned vertex_stride;
-
- draw_vf_emit_func emit;
-
- /* Parameters and constants for codegen:
- */
- float identity[4];
-
- struct draw_vf_fastpath *fastpath;
-
- void (*codegen_emit)( struct draw_vertex_fetch *vf );
-};
-
-
-struct draw_vf_attr_type {
- unsigned format;
- unsigned size;
- unsigned stride;
- unsigned offset;
-};
-
-/** XXX this could be moved into draw_vf.c */
-struct draw_vf_fastpath {
- unsigned vertex_stride;
- unsigned attr_count;
- boolean match_strides;
-
- struct draw_vf_attr_type *attr;
-
- draw_vf_emit_func func;
- struct draw_vf_fastpath *next;
-};
-
-
-void
-draw_vf_register_fastpath( struct draw_vertex_fetch *vtx,
- boolean match_strides );
-
-void
-draw_vf_generic_emit( struct draw_vertex_fetch *vf,
- unsigned count,
- uint8_t *v );
-
-void
-draw_vf_generate_hardwired_emit( struct draw_vertex_fetch *vf );
-
-void
-draw_vf_generate_sse_emit( struct draw_vertex_fetch *vf );
-
-
-/** XXX this type and function could probably be moved into draw_vf.c */
-struct draw_vf_format_info {
- const char *name;
- draw_vf_insert_func insert[4];
- const unsigned attrsize;
- const boolean isconst;
-};
-
-extern const struct draw_vf_format_info
-draw_vf_format_info[DRAW_EMIT_MAX];
-
-
-#endif
diff --git a/src/gallium/auxiliary/draw/draw_vf_generic.c b/src/gallium/auxiliary/draw/draw_vf_generic.c
deleted file mode 100644
index 7a60a9db9c3..00000000000
--- a/src/gallium/auxiliary/draw/draw_vf_generic.c
+++ /dev/null
@@ -1,585 +0,0 @@
-
-/*
- * Copyright 2003 Tungsten Graphics, inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Keith Whitwell <[email protected]>
- */
-
-
-#include "pipe/p_compiler.h"
-#include "pipe/p_debug.h"
-#include "pipe/p_util.h"
-
-#include "draw_vf.h"
-
-
-
-static INLINE void insert_4f_4( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- float *out = (float *)(v);
- (void) a;
-
- out[0] = in[0];
- out[1] = in[1];
- out[2] = in[2];
- out[3] = in[3];
-}
-
-static INLINE void insert_4f_3( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- float *out = (float *)(v);
- (void) a;
-
- out[0] = in[0];
- out[1] = in[1];
- out[2] = in[2];
- out[3] = 1;
-}
-
-static INLINE void insert_4f_2( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- float *out = (float *)(v);
- (void) a;
-
- out[0] = in[0];
- out[1] = in[1];
- out[2] = 0;
- out[3] = 1;
-}
-
-static INLINE void insert_4f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- float *out = (float *)(v);
- (void) a;
-
- out[0] = in[0];
- out[1] = 0;
- out[2] = 0;
- out[3] = 1;
-}
-
-static INLINE void insert_3f_xyw_4( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- float *out = (float *)(v);
- (void) a;
-
- out[0] = in[0];
- out[1] = in[1];
- out[2] = in[3];
-}
-
-static INLINE void insert_3f_xyw_err( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- (void) a; (void) v; (void) in;
- assert(0);
-}
-
-static INLINE void insert_3f_3( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- float *out = (float *)(v);
- (void) a;
-
- out[0] = in[0];
- out[1] = in[1];
- out[2] = in[2];
-}
-
-static INLINE void insert_3f_2( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- float *out = (float *)(v);
- (void) a;
-
- out[0] = in[0];
- out[1] = in[1];
- out[2] = 0;
-}
-
-static INLINE void insert_3f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- float *out = (float *)(v);
- (void) a;
-
- out[0] = in[0];
- out[1] = 0;
- out[2] = 0;
-}
-
-
-static INLINE void insert_2f_2( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- float *out = (float *)(v);
- (void) a;
-
- out[0] = in[0];
- out[1] = in[1];
-}
-
-static INLINE void insert_2f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- float *out = (float *)(v);
- (void) a;
-
- out[0] = in[0];
- out[1] = 0;
-}
-
-static INLINE void insert_1f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- float *out = (float *)(v);
- (void) a;
-
- out[0] = in[0];
-}
-
-static INLINE void insert_null( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- (void) a; (void) v; (void) in;
-}
-
-static INLINE void insert_4ub_4f_rgba_4( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[2]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[3]);
-}
-
-static INLINE void insert_4ub_4f_rgba_3( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[2]);
- v[3] = 0xff;
-}
-
-static INLINE void insert_4ub_4f_rgba_2( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
- v[2] = 0;
- v[3] = 0xff;
-}
-
-static INLINE void insert_4ub_4f_rgba_1( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
- v[1] = 0;
- v[2] = 0;
- v[3] = 0xff;
-}
-
-static INLINE void insert_4ub_4f_bgra_4( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[2]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[3]);
-}
-
-static INLINE void insert_4ub_4f_bgra_3( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[2]);
- v[3] = 0xff;
-}
-
-static INLINE void insert_4ub_4f_bgra_2( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
- v[0] = 0;
- v[3] = 0xff;
-}
-
-static INLINE void insert_4ub_4f_bgra_1( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]);
- v[1] = 0;
- v[0] = 0;
- v[3] = 0xff;
-}
-
-static INLINE void insert_4ub_4f_argb_4( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[2]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[3]);
-}
-
-static INLINE void insert_4ub_4f_argb_3( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[2]);
- v[0] = 0xff;
-}
-
-static INLINE void insert_4ub_4f_argb_2( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]);
- v[3] = 0x00;
- v[0] = 0xff;
-}
-
-static INLINE void insert_4ub_4f_argb_1( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]);
- v[2] = 0x00;
- v[3] = 0x00;
- v[0] = 0xff;
-}
-
-static INLINE void insert_4ub_4f_abgr_4( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[2]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[3]);
-}
-
-static INLINE void insert_4ub_4f_abgr_3( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[2]);
- v[0] = 0xff;
-}
-
-static INLINE void insert_4ub_4f_abgr_2( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]);
- v[1] = 0x00;
- v[0] = 0xff;
-}
-
-static INLINE void insert_4ub_4f_abgr_1( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]);
- v[2] = 0x00;
- v[1] = 0x00;
- v[0] = 0xff;
-}
-
-static INLINE void insert_3ub_3f_rgb_3( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[2]);
-}
-
-static INLINE void insert_3ub_3f_rgb_2( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
- v[2] = 0;
-}
-
-static INLINE void insert_3ub_3f_rgb_1( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
- v[1] = 0;
- v[2] = 0;
-}
-
-static INLINE void insert_3ub_3f_bgr_3( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[2]);
-}
-
-static INLINE void insert_3ub_3f_bgr_2( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
- v[0] = 0;
-}
-
-static INLINE void insert_3ub_3f_bgr_1( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]);
- v[1] = 0;
- v[0] = 0;
-}
-
-
-static INLINE void insert_1ub_1f_1( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
-}
-
-
-const struct draw_vf_format_info draw_vf_format_info[DRAW_EMIT_MAX] =
-{
- { "1f",
- { insert_1f_1, insert_1f_1, insert_1f_1, insert_1f_1 },
- sizeof(float), FALSE },
-
- { "2f",
- { insert_2f_1, insert_2f_2, insert_2f_2, insert_2f_2 },
- 2 * sizeof(float), FALSE },
-
- { "3f",
- { insert_3f_1, insert_3f_2, insert_3f_3, insert_3f_3 },
- 3 * sizeof(float), FALSE },
-
- { "4f",
- { insert_4f_1, insert_4f_2, insert_4f_3, insert_4f_4 },
- 4 * sizeof(float), FALSE },
-
- { "3f_xyw",
- { insert_3f_xyw_err, insert_3f_xyw_err, insert_3f_xyw_err,
- insert_3f_xyw_4 },
- 3 * sizeof(float), FALSE },
-
- { "1ub_1f",
- { insert_1ub_1f_1, insert_1ub_1f_1, insert_1ub_1f_1, insert_1ub_1f_1 },
- sizeof(uint8_t), FALSE },
-
- { "3ub_3f_rgb",
- { insert_3ub_3f_rgb_1, insert_3ub_3f_rgb_2, insert_3ub_3f_rgb_3,
- insert_3ub_3f_rgb_3 },
- 3 * sizeof(uint8_t), FALSE },
-
- { "3ub_3f_bgr",
- { insert_3ub_3f_bgr_1, insert_3ub_3f_bgr_2, insert_3ub_3f_bgr_3,
- insert_3ub_3f_bgr_3 },
- 3 * sizeof(uint8_t), FALSE },
-
- { "4ub_4f_rgba",
- { insert_4ub_4f_rgba_1, insert_4ub_4f_rgba_2, insert_4ub_4f_rgba_3,
- insert_4ub_4f_rgba_4 },
- 4 * sizeof(uint8_t), FALSE },
-
- { "4ub_4f_bgra",
- { insert_4ub_4f_bgra_1, insert_4ub_4f_bgra_2, insert_4ub_4f_bgra_3,
- insert_4ub_4f_bgra_4 },
- 4 * sizeof(uint8_t), FALSE },
-
- { "4ub_4f_argb",
- { insert_4ub_4f_argb_1, insert_4ub_4f_argb_2, insert_4ub_4f_argb_3,
- insert_4ub_4f_argb_4 },
- 4 * sizeof(uint8_t), FALSE },
-
- { "4ub_4f_abgr",
- { insert_4ub_4f_abgr_1, insert_4ub_4f_abgr_2, insert_4ub_4f_abgr_3,
- insert_4ub_4f_abgr_4 },
- 4 * sizeof(uint8_t), FALSE },
-
- { "1f_const",
- { insert_1f_1, insert_1f_1, insert_1f_1, insert_1f_1 },
- sizeof(float), TRUE },
-
- { "2f_const",
- { insert_2f_1, insert_2f_2, insert_2f_2, insert_2f_2 },
- 2 * sizeof(float), TRUE },
-
- { "3f_const",
- { insert_3f_1, insert_3f_2, insert_3f_3, insert_3f_3 },
- 3 * sizeof(float), TRUE },
-
- { "4f_const",
- { insert_4f_1, insert_4f_2, insert_4f_3, insert_4f_4 },
- 4 * sizeof(float), TRUE },
-
- { "pad",
- { NULL, NULL, NULL, NULL },
- 0, FALSE },
-
-};
-
-
-
-
-/***********************************************************************
- * Hardwired fastpaths for emitting whole vertices or groups of
- * vertices
- */
-#define EMIT5(NR, F0, F1, F2, F3, F4, NAME) \
-static void NAME( struct draw_vertex_fetch *vf, \
- unsigned count, \
- uint8_t *v ) \
-{ \
- struct draw_vf_attr *a = vf->attr; \
- unsigned i; \
- \
- for (i = 0 ; i < count ; i++, v += vf->vertex_stride) { \
- if (NR > 0) { \
- F0( &a[0], v + a[0].vertoffset, (float *)a[0].inputptr ); \
- a[0].inputptr += a[0].inputstride; \
- } \
- \
- if (NR > 1) { \
- F1( &a[1], v + a[1].vertoffset, (float *)a[1].inputptr ); \
- a[1].inputptr += a[1].inputstride; \
- } \
- \
- if (NR > 2) { \
- F2( &a[2], v + a[2].vertoffset, (float *)a[2].inputptr ); \
- a[2].inputptr += a[2].inputstride; \
- } \
- \
- if (NR > 3) { \
- F3( &a[3], v + a[3].vertoffset, (float *)a[3].inputptr ); \
- a[3].inputptr += a[3].inputstride; \
- } \
- \
- if (NR > 4) { \
- F4( &a[4], v + a[4].vertoffset, (float *)a[4].inputptr ); \
- a[4].inputptr += a[4].inputstride; \
- } \
- } \
-}
-
-
-#define EMIT2(F0, F1, NAME) EMIT5(2, F0, F1, insert_null, \
- insert_null, insert_null, NAME)
-
-#define EMIT3(F0, F1, F2, NAME) EMIT5(3, F0, F1, F2, insert_null, \
- insert_null, NAME)
-
-#define EMIT4(F0, F1, F2, F3, NAME) EMIT5(4, F0, F1, F2, F3, \
- insert_null, NAME)
-
-
-EMIT2(insert_3f_3, insert_4ub_4f_rgba_4, emit_xyz3_rgba4)
-
-EMIT3(insert_4f_4, insert_4ub_4f_rgba_4, insert_2f_2, emit_xyzw4_rgba4_st2)
-
-EMIT4(insert_4f_4, insert_4ub_4f_rgba_4, insert_2f_2, insert_2f_2, emit_xyzw4_rgba4_st2_st2)
-
-
-/* Use the codegen paths to select one of a number of hardwired
- * fastpaths.
- */
-void draw_vf_generate_hardwired_emit( struct draw_vertex_fetch *vf )
-{
- draw_vf_emit_func func = NULL;
-
- /* Does it fit a hardwired fastpath? Help! this is growing out of
- * control!
- */
- switch (vf->attr_count) {
- case 2:
- if (vf->attr[0].do_insert == insert_3f_3 &&
- vf->attr[1].do_insert == insert_4ub_4f_rgba_4) {
- func = emit_xyz3_rgba4;
- }
- break;
- case 3:
- if (vf->attr[2].do_insert == insert_2f_2) {
- if (vf->attr[1].do_insert == insert_4ub_4f_rgba_4) {
- if (vf->attr[0].do_insert == insert_4f_4)
- func = emit_xyzw4_rgba4_st2;
- }
- }
- break;
- case 4:
- if (vf->attr[2].do_insert == insert_2f_2 &&
- vf->attr[3].do_insert == insert_2f_2) {
- if (vf->attr[1].do_insert == insert_4ub_4f_rgba_4) {
- if (vf->attr[0].do_insert == insert_4f_4)
- func = emit_xyzw4_rgba4_st2_st2;
- }
- }
- break;
- }
-
- vf->emit = func;
-}
-
-/***********************************************************************
- * Generic (non-codegen) functions for whole vertices or groups of
- * vertices
- */
-
-void draw_vf_generic_emit( struct draw_vertex_fetch *vf,
- unsigned count,
- uint8_t *v )
-{
- struct draw_vf_attr *a = vf->attr;
- const unsigned attr_count = vf->attr_count;
- const unsigned stride = vf->vertex_stride;
- unsigned i, j;
-
- for (i = 0 ; i < count ; i++, v += stride) {
- for (j = 0; j < attr_count; j++) {
- float *in = (float *)a[j].inputptr;
- a[j].inputptr += a[j].inputstride;
- a[j].do_insert( &a[j], v + a[j].vertoffset, in );
- }
- }
-}
-
-
diff --git a/src/gallium/auxiliary/draw/draw_vf_sse.c b/src/gallium/auxiliary/draw/draw_vf_sse.c
deleted file mode 100644
index aff4ffd985c..00000000000
--- a/src/gallium/auxiliary/draw/draw_vf_sse.c
+++ /dev/null
@@ -1,613 +0,0 @@
-/*
- * Copyright 2003 Tungsten Graphics, inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Keith Whitwell <[email protected]>
- */
-
-
-#include "pipe/p_compiler.h"
-#include "util/u_simple_list.h"
-
-#include "draw_vf.h"
-
-
-#if defined(USE_SSE_ASM)
-
-#include "rtasm/rtasm_cpu.h"
-#include "rtasm/rtasm_x86sse.h"
-
-
-#define X 0
-#define Y 1
-#define Z 2
-#define W 3
-
-
-struct x86_program {
- struct x86_function func;
-
- struct draw_vertex_fetch *vf;
- boolean inputs_safe;
- boolean outputs_safe;
- boolean have_sse2;
-
- struct x86_reg identity;
- struct x86_reg chan0;
-};
-
-
-static struct x86_reg get_identity( struct x86_program *p )
-{
- return p->identity;
-}
-
-static void emit_load4f_4( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- sse_movups(&p->func, dest, arg0);
-}
-
-static void emit_load4f_3( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- /* Have to jump through some hoops:
- *
- * c 0 0 0
- * c 0 0 1
- * 0 0 c 1
- * a b c 1
- */
- sse_movss(&p->func, dest, x86_make_disp(arg0, 8));
- sse_shufps(&p->func, dest, get_identity(p), SHUF(X,Y,Z,W) );
- sse_shufps(&p->func, dest, dest, SHUF(Y,Z,X,W) );
- sse_movlps(&p->func, dest, arg0);
-}
-
-static void emit_load4f_2( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- /* Initialize from identity, then pull in low two words:
- */
- sse_movups(&p->func, dest, get_identity(p));
- sse_movlps(&p->func, dest, arg0);
-}
-
-static void emit_load4f_1( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- /* Pull in low word, then swizzle in identity */
- sse_movss(&p->func, dest, arg0);
- sse_shufps(&p->func, dest, get_identity(p), SHUF(X,Y,Z,W) );
-}
-
-
-
-static void emit_load3f_3( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- /* Over-reads by 1 dword - potential SEGV if input is a vertex
- * array.
- */
- if (p->inputs_safe) {
- sse_movups(&p->func, dest, arg0);
- }
- else {
- /* c 0 0 0
- * c c c c
- * a b c c
- */
- sse_movss(&p->func, dest, x86_make_disp(arg0, 8));
- sse_shufps(&p->func, dest, dest, SHUF(X,X,X,X));
- sse_movlps(&p->func, dest, arg0);
- }
-}
-
-static void emit_load3f_2( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- emit_load4f_2(p, dest, arg0);
-}
-
-static void emit_load3f_1( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- emit_load4f_1(p, dest, arg0);
-}
-
-static void emit_load2f_2( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- sse_movlps(&p->func, dest, arg0);
-}
-
-static void emit_load2f_1( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- emit_load4f_1(p, dest, arg0);
-}
-
-static void emit_load1f_1( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- sse_movss(&p->func, dest, arg0);
-}
-
-static void (*load[4][4])( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 ) = {
- { emit_load1f_1,
- emit_load1f_1,
- emit_load1f_1,
- emit_load1f_1 },
-
- { emit_load2f_1,
- emit_load2f_2,
- emit_load2f_2,
- emit_load2f_2 },
-
- { emit_load3f_1,
- emit_load3f_2,
- emit_load3f_3,
- emit_load3f_3 },
-
- { emit_load4f_1,
- emit_load4f_2,
- emit_load4f_3,
- emit_load4f_4 }
-};
-
-static void emit_load( struct x86_program *p,
- struct x86_reg dest,
- unsigned sz,
- struct x86_reg src,
- unsigned src_sz)
-{
- load[sz-1][src_sz-1](p, dest, src);
-}
-
-static void emit_store4f( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- sse_movups(&p->func, dest, arg0);
-}
-
-static void emit_store3f( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- if (p->outputs_safe) {
- /* Emit the extra dword anyway. This may hurt writecombining,
- * may cause other problems.
- */
- sse_movups(&p->func, dest, arg0);
- }
- else {
- /* Alternate strategy - emit two, shuffle, emit one.
- */
- sse_movlps(&p->func, dest, arg0);
- sse_shufps(&p->func, arg0, arg0, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */
- sse_movss(&p->func, x86_make_disp(dest,8), arg0);
- }
-}
-
-static void emit_store2f( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- sse_movlps(&p->func, dest, arg0);
-}
-
-static void emit_store1f( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- sse_movss(&p->func, dest, arg0);
-}
-
-
-static void (*store[4])( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 ) =
-{
- emit_store1f,
- emit_store2f,
- emit_store3f,
- emit_store4f
-};
-
-static void emit_store( struct x86_program *p,
- struct x86_reg dest,
- unsigned sz,
- struct x86_reg temp )
-
-{
- store[sz-1](p, dest, temp);
-}
-
-static void emit_pack_store_4ub( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg temp )
-{
- /* Scale by 255.0
- */
- sse_mulps(&p->func, temp, p->chan0);
-
- if (p->have_sse2) {
- sse2_cvtps2dq(&p->func, temp, temp);
- sse2_packssdw(&p->func, temp, temp);
- sse2_packuswb(&p->func, temp, temp);
- sse_movss(&p->func, dest, temp);
- }
- else {
- struct x86_reg mmx0 = x86_make_reg(file_MMX, 0);
- struct x86_reg mmx1 = x86_make_reg(file_MMX, 1);
- sse_cvtps2pi(&p->func, mmx0, temp);
- sse_movhlps(&p->func, temp, temp);
- sse_cvtps2pi(&p->func, mmx1, temp);
- mmx_packssdw(&p->func, mmx0, mmx1);
- mmx_packuswb(&p->func, mmx0, mmx0);
- mmx_movd(&p->func, dest, mmx0);
- }
-}
-
-static int get_offset( const void *a, const void *b )
-{
- return (const char *)b - (const char *)a;
-}
-
-/* Not much happens here. Eventually use this function to try and
- * avoid saving/reloading the source pointers each vertex (if some of
- * them can fit in registers).
- */
-static void get_src_ptr( struct x86_program *p,
- struct x86_reg srcREG,
- struct x86_reg vfREG,
- struct draw_vf_attr *a )
-{
- struct draw_vertex_fetch *vf = p->vf;
- struct x86_reg ptr_to_src = x86_make_disp(vfREG, get_offset(vf, &a->inputptr));
-
- /* Load current a[j].inputptr
- */
- x86_mov(&p->func, srcREG, ptr_to_src);
-}
-
-static void update_src_ptr( struct x86_program *p,
- struct x86_reg srcREG,
- struct x86_reg vfREG,
- struct draw_vf_attr *a )
-{
- if (a->inputstride) {
- struct draw_vertex_fetch *vf = p->vf;
- struct x86_reg ptr_to_src = x86_make_disp(vfREG, get_offset(vf, &a->inputptr));
-
- /* add a[j].inputstride (hardcoded value - could just as easily
- * pull the stride value from memory each time).
- */
- x86_lea(&p->func, srcREG, x86_make_disp(srcREG, a->inputstride));
-
- /* save new value of a[j].inputptr
- */
- x86_mov(&p->func, ptr_to_src, srcREG);
- }
-}
-
-
-/* Lots of hardcoding
- *
- * EAX -- pointer to current output vertex
- * ECX -- pointer to current attribute
- *
- */
-static boolean build_vertex_emit( struct x86_program *p )
-{
- struct draw_vertex_fetch *vf = p->vf;
- unsigned j = 0;
-
- struct x86_reg vertexEAX = x86_make_reg(file_REG32, reg_AX);
- struct x86_reg srcECX = x86_make_reg(file_REG32, reg_CX);
- struct x86_reg countEBP = x86_make_reg(file_REG32, reg_BP);
- struct x86_reg vfESI = x86_make_reg(file_REG32, reg_SI);
- struct x86_reg temp = x86_make_reg(file_XMM, 0);
- uint8_t *fixup, *label;
-
- /* Push a few regs?
- */
- x86_push(&p->func, countEBP);
- x86_push(&p->func, vfESI);
-
-
- /* Get vertex count, compare to zero
- */
- x86_xor(&p->func, srcECX, srcECX);
- x86_mov(&p->func, countEBP, x86_fn_arg(&p->func, 2));
- x86_cmp(&p->func, countEBP, srcECX);
- fixup = x86_jcc_forward(&p->func, cc_E);
-
- /* Initialize destination register.
- */
- x86_mov(&p->func, vertexEAX, x86_fn_arg(&p->func, 3));
-
- /* Move argument 1 (vf) into a reg:
- */
- x86_mov(&p->func, vfESI, x86_fn_arg(&p->func, 1));
-
-
- /* always load, needed or not:
- */
- sse_movups(&p->func, p->identity, x86_make_disp(vfESI, get_offset(vf, &vf->identity[0])));
-
- /* Note address for loop jump */
- label = x86_get_label(&p->func);
-
- /* Emit code for each of the attributes. Currently routes
- * everything through SSE registers, even when it might be more
- * efficient to stick with regular old x86. No optimization or
- * other tricks - enough new ground to cover here just getting
- * things working.
- */
- while (j < vf->attr_count) {
- struct draw_vf_attr *a = &vf->attr[j];
- struct x86_reg dest = x86_make_disp(vertexEAX, a->vertoffset);
-
- /* Now, load an XMM reg from src, perhaps transform, then save.
- * Could be shortcircuited in specific cases:
- */
- switch (a->format) {
- case DRAW_EMIT_1F:
- case DRAW_EMIT_1F_CONST:
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 1, x86_deref(srcECX), a->inputsize);
- emit_store(p, dest, 1, temp);
- update_src_ptr(p, srcECX, vfESI, a);
- break;
- case DRAW_EMIT_2F:
- case DRAW_EMIT_2F_CONST:
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 2, x86_deref(srcECX), a->inputsize);
- emit_store(p, dest, 2, temp);
- update_src_ptr(p, srcECX, vfESI, a);
- break;
- case DRAW_EMIT_3F:
- case DRAW_EMIT_3F_CONST:
- /* Potentially the worst case - hardcode 2+1 copying:
- */
- if (0) {
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize);
- emit_store(p, dest, 3, temp);
- update_src_ptr(p, srcECX, vfESI, a);
- }
- else {
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 2, x86_deref(srcECX), a->inputsize);
- emit_store(p, dest, 2, temp);
- if (a->inputsize > 2) {
- emit_load(p, temp, 1, x86_make_disp(srcECX, 8), 1);
- emit_store(p, x86_make_disp(dest,8), 1, temp);
- }
- else {
- sse_movss(&p->func, x86_make_disp(dest,8), get_identity(p));
- }
- update_src_ptr(p, srcECX, vfESI, a);
- }
- break;
- case DRAW_EMIT_4F:
- case DRAW_EMIT_4F_CONST:
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize);
- emit_store(p, dest, 4, temp);
- update_src_ptr(p, srcECX, vfESI, a);
- break;
- case DRAW_EMIT_3F_XYW:
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize);
- sse_shufps(&p->func, temp, temp, SHUF(X,Y,W,Z));
- emit_store(p, dest, 3, temp);
- update_src_ptr(p, srcECX, vfESI, a);
- break;
-
- case DRAW_EMIT_1UB_1F:
- /* Test for PAD3 + 1UB:
- */
- if (j > 0 &&
- a[-1].vertoffset + a[-1].vertattrsize <= a->vertoffset - 3)
- {
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 1, x86_deref(srcECX), a->inputsize);
- sse_shufps(&p->func, temp, temp, SHUF(X,X,X,X));
- emit_pack_store_4ub(p, x86_make_disp(dest, -3), temp); /* overkill! */
- update_src_ptr(p, srcECX, vfESI, a);
- }
- else {
- debug_printf("Can't emit 1ub %x %x %d\n",
- a->vertoffset, a[-1].vertoffset, a[-1].vertattrsize );
- return FALSE;
- }
- break;
- case DRAW_EMIT_3UB_3F_RGB:
- case DRAW_EMIT_3UB_3F_BGR:
- /* Test for 3UB + PAD1:
- */
- if (j == vf->attr_count - 1 ||
- a[1].vertoffset >= a->vertoffset + 4) {
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize);
- if (a->format == DRAW_EMIT_3UB_3F_BGR)
- sse_shufps(&p->func, temp, temp, SHUF(Z,Y,X,W));
- emit_pack_store_4ub(p, dest, temp);
- update_src_ptr(p, srcECX, vfESI, a);
- }
- /* Test for 3UB + 1UB:
- */
- else if (j < vf->attr_count - 1 &&
- a[1].format == DRAW_EMIT_1UB_1F &&
- a[1].vertoffset == a->vertoffset + 3) {
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize);
- update_src_ptr(p, srcECX, vfESI, a);
-
- /* Make room for incoming value:
- */
- sse_shufps(&p->func, temp, temp, SHUF(W,X,Y,Z));
-
- get_src_ptr(p, srcECX, vfESI, &a[1]);
- emit_load(p, temp, 1, x86_deref(srcECX), a[1].inputsize);
- update_src_ptr(p, srcECX, vfESI, &a[1]);
-
- /* Rearrange and possibly do BGR conversion:
- */
- if (a->format == DRAW_EMIT_3UB_3F_BGR)
- sse_shufps(&p->func, temp, temp, SHUF(W,Z,Y,X));
- else
- sse_shufps(&p->func, temp, temp, SHUF(Y,Z,W,X));
-
- emit_pack_store_4ub(p, dest, temp);
- j++; /* NOTE: two attrs consumed */
- }
- else {
- debug_printf("Can't emit 3ub\n");
- }
- return FALSE; /* add this later */
- break;
-
- case DRAW_EMIT_4UB_4F_RGBA:
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize);
- emit_pack_store_4ub(p, dest, temp);
- update_src_ptr(p, srcECX, vfESI, a);
- break;
- case DRAW_EMIT_4UB_4F_BGRA:
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize);
- sse_shufps(&p->func, temp, temp, SHUF(Z,Y,X,W));
- emit_pack_store_4ub(p, dest, temp);
- update_src_ptr(p, srcECX, vfESI, a);
- break;
- case DRAW_EMIT_4UB_4F_ARGB:
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize);
- sse_shufps(&p->func, temp, temp, SHUF(W,X,Y,Z));
- emit_pack_store_4ub(p, dest, temp);
- update_src_ptr(p, srcECX, vfESI, a);
- break;
- case DRAW_EMIT_4UB_4F_ABGR:
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize);
- sse_shufps(&p->func, temp, temp, SHUF(W,Z,Y,X));
- emit_pack_store_4ub(p, dest, temp);
- update_src_ptr(p, srcECX, vfESI, a);
- break;
- default:
- debug_printf("unknown a[%d].format %d\n", j, a->format);
- return FALSE; /* catch any new opcodes */
- }
-
- /* Increment j by at least 1 - may have been incremented above also:
- */
- j++;
- }
-
- /* Next vertex:
- */
- x86_lea(&p->func, vertexEAX, x86_make_disp(vertexEAX, vf->vertex_stride));
-
- /* decr count, loop if not zero
- */
- x86_dec(&p->func, countEBP);
- x86_test(&p->func, countEBP, countEBP);
- x86_jcc(&p->func, cc_NZ, label);
-
- /* Exit mmx state?
- */
- if (p->func.need_emms)
- mmx_emms(&p->func);
-
- /* Land forward jump here:
- */
- x86_fixup_fwd_jump(&p->func, fixup);
-
- /* Pop regs and return
- */
- x86_pop(&p->func, x86_get_base_reg(vfESI));
- x86_pop(&p->func, countEBP);
- x86_ret(&p->func);
-
- vf->emit = (draw_vf_emit_func)x86_get_func(&p->func);
- return TRUE;
-}
-
-
-
-void draw_vf_generate_sse_emit( struct draw_vertex_fetch *vf )
-{
- struct x86_program p;
-
- if (!rtasm_cpu_has_sse()) {
- vf->codegen_emit = NULL;
- return;
- }
-
- memset(&p, 0, sizeof(p));
-
- p.vf = vf;
- p.inputs_safe = 0; /* for now */
- p.outputs_safe = 1; /* for now */
- p.have_sse2 = rtasm_cpu_has_sse2();
- p.identity = x86_make_reg(file_XMM, 6);
- p.chan0 = x86_make_reg(file_XMM, 7);
-
- x86_init_func(&p.func);
-
- if (build_vertex_emit(&p)) {
- draw_vf_register_fastpath( vf, TRUE );
- }
- else {
- /* Note the failure so that we don't keep trying to codegen an
- * impossible state:
- */
- draw_vf_register_fastpath( vf, FALSE );
- x86_release_func(&p.func);
- }
-}
-
-#else
-
-void draw_vf_generate_sse_emit( struct draw_vertex_fetch *vf )
-{
- /* Dummy version for when USE_SSE_ASM not defined */
-}
-
-#endif
diff --git a/src/gallium/auxiliary/draw/draw_vertex_shader.c b/src/gallium/auxiliary/draw/draw_vs.c
index 8572a6d40c3..03fe00a9510 100644
--- a/src/gallium/auxiliary/draw/draw_vertex_shader.c
+++ b/src/gallium/auxiliary/draw/draw_vs.c
@@ -37,49 +37,6 @@
#include "draw_context.h"
#include "draw_vs.h"
-/**
- * Run the vertex shader on all vertices in the vertex queue.
- * Called by the draw module when the vertx cache needs to be flushed.
- */
-void
-draw_vertex_shader_queue_flush(struct draw_context *draw)
-{
- struct draw_vertex_shader *shader = draw->vertex_shader;
- unsigned i;
-
- assert(draw->vs.queue_nr != 0);
-
- /* XXX: do this on statechange:
- */
- shader->prepare( shader, draw );
-
-// fprintf(stderr, "%s %d\n", __FUNCTION__, draw->vs.queue_nr );
-
- /* run vertex shader on vertex cache entries, four per invokation */
- for (i = 0; i < draw->vs.queue_nr; i += MAX_SHADER_VERTICES) {
- unsigned elts[MAX_SHADER_VERTICES];
- int j, n = MIN2(MAX_SHADER_VERTICES, draw->vs.queue_nr - i);
- struct vertex_header *dests =
- draw_header_from_block(draw->vs.vertex_cache,
- MAX_VERTEX_ALLOCATION, i);
-
- for (j = 0; j < n; j++) {
- elts[j] = draw->vs.elts[i + j];
- }
-
- for ( ; j < MAX_SHADER_VERTICES; j++) {
- elts[j] = elts[0];
- }
-
- assert(n > 0);
- assert(n <= MAX_SHADER_VERTICES);
-
- shader->run(shader, draw, elts, n, dests, MAX_VERTEX_ALLOCATION);
- }
-
- draw->vs.post_nr = draw->vs.queue_nr;
- draw->vs.queue_nr = 0;
-}
struct draw_vertex_shader *
@@ -95,10 +52,8 @@ draw_create_vertex_shader(struct draw_context *draw,
vs = draw_create_vs_exec( draw, shader );
}
}
- assert(vs);
-
- tgsi_scan_shader(shader->tokens, &vs->info);
+ assert(vs);
return vs;
}
@@ -113,9 +68,6 @@ draw_bind_vertex_shader(struct draw_context *draw,
{
draw->vertex_shader = dvs;
draw->num_vs_outputs = dvs->info.num_outputs;
-
- tgsi_exec_machine_init(&draw->machine);
-
dvs->prepare( dvs, draw );
}
else {
diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h
index 33ce1e335e0..f9772b83b85 100644
--- a/src/gallium/auxiliary/draw/draw_vs.h
+++ b/src/gallium/auxiliary/draw/draw_vs.h
@@ -35,10 +35,39 @@
#include "draw_private.h"
-struct draw_vertex_shader;
struct draw_context;
struct pipe_shader_state;
+/**
+ * Private version of the compiled vertex_shader
+ */
+struct draw_vertex_shader {
+
+ /* This member will disappear shortly:
+ */
+ struct pipe_shader_state state;
+
+ struct tgsi_shader_info info;
+
+ void (*prepare)( struct draw_vertex_shader *shader,
+ struct draw_context *draw );
+
+ /* Run the shader - this interface will get cleaned up in the
+ * future:
+ */
+ void (*run_linear)( struct draw_vertex_shader *shader,
+ const float (*input)[4],
+ float (*output)[4],
+ const float (*constants)[4],
+ unsigned count,
+ unsigned input_stride,
+ unsigned output_stride );
+
+
+ void (*delete)( struct draw_vertex_shader * );
+};
+
+
struct draw_vertex_shader *
draw_create_vs_exec(struct draw_context *draw,
const struct pipe_shader_state *templ);
@@ -52,32 +81,7 @@ draw_create_vs_llvm(struct draw_context *draw,
const struct pipe_shader_state *templ);
-/* Should be part of the generated shader:
- */
-static INLINE unsigned
-compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr)
-{
- unsigned mask = 0x0;
- unsigned i;
-
- /* Do the hardwired planes first:
- */
- if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT;
- if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT;
- if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT;
- if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT;
- if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT;
- if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT;
-
- /* Followed by any remaining ones:
- */
- for (i = 6; i < nr; i++) {
- if (dot4(clip, plane[i]) < 0)
- mask |= (1<<i);
- }
-
- return mask;
-}
-
+#define MAX_TGSI_VERTICES 4
+
#endif
diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c
index 5c88c2e24e3..54a2b2ab040 100644
--- a/src/gallium/auxiliary/draw/draw_vs_exec.c
+++ b/src/gallium/auxiliary/draw/draw_vs_exec.c
@@ -40,145 +40,118 @@
#include "tgsi/util/tgsi_parse.h"
-#define MAX_TGSI_VERTICES 4
+struct exec_vertex_shader {
+ struct draw_vertex_shader base;
+ struct tgsi_exec_machine *machine;
+};
+
+static struct exec_vertex_shader *exec_vertex_shader( struct draw_vertex_shader *vs )
+{
+ return (struct exec_vertex_shader *)vs;
+}
+
+
+/* Not required for run_linear.
+ */
static void
vs_exec_prepare( struct draw_vertex_shader *shader,
struct draw_context *draw )
{
+ struct exec_vertex_shader *evs = exec_vertex_shader(shader);
+
/* specify the vertex program to interpret/execute */
- tgsi_exec_machine_bind_shader(&draw->machine,
+ tgsi_exec_machine_bind_shader(evs->machine,
shader->state.tokens,
PIPE_MAX_SAMPLERS,
NULL /*samplers*/ );
- draw_update_vertex_fetch( draw );
}
-/**
- * Transform vertices with the current vertex program/shader
- * Up to four vertices can be shaded at a time.
- * \param vbuffer the input vertex data
- * \param elts indexes of four input vertices
- * \param count number of vertices to shade [1..4]
- * \param vOut array of pointers to four output vertices
+
+
+/* Simplified vertex shader interface for the pt paths. Given the
+ * complexity of code-generating all the above operations together,
+ * it's time to try doing all the other stuff separately.
*/
-static boolean
-vs_exec_run( struct draw_vertex_shader *shader,
- struct draw_context *draw,
- const unsigned *elts,
- unsigned count,
- void *vOut,
- unsigned vertex_size)
+static void
+vs_exec_run_linear( struct draw_vertex_shader *shader,
+ const float (*input)[4],
+ float (*output)[4],
+ const float (*constants)[4],
+ unsigned count,
+ unsigned input_stride,
+ unsigned output_stride )
{
- struct tgsi_exec_machine *machine = &draw->machine;
+ struct exec_vertex_shader *evs = exec_vertex_shader(shader);
+ struct tgsi_exec_machine *machine = evs->machine;
unsigned int i, j;
- unsigned int clipped = 0;
+ unsigned slot;
- ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS);
- ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS);
- const float *scale = draw->viewport.scale;
- const float *trans = draw->viewport.translate;
-
- assert(draw->vertex_shader->info.output_semantic_name[0]
- == TGSI_SEMANTIC_POSITION);
-
- machine->Consts = (float (*)[4]) draw->user.constants;
- machine->Inputs = ALIGN16_ASSIGN(inputs);
- if (draw->rasterizer->bypass_vs) {
- /* outputs are just the inputs */
- machine->Outputs = machine->Inputs;
- }
- else {
- machine->Outputs = ALIGN16_ASSIGN(outputs);
- }
+ machine->Consts = constants;
for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);
- draw->vertex_fetch.fetch_func( draw, machine, &elts[i], max_vertices );
-
- if (!draw->rasterizer->bypass_vs) {
- /* run interpreter */
- tgsi_exec_machine_run( machine );
- }
- /* store machine results */
+ /* Swizzle inputs.
+ */
for (j = 0; j < max_vertices; j++) {
- unsigned slot;
- float x, y, z, w;
- struct vertex_header *out =
- draw_header_from_block(vOut, vertex_size, i + j);
-
- /* Handle attr[0] (position) specially:
- *
- * XXX: Computing the clipmask should be done in the vertex
- * program as a set of DP4 instructions appended to the
- * user-provided code.
- */
- x = out->clip[0] = machine->Outputs[0].xyzw[0].f[j];
- y = out->clip[1] = machine->Outputs[0].xyzw[1].f[j];
- z = out->clip[2] = machine->Outputs[0].xyzw[2].f[j];
- w = out->clip[3] = machine->Outputs[0].xyzw[3].f[j];
-
- if (!draw->rasterizer->bypass_clipping) {
- out->clipmask = compute_clipmask(out->clip, draw->plane,
- draw->nr_planes);
- clipped += out->clipmask;
-
- /* divide by w */
- w = 1.0f / w;
- x *= w;
- y *= w;
- z *= w;
+#if 0
+ debug_printf("%d) Input vert:\n", i + j);
+ for (slot = 0; slot < shader->info.num_inputs; slot++) {
+ debug_printf("\t%d: %f %f %f %f\n", slot,
+ input[slot][0],
+ input[slot][1],
+ input[slot][2],
+ input[slot][3]);
}
- else {
- out->clipmask = 0;
- }
- out->edgeflag = 1;
- out->vertex_id = UNDEFINED_VERTEX_ID;
-
- if (!draw->identity_viewport) {
- /* Viewport mapping */
- out->data[0][0] = x * scale[0] + trans[0];
- out->data[0][1] = y * scale[1] + trans[1];
- out->data[0][2] = z * scale[2] + trans[2];
- out->data[0][3] = w;
- }
- else {
- out->data[0][0] = x;
- out->data[0][1] = y;
- out->data[0][2] = z;
- out->data[0][3] = w;
+#endif
+
+ for (slot = 0; slot < shader->info.num_inputs; slot++) {
+ machine->Inputs[slot].xyzw[0].f[j] = input[slot][0];
+ machine->Inputs[slot].xyzw[1].f[j] = input[slot][1];
+ machine->Inputs[slot].xyzw[2].f[j] = input[slot][2];
+ machine->Inputs[slot].xyzw[3].f[j] = input[slot][3];
}
- /* Remaining attributes are packed into sequential post-transform
- * vertex attrib slots.
- */
- for (slot = 1; slot < draw->num_vs_outputs; slot++) {
- out->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
- out->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
- out->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
- out->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
+ input = (const float (*)[4])((const char *)input + input_stride);
+ }
+
+ /* run interpreter */
+ tgsi_exec_machine_run( machine );
+
+ /* Unswizzle all output results.
+ */
+ for (j = 0; j < max_vertices; j++) {
+ for (slot = 0; slot < shader->info.num_outputs; slot++) {
+ output[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
+ output[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
+ output[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
+ output[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
+
}
-#if 0 /*DEBUG*/
- printf("%d) Post xform vert:\n", i + j);
- for (slot = 0; slot < draw->num_vs_outputs; slot++) {
- printf("\t%d: %f %f %f %f\n", slot,
- out->data[slot][0],
- out->data[slot][1],
- out->data[slot][2],
- out->data[slot][3]);
+#if 0
+ debug_printf("%d) Post xform vert:\n", i + j);
+ for (slot = 0; slot < shader->info.num_outputs; slot++) {
+ debug_printf("\t%d: %f %f %f %f\n", slot,
+ output[slot][0],
+ output[slot][1],
+ output[slot][2],
+ output[slot][3]);
}
#endif
- } /* loop over vertices */
+
+ output = (float (*)[4])((char *)output + output_stride);
+ }
+
}
- return clipped != 0;
}
+
static void
vs_exec_delete( struct draw_vertex_shader *dvs )
{
@@ -191,17 +164,22 @@ struct draw_vertex_shader *
draw_create_vs_exec(struct draw_context *draw,
const struct pipe_shader_state *state)
{
- struct draw_vertex_shader *vs = CALLOC_STRUCT( draw_vertex_shader );
+ struct exec_vertex_shader *vs = CALLOC_STRUCT( exec_vertex_shader );
uint nt = tgsi_num_tokens(state->tokens);
if (vs == NULL)
return NULL;
/* we make a private copy of the tokens */
- vs->state.tokens = mem_dup(state->tokens, nt * sizeof(state->tokens[0]));
- vs->prepare = vs_exec_prepare;
- vs->run = vs_exec_run;
- vs->delete = vs_exec_delete;
+ vs->base.state.tokens = mem_dup(state->tokens, nt * sizeof(state->tokens[0]));
+ tgsi_scan_shader(state->tokens, &vs->base.info);
+
+
+ vs->base.prepare = vs_exec_prepare;
+ vs->base.run_linear = vs_exec_run_linear;
+ vs->base.delete = vs_exec_delete;
+ vs->machine = &draw->machine;
+
- return vs;
+ return &vs->base;
}
diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c
index 73076d24674..dcada665143 100644
--- a/src/gallium/auxiliary/draw/draw_vs_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c
@@ -47,6 +47,7 @@
struct draw_llvm_vertex_shader {
struct draw_vertex_shader base;
struct gallivm_prog *llvm_prog;
+ struct tgsi_exec_machine *machine;
};
@@ -54,121 +55,68 @@ static void
vs_llvm_prepare( struct draw_vertex_shader *base,
struct draw_context *draw )
{
- draw_update_vertex_fetch( draw );
}
-/**
- * Transform vertices with the current vertex program/shader
- * Up to four vertices can be shaded at a time.
- * \param vbuffer the input vertex data
- * \param elts indexes of four input vertices
- * \param count number of vertices to shade [1..4]
- * \param vOut array of pointers to four output vertices
- */
-static boolean
-vs_llvm_run( struct draw_vertex_shader *base,
- struct draw_context *draw,
- const unsigned *elts,
- unsigned count,
- void *vOut )
+
+static void
+vs_llvm_run_linear( struct draw_vertex_shader *base,
+ const float (*input)[4],
+ float (*output)[4],
+ const float (*constants)[4],
+ unsigned count,
+ unsigned input_stride,
+ unsigned output_stride )
{
struct draw_llvm_vertex_shader *shader =
(struct draw_llvm_vertex_shader *)base;
- struct tgsi_exec_machine *machine = &draw->machine;
- unsigned int j;
- unsigned int clipped = 0;
-
- ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS);
- ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS);
- const float *scale = draw->viewport.scale;
- const float *trans = draw->viewport.translate;
-
-
- assert(count <= 4);
- assert(draw->vertex_shader->state->output_semantic_name[0]
- == TGSI_SEMANTIC_POSITION);
+ struct tgsi_exec_machine *machine = shader->machine;
+ unsigned int i, j;
+ unsigned slot;
- /* Consts does not require 16 byte alignment. */
- machine->Consts = (float (*)[4]) draw->user.constants;
- machine->Inputs = ALIGN16_ASSIGN(inputs);
- if (draw->rasterizer->bypass_vs) {
- /* outputs are just the inputs */
- machine->Outputs = machine->Inputs;
- }
- else {
- machine->Outputs = ALIGN16_ASSIGN(outputs);
- }
+ for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
+ unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);
+ /* Swizzle inputs.
+ */
+ for (j = 0; j < max_vertices; j++) {
+ for (slot = 0; slot < base->info.num_inputs; slot++) {
+ machine->Inputs[slot].xyzw[0].f[j] = input[slot][0];
+ machine->Inputs[slot].xyzw[1].f[j] = input[slot][1];
+ machine->Inputs[slot].xyzw[2].f[j] = input[slot][2];
+ machine->Inputs[slot].xyzw[3].f[j] = input[slot][3];
+ }
- draw->vertex_fetch.fetch_func( draw, machine, elts, count );
+ input = (const float (*)[4])((const char *)input + input_stride);
+ }
- if (!draw->rasterizer->bypass_vs) {
/* run shader */
gallivm_cpu_vs_exec(shader->llvm_prog,
machine->Inputs,
machine->Outputs,
- machine->Consts,
+ (float (*)[4]) constants,
machine->Temps);
- }
- /* store machine results */
- for (j = 0; j < count; j++) {
- unsigned slot;
- float x, y, z, w;
-
- x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j];
- y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j];
- z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j];
- w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j];
-
- if (!draw->rasterizer->bypass_clipping) {
- vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane,
- draw->nr_planes);
- clipped += vOut[j]->clipmask;
-
- /* divide by w */
- w = 1.0f / w;
- x *= w;
- y *= w;
- z *= w;
- }
- else {
- vOut[j]->clipmask = 0;
- }
- vOut[j]->edgeflag = 1;
- vOut[j]->vertex_id = UNDEFINED_VERTEX_ID;
-
- if (!draw->identity_viewport) {
- /* Viewport mapping */
- vOut[j]->data[0][0] = x * scale[0] + trans[0];
- vOut[j]->data[0][1] = y * scale[1] + trans[1];
- vOut[j]->data[0][2] = z * scale[2] + trans[2];
- vOut[j]->data[0][3] = w;
- }
- else {
- vOut[j]->data[0][0] = x;
- vOut[j]->data[0][1] = y;
- vOut[j]->data[0][2] = z;
- vOut[j]->data[0][3] = w;
- }
- /* Remaining attributes are packed into sequential post-transform
- * vertex attrib slots.
+ /* Unswizzle all output results
*/
- for (slot = 1; slot < draw->num_vs_outputs; slot++) {
- vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
- vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
- vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
- vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
+ for (j = 0; j < max_vertices; j++) {
+ for (slot = 0; slot < base->info.num_outputs; slot++) {
+ output[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
+ output[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
+ output[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
+ output[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
+ }
+ output = (float (*)[4])((char *)output + output_stride);
}
- } /* loop over vertices */
- return clipped != 0;
+ }
}
+
+
static void
vs_llvm_delete( struct draw_vertex_shader *base )
{
@@ -198,15 +146,19 @@ draw_create_vs_llvm(struct draw_context *draw,
/* we make a private copy of the tokens */
vs->base.state.tokens = mem_dup(templ->tokens, nt * sizeof(templ->tokens[0]));
+
+ tgsi_scan_shader(vs->base.state.tokens, &vs->base.info);
+
vs->base.prepare = vs_llvm_prepare;
- vs->base.run = vs_llvm_run;
+ vs->base.run_linear = vs_llvm_run_linear;
vs->base.delete = vs_llvm_delete;
+ vs->machine = &draw->machine;
{
struct gallivm_ir *ir = gallivm_ir_new(GALLIVM_VS);
gallivm_ir_set_layout(ir, GALLIVM_SOA);
gallivm_ir_set_components(ir, 4);
- gallivm_ir_fill_from_tgsi(ir, vs->base.state->tokens);
+ gallivm_ir_fill_from_tgsi(ir, vs->base.state.tokens);
vs->llvm_prog = gallivm_ir_compile(ir);
gallivm_ir_delete(ir);
}
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index ee0a3105b99..b1e9f671147 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -41,6 +41,7 @@
#include "draw_private.h"
#include "draw_context.h"
+#include "rtasm/rtasm_cpu.h"
#include "rtasm/rtasm_x86sse.h"
#include "tgsi/exec/tgsi_sse2.h"
#include "tgsi/util/tgsi_parse.h"
@@ -58,7 +59,11 @@ typedef void (XSTDCALL *codegen_function) (
struct draw_sse_vertex_shader {
struct draw_vertex_shader base;
struct x86_function sse2_program;
+
codegen_function func;
+
+ struct tgsi_exec_machine *machine;
+
float immediates[TGSI_EXEC_NUM_IMMEDIATES][4];
};
@@ -67,140 +72,71 @@ static void
vs_sse_prepare( struct draw_vertex_shader *base,
struct draw_context *draw )
{
- draw_update_vertex_fetch( draw );
}
-/**
- * Transform vertices with the current vertex program/shader
- * Up to four vertices can be shaded at a time.
- * \param vbuffer the input vertex data
- * \param elts indexes of four input vertices
- * \param count number of vertices to shade [1..4]
- * \param vOut array of pointers to four output vertices
+
+
+/* Simplified vertex shader interface for the pt paths. Given the
+ * complexity of code-generating all the above operations together,
+ * it's time to try doing all the other stuff separately.
*/
-static boolean
-vs_sse_run( struct draw_vertex_shader *base,
- struct draw_context *draw,
- const unsigned *elts,
- unsigned count,
- void *vOut,
- unsigned vertex_size )
+static void
+vs_sse_run_linear( struct draw_vertex_shader *base,
+ const float (*input)[4],
+ float (*output)[4],
+ const float (*constants)[4],
+ unsigned count,
+ unsigned input_stride,
+ unsigned output_stride )
{
struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base;
- struct tgsi_exec_machine *machine = &draw->machine;
+ struct tgsi_exec_machine *machine = shader->machine;
unsigned int i, j;
- unsigned int clipped = 0;
-
- ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS);
- ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS);
- const float *scale = draw->viewport.scale;
- const float *trans = draw->viewport.translate;
-
- assert(draw->vertex_shader->info.output_semantic_name[0]
- == TGSI_SEMANTIC_POSITION);
-
- /* Consts does not require 16 byte alignment. */
- machine->Consts = (float (*)[4]) draw->user.constants;
- machine->Inputs = ALIGN16_ASSIGN(inputs);
- if (draw->rasterizer->bypass_vs) {
- /* outputs are just the inputs */
- machine->Outputs = machine->Inputs;
- }
- else {
- machine->Outputs = ALIGN16_ASSIGN(outputs);
- }
+ unsigned slot;
- for (i = 0; i < count; i += SSE_MAX_VERTICES) {
- unsigned int max_vertices = MIN2(SSE_MAX_VERTICES, count - i);
- /* Fetch vertices. This may at some point be integrated into the
- * compiled shader -- that would require a reorganization where
- * multiple versions of the compiled shader might exist,
- * specialized for each fetch state.
- */
- draw->vertex_fetch.fetch_func(draw, machine, &elts[i], max_vertices);
-
- if (!draw->rasterizer->bypass_vs) {
- /* run compiled shader
- */
- shader->func(machine->Inputs,
- machine->Outputs,
- machine->Consts,
- machine->Temps,
- shader->immediates);
- }
-
- /* XXX: Computing the clipmask and emitting results should be done
- * in the vertex program as a set of instructions appended to
- * the user-provided code.
+ for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
+ unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);
+
+ /* Swizzle inputs.
*/
for (j = 0; j < max_vertices; j++) {
- unsigned slot;
- float x, y, z, w;
- struct vertex_header *out =
- draw_header_from_block(vOut, vertex_size, i + j);
-
- x = out->clip[0] = machine->Outputs[0].xyzw[0].f[j];
- y = out->clip[1] = machine->Outputs[0].xyzw[1].f[j];
- z = out->clip[2] = machine->Outputs[0].xyzw[2].f[j];
- w = out->clip[3] = machine->Outputs[0].xyzw[3].f[j];
-
- if (!draw->rasterizer->bypass_clipping) {
- out->clipmask = compute_clipmask(out->clip, draw->plane,
- draw->nr_planes);
- clipped += out->clipmask;
-
- /* divide by w */
- w = 1.0f / w;
- x *= w;
- y *= w;
- z *= w;
- }
- else {
- out->clipmask = 0;
- }
- out->edgeflag = 1;
- out->vertex_id = UNDEFINED_VERTEX_ID;
-
- if (!draw->identity_viewport) {
- /* Viewport mapping */
- out->data[0][0] = x * scale[0] + trans[0];
- out->data[0][1] = y * scale[1] + trans[1];
- out->data[0][2] = z * scale[2] + trans[2];
- out->data[0][3] = w;
- }
- else {
- out->data[0][0] = x;
- out->data[0][1] = y;
- out->data[0][2] = z;
- out->data[0][3] = w;
+ for (slot = 0; slot < base->info.num_inputs; slot++) {
+ machine->Inputs[slot].xyzw[0].f[j] = input[slot][0];
+ machine->Inputs[slot].xyzw[1].f[j] = input[slot][1];
+ machine->Inputs[slot].xyzw[2].f[j] = input[slot][2];
+ machine->Inputs[slot].xyzw[3].f[j] = input[slot][3];
}
- /* Remaining attributes are packed into sequential post-transform
- * vertex attrib slots.
- */
- for (slot = 1; slot < draw->num_vs_outputs; slot++) {
- out->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
- out->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
- out->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
- out->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
- }
-#if 0 /*DEBUG*/
- printf("%d) Post xform vert:\n", i + j);
- for (slot = 0; slot < draw->num_vs_outputs; slot++) {
- printf("\t%d: %f %f %f %f\n", slot,
- out->data[slot][0],
- out->data[slot][1],
- out->data[slot][2],
- out->data[slot][3]);
+ input = (const float (*)[4])((const char *)input + input_stride);
+ }
+
+ /* run compiled shader
+ */
+ shader->func(machine->Inputs,
+ machine->Outputs,
+ (float (*)[4])constants,
+ machine->Temps,
+ shader->immediates);
+
+
+ /* Unswizzle all output results.
+ */
+ for (j = 0; j < max_vertices; j++) {
+ for (slot = 0; slot < base->info.num_outputs; slot++) {
+ output[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
+ output[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
+ output[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
+ output[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
}
-#endif
- }
+
+ output = (float (*)[4])((char *)output + output_stride);
+ }
}
- return clipped != 0;
}
+
static void
vs_sse_delete( struct draw_vertex_shader *base )
{
@@ -220,7 +156,7 @@ draw_create_vs_sse(struct draw_context *draw,
struct draw_sse_vertex_shader *vs;
uint nt = tgsi_num_tokens(templ->tokens);
- if (!draw->use_sse)
+ if (!rtasm_cpu_has_sse2())
return NULL;
vs = CALLOC_STRUCT( draw_sse_vertex_shader );
@@ -229,9 +165,13 @@ draw_create_vs_sse(struct draw_context *draw,
/* we make a private copy of the tokens */
vs->base.state.tokens = mem_dup(templ->tokens, nt * sizeof(templ->tokens[0]));
+
+ tgsi_scan_shader(templ->tokens, &vs->base.info);
+
vs->base.prepare = vs_sse_prepare;
- vs->base.run = vs_sse_run;
+ vs->base.run_linear = vs_sse_run_linear;
vs->base.delete = vs_sse_delete;
+ vs->machine = &draw->machine;
x86_init_func( &vs->sse2_program );
diff --git a/src/gallium/auxiliary/draw/draw_wide_prims.c b/src/gallium/auxiliary/draw/draw_wide_prims.c
deleted file mode 100644
index d6bff110b40..00000000000
--- a/src/gallium/auxiliary/draw/draw_wide_prims.c
+++ /dev/null
@@ -1,366 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/* Authors: Keith Whitwell <[email protected]>
- */
-
-#include "pipe/p_util.h"
-#include "pipe/p_defines.h"
-#include "pipe/p_shader_tokens.h"
-#include "draw_private.h"
-
-
-struct wide_stage {
- struct draw_stage stage;
-
- float half_line_width;
- float half_point_size;
-
- uint texcoord_slot[PIPE_MAX_SHADER_OUTPUTS];
- uint texcoord_mode[PIPE_MAX_SHADER_OUTPUTS];
- uint num_texcoords;
-
- int psize_slot;
-};
-
-
-
-static INLINE struct wide_stage *wide_stage( struct draw_stage *stage )
-{
- return (struct wide_stage *)stage;
-}
-
-
-static void passthrough_point( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->point( stage->next, header );
-}
-
-static void passthrough_line( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->line(stage->next, header);
-}
-
-static void passthrough_tri( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->tri(stage->next, header);
-}
-
-
-/**
- * Draw a wide line by drawing a quad (two triangles).
- * XXX need to disable polygon stipple.
- */
-static void wide_line( struct draw_stage *stage,
- struct prim_header *header )
-{
- const struct wide_stage *wide = wide_stage(stage);
- const float half_width = wide->half_line_width;
-
- struct prim_header tri;
-
- struct vertex_header *v0 = dup_vert(stage, header->v[0], 0);
- struct vertex_header *v1 = dup_vert(stage, header->v[0], 1);
- struct vertex_header *v2 = dup_vert(stage, header->v[1], 2);
- struct vertex_header *v3 = dup_vert(stage, header->v[1], 3);
-
- float *pos0 = v0->data[0];
- float *pos1 = v1->data[0];
- float *pos2 = v2->data[0];
- float *pos3 = v3->data[0];
-
- const float dx = FABSF(pos0[0] - pos2[0]);
- const float dy = FABSF(pos0[1] - pos2[1]);
-
- /*
- * Draw wide line as a quad (two tris) by "stretching" the line along
- * X or Y.
- * We need to tweak coords in several ways to be conformant here.
- */
-
- if (dx > dy) {
- /* x-major line */
- pos0[1] = pos0[1] - half_width - 0.25f;
- pos1[1] = pos1[1] + half_width - 0.25f;
- pos2[1] = pos2[1] - half_width - 0.25f;
- pos3[1] = pos3[1] + half_width - 0.25f;
- if (pos0[0] < pos2[0]) {
- /* left to right line */
- pos0[0] -= 0.5f;
- pos1[0] -= 0.5f;
- pos2[0] -= 0.5f;
- pos3[0] -= 0.5f;
- }
- else {
- /* right to left line */
- pos0[0] += 0.5f;
- pos1[0] += 0.5f;
- pos2[0] += 0.5f;
- pos3[0] += 0.5f;
- }
- }
- else {
- /* y-major line */
- pos0[0] = pos0[0] - half_width + 0.25f;
- pos1[0] = pos1[0] + half_width + 0.25f;
- pos2[0] = pos2[0] - half_width + 0.25f;
- pos3[0] = pos3[0] + half_width + 0.25f;
- if (pos0[1] < pos2[1]) {
- /* top to bottom line */
- pos0[1] -= 0.5f;
- pos1[1] -= 0.5f;
- pos2[1] -= 0.5f;
- pos3[1] -= 0.5f;
- }
- else {
- /* bottom to top line */
- pos0[1] += 0.5f;
- pos1[1] += 0.5f;
- pos2[1] += 0.5f;
- pos3[1] += 0.5f;
- }
- }
-
- tri.det = header->det; /* only the sign matters */
- tri.v[0] = v0;
- tri.v[1] = v2;
- tri.v[2] = v3;
- stage->next->tri( stage->next, &tri );
-
- tri.v[0] = v0;
- tri.v[1] = v3;
- tri.v[2] = v1;
- stage->next->tri( stage->next, &tri );
-}
-
-
-/**
- * Set the vertex texcoords for sprite mode.
- * Coords may be left untouched or set to a right-side-up or upside-down
- * orientation.
- */
-static void set_texcoords(const struct wide_stage *wide,
- struct vertex_header *v, const float tc[4])
-{
- uint i;
- for (i = 0; i < wide->num_texcoords; i++) {
- if (wide->texcoord_mode[i] != PIPE_SPRITE_COORD_NONE) {
- uint j = wide->texcoord_slot[i];
- v->data[j][0] = tc[0];
- if (wide->texcoord_mode[i] == PIPE_SPRITE_COORD_LOWER_LEFT)
- v->data[j][1] = 1.0f - tc[1];
- else
- v->data[j][1] = tc[1];
- v->data[j][2] = tc[2];
- v->data[j][3] = tc[3];
- }
- }
-}
-
-
-/* If there are lots of sprite points (and why wouldn't there be?) it
- * would probably be more sensible to change hardware setup to
- * optimize this rather than doing the whole thing in software like
- * this.
- */
-static void wide_point( struct draw_stage *stage,
- struct prim_header *header )
-{
- const struct wide_stage *wide = wide_stage(stage);
- const boolean sprite = (boolean) stage->draw->rasterizer->point_sprite;
- float half_size;
- float left_adj, right_adj;
-
- struct prim_header tri;
-
- /* four dups of original vertex */
- struct vertex_header *v0 = dup_vert(stage, header->v[0], 0);
- struct vertex_header *v1 = dup_vert(stage, header->v[0], 1);
- struct vertex_header *v2 = dup_vert(stage, header->v[0], 2);
- struct vertex_header *v3 = dup_vert(stage, header->v[0], 3);
-
- float *pos0 = v0->data[0];
- float *pos1 = v1->data[0];
- float *pos2 = v2->data[0];
- float *pos3 = v3->data[0];
-
- /* point size is either per-vertex or fixed size */
- if (wide->psize_slot >= 0) {
- half_size = 0.5f * header->v[0]->data[wide->psize_slot][0];
- }
- else {
- half_size = wide->half_point_size;
- }
-
- left_adj = -half_size; /* + 0.25f;*/
- right_adj = half_size; /* + 0.25f;*/
-
- pos0[0] += left_adj;
- pos0[1] -= half_size;
-
- pos1[0] += left_adj;
- pos1[1] += half_size;
-
- pos2[0] += right_adj;
- pos2[1] -= half_size;
-
- pos3[0] += right_adj;
- pos3[1] += half_size;
-
- if (sprite) {
- static const float tex00[4] = { 0, 0, 0, 1 };
- static const float tex01[4] = { 0, 1, 0, 1 };
- static const float tex11[4] = { 1, 1, 0, 1 };
- static const float tex10[4] = { 1, 0, 0, 1 };
- set_texcoords( wide, v0, tex00 );
- set_texcoords( wide, v1, tex01 );
- set_texcoords( wide, v2, tex10 );
- set_texcoords( wide, v3, tex11 );
- }
-
- tri.det = header->det; /* only the sign matters */
- tri.v[0] = v0;
- tri.v[1] = v2;
- tri.v[2] = v3;
- stage->next->tri( stage->next, &tri );
-
- tri.v[0] = v0;
- tri.v[1] = v3;
- tri.v[2] = v1;
- stage->next->tri( stage->next, &tri );
-}
-
-
-static void wide_first_point( struct draw_stage *stage,
- struct prim_header *header )
-{
- struct wide_stage *wide = wide_stage(stage);
- struct draw_context *draw = stage->draw;
-
- wide->half_point_size = 0.5f * draw->rasterizer->point_size;
-
- /* XXX we won't know the real size if it's computed by the vertex shader! */
- if (draw->rasterizer->point_size > draw->wide_point_threshold) {
- stage->point = wide_point;
- }
- else {
- stage->point = passthrough_point;
- }
-
- if (draw->rasterizer->point_sprite) {
- /* find vertex shader texcoord outputs */
- const struct draw_vertex_shader *vs = draw->vertex_shader;
- uint i, j = 0;
- for (i = 0; i < vs->info.num_outputs; i++) {
- if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
- wide->texcoord_slot[j] = i;
- wide->texcoord_mode[j] = draw->rasterizer->sprite_coord_mode[j];
- j++;
- }
- }
- wide->num_texcoords = j;
- }
-
- wide->psize_slot = -1;
-
- if (draw->rasterizer->point_size_per_vertex) {
- /* find PSIZ vertex output */
- const struct draw_vertex_shader *vs = draw->vertex_shader;
- uint i;
- for (i = 0; i < vs->info.num_outputs; i++) {
- if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) {
- wide->psize_slot = i;
- break;
- }
- }
- }
-
- stage->point( stage, header );
-}
-
-
-
-static void wide_first_line( struct draw_stage *stage,
- struct prim_header *header )
-{
- struct wide_stage *wide = wide_stage(stage);
- struct draw_context *draw = stage->draw;
-
- wide->half_line_width = 0.5f * draw->rasterizer->line_width;
-
- if (draw->rasterizer->line_width != 1.0) {
- wide->stage.line = wide_line;
- }
- else {
- wide->stage.line = passthrough_line;
- }
-
- stage->line( stage, header );
-}
-
-
-static void wide_flush( struct draw_stage *stage, unsigned flags )
-{
- stage->line = wide_first_line;
- stage->point = wide_first_point;
- stage->next->flush( stage->next, flags );
-}
-
-
-static void wide_reset_stipple_counter( struct draw_stage *stage )
-{
- stage->next->reset_stipple_counter( stage->next );
-}
-
-
-static void wide_destroy( struct draw_stage *stage )
-{
- draw_free_temp_verts( stage );
- FREE( stage );
-}
-
-
-struct draw_stage *draw_wide_stage( struct draw_context *draw )
-{
- struct wide_stage *wide = CALLOC_STRUCT(wide_stage);
-
- draw_alloc_temp_verts( &wide->stage, 4 );
-
- wide->stage.draw = draw;
- wide->stage.next = NULL;
- wide->stage.point = wide_first_point;
- wide->stage.line = wide_first_line;
- wide->stage.tri = passthrough_tri;
- wide->stage.flush = wide_flush;
- wide->stage.reset_stipple_counter = wide_reset_stipple_counter;
- wide->stage.destroy = wide_destroy;
-
- return &wide->stage;
-}
diff --git a/src/gallium/auxiliary/gallivm/Makefile b/src/gallium/auxiliary/gallivm/Makefile
index c24e19e062d..c3f7bfba93b 100644
--- a/src/gallium/auxiliary/gallivm/Makefile
+++ b/src/gallium/auxiliary/gallivm/Makefile
@@ -65,10 +65,14 @@ depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(INC_SOURCES)
gallivm_builtins.cpp: llvm_builtins.c
- clang --emit-llvm < $< |llvm-as|opt -std-compile-opts|llvm2cpp -gen-contents -o=$@ -f -for=shader -funcname=createGallivmBuiltins
+ clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp1.bin
+ (echo "static const unsigned char llvm_builtins_data[] = {"; od -txC temp1.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/};/") >$@
+ rm temp1.bin
gallivmsoabuiltins.cpp: soabuiltins.c
- clang --emit-llvm < $< |llvm-as|opt -std-compile-opts|llvm2cpp -gen-module -o=$@ -f -for=shader -funcname=createSoaBuiltins
+ clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp2.bin
+ (echo "static const unsigned char soabuiltins_data[] = {"; od -txC temp2.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/};/") >$@
+ rm temp2.bin
# Emacs tags
tags:
diff --git a/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp
index 1796f0a1772..a6f8cd043bc 100644
--- a/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp
+++ b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp
@@ -1,567 +1,141 @@
-// Generated by llvm2cpp - DO NOT MODIFY!
-
-
-Module* createGallivmBuiltins(Module *mod) {
-
-mod->setModuleIdentifier("shader");
-
-// Type Definitions
-ArrayType* ArrayTy_0 = ArrayType::get(IntegerType::get(8), 25);
-
-PointerType* PointerTy_1 = PointerType::get(ArrayTy_0, 0);
-
-std::vector<const Type*>FuncTy_2_args;
-FuncTy_2_args.push_back(Type::FloatTy);
-FuncTy_2_args.push_back(Type::FloatTy);
-FunctionType* FuncTy_2 = FunctionType::get(
- /*Result=*/Type::FloatTy,
- /*Params=*/FuncTy_2_args,
- /*isVarArg=*/false);
-
-PointerType* PointerTy_3 = PointerType::get(FuncTy_2, 0);
-
-VectorType* VectorTy_4 = VectorType::get(Type::FloatTy, 4);
-
-std::vector<const Type*>FuncTy_5_args;
-FuncTy_5_args.push_back(VectorTy_4);
-FunctionType* FuncTy_5 = FunctionType::get(
- /*Result=*/VectorTy_4,
- /*Params=*/FuncTy_5_args,
- /*isVarArg=*/false);
-
-std::vector<const Type*>FuncTy_6_args;
-FuncTy_6_args.push_back(VectorTy_4);
-FuncTy_6_args.push_back(VectorTy_4);
-FuncTy_6_args.push_back(VectorTy_4);
-FunctionType* FuncTy_6 = FunctionType::get(
- /*Result=*/VectorTy_4,
- /*Params=*/FuncTy_6_args,
- /*isVarArg=*/false);
-
-VectorType* VectorTy_7 = VectorType::get(IntegerType::get(32), 4);
-
-std::vector<const Type*>FuncTy_9_args;
-FunctionType* FuncTy_9 = FunctionType::get(
- /*Result=*/IntegerType::get(32),
- /*Params=*/FuncTy_9_args,
- /*isVarArg=*/true);
-
-PointerType* PointerTy_8 = PointerType::get(FuncTy_9, 0);
-
-PointerType* PointerTy_10 = PointerType::get(IntegerType::get(8), 0);
-
-std::vector<const Type*>FuncTy_12_args;
-FuncTy_12_args.push_back(Type::FloatTy);
-FunctionType* FuncTy_12 = FunctionType::get(
- /*Result=*/Type::FloatTy,
- /*Params=*/FuncTy_12_args,
- /*isVarArg=*/false);
-
-PointerType* PointerTy_11 = PointerType::get(FuncTy_12, 0);
-
-std::vector<const Type*>FuncTy_13_args;
-FuncTy_13_args.push_back(VectorTy_4);
-FunctionType* FuncTy_13 = FunctionType::get(
- /*Result=*/IntegerType::get(32),
- /*Params=*/FuncTy_13_args,
- /*isVarArg=*/false);
-
-
-// Function Declarations
-
-Function* func_approx = new Function(
- /*Type=*/FuncTy_2,
- /*Linkage=*/GlobalValue::WeakLinkage,
- /*Name=*/"approx", mod);
-func_approx->setCallingConv(CallingConv::C);
-const ParamAttrsList *func_approx_PAL = 0;
-func_approx->setParamAttrs(func_approx_PAL);
-
-Function* func_powf = new Function(
- /*Type=*/FuncTy_2,
- /*Linkage=*/GlobalValue::ExternalLinkage,
- /*Name=*/"powf", mod); // (external, no body)
-func_powf->setCallingConv(CallingConv::C);
-const ParamAttrsList *func_powf_PAL = 0;
-func_powf->setParamAttrs(func_powf_PAL);
-
-Function* func_lit = new Function(
- /*Type=*/FuncTy_5,
- /*Linkage=*/GlobalValue::WeakLinkage,
- /*Name=*/"lit", mod);
-func_lit->setCallingConv(CallingConv::C);
-const ParamAttrsList *func_lit_PAL = 0;
-func_lit->setParamAttrs(func_lit_PAL);
-
-Function* func_cmp = new Function(
- /*Type=*/FuncTy_6,
- /*Linkage=*/GlobalValue::WeakLinkage,
- /*Name=*/"cmp", mod);
-func_cmp->setCallingConv(CallingConv::C);
-const ParamAttrsList *func_cmp_PAL = 0;
-{
- ParamAttrsVector Attrs;
- ParamAttrsWithIndex PAWI;
- PAWI.index = 0; PAWI.attrs = 0 | ParamAttr::NoUnwind;
- Attrs.push_back(PAWI);
- func_cmp_PAL = ParamAttrsList::get(Attrs);
-
-}
-func_cmp->setParamAttrs(func_cmp_PAL);
-
-Function* func_vcos = new Function(
- /*Type=*/FuncTy_5,
- /*Linkage=*/GlobalValue::WeakLinkage,
- /*Name=*/"vcos", mod);
-func_vcos->setCallingConv(CallingConv::C);
-const ParamAttrsList *func_vcos_PAL = 0;
-func_vcos->setParamAttrs(func_vcos_PAL);
-
-Function* func_printf = new Function(
- /*Type=*/FuncTy_9,
- /*Linkage=*/GlobalValue::ExternalLinkage,
- /*Name=*/"printf", mod); // (external, no body)
-func_printf->setCallingConv(CallingConv::C);
-const ParamAttrsList *func_printf_PAL = 0;
-func_printf->setParamAttrs(func_printf_PAL);
-
-Function* func_cosf = new Function(
- /*Type=*/FuncTy_12,
- /*Linkage=*/GlobalValue::ExternalLinkage,
- /*Name=*/"cosf", mod); // (external, no body)
-func_cosf->setCallingConv(CallingConv::C);
-const ParamAttrsList *func_cosf_PAL = 0;
-func_cosf->setParamAttrs(func_cosf_PAL);
-
-Function* func_scs = new Function(
- /*Type=*/FuncTy_5,
- /*Linkage=*/GlobalValue::WeakLinkage,
- /*Name=*/"scs", mod);
-func_scs->setCallingConv(CallingConv::C);
-const ParamAttrsList *func_scs_PAL = 0;
-func_scs->setParamAttrs(func_scs_PAL);
-
-Function* func_sinf = new Function(
- /*Type=*/FuncTy_12,
- /*Linkage=*/GlobalValue::ExternalLinkage,
- /*Name=*/"sinf", mod); // (external, no body)
-func_sinf->setCallingConv(CallingConv::C);
-const ParamAttrsList *func_sinf_PAL = 0;
-func_sinf->setParamAttrs(func_sinf_PAL);
-
-Function* func_vsin = new Function(
- /*Type=*/FuncTy_5,
- /*Linkage=*/GlobalValue::WeakLinkage,
- /*Name=*/"vsin", mod);
-func_vsin->setCallingConv(CallingConv::C);
-const ParamAttrsList *func_vsin_PAL = 0;
-func_vsin->setParamAttrs(func_vsin_PAL);
-
-Function* func_kilp = new Function(
- /*Type=*/FuncTy_13,
- /*Linkage=*/GlobalValue::WeakLinkage,
- /*Name=*/"kilp", mod);
-func_kilp->setCallingConv(CallingConv::C);
-const ParamAttrsList *func_kilp_PAL = 0;
-{
- ParamAttrsVector Attrs;
- ParamAttrsWithIndex PAWI;
- PAWI.index = 0; PAWI.attrs = 0 | ParamAttr::NoUnwind;
- Attrs.push_back(PAWI);
- func_kilp_PAL = ParamAttrsList::get(Attrs);
-
-}
-func_kilp->setParamAttrs(func_kilp_PAL);
-
-// Global Variable Declarations
-
-
-GlobalVariable* gvar_array__str = new GlobalVariable(
-/*Type=*/ArrayTy_0,
-/*isConstant=*/true,
-/*Linkage=*/GlobalValue::InternalLinkage,
-/*Initializer=*/0, // has initializer, specified below
-/*Name=*/".str",
-mod);
-
-GlobalVariable* gvar_array__str1 = new GlobalVariable(
-/*Type=*/ArrayTy_0,
-/*isConstant=*/true,
-/*Linkage=*/GlobalValue::InternalLinkage,
-/*Initializer=*/0, // has initializer, specified below
-/*Name=*/".str1",
-mod);
-
-// Constant Definitions
-Constant* const_array_14 = ConstantArray::get("VEC IN is %f %f %f %f\x0A", true);
-Constant* const_array_15 = ConstantArray::get("VEC OUT is %f %f %f %f\x0A", true);
-ConstantFP* const_float_16 = ConstantFP::get(Type::FloatTy, APFloat(-1.280000e+02f));
-ConstantFP* const_float_17 = ConstantFP::get(Type::FloatTy, APFloat(1.280000e+02f));
-Constant* const_float_18 = Constant::getNullValue(Type::FloatTy);
-Constant* const_int32_19 = Constant::getNullValue(IntegerType::get(32));
-std::vector<Constant*> const_packed_20_elems;
-ConstantFP* const_float_21 = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f));
-const_packed_20_elems.push_back(const_float_21);
-UndefValue* const_float_22 = UndefValue::get(Type::FloatTy);
-const_packed_20_elems.push_back(const_float_22);
-const_packed_20_elems.push_back(const_float_22);
-const_packed_20_elems.push_back(const_float_21);
-Constant* const_packed_20 = ConstantVector::get(VectorTy_4, const_packed_20_elems);
-ConstantInt* const_int32_23 = ConstantInt::get(APInt(32, "1", 10));
-ConstantInt* const_int32_24 = ConstantInt::get(APInt(32, "3", 10));
-ConstantInt* const_int32_25 = ConstantInt::get(APInt(32, "2", 10));
-std::vector<Constant*> const_packed_26_elems;
-const_packed_26_elems.push_back(const_float_21);
-const_packed_26_elems.push_back(const_float_18);
-const_packed_26_elems.push_back(const_float_18);
-const_packed_26_elems.push_back(const_float_21);
-Constant* const_packed_26 = ConstantVector::get(VectorTy_4, const_packed_26_elems);
-Constant* const_double_27 = Constant::getNullValue(Type::DoubleTy);
-std::vector<Constant*> const_packed_28_elems;
-const_packed_28_elems.push_back(const_int32_19);
-ConstantInt* const_int32_29 = ConstantInt::get(APInt(32, "5", 10));
-const_packed_28_elems.push_back(const_int32_29);
-const_packed_28_elems.push_back(const_int32_25);
-const_packed_28_elems.push_back(const_int32_24);
-Constant* const_packed_28 = ConstantVector::get(VectorTy_7, const_packed_28_elems);
-std::vector<Constant*> const_packed_30_elems;
-const_packed_30_elems.push_back(const_int32_19);
-const_packed_30_elems.push_back(const_int32_23);
-ConstantInt* const_int32_31 = ConstantInt::get(APInt(32, "6", 10));
-const_packed_30_elems.push_back(const_int32_31);
-const_packed_30_elems.push_back(const_int32_24);
-Constant* const_packed_30 = ConstantVector::get(VectorTy_7, const_packed_30_elems);
-std::vector<Constant*> const_packed_32_elems;
-const_packed_32_elems.push_back(const_int32_19);
-const_packed_32_elems.push_back(const_int32_23);
-const_packed_32_elems.push_back(const_int32_25);
-ConstantInt* const_int32_33 = ConstantInt::get(APInt(32, "7", 10));
-const_packed_32_elems.push_back(const_int32_33);
-Constant* const_packed_32 = ConstantVector::get(VectorTy_7, const_packed_32_elems);
-std::vector<Constant*> const_ptr_34_indices;
-const_ptr_34_indices.push_back(const_int32_19);
-const_ptr_34_indices.push_back(const_int32_19);
-Constant* const_ptr_34 = ConstantExpr::getGetElementPtr(gvar_array__str, &const_ptr_34_indices[0], const_ptr_34_indices.size() );
-UndefValue* const_packed_35 = UndefValue::get(VectorTy_4);
-std::vector<Constant*> const_ptr_36_indices;
-const_ptr_36_indices.push_back(const_int32_19);
-const_ptr_36_indices.push_back(const_int32_19);
-Constant* const_ptr_36 = ConstantExpr::getGetElementPtr(gvar_array__str1, &const_ptr_36_indices[0], const_ptr_36_indices.size() );
-
-// Global Variable Definitions
-gvar_array__str->setInitializer(const_array_14);
-gvar_array__str1->setInitializer(const_array_15);
-
-// Function Definitions
-
-// Function: approx (func_approx)
-{
- Function::arg_iterator args = func_approx->arg_begin();
- Value* float_a = args++;
- float_a->setName("a");
- Value* float_b = args++;
- float_b->setName("b");
-
- BasicBlock* label_entry = new BasicBlock("entry",func_approx,0);
-
- // Block entry (label_entry)
- FCmpInst* int1_cmp = new FCmpInst(FCmpInst::FCMP_OLT, float_b, const_float_16, "cmp", label_entry);
- SelectInst* float_b_addr_0 = new SelectInst(int1_cmp, const_float_16, float_b, "b.addr.0", label_entry);
- FCmpInst* int1_cmp3 = new FCmpInst(FCmpInst::FCMP_OGT, float_b_addr_0, const_float_17, "cmp3", label_entry);
- SelectInst* float_b_addr_1 = new SelectInst(int1_cmp3, const_float_17, float_b_addr_0, "b.addr.1", label_entry);
- FCmpInst* int1_cmp7 = new FCmpInst(FCmpInst::FCMP_OLT, float_a, const_float_18, "cmp7", label_entry);
- SelectInst* float_a_addr_0 = new SelectInst(int1_cmp7, const_float_18, float_a, "a.addr.0", label_entry);
- std::vector<Value*> float_call_params;
- float_call_params.push_back(float_a_addr_0);
- float_call_params.push_back(float_b_addr_1);
- CallInst* float_call = new CallInst(func_powf, float_call_params.begin(), float_call_params.end(), "call", label_entry);
- float_call->setCallingConv(CallingConv::C);
- float_call->setTailCall(true);const ParamAttrsList *float_call_PAL = 0;
- float_call->setParamAttrs(float_call_PAL);
-
- new ReturnInst(float_call, label_entry);
-
-}
-
-// Function: lit (func_lit)
-{
- Function::arg_iterator args = func_lit->arg_begin();
- Value* packed_tmp = args++;
- packed_tmp->setName("tmp");
-
- BasicBlock* label_entry_38 = new BasicBlock("entry",func_lit,0);
- BasicBlock* label_ifthen = new BasicBlock("ifthen",func_lit,0);
- BasicBlock* label_UnifiedReturnBlock = new BasicBlock("UnifiedReturnBlock",func_lit,0);
-
- // Block entry (label_entry_38)
- ExtractElementInst* float_tmp6 = new ExtractElementInst(packed_tmp, const_int32_19, "tmp6", label_entry_38);
- FCmpInst* int1_cmp_39 = new FCmpInst(FCmpInst::FCMP_OGT, float_tmp6, const_float_18, "cmp", label_entry_38);
- new BranchInst(label_ifthen, label_UnifiedReturnBlock, int1_cmp_39, label_entry_38);
-
- // Block ifthen (label_ifthen)
- InsertElementInst* packed_tmp10 = new InsertElementInst(const_packed_20, float_tmp6, const_int32_23, "tmp10", label_ifthen);
- ExtractElementInst* float_tmp12 = new ExtractElementInst(packed_tmp, const_int32_23, "tmp12", label_ifthen);
- ExtractElementInst* float_tmp14 = new ExtractElementInst(packed_tmp, const_int32_24, "tmp14", label_ifthen);
- std::vector<Value*> float_call_41_params;
- float_call_41_params.push_back(float_tmp12);
- float_call_41_params.push_back(float_tmp14);
- CallInst* float_call_41 = new CallInst(func_approx, float_call_41_params.begin(), float_call_41_params.end(), "call", label_ifthen);
- float_call_41->setCallingConv(CallingConv::C);
- float_call_41->setTailCall(true);const ParamAttrsList *float_call_41_PAL = 0;
- float_call_41->setParamAttrs(float_call_41_PAL);
-
- InsertElementInst* packed_tmp16 = new InsertElementInst(packed_tmp10, float_call_41, const_int32_25, "tmp16", label_ifthen);
- new ReturnInst(packed_tmp16, label_ifthen);
-
- // Block UnifiedReturnBlock (label_UnifiedReturnBlock)
- new ReturnInst(const_packed_26, label_UnifiedReturnBlock);
-
-}
-
-// Function: cmp (func_cmp)
-{
- Function::arg_iterator args = func_cmp->arg_begin();
- Value* packed_tmp0 = args++;
- packed_tmp0->setName("tmp0");
- Value* packed_tmp1 = args++;
- packed_tmp1->setName("tmp1");
- Value* packed_tmp2 = args++;
- packed_tmp2->setName("tmp2");
-
- BasicBlock* label_entry_44 = new BasicBlock("entry",func_cmp,0);
- BasicBlock* label_cond__14 = new BasicBlock("cond.?14",func_cmp,0);
- BasicBlock* label_cond_cont20 = new BasicBlock("cond.cont20",func_cmp,0);
- BasicBlock* label_cond__28 = new BasicBlock("cond.?28",func_cmp,0);
- BasicBlock* label_cond_cont34 = new BasicBlock("cond.cont34",func_cmp,0);
- BasicBlock* label_cond__42 = new BasicBlock("cond.?42",func_cmp,0);
- BasicBlock* label_cond_cont48 = new BasicBlock("cond.cont48",func_cmp,0);
-
- // Block entry (label_entry_44)
- ExtractElementInst* float_tmp3 = new ExtractElementInst(packed_tmp0, const_int32_19, "tmp3", label_entry_44);
- CastInst* double_conv = new FPExtInst(float_tmp3, Type::DoubleTy, "conv", label_entry_44);
- FCmpInst* int1_cmp_45 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv, const_double_27, "cmp", label_entry_44);
- ExtractElementInst* float_tmp11 = new ExtractElementInst(packed_tmp0, const_int32_23, "tmp11", label_entry_44);
- CastInst* double_conv12 = new FPExtInst(float_tmp11, Type::DoubleTy, "conv12", label_entry_44);
- FCmpInst* int1_cmp13 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv12, const_double_27, "cmp13", label_entry_44);
- SelectInst* packed_tmp1_tmp2 = new SelectInst(int1_cmp_45, packed_tmp1, packed_tmp2, "tmp1.tmp2", label_entry_44);
- new BranchInst(label_cond__14, label_cond_cont20, int1_cmp13, label_entry_44);
-
- // Block cond.?14 (label_cond__14)
- ShuffleVectorInst* packed_tmp233 = new ShuffleVectorInst(packed_tmp1_tmp2, packed_tmp1, const_packed_28, "tmp233", label_cond__14);
- ExtractElementInst* float_tmp254 = new ExtractElementInst(packed_tmp0, const_int32_25, "tmp254", label_cond__14);
- CastInst* double_conv265 = new FPExtInst(float_tmp254, Type::DoubleTy, "conv265", label_cond__14);
- FCmpInst* int1_cmp276 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv265, const_double_27, "cmp276", label_cond__14);
- new BranchInst(label_cond__28, label_cond_cont34, int1_cmp276, label_cond__14);
-
- // Block cond.cont20 (label_cond_cont20)
- ShuffleVectorInst* packed_tmp23 = new ShuffleVectorInst(packed_tmp1_tmp2, packed_tmp2, const_packed_28, "tmp23", label_cond_cont20);
- ExtractElementInst* float_tmp25 = new ExtractElementInst(packed_tmp0, const_int32_25, "tmp25", label_cond_cont20);
- CastInst* double_conv26 = new FPExtInst(float_tmp25, Type::DoubleTy, "conv26", label_cond_cont20);
- FCmpInst* int1_cmp27 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv26, const_double_27, "cmp27", label_cond_cont20);
- new BranchInst(label_cond__28, label_cond_cont34, int1_cmp27, label_cond_cont20);
-
- // Block cond.?28 (label_cond__28)
- PHINode* packed_tmp23_reg2mem_0 = new PHINode(VectorTy_4, "tmp23.reg2mem.0", label_cond__28);
- packed_tmp23_reg2mem_0->reserveOperandSpace(2);
- packed_tmp23_reg2mem_0->addIncoming(packed_tmp233, label_cond__14);
- packed_tmp23_reg2mem_0->addIncoming(packed_tmp23, label_cond_cont20);
-
- ShuffleVectorInst* packed_tmp378 = new ShuffleVectorInst(packed_tmp23_reg2mem_0, packed_tmp1, const_packed_30, "tmp378", label_cond__28);
- ExtractElementInst* float_tmp399 = new ExtractElementInst(packed_tmp0, const_int32_24, "tmp399", label_cond__28);
- CastInst* double_conv4010 = new FPExtInst(float_tmp399, Type::DoubleTy, "conv4010", label_cond__28);
- FCmpInst* int1_cmp4111 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv4010, const_double_27, "cmp4111", label_cond__28);
- new BranchInst(label_cond__42, label_cond_cont48, int1_cmp4111, label_cond__28);
-
- // Block cond.cont34 (label_cond_cont34)
- PHINode* packed_tmp23_reg2mem_1 = new PHINode(VectorTy_4, "tmp23.reg2mem.1", label_cond_cont34);
- packed_tmp23_reg2mem_1->reserveOperandSpace(2);
- packed_tmp23_reg2mem_1->addIncoming(packed_tmp233, label_cond__14);
- packed_tmp23_reg2mem_1->addIncoming(packed_tmp23, label_cond_cont20);
-
- ShuffleVectorInst* packed_tmp37 = new ShuffleVectorInst(packed_tmp23_reg2mem_1, packed_tmp2, const_packed_30, "tmp37", label_cond_cont34);
- ExtractElementInst* float_tmp39 = new ExtractElementInst(packed_tmp0, const_int32_24, "tmp39", label_cond_cont34);
- CastInst* double_conv40 = new FPExtInst(float_tmp39, Type::DoubleTy, "conv40", label_cond_cont34);
- FCmpInst* int1_cmp41 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv40, const_double_27, "cmp41", label_cond_cont34);
- new BranchInst(label_cond__42, label_cond_cont48, int1_cmp41, label_cond_cont34);
-
- // Block cond.?42 (label_cond__42)
- PHINode* packed_tmp37_reg2mem_0 = new PHINode(VectorTy_4, "tmp37.reg2mem.0", label_cond__42);
- packed_tmp37_reg2mem_0->reserveOperandSpace(2);
- packed_tmp37_reg2mem_0->addIncoming(packed_tmp378, label_cond__28);
- packed_tmp37_reg2mem_0->addIncoming(packed_tmp37, label_cond_cont34);
-
- ShuffleVectorInst* packed_tmp5113 = new ShuffleVectorInst(packed_tmp37_reg2mem_0, packed_tmp1, const_packed_32, "tmp5113", label_cond__42);
- new ReturnInst(packed_tmp5113, label_cond__42);
-
- // Block cond.cont48 (label_cond_cont48)
- PHINode* packed_tmp37_reg2mem_1 = new PHINode(VectorTy_4, "tmp37.reg2mem.1", label_cond_cont48);
- packed_tmp37_reg2mem_1->reserveOperandSpace(2);
- packed_tmp37_reg2mem_1->addIncoming(packed_tmp378, label_cond__28);
- packed_tmp37_reg2mem_1->addIncoming(packed_tmp37, label_cond_cont34);
-
- ShuffleVectorInst* packed_tmp51 = new ShuffleVectorInst(packed_tmp37_reg2mem_1, packed_tmp2, const_packed_32, "tmp51", label_cond_cont48);
- new ReturnInst(packed_tmp51, label_cond_cont48);
-
-}
-
-// Function: vcos (func_vcos)
-{
- Function::arg_iterator args = func_vcos->arg_begin();
- Value* packed_val = args++;
- packed_val->setName("val");
-
- BasicBlock* label_entry_53 = new BasicBlock("entry",func_vcos,0);
-
- // Block entry (label_entry_53)
- ExtractElementInst* float_tmp1 = new ExtractElementInst(packed_val, const_int32_19, "tmp1", label_entry_53);
- CastInst* double_conv_54 = new FPExtInst(float_tmp1, Type::DoubleTy, "conv", label_entry_53);
- ExtractElementInst* float_tmp3_55 = new ExtractElementInst(packed_val, const_int32_23, "tmp3", label_entry_53);
- CastInst* double_conv4 = new FPExtInst(float_tmp3_55, Type::DoubleTy, "conv4", label_entry_53);
- ExtractElementInst* float_tmp6_56 = new ExtractElementInst(packed_val, const_int32_25, "tmp6", label_entry_53);
- CastInst* double_conv7 = new FPExtInst(float_tmp6_56, Type::DoubleTy, "conv7", label_entry_53);
- ExtractElementInst* float_tmp9 = new ExtractElementInst(packed_val, const_int32_24, "tmp9", label_entry_53);
- CastInst* double_conv10 = new FPExtInst(float_tmp9, Type::DoubleTy, "conv10", label_entry_53);
- std::vector<Value*> int32_call_params;
- int32_call_params.push_back(const_ptr_34);
- int32_call_params.push_back(double_conv_54);
- int32_call_params.push_back(double_conv4);
- int32_call_params.push_back(double_conv7);
- int32_call_params.push_back(double_conv10);
- CallInst* int32_call = new CallInst(func_printf, int32_call_params.begin(), int32_call_params.end(), "call", label_entry_53);
- int32_call->setCallingConv(CallingConv::C);
- int32_call->setTailCall(true);const ParamAttrsList *int32_call_PAL = 0;
- int32_call->setParamAttrs(int32_call_PAL);
-
- CallInst* float_call13 = new CallInst(func_cosf, float_tmp1, "call13", label_entry_53);
- float_call13->setCallingConv(CallingConv::C);
- float_call13->setTailCall(true);const ParamAttrsList *float_call13_PAL = 0;
- float_call13->setParamAttrs(float_call13_PAL);
-
- InsertElementInst* packed_tmp15 = new InsertElementInst(const_packed_35, float_call13, const_int32_19, "tmp15", label_entry_53);
- CallInst* float_call18 = new CallInst(func_cosf, float_tmp1, "call18", label_entry_53);
- float_call18->setCallingConv(CallingConv::C);
- float_call18->setTailCall(true);const ParamAttrsList *float_call18_PAL = 0;
- float_call18->setParamAttrs(float_call18_PAL);
-
- InsertElementInst* packed_tmp20 = new InsertElementInst(packed_tmp15, float_call18, const_int32_23, "tmp20", label_entry_53);
- CallInst* float_call23 = new CallInst(func_cosf, float_tmp1, "call23", label_entry_53);
- float_call23->setCallingConv(CallingConv::C);
- float_call23->setTailCall(true);const ParamAttrsList *float_call23_PAL = 0;
- float_call23->setParamAttrs(float_call23_PAL);
-
- InsertElementInst* packed_tmp25 = new InsertElementInst(packed_tmp20, float_call23, const_int32_25, "tmp25", label_entry_53);
- CallInst* float_call28 = new CallInst(func_cosf, float_tmp1, "call28", label_entry_53);
- float_call28->setCallingConv(CallingConv::C);
- float_call28->setTailCall(true);const ParamAttrsList *float_call28_PAL = 0;
- float_call28->setParamAttrs(float_call28_PAL);
-
- InsertElementInst* packed_tmp30 = new InsertElementInst(packed_tmp25, float_call28, const_int32_24, "tmp30", label_entry_53);
- CastInst* double_conv33 = new FPExtInst(float_call13, Type::DoubleTy, "conv33", label_entry_53);
- CastInst* double_conv36 = new FPExtInst(float_call18, Type::DoubleTy, "conv36", label_entry_53);
- CastInst* double_conv39 = new FPExtInst(float_call23, Type::DoubleTy, "conv39", label_entry_53);
- CastInst* double_conv42 = new FPExtInst(float_call28, Type::DoubleTy, "conv42", label_entry_53);
- std::vector<Value*> int32_call43_params;
- int32_call43_params.push_back(const_ptr_36);
- int32_call43_params.push_back(double_conv33);
- int32_call43_params.push_back(double_conv36);
- int32_call43_params.push_back(double_conv39);
- int32_call43_params.push_back(double_conv42);
- CallInst* int32_call43 = new CallInst(func_printf, int32_call43_params.begin(), int32_call43_params.end(), "call43", label_entry_53);
- int32_call43->setCallingConv(CallingConv::C);
- int32_call43->setTailCall(true);const ParamAttrsList *int32_call43_PAL = 0;
- int32_call43->setParamAttrs(int32_call43_PAL);
-
- new ReturnInst(packed_tmp30, label_entry_53);
-
-}
-
-// Function: scs (func_scs)
-{
- Function::arg_iterator args = func_scs->arg_begin();
- Value* packed_val_58 = args++;
- packed_val_58->setName("val");
-
- BasicBlock* label_entry_59 = new BasicBlock("entry",func_scs,0);
-
- // Block entry (label_entry_59)
- ExtractElementInst* float_tmp2 = new ExtractElementInst(packed_val_58, const_int32_19, "tmp2", label_entry_59);
- CallInst* float_call_60 = new CallInst(func_cosf, float_tmp2, "call", label_entry_59);
- float_call_60->setCallingConv(CallingConv::C);
- float_call_60->setTailCall(true);const ParamAttrsList *float_call_60_PAL = 0;
- float_call_60->setParamAttrs(float_call_60_PAL);
-
- InsertElementInst* packed_tmp5 = new InsertElementInst(const_packed_35, float_call_60, const_int32_19, "tmp5", label_entry_59);
- CallInst* float_call7 = new CallInst(func_sinf, float_tmp2, "call7", label_entry_59);
- float_call7->setCallingConv(CallingConv::C);
- float_call7->setTailCall(true);const ParamAttrsList *float_call7_PAL = 0;
- float_call7->setParamAttrs(float_call7_PAL);
-
- InsertElementInst* packed_tmp9 = new InsertElementInst(packed_tmp5, float_call7, const_int32_23, "tmp9", label_entry_59);
- new ReturnInst(packed_tmp9, label_entry_59);
-
-}
-
-// Function: vsin (func_vsin)
-{
- Function::arg_iterator args = func_vsin->arg_begin();
- Value* packed_val_62 = args++;
- packed_val_62->setName("val");
-
- BasicBlock* label_entry_63 = new BasicBlock("entry",func_vsin,0);
-
- // Block entry (label_entry_63)
- ExtractElementInst* float_tmp2_64 = new ExtractElementInst(packed_val_62, const_int32_19, "tmp2", label_entry_63);
- CallInst* float_call_65 = new CallInst(func_sinf, float_tmp2_64, "call", label_entry_63);
- float_call_65->setCallingConv(CallingConv::C);
- float_call_65->setTailCall(true);const ParamAttrsList *float_call_65_PAL = 0;
- float_call_65->setParamAttrs(float_call_65_PAL);
-
- InsertElementInst* packed_tmp6 = new InsertElementInst(const_packed_35, float_call_65, const_int32_19, "tmp6", label_entry_63);
- InsertElementInst* packed_tmp9_66 = new InsertElementInst(packed_tmp6, float_call_65, const_int32_23, "tmp9", label_entry_63);
- InsertElementInst* packed_tmp12 = new InsertElementInst(packed_tmp9_66, float_call_65, const_int32_25, "tmp12", label_entry_63);
- InsertElementInst* packed_tmp15_67 = new InsertElementInst(packed_tmp12, float_call_65, const_int32_24, "tmp15", label_entry_63);
- new ReturnInst(packed_tmp15_67, label_entry_63);
-
-}
-
-// Function: kilp (func_kilp)
-{
- Function::arg_iterator args = func_kilp->arg_begin();
- Value* packed_val_69 = args++;
- packed_val_69->setName("val");
-
- BasicBlock* label_entry_70 = new BasicBlock("entry",func_kilp,0);
- BasicBlock* label_lor_rhs = new BasicBlock("lor_rhs",func_kilp,0);
- BasicBlock* label_lor_rhs5 = new BasicBlock("lor_rhs5",func_kilp,0);
- BasicBlock* label_lor_rhs11 = new BasicBlock("lor_rhs11",func_kilp,0);
- BasicBlock* label_UnifiedReturnBlock_71 = new BasicBlock("UnifiedReturnBlock",func_kilp,0);
-
- // Block entry (label_entry_70)
- ExtractElementInst* float_tmp1_72 = new ExtractElementInst(packed_val_69, const_int32_19, "tmp1", label_entry_70);
- FCmpInst* int1_cmp_73 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp1_72, const_float_18, "cmp", label_entry_70);
- new BranchInst(label_UnifiedReturnBlock_71, label_lor_rhs, int1_cmp_73, label_entry_70);
-
- // Block lor_rhs (label_lor_rhs)
- ExtractElementInst* float_tmp3_75 = new ExtractElementInst(packed_val_69, const_int32_23, "tmp3", label_lor_rhs);
- FCmpInst* int1_cmp4 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp3_75, const_float_18, "cmp4", label_lor_rhs);
- new BranchInst(label_UnifiedReturnBlock_71, label_lor_rhs5, int1_cmp4, label_lor_rhs);
-
- // Block lor_rhs5 (label_lor_rhs5)
- ExtractElementInst* float_tmp7 = new ExtractElementInst(packed_val_69, const_int32_25, "tmp7", label_lor_rhs5);
- FCmpInst* int1_cmp8 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp7, const_float_18, "cmp8", label_lor_rhs5);
- new BranchInst(label_UnifiedReturnBlock_71, label_lor_rhs11, int1_cmp8, label_lor_rhs5);
-
- // Block lor_rhs11 (label_lor_rhs11)
- ExtractElementInst* float_tmp13 = new ExtractElementInst(packed_val_69, const_int32_24, "tmp13", label_lor_rhs11);
- FCmpInst* int1_cmp14 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp13, const_float_18, "cmp14", label_lor_rhs11);
- CastInst* int32_retval = new ZExtInst(int1_cmp14, IntegerType::get(32), "retval", label_lor_rhs11);
- new ReturnInst(int32_retval, label_lor_rhs11);
-
- // Block UnifiedReturnBlock (label_UnifiedReturnBlock_71)
- new ReturnInst(const_int32_23, label_UnifiedReturnBlock_71);
-
-}
-
-return mod;
-
-}
+static const unsigned char llvm_builtins_data[] = {
+0x42,0x43,0xc0,0xde,0x21,0x0c,0x00,0x00,0x2b,0x02,0x00,0x00,0x01,0x10,0x00,0x00,
+0x10,0x00,0x00,0x00,0x07,0x81,0x23,0x91,0x41,0xc8,0x04,0x49,0x06,0x10,0x32,0x39,
+0x92,0x01,0x84,0x0c,0x25,0x05,0x08,0x19,0x1e,0x04,0x8b,0x62,0x80,0x14,0x45,0x02,
+0x42,0x92,0x0b,0x42,0xa4,0x10,0x32,0x14,0x38,0x08,0x18,0x49,0x0a,0x32,0x44,0x24,
+0x48,0x0a,0x90,0x21,0x23,0x44,0x72,0x80,0x8c,0x14,0x21,0x86,0x0a,0x8a,0x0a,0x64,
+0x0c,0x1f,0x00,0x00,0x49,0x18,0x00,0x00,0x02,0x00,0x00,0x00,0x0b,0x04,0x00,0x0c,
+0x00,0x00,0x00,0x00,0x51,0x20,0x00,0x00,0x13,0x00,0x00,0x00,0x32,0x22,0x48,0x09,
+0x20,0x65,0x82,0x84,0x00,0x26,0x45,0x48,0x05,0x09,0x26,0x45,0xc6,0x05,0x42,0x52,
+0x26,0x08,0xb0,0x19,0x80,0x61,0x04,0x02,0x98,0x23,0x00,0x83,0x21,0x80,0x39,0x82,
+0x60,0x0a,0x80,0x2e,0xd5,0x61,0x04,0x42,0x20,0x49,0x90,0x22,0x4d,0xa2,0x73,0x04,
+0x08,0xb9,0x32,0x00,0x00,0x8a,0x10,0xc2,0x65,0xb8,0x42,0x84,0x10,0x42,0x0d,0x44,
+0x11,0x00,0x18,0x01,0x28,0x82,0x08,0x00,0x13,0xa2,0x74,0xb0,0x03,0x3c,0xb0,0x83,
+0x36,0x80,0x87,0x71,0x68,0x03,0x76,0x48,0x07,0x77,0xa8,0x07,0x7c,0x68,0x83,0x73,
+0x70,0x87,0x7a,0xd8,0x70,0x0f,0xe5,0xd0,0x06,0xf0,0xa0,0x07,0x73,0x20,0x07,0x7a,
+0x30,0x07,0x72,0xa0,0x07,0x73,0x20,0x07,0x6d,0x90,0x0e,0x71,0xa0,0x07,0x78,0xa0,
+0x07,0x78,0xd0,0x06,0xe9,0x80,0x07,0x7a,0x80,0x07,0x7a,0x80,0x07,0x6d,0x90,0x0e,
+0x71,0x60,0x07,0x7a,0x10,0x07,0x76,0xa0,0x07,0x71,0x60,0x07,0x6d,0x90,0x0e,0x73,
+0x20,0x07,0x7a,0x30,0x07,0x72,0xa0,0x07,0x73,0x20,0x07,0x6d,0x90,0x0e,0x76,0x40,
+0x07,0x7a,0x30,0x07,0x72,0xa0,0x07,0x76,0x40,0x07,0x6d,0x60,0x0e,0x73,0x20,0x07,
+0x7a,0x30,0x07,0x72,0xa0,0x07,0x73,0x20,0x07,0x6d,0x60,0x0e,0x76,0x40,0x07,0x7a,
+0x30,0x07,0x72,0xa0,0x07,0x76,0x40,0x07,0x6d,0x60,0x0f,0x76,0x40,0x07,0x7a,0x60,
+0x07,0x74,0xa0,0x07,0x76,0x40,0x07,0x6d,0x60,0x0f,0x71,0x20,0x07,0x78,0xa0,0x07,
+0x71,0x20,0x07,0x78,0xa0,0x07,0x71,0x20,0x07,0x78,0xd0,0x06,0xe1,0x00,0x07,0x7a,
+0x00,0x07,0x7a,0x60,0x07,0x74,0xd0,0x06,0xe6,0x80,0x07,0x70,0xa0,0x07,0x71,0x20,
+0x07,0x78,0xa0,0x07,0x71,0x20,0x07,0x78,0xa0,0xf3,0x40,0x88,0x04,0x32,0x32,0x02,
+0x04,0x60,0x76,0xc6,0xfc,0x6c,0x48,0xa2,0x00,0x40,0x00,0x00,0x00,0x00,0x0c,0x49,
+0x14,0x20,0x00,0x00,0x00,0x00,0x80,0x21,0xc9,0x02,0x00,0x01,0x00,0x00,0x00,0x30,
+0x24,0x61,0x00,0x20,0x08,0x00,0x00,0x00,0x86,0x24,0x0b,0x00,0x04,0x00,0x00,0x00,
+0xc0,0x90,0xa4,0x01,0x02,0x00,0x00,0x00,0x00,0x18,0x92,0x1c,0x40,0x00,0x00,0x00,
+0x00,0x00,0x43,0x92,0x05,0x00,0x02,0x00,0x00,0x00,0x60,0x48,0x72,0x00,0x01,0x00,
+0x00,0x00,0x00,0x0c,0x49,0x16,0x00,0x08,0x00,0x00,0x00,0x80,0x21,0x89,0x01,0x00,
+0x41,0x00,0x00,0x00,0x90,0x05,0x02,0x00,0x10,0x00,0x00,0x00,0x32,0x1e,0x98,0x10,
+0x19,0x11,0x4c,0x90,0x8c,0x09,0x26,0x47,0xc6,0x04,0x43,0x92,0x8a,0x59,0x8b,0x43,
+0x50,0xd2,0x09,0x02,0x81,0xd2,0x73,0x50,0xc9,0x0c,0x2a,0x99,0x41,0x25,0x33,0xa8,
+0x64,0x56,0x28,0x66,0x2d,0x0e,0x41,0xcf,0x2a,0x15,0x04,0x4a,0xcf,0x41,0x25,0x33,
+0xa8,0x64,0x06,0x95,0xcc,0xa0,0x92,0x59,0x01,0x00,0x00,0x00,0x53,0x82,0x26,0x0c,
+0x04,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,
+0x05,0x00,0x00,0x00,0x04,0xc6,0x08,0x40,0x10,0x04,0xe1,0x70,0x18,0x23,0x00,0x41,
+0x10,0x84,0xc3,0x60,0x04,0x00,0x00,0x00,0x93,0x0c,0xce,0x43,0x4c,0x31,0x3c,0x8e,
+0x34,0xc9,0x30,0x41,0xc2,0x14,0x03,0x34,0x51,0x93,0x0c,0x4d,0x44,0x4c,0x31,0x44,
+0x8d,0x35,0x56,0x01,0x04,0xc3,0x55,0x21,0x16,0x0e,0x04,0x00,0x0f,0x00,0x00,0x00,
+0x46,0x41,0x08,0xcc,0x73,0x9b,0x05,0x21,0x30,0xcf,0x6e,0x18,0x84,0x00,0x2c,0x8b,
+0x35,0x04,0x80,0x39,0x04,0x81,0x5d,0x20,0x80,0x0f,0x0c,0x43,0xe4,0xd3,0x36,0x81,
+0x04,0x3e,0x30,0x0c,0x91,0x4f,0x5b,0x05,0x12,0xf8,0xc0,0x30,0x44,0x7e,0x7d,0x00,
+0x05,0xd1,0x4c,0x11,0x66,0x12,0x83,0xc0,0x3c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+0x61,0x20,0x00,0x00,0x2a,0x00,0x00,0x00,0x13,0x04,0x43,0x2c,0x10,0x00,0x00,0x00,
+0x08,0x00,0x00,0x00,0x24,0x8a,0xa0,0x0c,0x46,0x00,0x4a,0x80,0xc2,0x1c,0x84,0x55,
+0x55,0xd6,0x1c,0x84,0x45,0x51,0x16,0x81,0x19,0x80,0x11,0x80,0x31,0x02,0x10,0x04,
+0x41,0xfc,0x03,0x00,0x63,0x08,0x0d,0x34,0xc9,0x70,0x55,0xc2,0x2c,0x43,0x20,0x60,
+0x73,0x0c,0xd3,0x15,0x8d,0x21,0x34,0xd1,0x18,0x42,0xf3,0x8c,0x55,0x00,0x81,0xa0,
+0x6d,0x73,0x0c,0x19,0xe7,0x60,0x87,0x52,0x38,0x10,0x00,0x00,0x13,0x00,0x00,0x00,
+0x17,0x60,0x20,0xc5,0x74,0x10,0x8d,0x65,0x14,0x13,0xf3,0xd4,0xb4,0x6d,0x14,0x13,
+0xf3,0xd4,0xb8,0x69,0x14,0x13,0xf3,0xd4,0xb6,0x75,0x14,0x13,0xf3,0xd4,0xba,0x35,
+0x0c,0x13,0xf3,0x9c,0x80,0xe4,0x36,0x48,0x81,0x10,0xc3,0x4a,0x4c,0x54,0xd4,0x6c,
+0x8b,0x23,0x28,0x76,0x41,0x4c,0xcc,0xa3,0x1b,0x07,0x21,0x00,0xcb,0x72,0x00,0x05,
+0xd1,0x4c,0x11,0x66,0x18,0x83,0xc0,0x3c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+0x61,0x20,0x00,0x00,0x82,0x00,0x00,0x00,0x13,0x04,0x47,0x2c,0x10,0x00,0x00,0x00,
+0x08,0x00,0x00,0x00,0x24,0x46,0x00,0x8a,0xa0,0x0c,0x4a,0xa0,0x14,0x8a,0xa1,0x1c,
+0x68,0x8c,0x00,0x10,0x9a,0x83,0x80,0xa8,0x48,0x9a,0x83,0x80,0xa6,0x4a,0x9a,0x83,
+0x80,0xa6,0xc8,0x02,0x63,0x08,0x0d,0x64,0xdb,0xc0,0x49,0x06,0xee,0x22,0xc6,0x10,
+0x9a,0xc9,0xbc,0x81,0x93,0x0c,0xdf,0x45,0x4c,0x31,0x38,0x4f,0x37,0xcb,0x10,0x08,
+0x60,0x30,0xc8,0x10,0x06,0x0e,0x36,0x86,0xd0,0x44,0x36,0x06,0x03,0x27,0x19,0xc8,
+0xe0,0x22,0x66,0x19,0x06,0xa2,0x0c,0x06,0x19,0xc2,0xe0,0xc1,0xc6,0x10,0x9a,0xc8,
+0xce,0x60,0xe0,0x24,0x03,0x1a,0x5c,0xc4,0x2c,0xc3,0x40,0xa4,0xc1,0x40,0x45,0x20,
+0x06,0x81,0x19,0x08,0x83,0x0c,0x6a,0xe0,0x64,0x63,0x08,0x8d,0x64,0x6c,0x30,0x70,
+0x92,0xa1,0x0d,0x2e,0x62,0x96,0xa1,0x30,0xdc,0x60,0xa0,0x22,0x10,0x83,0xc0,0x0c,
+0x84,0x41,0x86,0x37,0x78,0xb2,0x31,0x84,0x46,0xb2,0x38,0x18,0x38,0xc9,0x20,0x07,
+0x17,0x31,0xcb,0x50,0x18,0x73,0x30,0x50,0x11,0xac,0xc1,0x00,0x07,0xc4,0x20,0x03,
+0x1d,0x38,0x1a,0xd6,0xc1,0x40,0x45,0xb0,0x06,0x03,0x1c,0x10,0x83,0x0c,0x76,0xf0,
+0x68,0x78,0x07,0xe1,0x40,0x00,0x00,0x00,0x4c,0x00,0x00,0x00,0x56,0x62,0x08,0xcc,
+0x63,0xef,0x3a,0xa9,0x00,0x19,0x7b,0x73,0x23,0x73,0xf9,0xa1,0x91,0x31,0x98,0x62,
+0x62,0x9e,0x7b,0xb7,0x06,0x62,0x62,0x1e,0xda,0x1c,0x88,0x89,0x79,0x6a,0x7b,0x20,
+0x26,0xe6,0xb1,0x6d,0x83,0x98,0x98,0xe7,0x36,0x92,0x43,0x70,0x9a,0xca,0xd6,0x73,
+0xa3,0x79,0x26,0xe6,0xb9,0x77,0x3f,0x22,0x0c,0x9b,0x21,0x18,0x9f,0xb6,0x90,0x64,
+0x62,0x9e,0xda,0x9f,0x98,0xc7,0x36,0x9b,0x67,0x62,0x9e,0x7b,0xf7,0x23,0xc2,0xb0,
+0x19,0x82,0xf1,0x6b,0x53,0x79,0x26,0xe6,0xb1,0x6f,0x3f,0x22,0x0c,0x9b,0x21,0x18,
+0x9f,0xb6,0x98,0x62,0x62,0x9e,0xbb,0xb7,0x97,0x67,0x62,0x1e,0xfb,0xf6,0x23,0xc2,
+0xb0,0x19,0x82,0xf1,0x6b,0x43,0x31,0x04,0xa7,0xa9,0x6c,0xdd,0x66,0x0a,0x81,0x79,
+0xf0,0xfa,0x08,0x16,0xc1,0x69,0x06,0x5f,0x70,0x9a,0xe9,0xc6,0x49,0x01,0xc8,0xd8,
+0x9b,0x1b,0x99,0xcb,0x4f,0x0c,0x8d,0xad,0x18,0x13,0xf3,0xdc,0x3b,0x6f,0x35,0xc7,
+0xc4,0x3c,0x79,0x5d,0xdf,0x06,0x52,0x08,0xcc,0x53,0xdf,0x26,0x62,0x4c,0xcc,0x63,
+0xdf,0xb7,0xb9,0x1c,0x02,0xf3,0xe0,0x75,0x5d,0x5b,0xc7,0x20,0x30,0x8f,0x79,0x14,
+0x13,0xf3,0xd4,0xf5,0x19,0x2c,0x82,0xd3,0x0c,0xbe,0xe0,0x34,0x13,0xce,0x5b,0x0b,
+0x22,0x38,0x4d,0x85,0xd3,0x35,0x6d,0x37,0xc5,0xc4,0x3c,0x79,0x4d,0x1a,0x40,0xc6,
+0xde,0xdc,0xc8,0x5c,0x7e,0x64,0x70,0x8c,0x83,0x10,0x9c,0xa6,0xb2,0x94,0x42,0x60,
+0x1e,0x7b,0x37,0x19,0x43,0x70,0x9a,0x0a,0xa7,0xcd,0xa4,0x98,0x98,0xc7,0xbe,0x8d,
+0xc5,0x98,0x98,0xe7,0xee,0x7b,0x3b,0x29,0x26,0xe6,0xb1,0xf3,0x13,0x58,0x04,0xa7,
+0x19,0x7c,0xc1,0x69,0x26,0x9b,0xb6,0x0f,0x43,0x70,0x9a,0xaa,0xb6,0x6d,0xc4,0x98,
+0x98,0xc7,0xce,0xf1,0x03,0x28,0x88,0x66,0x8a,0x30,0x00,0x00,0x00,0x00,0x00,0x00,
+0x61,0x20,0x00,0x00,0x4a,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,
+0x07,0x00,0x00,0x00,0x24,0xca,0x60,0x04,0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0xbd,
+0x61,0x8c,0x04,0x10,0x1e,0xe1,0x19,0xc6,0x48,0x02,0xe1,0x11,0x1e,0x00,0x00,0x00,
+0x63,0x08,0xcd,0x63,0xd5,0xc0,0x31,0x84,0x06,0xb2,0x6b,0xe0,0x18,0x42,0x13,0x59,
+0x36,0x70,0x0c,0xa1,0x71,0x6c,0x1b,0x38,0x16,0x02,0x04,0xc7,0x64,0x61,0x1a,0x37,
+0x16,0x01,0x04,0x48,0x35,0xc7,0x20,0x79,0xcf,0x58,0x04,0x10,0x20,0xd5,0x1c,0xc3,
+0x07,0x06,0xd0,0x58,0x04,0x10,0x20,0xd5,0x1c,0x43,0x18,0x88,0x41,0x34,0x16,0x01,
+0x04,0x48,0x35,0xc7,0x30,0x06,0x64,0xe0,0x98,0x37,0xd0,0xc0,0x60,0xa0,0x89,0xc1,
+0x40,0x23,0x83,0x81,0x63,0x21,0x40,0x70,0x50,0x66,0x70,0x06,0x68,0x90,0x06,0x58,
+0x06,0xe1,0x40,0x00,0x25,0x00,0x00,0x00,0x56,0x52,0x4c,0xcc,0x73,0xd3,0x56,0x41,
+0x4c,0xcc,0x53,0xdb,0x05,0x31,0x31,0xcf,0x6d,0x19,0xc4,0xc4,0x3c,0xba,0x6d,0x10,
+0x13,0xf3,0xf4,0xd6,0x41,0x08,0xc0,0xb2,0x18,0x46,0x21,0x38,0x4d,0x85,0x9b,0x46,
+0x21,0x38,0x4d,0xb5,0x9b,0x8a,0x21,0x00,0xcb,0x82,0xdf,0x66,0x62,0x08,0x4e,0x53,
+0xdd,0xb7,0x9d,0x18,0x82,0xd3,0x54,0xb7,0x6e,0x28,0x86,0xe0,0x34,0xd5,0xdd,0xdb,
+0x47,0x31,0x31,0x4f,0x9d,0x9b,0x87,0x21,0x00,0xcb,0x52,0xdf,0x06,0x62,0x08,0xc0,
+0xb2,0xd4,0xbc,0x59,0x10,0x82,0xd3,0x54,0x96,0x62,0x08,0x4e,0x53,0xe1,0xb6,0x85,
+0x14,0x13,0xf3,0xd8,0xb4,0x8d,0x14,0x13,0xf3,0xd8,0xb9,0x89,0x18,0x02,0xb0,0x2c,
+0xf6,0x6d,0x24,0x86,0x00,0x2c,0x8b,0xcd,0x1b,0x87,0x21,0x38,0x4d,0x55,0xd3,0xd6,
+0x30,0x54,0xc0,0x72,0x00,0x05,0xd1,0x4c,0x11,0x06,0x00,0x00,0x00,0x00,0x00,0x00,
+0x61,0x20,0x00,0x00,0x19,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,
+0x03,0x00,0x00,0x00,0x24,0x4a,0x60,0x04,0x80,0xc2,0x0c,0x00,0x00,0x00,0x00,0x00,
+0x63,0x08,0xcd,0x33,0x16,0x01,0x04,0x48,0x34,0xc7,0x00,0x49,0xcf,0x58,0x04,0x10,
+0x28,0xd1,0x1c,0xc3,0x44,0x39,0x58,0x85,0x03,0x01,0x00,0x00,0x0a,0x00,0x00,0x00,
+0x16,0x41,0x4c,0xcc,0x63,0xdb,0x04,0x31,0x31,0x4f,0x6e,0x0d,0x43,0x05,0x2c,0x07,
+0x50,0x10,0xcd,0x14,0x61,0x56,0x41,0x4c,0xcc,0xd3,0x1b,0x45,0x21,0x00,0xcb,0xb2,
+0x9b,0x04,0x21,0x00,0xcb,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00,
+0x1b,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00,
+0x24,0xca,0x60,0x04,0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0x00,0x63,0x08,0xcd,0x33,
+0x16,0x01,0x04,0xca,0x34,0xc7,0x20,0x51,0xcf,0x1c,0x43,0x45,0x41,0x73,0x0c,0x16,
+0x15,0xcd,0x31,0x5c,0x94,0x83,0x58,0x38,0x10,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,
+0x86,0x51,0x4c,0xcc,0x53,0xe7,0x76,0x51,0x4c,0xcc,0x53,0xdb,0x36,0x41,0x4c,0xcc,
+0x63,0x5b,0x05,0x31,0x31,0x8f,0x6e,0x0d,0x43,0x05,0x2c,0x66,0x41,0x4c,0xcc,0xd3,
+0x1f,0x40,0x41,0x34,0x53,0x84,0x19,0x05,0x21,0x00,0xcb,0x02,0x00,0x00,0x00,0x00,
+0x61,0x20,0x00,0x00,0x2f,0x00,0x00,0x00,0x13,0x04,0x45,0x2c,0x10,0x00,0x00,0x00,
+0x03,0x00,0x00,0x00,0x24,0xca,0xa0,0x04,0x46,0x00,0x8a,0x80,0xc0,0x08,0x00,0x00,
+0x63,0x08,0x0d,0x34,0xc9,0x30,0x49,0xc4,0x2c,0x03,0x11,0x50,0x63,0x08,0xcd,0x33,
+0xc9,0x50,0x49,0xc4,0x2c,0x03,0x21,0x58,0x63,0x08,0x4d,0x34,0xc9,0x70,0x49,0xc4,
+0x2c,0x03,0x31,0x60,0x63,0x08,0x8d,0x33,0xc9,0x90,0x49,0x84,0x69,0x22,0x70,0xc3,
+0x27,0x1c,0x08,0x00,0x1a,0x00,0x00,0x00,0x96,0x51,0x4c,0xcc,0x53,0xdf,0x66,0x41,
+0x08,0xcc,0x83,0xdb,0x04,0x31,0x31,0x4f,0x6d,0x15,0xc4,0xc4,0x3c,0xb7,0x61,0x10,
+0x02,0xf3,0xf0,0x47,0x20,0xb9,0x0d,0x52,0x20,0xc4,0xb0,0x12,0x13,0x15,0x35,0xdb,
+0xe2,0x08,0x8a,0x5d,0x10,0x13,0xf3,0xec,0x37,0x90,0x2c,0x4e,0xf4,0x47,0x87,0x54,
+0xd7,0x17,0x70,0x2c,0x4e,0xf4,0x47,0x87,0x74,0x02,0xc8,0xe2,0x44,0x7f,0x74,0x48,
+0xb9,0x69,0x14,0x02,0xf3,0xd4,0xb8,0x6d,0x18,0x11,0x31,0x55,0xc0,0x62,0x0d,0x43,
+0x05,0x2c,0x07,0x50,0x10,0xcd,0x14,0x61,0x46,0x31,0x08,0xcc,0x03,0x00,0x00,0x00,
+0x00,0x00,0x00,0x00,0x71,0x20,0x00,0x00,0x12,0x00,0x00,0x00,0x66,0x40,0x54,0x82,
+0x23,0x59,0xc2,0x20,0x09,0x92,0x1d,0x18,0x4f,0x84,0x34,0x53,0x61,0x03,0xc4,0xe3,
+0x58,0x85,0x05,0x14,0xbe,0x34,0x45,0xb5,0x21,0x10,0x82,0x23,0x15,0x46,0x30,0x2c,
+0xc8,0x64,0x02,0x06,0xf0,0x3c,0x91,0x73,0x19,0x00,0xe1,0x4b,0x53,0x64,0x0a,0x84,
+0x84,0x34,0x85,0x31,0x10,0x0a,0xb2,0x3c,0x56,0x30,0x08,0xcc,0x63,0x0b,0x44,0x25,
+0x21,0x0d,0x00,0x00,0x00,0x00,0x00,0x00};
diff --git a/src/gallium/auxiliary/gallivm/instructions.cpp b/src/gallium/auxiliary/gallivm/instructions.cpp
index 8919491792f..95a670edafc 100644
--- a/src/gallium/auxiliary/gallivm/instructions.cpp
+++ b/src/gallium/auxiliary/gallivm/instructions.cpp
@@ -35,6 +35,8 @@
#include "storage.h"
+#include "pipe/p_util.h"
+
#include <llvm/CallingConv.h>
#include <llvm/Constants.h>
#include <llvm/DerivedTypes.h>
@@ -42,7 +44,8 @@
#include <llvm/InstrTypes.h>
#include <llvm/Instructions.h>
#include <llvm/ParameterAttributes.h>
-#include <llvm/ParamAttrsList.h>
+#include <llvm/Support/MemoryBuffer.h>
+#include <llvm/Bitcode/ReaderWriter.h>
#include <sstream>
#include <fstream>
@@ -53,7 +56,6 @@ using namespace llvm;
#include "gallivm_builtins.cpp"
#if 0
-
llvm::Value *arrayFromChannels(std::vector<llvm::Value*> &vals)
{
VectorType *vectorType = VectorType::get(Type::FloatTy, 4);
@@ -84,7 +86,10 @@ Instructions::Instructions(llvm::Module *mod, llvm::Function *func, llvm::BasicB
m_llvmLit = 0;
m_fmtPtr = 0;
- createGallivmBuiltins(m_mod);
+ MemoryBuffer *buffer = MemoryBuffer::getMemBuffer(
+ (const char*)&llvm_builtins_data[0],
+ (const char*)&llvm_builtins_data[Elements(llvm_builtins_data)-1]);
+ m_mod = ParseBitcodeFile(buffer);
}
llvm::Value * Instructions::add(llvm::Value *in1, llvm::Value *in2)
@@ -134,12 +139,12 @@ llvm::Value *Instructions::callFSqrt(llvm::Value *val)
// predeclare the intrinsic
std::vector<const Type*> fsqrtArgs;
fsqrtArgs.push_back(Type::FloatTy);
- ParamAttrsList *fsqrtPal = 0;
+ PAListPtr fsqrtPal;
FunctionType* fsqrtType = FunctionType::get(
/*Result=*/Type::FloatTy,
/*Params=*/fsqrtArgs,
/*isVarArg=*/false);
- m_llvmFSqrt = new Function(
+ m_llvmFSqrt = Function::Create(
/*Type=*/fsqrtType,
/*Linkage=*/GlobalValue::ExternalLinkage,
/*Name=*/"llvm.sqrt.f32", m_mod);
@@ -191,12 +196,12 @@ llvm::Value *Instructions::callFAbs(llvm::Value *val)
// predeclare the intrinsic
std::vector<const Type*> fabsArgs;
fabsArgs.push_back(Type::FloatTy);
- ParamAttrsList *fabsPal = 0;
+ PAListPtr fabsPal;
FunctionType* fabsType = FunctionType::get(
/*Result=*/Type::FloatTy,
/*Params=*/fabsArgs,
/*isVarArg=*/false);
- m_llvmFAbs = new Function(
+ m_llvmFAbs = Function::Create(
/*Type=*/fabsType,
/*Linkage=*/GlobalValue::ExternalLinkage,
/*Name=*/"fabs", m_mod);
@@ -234,12 +239,12 @@ llvm::Value * Instructions::callPow(llvm::Value *val1, llvm::Value *val2)
std::vector<const Type*> powArgs;
powArgs.push_back(Type::FloatTy);
powArgs.push_back(Type::FloatTy);
- ParamAttrsList *powPal = 0;
+ PAListPtr powPal;
FunctionType* powType = FunctionType::get(
/*Result=*/Type::FloatTy,
/*Params=*/powArgs,
/*isVarArg=*/false);
- m_llvmPow = new Function(
+ m_llvmPow = Function::Create(
/*Type=*/powType,
/*Linkage=*/GlobalValue::ExternalLinkage,
/*Name=*/"llvm.pow.f32", m_mod);
@@ -333,12 +338,12 @@ llvm::Value * Instructions::callFloor(llvm::Value *val)
// predeclare the intrinsic
std::vector<const Type*> floorArgs;
floorArgs.push_back(Type::FloatTy);
- ParamAttrsList *floorPal = 0;
+ PAListPtr floorPal;
FunctionType* floorType = FunctionType::get(
/*Result=*/Type::FloatTy,
/*Params=*/floorArgs,
/*isVarArg=*/false);
- m_llvmFloor = new Function(
+ m_llvmFloor = Function::Create(
/*Type=*/floorType,
/*Linkage=*/GlobalValue::ExternalLinkage,
/*Name=*/"floorf", m_mod);
@@ -376,12 +381,12 @@ llvm::Value * Instructions::callFLog(llvm::Value *val)
// predeclare the intrinsic
std::vector<const Type*> flogArgs;
flogArgs.push_back(Type::FloatTy);
- ParamAttrsList *flogPal = 0;
+ PAListPtr flogPal;
FunctionType* flogType = FunctionType::get(
/*Result=*/Type::FloatTy,
/*Params=*/flogArgs,
/*isVarArg=*/false);
- m_llvmFlog = new Function(
+ m_llvmFlog = Function::Create(
/*Type=*/flogType,
/*Linkage=*/GlobalValue::ExternalLinkage,
/*Name=*/"logf", m_mod);
@@ -504,12 +509,12 @@ void Instructions::printVector(llvm::Value *val)
llvm::Function * Instructions::declarePrintf()
{
std::vector<const Type*> args;
- ParamAttrsList *params = 0;
+ PAListPtr params;
FunctionType* funcTy = FunctionType::get(
/*Result=*/IntegerType::get(32),
/*Params=*/args,
/*isVarArg=*/true);
- Function* func_printf = new Function(
+ Function* func_printf = Function::Create(
/*Type=*/funcTy,
/*Linkage=*/GlobalValue::ExternalLinkage,
/*Name=*/"printf", m_mod);
@@ -633,8 +638,8 @@ llvm::Value * Instructions::abs(llvm::Value *in)
void Instructions::ifop(llvm::Value *in)
{
- BasicBlock *ifthen = new BasicBlock(name("ifthen"), m_func,0);
- BasicBlock *ifend = new BasicBlock(name("ifthenend"), m_func,0);
+ BasicBlock *ifthen = BasicBlock::Create(name("ifthen"), m_func,0);
+ BasicBlock *ifend = BasicBlock::Create(name("ifthenend"), m_func,0);
//BasicBlock *yblock = new BasicBlock(name("yblock"), m_func,0);
//BasicBlock *zblock = new BasicBlock(name("zblock"), m_func,0);
@@ -660,7 +665,7 @@ llvm::BasicBlock * Instructions::currentBlock() const
void Instructions::elseop()
{
assert(!m_ifStack.empty());
- BasicBlock *ifend = new BasicBlock(name("ifend"), m_func,0);
+ BasicBlock *ifend = BasicBlock::Create(name("ifend"), m_func,0);
m_builder.CreateBr(ifend);
m_builder.SetInsertPoint(m_ifStack.top());
currentBlock()->setName(name("ifelse"));
@@ -687,8 +692,8 @@ llvm::Value * Instructions::lerp(llvm::Value *in1, llvm::Value *in2,
void Instructions::beginLoop()
{
- BasicBlock *begin = new BasicBlock(name("loop"), m_func,0);
- BasicBlock *end = new BasicBlock(name("endloop"), m_func,0);
+ BasicBlock *begin = BasicBlock::Create(name("loop"), m_func,0);
+ BasicBlock *end = BasicBlock::Create(name("endloop"), m_func,0);
m_builder.CreateBr(begin);
Loop loop;
@@ -711,7 +716,7 @@ void Instructions::endLoop()
void Instructions::brk()
{
assert(!m_loopStack.empty());
- BasicBlock *unr = new BasicBlock(name("unreachable"), m_func,0);
+ BasicBlock *unr = BasicBlock::Create(name("unreachable"), m_func,0);
m_builder.CreateBr(m_loopStack.top().end);
m_builder.SetInsertPoint(unr);
}
@@ -760,13 +765,13 @@ llvm::Function * Instructions::declareFunc(int label)
args.push_back(vecPtr);
args.push_back(vecPtr);
args.push_back(vecPtr);
- ParamAttrsList *params = 0;
+ PAListPtr params;
FunctionType *funcType = FunctionType::get(
/*Result=*/Type::VoidTy,
/*Params=*/args,
/*isVarArg=*/false);
std::string name = createFuncName(label);
- Function *func = new Function(
+ Function *func = Function::Create(
/*Type=*/funcType,
/*Linkage=*/GlobalValue::ExternalLinkage,
/*Name=*/name.c_str(), m_mod);
@@ -784,7 +789,7 @@ void Instructions::bgnSub(unsigned label)
ptr_INPUT->setName("INPUT");
m_storage->pushArguments(ptr_INPUT);
- llvm::BasicBlock *entry = new BasicBlock("entry", func, 0);
+ llvm::BasicBlock *entry = BasicBlock::Create("entry", func, 0);
m_func = func;
m_builder.SetInsertPoint(entry);
diff --git a/src/gallium/auxiliary/gallivm/instructions.h b/src/gallium/auxiliary/gallivm/instructions.h
index 9ebc17dd8ec..19ca84ddc68 100644
--- a/src/gallium/auxiliary/gallivm/instructions.h
+++ b/src/gallium/auxiliary/gallivm/instructions.h
@@ -36,7 +36,7 @@
#include <llvm/BasicBlock.h>
#include <llvm/Module.h>
#include <llvm/Value.h>
-#include <llvm/Support/LLVMBuilder.h>
+#include <llvm/Support/IRBuilder.h>
#include <map>
#include <stack>
@@ -125,7 +125,7 @@ private:
llvm::Module *m_mod;
llvm::Function *m_func;
char m_name[32];
- llvm::LLVMFoldingBuilder m_builder;
+ llvm::IRBuilder m_builder;
int m_idx;
llvm::VectorType *m_floatVecType;
diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp
index 6f83b56a727..f0122802db9 100644
--- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp
+++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp
@@ -1,8 +1,35 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
#include "instructionssoa.h"
#include "storagesoa.h"
#include "pipe/p_shader_tokens.h"
+#include "pipe/p_util.h"
#include <llvm/CallingConv.h>
#include <llvm/Constants.h>
@@ -10,7 +37,11 @@
#include <llvm/Function.h>
#include <llvm/Instructions.h>
#include <llvm/Transforms/Utils/Cloning.h>
-#include <llvm/ParamAttrsList.h>
+#include <llvm/ParameterAttributes.h>
+//#include <llvm/ParamAttrsList.h>
+#include <llvm/Support/MemoryBuffer.h>
+#include <llvm/Bitcode/ReaderWriter.h>
+
#include <iostream>
@@ -183,7 +214,10 @@ llvm::Module * InstructionsSoa::currentModule() const
void InstructionsSoa::createBuiltins()
{
- m_builtins = createSoaBuiltins();
+ MemoryBuffer *buffer = MemoryBuffer::getMemBuffer(
+ (const char*)&soabuiltins_data[0],
+ (const char*)&soabuiltins_data[Elements(soabuiltins_data)-1]);
+ m_builtins = ParseBitcodeFile(buffer);
createDependencies();
}
@@ -204,32 +238,32 @@ llvm::Value * InstructionsSoa::allocaTemp()
std::vector<Value*> indices;
indices.push_back(m_storage->constantInt(0));
indices.push_back(m_storage->constantInt(0));
- GetElementPtrInst *getElem = new GetElementPtrInst(alloca,
- indices.begin(),
- indices.end(),
- name("allocaPtr"),
- m_builder.GetInsertBlock());
+ GetElementPtrInst *getElem = GetElementPtrInst::Create(alloca,
+ indices.begin(),
+ indices.end(),
+ name("allocaPtr"),
+ m_builder.GetInsertBlock());
return getElem;
}
std::vector<llvm::Value*> InstructionsSoa::allocaToResult(llvm::Value *allocaPtr)
{
- GetElementPtrInst *xElemPtr = new GetElementPtrInst(allocaPtr,
- m_storage->constantInt(0),
- name("xPtr"),
- m_builder.GetInsertBlock());
- GetElementPtrInst *yElemPtr = new GetElementPtrInst(allocaPtr,
- m_storage->constantInt(1),
- name("yPtr"),
- m_builder.GetInsertBlock());
- GetElementPtrInst *zElemPtr = new GetElementPtrInst(allocaPtr,
- m_storage->constantInt(2),
- name("zPtr"),
- m_builder.GetInsertBlock());
- GetElementPtrInst *wElemPtr = new GetElementPtrInst(allocaPtr,
- m_storage->constantInt(3),
- name("wPtr"),
- m_builder.GetInsertBlock());
+ GetElementPtrInst *xElemPtr = GetElementPtrInst::Create(allocaPtr,
+ m_storage->constantInt(0),
+ name("xPtr"),
+ m_builder.GetInsertBlock());
+ GetElementPtrInst *yElemPtr = GetElementPtrInst::Create(allocaPtr,
+ m_storage->constantInt(1),
+ name("yPtr"),
+ m_builder.GetInsertBlock());
+ GetElementPtrInst *zElemPtr = GetElementPtrInst::Create(allocaPtr,
+ m_storage->constantInt(2),
+ name("zPtr"),
+ m_builder.GetInsertBlock());
+ GetElementPtrInst *wElemPtr = GetElementPtrInst::Create(allocaPtr,
+ m_storage->constantInt(3),
+ name("wPtr"),
+ m_builder.GetInsertBlock());
std::vector<llvm::Value*> res(4);
res[0] = new LoadInst(xElemPtr, name("xRes"), false, m_builder.GetInsertBlock());
@@ -355,10 +389,10 @@ void InstructionsSoa::injectFunction(llvm::Function *originalFunc, int op)
llvm::Function *func = 0;
if (originalFunc->isDeclaration()) {
std::cout << "function decleration" <<std::endl;
- func = new Function(originalFunc->getFunctionType(), GlobalValue::ExternalLinkage,
- originalFunc->getName(), currentModule());
+ func = Function::Create(originalFunc->getFunctionType(), GlobalValue::ExternalLinkage,
+ originalFunc->getName(), currentModule());
func->setCallingConv(CallingConv::C);
- const ParamAttrsList *pal = 0;
+ const PAListPtr pal;
func->setParamAttrs(pal);
currentModule()->dump();
} else {
diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.h b/src/gallium/auxiliary/gallivm/instructionssoa.h
index b9104ea286d..060ee72f2e8 100644
--- a/src/gallium/auxiliary/gallivm/instructionssoa.h
+++ b/src/gallium/auxiliary/gallivm/instructionssoa.h
@@ -29,7 +29,7 @@
#define INSTRUCTIONSSOA_H
#include <pipe/p_shader_tokens.h>
-#include <llvm/Support/LLVMBuilder.h>
+#include <llvm/Support/IRBuilder.h>
#include <map>
#include <vector>
@@ -87,7 +87,7 @@ private:
const std::vector<llvm::Value*> in3);
void injectFunction(llvm::Function *originalFunc, int op = TGSI_OPCODE_LAST);
private:
- llvm::LLVMFoldingBuilder m_builder;
+ llvm::IRBuilder m_builder;
StorageSoa *m_storage;
std::map<int, std::string> m_functionsMap;
diff --git a/src/gallium/auxiliary/gallivm/llvm_builtins.c b/src/gallium/auxiliary/gallivm/llvm_builtins.c
index 4f98d754baa..64b5d499a8e 100644
--- a/src/gallium/auxiliary/gallivm/llvm_builtins.c
+++ b/src/gallium/auxiliary/gallivm/llvm_builtins.c
@@ -30,7 +30,7 @@
* Authors:
* Zack Rusin [email protected]
*/
-typedef __attribute__(( ocu_vector_type(4) )) float float4;
+typedef __attribute__(( ext_vector_type(4) )) float float4;
extern float powf(float a, float b);
diff --git a/src/gallium/auxiliary/gallivm/soabuiltins.c b/src/gallium/auxiliary/gallivm/soabuiltins.c
index 4d658be5208..40addebd8cd 100644
--- a/src/gallium/auxiliary/gallivm/soabuiltins.c
+++ b/src/gallium/auxiliary/gallivm/soabuiltins.c
@@ -31,7 +31,7 @@
* Authors:
* Zack Rusin [email protected]
*/
-typedef __attribute__(( ocu_vector_type(4) )) float float4;
+typedef __attribute__(( ext_vector_type(4) )) float float4;
void dp3(float4 *res,
float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
diff --git a/src/gallium/auxiliary/gallivm/storage.cpp b/src/gallium/auxiliary/gallivm/storage.cpp
index c4326de8c53..9d9fd123604 100644
--- a/src/gallium/auxiliary/gallivm/storage.cpp
+++ b/src/gallium/auxiliary/gallivm/storage.cpp
@@ -186,26 +186,26 @@ llvm::Value *Storage::maskWrite(llvm::Value *src, int mask, llvm::Value *templ)
if ((mask & TGSI_WRITEMASK_X)) {
llvm::Value *x = new ExtractElementInst(src, unsigned(0),
name("x"), m_block);
- dst = new InsertElementInst(dst, x, unsigned(0),
- name("dstx"), m_block);
+ dst = InsertElementInst::Create(dst, x, unsigned(0),
+ name("dstx"), m_block);
}
if ((mask & TGSI_WRITEMASK_Y)) {
llvm::Value *y = new ExtractElementInst(src, unsigned(1),
name("y"), m_block);
- dst = new InsertElementInst(dst, y, unsigned(1),
- name("dsty"), m_block);
+ dst = InsertElementInst::Create(dst, y, unsigned(1),
+ name("dsty"), m_block);
}
if ((mask & TGSI_WRITEMASK_Z)) {
llvm::Value *z = new ExtractElementInst(src, unsigned(2),
name("z"), m_block);
- dst = new InsertElementInst(dst, z, unsigned(2),
- name("dstz"), m_block);
+ dst = InsertElementInst::Create(dst, z, unsigned(2),
+ name("dstz"), m_block);
}
if ((mask & TGSI_WRITEMASK_W)) {
llvm::Value *w = new ExtractElementInst(src, unsigned(3),
name("w"), m_block);
- dst = new InsertElementInst(dst, w, unsigned(3),
- name("dstw"), m_block);
+ dst = InsertElementInst::Create(dst, w, unsigned(3),
+ name("dstw"), m_block);
}
return dst;
}
@@ -308,11 +308,11 @@ llvm::Value * Storage::elemPtr(Args arg)
std::vector<Value*> indices;
indices.push_back(constantInt(0));
indices.push_back(constantInt(static_cast<int>(arg)));
- GetElementPtrInst *getElem = new GetElementPtrInst(m_INPUT,
- indices.begin(),
- indices.end(),
- name("input_ptr"),
- m_block);
+ GetElementPtrInst *getElem = GetElementPtrInst::Create(m_INPUT,
+ indices.begin(),
+ indices.end(),
+ name("input_ptr"),
+ m_block);
return new LoadInst(getElem, name("input_field"), false, m_block);
}
@@ -322,7 +322,7 @@ llvm::Value * Storage::elemIdx(llvm::Value *ptr, int idx,
GetElementPtrInst *getElem = 0;
if (indIdx) {
- getElem = new GetElementPtrInst(ptr,
+ getElem = GetElementPtrInst::Create(ptr,
BinaryOperator::create(Instruction::Add,
indIdx,
constantInt(idx),
@@ -331,7 +331,7 @@ llvm::Value * Storage::elemIdx(llvm::Value *ptr, int idx,
name("field"),
m_block);
} else {
- getElem = new GetElementPtrInst(ptr,
+ getElem = GetElementPtrInst::Create(ptr,
constantInt(idx),
name("field"),
m_block);
@@ -350,7 +350,7 @@ void Storage::setKilElement(llvm::Value *val)
std::vector<Value*> indices;
indices.push_back(constantInt(0));
indices.push_back(constantInt(static_cast<int>(KilArg)));
- GetElementPtrInst *elem = new GetElementPtrInst(m_INPUT,
+ GetElementPtrInst *elem = GetElementPtrInst::Create(m_INPUT,
indices.begin(),
indices.end(),
name("kil_ptr"),
diff --git a/src/gallium/auxiliary/gallivm/storagesoa.cpp b/src/gallium/auxiliary/gallivm/storagesoa.cpp
index bb6fe3d7e11..0e6e68c9d70 100644
--- a/src/gallium/auxiliary/gallivm/storagesoa.cpp
+++ b/src/gallium/auxiliary/gallivm/storagesoa.cpp
@@ -207,11 +207,11 @@ llvm::Value * StorageSoa::elementPointer(llvm::Value *ptr, llvm::Value *index,
indices.push_back(index);
indices.push_back(constantInt(channel));
- GetElementPtrInst *getElem = new GetElementPtrInst(ptr,
- indices.begin(),
- indices.end(),
- name("ptr"),
- m_block);
+ GetElementPtrInst *getElem = GetElementPtrInst::Create(ptr,
+ indices.begin(),
+ indices.end(),
+ name("ptr"),
+ m_block);
return getElem;
}
diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
index ab9e7a06fba..ab8c851f148 100644
--- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
+++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
@@ -1014,7 +1014,7 @@ tgsi_to_llvm(struct gallivm_ir *ir, const struct tgsi_token *tokens)
Value *ptr_INPUT = args++;
ptr_INPUT->setName("input");
- BasicBlock *label_entry = new BasicBlock("entry", shader, 0);
+ BasicBlock *label_entry = BasicBlock::Create("entry", shader, 0);
tgsi_parse_init(&parse, tokens);
@@ -1085,7 +1085,7 @@ llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir,
Value *temps = args++;
temps->setName("temps");
- BasicBlock *label_entry = new BasicBlock("entry", shader, 0);
+ BasicBlock *label_entry = BasicBlock::Create("entry", shader, 0);
tgsi_parse_init(&parse, tokens);
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h
index 4b09c80b2a1..49705cb8627 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h
@@ -193,6 +193,17 @@ pb_reference(struct pb_buffer **dst,
/**
+ * Utility function to check whether a requested alignment is consistent with
+ * the provided alignment or not.
+ */
+static INLINE int
+pb_check_alignment(size_t requested, size_t provided)
+{
+ return requested <= provided && (provided % requested) == 0;
+}
+
+
+/**
* Malloc-based buffer to store data that can't be used by the graphics
* hardware.
*/
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
index 65b6584003a..27032b0c4c0 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
@@ -215,15 +215,21 @@ fenced_buffer_serialize(struct fenced_buffer *fenced_buf, unsigned flags)
struct fenced_buffer_list *fenced_list = fenced_buf->list;
struct pipe_winsys *winsys = fenced_list->winsys;
+ /* Allow concurrent reads */
if(((fenced_buf->flags | flags) & PIPE_BUFFER_USAGE_WRITE) == 0)
return PIPE_OK;
+ /* Wait for the CPU to finish */
if(fenced_buf->mapcount) {
- /* FIXME */
+ /* FIXME: Use thread conditions variables to signal when mapcount
+ * reaches zero */
debug_warning("attemp to write concurrently to buffer");
+ /* XXX: we must not fail here in order to support texture mipmap generation
return PIPE_ERROR_RETRY;
+ */
}
+ /* Wait for the GPU to finish */
if(fenced_buf->fence) {
if(winsys->fence_finish(winsys, fenced_buf->fence, 0) != 0)
return PIPE_ERROR_RETRY;
@@ -353,6 +359,16 @@ buffer_fence(struct pb_buffer *buf,
/* FIXME: receive this as a parameter */
unsigned flags = fence ? PIPE_BUFFER_USAGE_GPU_READ_WRITE : 0;
+ if(fence == fenced_buf->fence) {
+ /* Handle the same fence case specially, not only because it is a fast
+ * path, but mostly to avoid serializing two writes with the same fence,
+ * as that would bring the hardware down to synchronous operation without
+ * any benefit.
+ */
+ fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE;
+ return;
+ }
+
if(fenced_buffer_serialize(fenced_buf, flags) != PIPE_OK) {
/* FIXME: propagate error */
(void)0;
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
index b2d2520b67e..96f9af3825f 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
@@ -118,13 +118,21 @@ mm_bufmgr_create_from_buffer(struct pb_buffer *buffer,
* Slab sub-allocator.
*/
struct pb_manager *
-pb_slab_manager_create(struct pb_manager *provider,
- const struct pb_desc *desc,
- size_t smallestSize,
- size_t numSizes,
- size_t desiredNumBuffers,
- size_t maxSlabSize,
- size_t pageAlignment);
+pb_slab_manager_create(struct pb_manager *provider,
+ size_t bufSize,
+ size_t slabSize,
+ const struct pb_desc *desc);
+
+/**
+ * Allow a range of buffer size, by aggregating multiple slabs sub-allocators
+ * with different bucket sizes.
+ */
+struct pb_manager *
+pb_slab_range_manager_create(struct pb_manager *provider,
+ size_t minBufSize,
+ size_t maxBufSize,
+ size_t slabSize,
+ const struct pb_desc *desc);
/**
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
index 06de0bb6c37..543fd51253c 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
@@ -136,7 +136,7 @@ _pb_cache_buffer_list_check_free(struct pb_cache_manager *mgr)
while(curr != &mgr->delayed) {
buf = LIST_ENTRY(struct pb_cache_buffer, curr, head);
- if(util_time_timeout(&buf->start, &buf->end, &now) != 0)
+ if(!util_time_timeout(&buf->start, &buf->end, &now))
break;
_pb_cache_buffer_destroy(buf);
@@ -202,6 +202,24 @@ pb_cache_buffer_vtbl = {
};
+static INLINE boolean
+pb_cache_is_buffer_compat(struct pb_cache_buffer *buf,
+ size_t size,
+ const struct pb_desc *desc)
+{
+ /* TODO: be more lenient with size */
+ if(buf->base.base.size != size)
+ return FALSE;
+
+ if(!pb_check_alignment(desc->alignment, buf->base.base.alignment))
+ return FALSE;
+
+ /* XXX: check usage too? */
+
+ return TRUE;
+}
+
+
static struct pb_buffer *
pb_cache_manager_create_buffer(struct pb_manager *_mgr,
size_t size,
@@ -209,29 +227,45 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr,
{
struct pb_cache_manager *mgr = pb_cache_manager(_mgr);
struct pb_cache_buffer *buf;
+ struct pb_cache_buffer *curr_buf;
struct list_head *curr, *next;
struct util_time now;
- util_time_get(&now);
+ _glthread_LOCK_MUTEX(mgr->mutex);
+
+ buf = NULL;
curr = mgr->delayed.next;
next = curr->next;
+
+ /* search in the expired buffers, freeing them in the process */
+ util_time_get(&now);
while(curr != &mgr->delayed) {
- buf = LIST_ENTRY(struct pb_cache_buffer, curr, head);
-
- if(buf->base.base.size == size &&
- buf->base.base.alignment >= desc->alignment &&
- (buf->base.base.alignment % desc->alignment) == 0 &&
- /* buf->base.base.usage == usage */ 1) {
- ++buf->base.base.refcount;
- return &buf->base;
- }
-
- if(util_time_timeout(&buf->start, &buf->end, &now) != 0)
- _pb_cache_buffer_destroy(buf);
+ curr_buf = LIST_ENTRY(struct pb_cache_buffer, curr, head);
+ if(!buf && pb_cache_is_buffer_compat(curr_buf, size, desc))
+ buf = curr_buf;
+ else if(util_time_timeout(&curr_buf->start, &curr_buf->end, &now))
+ _pb_cache_buffer_destroy(curr_buf);
+ curr = next;
+ next = curr->next;
+ }
+ /* keep searching in the hot buffers */
+ while(!buf && curr != &mgr->delayed) {
+ curr_buf = LIST_ENTRY(struct pb_cache_buffer, curr, head);
+ if(pb_cache_is_buffer_compat(curr_buf, size, desc))
+ buf = curr_buf;
curr = next;
next = curr->next;
}
+
+ if(buf) {
+ LIST_DEL(&buf->head);
+ _glthread_UNLOCK_MUTEX(mgr->mutex);
+ ++buf->base.base.refcount;
+ return &buf->base;
+ }
+
+ _glthread_UNLOCK_MUTEX(mgr->mutex);
buf = CALLOC_STRUCT(pb_cache_buffer);
if(!buf)
@@ -243,6 +277,11 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr,
return NULL;
}
+ assert(buf->buffer->base.refcount >= 1);
+ assert(pb_check_alignment(desc->alignment, buf->buffer->base.alignment));
+ assert((buf->buffer->base.usage & desc->usage) == desc->usage);
+ assert(buf->buffer->base.size >= size);
+
buf->base.base.refcount = 1;
buf->base.base.alignment = buf->buffer->base.alignment;
buf->base.base.usage = buf->buffer->base.usage;
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c
index bffca5b2449..9d809e2f9b5 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c
@@ -30,7 +30,7 @@
* \file
* A buffer manager that wraps buffers in fenced buffers.
*
- * \author Jos� Fonseca <[email protected]>
+ * \author José Fonseca <[email protected]>
*/
@@ -101,7 +101,8 @@ fenced_bufmgr_destroy(struct pb_manager *mgr)
fenced_buffer_list_destroy(fenced_mgr->fenced_list);
- fenced_mgr->provider->destroy(fenced_mgr->provider);
+ if(fenced_mgr->provider)
+ fenced_mgr->provider->destroy(fenced_mgr->provider);
FREE(fenced_mgr);
}
@@ -113,6 +114,9 @@ fenced_bufmgr_create(struct pb_manager *provider,
{
struct fenced_pb_manager *fenced_mgr;
+ if(!provider)
+ return NULL;
+
fenced_mgr = (struct fenced_pb_manager *)CALLOC(1, sizeof(*fenced_mgr));
if (!fenced_mgr)
return NULL;
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
index 676e8e29b9c..b931455056e 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
@@ -70,19 +70,24 @@ struct pb_slab
size_t numBuffers;
size_t numFree;
struct pb_slab_buffer *buffers;
- struct pb_slab_size_header *header;
+ struct pb_slab_manager *mgr;
struct pb_buffer *bo;
- size_t pageAlignment;
void *virtual;
};
-struct pb_slab_size_header
+struct pb_slab_manager
{
+ struct pb_manager base;
+
+ struct pb_manager *provider;
+ size_t bufSize;
+ size_t slabSize;
+ struct pb_desc desc;
+
struct list_head slabs;
struct list_head freeSlabs;
- struct pb_slab_manager *pool;
- size_t bufSize;
+
_glthread_Mutex mutex;
};
@@ -90,19 +95,18 @@ struct pb_slab_size_header
* The data of this structure remains constant after
* initialization and thus needs no mutex protection.
*/
-struct pb_slab_manager
+struct pb_slab_range_manager
{
struct pb_manager base;
+ struct pb_manager *provider;
+ size_t minBufSize;
+ size_t maxBufSize;
struct pb_desc desc;
+
+ unsigned numBuckets;
size_t *bucketSizes;
- size_t numBuckets;
- size_t pageSize;
- struct pb_manager *provider;
- unsigned pageAlignment;
- unsigned maxSlabSize;
- unsigned desiredNumBuffers;
- struct pb_slab_size_header *headers;
+ struct pb_manager **buckets;
};
@@ -122,8 +126,16 @@ pb_slab_manager(struct pb_manager *mgr)
}
+static INLINE struct pb_slab_range_manager *
+pb_slab_range_manager(struct pb_manager *mgr)
+{
+ assert(mgr);
+ return (struct pb_slab_range_manager *)mgr;
+}
+
+
/**
- * Delete a buffer from the slab header delayed list and put
+ * Delete a buffer from the slab delayed list and put
* it on the slab FREE list.
*/
static void
@@ -131,10 +143,10 @@ pb_slab_buffer_destroy(struct pb_buffer *_buf)
{
struct pb_slab_buffer *buf = pb_slab_buffer(_buf);
struct pb_slab *slab = buf->slab;
- struct pb_slab_size_header *header = slab->header;
+ struct pb_slab_manager *mgr = slab->mgr;
struct list_head *list = &buf->head;
- _glthread_LOCK_MUTEX(header->mutex);
+ _glthread_LOCK_MUTEX(mgr->mutex);
assert(buf->base.base.refcount == 0);
@@ -145,21 +157,21 @@ pb_slab_buffer_destroy(struct pb_buffer *_buf)
slab->numFree++;
if (slab->head.next == &slab->head)
- LIST_ADDTAIL(&slab->head, &header->slabs);
+ LIST_ADDTAIL(&slab->head, &mgr->slabs);
if (slab->numFree == slab->numBuffers) {
list = &slab->head;
LIST_DEL(list);
- LIST_ADDTAIL(list, &header->freeSlabs);
+ LIST_ADDTAIL(list, &mgr->freeSlabs);
}
- if (header->slabs.next == &header->slabs || slab->numFree
+ if (mgr->slabs.next == &mgr->slabs || slab->numFree
!= slab->numBuffers) {
struct list_head *next;
- for (list = header->freeSlabs.next, next = list->next; list
- != &header->freeSlabs; list = next, next = list->next) {
+ for (list = mgr->freeSlabs.next, next = list->next; list
+ != &mgr->freeSlabs; list = next, next = list->next) {
slab = LIST_ENTRY(struct pb_slab, list, head);
@@ -170,7 +182,7 @@ pb_slab_buffer_destroy(struct pb_buffer *_buf)
}
}
- _glthread_UNLOCK_MUTEX(header->mutex);
+ _glthread_UNLOCK_MUTEX(mgr->mutex);
}
@@ -217,15 +229,13 @@ pb_slab_buffer_vtbl = {
static enum pipe_error
-pb_slab_create(struct pb_slab_size_header *header)
+pb_slab_create(struct pb_slab_manager *mgr)
{
- struct pb_slab_manager *pool = header->pool;
- size_t size = header->bufSize * pool->desiredNumBuffers;
struct pb_slab *slab;
struct pb_slab_buffer *buf;
- size_t numBuffers;
- int ret;
+ unsigned numBuffers;
unsigned i;
+ enum pipe_error ret;
slab = CALLOC_STRUCT(pb_slab);
if (!slab)
@@ -236,22 +246,23 @@ pb_slab_create(struct pb_slab_size_header *header)
* to efficiently reuse slabs.
*/
- size = (size <= pool->maxSlabSize) ? size : pool->maxSlabSize;
- size = (size + pool->pageSize - 1) & ~(pool->pageSize - 1);
-
- slab->bo = pool->provider->create_buffer(pool->provider, size, &pool->desc);
- if(!slab->bo)
+ slab->bo = mgr->provider->create_buffer(mgr->provider, mgr->slabSize, &mgr->desc);
+ if(!slab->bo) {
+ ret = PIPE_ERROR_OUT_OF_MEMORY;
goto out_err0;
+ }
slab->virtual = pb_map(slab->bo,
- PIPE_BUFFER_USAGE_CPU_READ |
- PIPE_BUFFER_USAGE_CPU_WRITE);
- if(!slab->virtual)
+ PIPE_BUFFER_USAGE_CPU_READ |
+ PIPE_BUFFER_USAGE_CPU_WRITE);
+ if(!slab->virtual) {
+ ret = PIPE_ERROR_OUT_OF_MEMORY;
goto out_err1;
+ }
pb_unmap(slab->bo);
- numBuffers = slab->bo->base.size / header->bufSize;
+ numBuffers = slab->bo->base.size / mgr->bufSize;
slab->buffers = CALLOC(numBuffers, sizeof(*slab->buffers));
if (!slab->buffers) {
@@ -263,17 +274,17 @@ pb_slab_create(struct pb_slab_size_header *header)
LIST_INITHEAD(&slab->freeBuffers);
slab->numBuffers = numBuffers;
slab->numFree = 0;
- slab->header = header;
+ slab->mgr = mgr;
buf = slab->buffers;
for (i=0; i < numBuffers; ++i) {
buf->base.base.refcount = 0;
- buf->base.base.size = header->bufSize;
+ buf->base.base.size = mgr->bufSize;
buf->base.base.alignment = 0;
buf->base.base.usage = 0;
buf->base.vtbl = &pb_slab_buffer_vtbl;
buf->slab = slab;
- buf->start = i* header->bufSize;
+ buf->start = i* mgr->bufSize;
buf->mapCount = 0;
_glthread_INIT_COND(buf->event);
LIST_ADDTAIL(&buf->head, &slab->freeBuffers);
@@ -281,7 +292,7 @@ pb_slab_create(struct pb_slab_size_header *header)
buf++;
}
- LIST_ADDTAIL(&slab->head, &header->slabs);
+ LIST_ADDTAIL(&slab->head, &mgr->slabs);
return PIPE_OK;
@@ -294,50 +305,47 @@ out_err0:
static struct pb_buffer *
-pb_slab_manager_create_buffer(struct pb_manager *_pool,
+pb_slab_manager_create_buffer(struct pb_manager *_mgr,
size_t size,
const struct pb_desc *desc)
{
- struct pb_slab_manager *pool = pb_slab_manager(_pool);
- struct pb_slab_size_header *header;
- unsigned i;
+ struct pb_slab_manager *mgr = pb_slab_manager(_mgr);
static struct pb_slab_buffer *buf;
struct pb_slab *slab;
struct list_head *list;
int count = DRI_SLABPOOL_ALLOC_RETRIES;
- /*
- * FIXME: Check for compatibility.
- */
-
- header = pool->headers;
- for (i=0; i<pool->numBuckets; ++i) {
- if (header->bufSize >= size)
- break;
- header++;
- }
-
- if (i >= pool->numBuckets)
- /* Fall back to allocate a buffer object directly from the provider. */
- return pool->provider->create_buffer(pool->provider, size, desc);
-
+ /* check size */
+ assert(size == mgr->bufSize);
+ if(size != mgr->bufSize)
+ return NULL;
+
+ /* check if we can provide the requested alignment */
+ assert(pb_check_alignment(desc->alignment, mgr->desc.alignment));
+ if(!pb_check_alignment(desc->alignment, mgr->desc.alignment))
+ return NULL;
+ assert(pb_check_alignment(desc->alignment, mgr->bufSize));
+ if(!pb_check_alignment(desc->alignment, mgr->bufSize))
+ return NULL;
- _glthread_LOCK_MUTEX(header->mutex);
- while (header->slabs.next == &header->slabs && count > 0) {
- if (header->slabs.next != &header->slabs)
+ /* XXX: check for compatible buffer usage too? */
+
+ _glthread_LOCK_MUTEX(mgr->mutex);
+ while (mgr->slabs.next == &mgr->slabs && count > 0) {
+ if (mgr->slabs.next != &mgr->slabs)
break;
- _glthread_UNLOCK_MUTEX(header->mutex);
+ _glthread_UNLOCK_MUTEX(mgr->mutex);
if (count != DRI_SLABPOOL_ALLOC_RETRIES)
util_time_sleep(1);
- _glthread_LOCK_MUTEX(header->mutex);
- (void) pb_slab_create(header);
+ _glthread_LOCK_MUTEX(mgr->mutex);
+ (void) pb_slab_create(mgr);
count--;
}
- list = header->slabs.next;
- if (list == &header->slabs) {
- _glthread_UNLOCK_MUTEX(header->mutex);
+ list = mgr->slabs.next;
+ if (list == &mgr->slabs) {
+ _glthread_UNLOCK_MUTEX(mgr->mutex);
return NULL;
}
slab = LIST_ENTRY(struct pb_slab, list, head);
@@ -347,83 +355,141 @@ pb_slab_manager_create_buffer(struct pb_manager *_pool,
list = slab->freeBuffers.next;
LIST_DELINIT(list);
- _glthread_UNLOCK_MUTEX(header->mutex);
+ _glthread_UNLOCK_MUTEX(mgr->mutex);
buf = LIST_ENTRY(struct pb_slab_buffer, list, head);
+
++buf->base.base.refcount;
+ buf->base.base.alignment = desc->alignment;
+ buf->base.base.usage = desc->usage;
+
return &buf->base;
}
static void
-pb_slab_manager_destroy(struct pb_manager *_pool)
+pb_slab_manager_destroy(struct pb_manager *_mgr)
{
- struct pb_slab_manager *pool = pb_slab_manager(_pool);
+ struct pb_slab_manager *mgr = pb_slab_manager(_mgr);
- FREE(pool->headers);
- FREE(pool->bucketSizes);
- FREE(pool);
+ /* TODO: cleanup all allocated buffers */
+ FREE(mgr);
}
struct pb_manager *
-pb_slab_manager_create(struct pb_manager *provider,
- const struct pb_desc *desc,
- size_t smallestSize,
- size_t numSizes,
- size_t desiredNumBuffers,
- size_t maxSlabSize,
- size_t pageAlignment)
+pb_slab_manager_create(struct pb_manager *provider,
+ size_t bufSize,
+ size_t slabSize,
+ const struct pb_desc *desc)
{
- struct pb_slab_manager *pool;
- size_t i;
+ struct pb_slab_manager *mgr;
+
+ mgr = CALLOC_STRUCT(pb_slab_manager);
+ if (!mgr)
+ return NULL;
+
+ mgr->base.destroy = pb_slab_manager_destroy;
+ mgr->base.create_buffer = pb_slab_manager_create_buffer;
+
+ mgr->provider = provider;
+ mgr->bufSize = bufSize;
+ mgr->slabSize = slabSize;
+ mgr->desc = *desc;
+
+ LIST_INITHEAD(&mgr->slabs);
+ LIST_INITHEAD(&mgr->freeSlabs);
+
+ _glthread_INIT_MUTEX(mgr->mutex);
+
+ return &mgr->base;
+}
+
+
+static struct pb_buffer *
+pb_slab_range_manager_create_buffer(struct pb_manager *_mgr,
+ size_t size,
+ const struct pb_desc *desc)
+{
+ struct pb_slab_range_manager *mgr = pb_slab_range_manager(_mgr);
+ size_t bufSize;
+ unsigned i;
+
+ bufSize = mgr->minBufSize;
+ for (i = 0; i < mgr->numBuckets; ++i) {
+ if(bufSize >= size)
+ return mgr->buckets[i]->create_buffer(mgr->buckets[i], size, desc);
+ bufSize *= 2;
+ }
+
+ /* Fall back to allocate a buffer object directly from the provider. */
+ return mgr->provider->create_buffer(mgr->provider, size, desc);
+}
- pool = CALLOC_STRUCT(pb_slab_manager);
- if (!pool)
+
+static void
+pb_slab_range_manager_destroy(struct pb_manager *_mgr)
+{
+ struct pb_slab_range_manager *mgr = pb_slab_range_manager(_mgr);
+ unsigned i;
+
+ for (i = 0; i < mgr->numBuckets; ++i)
+ mgr->buckets[i]->destroy(mgr->buckets[i]);
+ FREE(mgr->buckets);
+ FREE(mgr->bucketSizes);
+ FREE(mgr);
+}
+
+
+struct pb_manager *
+pb_slab_range_manager_create(struct pb_manager *provider,
+ size_t minBufSize,
+ size_t maxBufSize,
+ size_t slabSize,
+ const struct pb_desc *desc)
+{
+ struct pb_slab_range_manager *mgr;
+ size_t bufSize;
+ unsigned i;
+
+ mgr = CALLOC_STRUCT(pb_slab_range_manager);
+ if (!mgr)
goto out_err0;
- pool->bucketSizes = CALLOC(numSizes, sizeof(*pool->bucketSizes));
- if (!pool->bucketSizes)
- goto out_err1;
+ mgr->base.destroy = pb_slab_range_manager_destroy;
+ mgr->base.create_buffer = pb_slab_range_manager_create_buffer;
- pool->headers = CALLOC(numSizes, sizeof(*pool->headers));
- if (!pool->headers)
- goto out_err2;
-
- pool->desc = *desc;
- pool->numBuckets = numSizes;
-#ifdef WIN32
- pool->pageSize = 4096;
-#else
- pool->pageSize = getpagesize();
-#endif
- pool->provider = provider;
- pool->pageAlignment = pageAlignment;
- pool->maxSlabSize = maxSlabSize;
- pool->desiredNumBuffers = desiredNumBuffers;
-
- for (i=0; i<pool->numBuckets; ++i) {
- struct pb_slab_size_header *header = &pool->headers[i];
-
- pool->bucketSizes[i] = (smallestSize << i);
-
- _glthread_INIT_MUTEX(header->mutex);
-
- LIST_INITHEAD(&header->slabs);
- LIST_INITHEAD(&header->freeSlabs);
-
- header->pool = pool;
- header->bufSize = (smallestSize << i);
+ mgr->provider = provider;
+ mgr->minBufSize = minBufSize;
+ mgr->maxBufSize = maxBufSize;
+
+ mgr->numBuckets = 1;
+ bufSize = minBufSize;
+ while(bufSize < maxBufSize) {
+ bufSize *= 2;
+ ++mgr->numBuckets;
}
+
+ mgr->buckets = CALLOC(mgr->numBuckets, sizeof(*mgr->buckets));
+ if (!mgr->buckets)
+ goto out_err1;
- pool->base.destroy = pb_slab_manager_destroy;
- pool->base.create_buffer = pb_slab_manager_create_buffer;
+ bufSize = minBufSize;
+ for (i = 0; i < mgr->numBuckets; ++i) {
+ mgr->buckets[i] = pb_slab_manager_create(provider, bufSize, slabSize, desc);
+ if(!mgr->buckets[i])
+ goto out_err2;
+ bufSize *= 2;
+ }
- return &pool->base;
+ return &mgr->base;
out_err2:
- FREE(pool->bucketSizes);
+ for (i = 0; i < mgr->numBuckets; ++i)
+ if(mgr->buckets[i])
+ mgr->buckets[i]->destroy(mgr->buckets[i]);
+ FREE(mgr->buckets);
out_err1:
- FREE(pool);
+ FREE(mgr);
out_err0:
return NULL;
}
diff --git a/src/gallium/auxiliary/rtasm/rtasm_cpu.c b/src/gallium/auxiliary/rtasm/rtasm_cpu.c
index eb3359750b4..f01e12faa07 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_cpu.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_cpu.c
@@ -26,14 +26,29 @@
**************************************************************************/
+#include "pipe/p_debug.h"
#include "rtasm_cpu.h"
+static boolean rtasm_sse_enabled(void)
+{
+ static boolean firsttime = 1;
+ static boolean enabled;
+
+ /* This gets called quite often at the moment:
+ */
+ if (firsttime) {
+ enabled = !debug_get_bool_option("GALLIUM_NOSSE", FALSE);
+ firsttime = FALSE;
+ }
+ return enabled;
+}
+
int rtasm_cpu_has_sse(void)
{
/* FIXME: actually detect this at run-time */
-#if defined(__i386__) || defined(__386__)
- return 1;
+#if defined(__i386__) || defined(__386__) || defined(i386)
+ return rtasm_sse_enabled();
#else
return 0;
#endif
@@ -42,8 +57,8 @@ int rtasm_cpu_has_sse(void)
int rtasm_cpu_has_sse2(void)
{
/* FIXME: actually detect this at run-time */
-#if defined(__i386__) || defined(__386__)
- return 1;
+#if defined(__i386__) || defined(__386__) || defined(i386)
+ return rtasm_sse_enabled();
#else
return 0;
#endif
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
index aea8b28e584..3cd45d7dd9e 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
@@ -33,15 +33,114 @@
#define DISASSEM 0
#define X86_TWOB 0x0f
-static unsigned char *cptr( void (*label)() )
-{
- return (unsigned char *) label;
-}
+
+#define DUMP_SSE 0
+
+#if DUMP_SSE
+
+static void
+_print_reg(
+ struct x86_reg reg )
+{
+ if (reg.mod != mod_REG)
+ debug_printf( "[" );
+
+ switch( reg.file ) {
+ case file_REG32:
+ switch( reg.idx ) {
+ case reg_AX: debug_printf( "EAX" ); break;
+ case reg_CX: debug_printf( "ECX" ); break;
+ case reg_DX: debug_printf( "EDX" ); break;
+ case reg_BX: debug_printf( "EBX" ); break;
+ case reg_SP: debug_printf( "ESP" ); break;
+ case reg_BP: debug_printf( "EBP" ); break;
+ case reg_SI: debug_printf( "ESI" ); break;
+ case reg_DI: debug_printf( "EDI" ); break;
+ }
+ break;
+ case file_MMX:
+ debug_printf( "MMX%u", reg.idx );
+ break;
+ case file_XMM:
+ debug_printf( "XMM%u", reg.idx );
+ break;
+ case file_x87:
+ debug_printf( "fp%u", reg.idx );
+ break;
+ }
+
+ if (reg.mod == mod_DISP8 ||
+ reg.mod == mod_DISP32)
+ debug_printf("+%d", reg.disp);
+
+ if (reg.mod != mod_REG)
+ debug_printf( "]" );
+}
+
+
+#define DUMP_START() debug_printf( "\n" )
+#define DUMP_END() debug_printf( "\n" )
+
+#define DUMP() do { \
+ const char *foo = __FUNCTION__; \
+ while (*foo && *foo != '_') \
+ foo++; \
+ if (*foo) \
+ foo++; \
+ debug_printf( "\n% 15s ", foo ); \
+} while (0)
+
+#define DUMP_I( I ) do { \
+ DUMP(); \
+ debug_printf( "%u", I ); \
+} while( 0 )
+
+#define DUMP_R( R0 ) do { \
+ DUMP(); \
+ _print_reg( R0 ); \
+} while( 0 )
+
+#define DUMP_RR( R0, R1 ) do { \
+ DUMP(); \
+ _print_reg( R0 ); \
+ debug_printf( ", " ); \
+ _print_reg( R1 ); \
+} while( 0 )
+
+#define DUMP_RI( R0, I ) do { \
+ DUMP(); \
+ _print_reg( R0 ); \
+ debug_printf( ", %u", I ); \
+} while( 0 )
+
+#define DUMP_RRI( R0, R1, I ) do { \
+ DUMP(); \
+ _print_reg( R0 ); \
+ debug_printf( ", " ); \
+ _print_reg( R1 ); \
+ debug_printf( ", %u", I ); \
+} while( 0 )
+
+#else
+
+#define DUMP_START()
+#define DUMP_END()
+#define DUMP( )
+#define DUMP_I( I )
+#define DUMP_R( R0 )
+#define DUMP_RR( R0, R1 )
+#define DUMP_RI( R0, I )
+#define DUMP_RRI( R0, R1, I )
+
+#endif
static void do_realloc( struct x86_function *p )
{
- if (p->size == 0) {
+ if (p->store == p->error_overflow) {
+ p->csr = p->store;
+ }
+ else if (p->size == 0) {
p->size = 1024;
p->store = rtasm_exec_malloc(p->size);
p->csr = p->store;
@@ -51,10 +150,22 @@ static void do_realloc( struct x86_function *p )
unsigned char *tmp = p->store;
p->size *= 2;
p->store = rtasm_exec_malloc(p->size);
- memcpy(p->store, tmp, used);
- p->csr = p->store + used;
+
+ if (p->store) {
+ memcpy(p->store, tmp, used);
+ p->csr = p->store + used;
+ }
+ else {
+ p->csr = p->store;
+ }
+
rtasm_exec_free(tmp);
}
+
+ if (p->store == NULL) {
+ p->store = p->csr = p->error_overflow;
+ p->size = sizeof(p->error_overflow);
+ }
}
/* Emit bytes to the instruction stream:
@@ -253,6 +364,7 @@ void x86_jcc( struct x86_function *p,
unsigned char *label )
{
intptr_t offset = pointer_to_intptr( label ) - (pointer_to_intptr( x86_get_label(p) ) + 2);
+ DUMP_I(cc);
if (offset <= 127 && offset >= -128) {
emit_1ub(p, 0x70 + cc);
@@ -270,6 +382,7 @@ void x86_jcc( struct x86_function *p,
unsigned char *x86_jcc_forward( struct x86_function *p,
enum x86_cc cc )
{
+ DUMP_I(cc);
emit_2ub(p, 0x0f, 0x80 + cc);
emit_1i(p, 0);
return x86_get_label(p);
@@ -277,6 +390,7 @@ unsigned char *x86_jcc_forward( struct x86_function *p,
unsigned char *x86_jmp_forward( struct x86_function *p)
{
+ DUMP();
emit_1ub(p, 0xe9);
emit_1i(p, 0);
return x86_get_label(p);
@@ -284,6 +398,8 @@ unsigned char *x86_jmp_forward( struct x86_function *p)
unsigned char *x86_call_forward( struct x86_function *p)
{
+ DUMP();
+
emit_1ub(p, 0xe8);
emit_1i(p, 0);
return x86_get_label(p);
@@ -299,23 +415,31 @@ void x86_fixup_fwd_jump( struct x86_function *p,
void x86_jmp( struct x86_function *p, unsigned char *label)
{
+ DUMP_I( label );
emit_1ub(p, 0xe9);
emit_1i(p, pointer_to_intptr( label ) - pointer_to_intptr( x86_get_label(p) ) - 4);
}
#if 0
+static unsigned char *cptr( void (*label)() )
+{
+ return (unsigned char *) label;
+}
+
/* This doesn't work once we start reallocating & copying the
* generated code on buffer fills, because the call is relative to the
* current pc.
*/
void x86_call( struct x86_function *p, void (*label)())
{
+ DUMP_I( label );
emit_1ub(p, 0xe8);
emit_1i(p, cptr(label) - x86_get_label(p) - 4);
}
#else
void x86_call( struct x86_function *p, struct x86_reg reg)
{
+ DUMP_R( reg );
emit_1ub(p, 0xff);
emit_modrm_noreg(p, 2, reg);
}
@@ -328,6 +452,7 @@ void x86_call( struct x86_function *p, struct x86_reg reg)
*/
void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm )
{
+ DUMP_RI( dst, imm );
assert(dst.mod == mod_REG);
emit_1ub(p, 0xb8 + dst.idx);
emit_1i(p, imm);
@@ -336,6 +461,7 @@ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm )
void x86_push( struct x86_function *p,
struct x86_reg reg )
{
+ DUMP_R( reg );
assert(reg.mod == mod_REG);
emit_1ub(p, 0x50 + reg.idx);
p->stack_offset += 4;
@@ -344,6 +470,7 @@ void x86_push( struct x86_function *p,
void x86_pop( struct x86_function *p,
struct x86_reg reg )
{
+ DUMP_R( reg );
assert(reg.mod == mod_REG);
emit_1ub(p, 0x58 + reg.idx);
p->stack_offset -= 4;
@@ -352,6 +479,7 @@ void x86_pop( struct x86_function *p,
void x86_inc( struct x86_function *p,
struct x86_reg reg )
{
+ DUMP_R( reg );
assert(reg.mod == mod_REG);
emit_1ub(p, 0x40 + reg.idx);
}
@@ -359,17 +487,20 @@ void x86_inc( struct x86_function *p,
void x86_dec( struct x86_function *p,
struct x86_reg reg )
{
+ DUMP_R( reg );
assert(reg.mod == mod_REG);
emit_1ub(p, 0x48 + reg.idx);
}
void x86_ret( struct x86_function *p )
{
+ DUMP();
emit_1ub(p, 0xc3);
}
void x86_sahf( struct x86_function *p )
{
+ DUMP();
emit_1ub(p, 0x9e);
}
@@ -377,6 +508,7 @@ void x86_mov( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_op_modrm( p, 0x8b, 0x89, dst, src );
}
@@ -384,6 +516,7 @@ void x86_xor( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_op_modrm( p, 0x33, 0x31, dst, src );
}
@@ -391,6 +524,7 @@ void x86_cmp( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_op_modrm( p, 0x3b, 0x39, dst, src );
}
@@ -398,6 +532,7 @@ void x86_lea( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_1ub(p, 0x8d);
emit_modrm( p, dst, src );
}
@@ -406,6 +541,7 @@ void x86_test( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_1ub(p, 0x85);
emit_modrm( p, dst, src );
}
@@ -414,20 +550,36 @@ void x86_add( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_op_modrm(p, 0x03, 0x01, dst, src );
}
+/* Calculate EAX * src, results in EDX:EAX.
+ */
void x86_mul( struct x86_function *p,
struct x86_reg src )
{
- assert (src.file == file_REG32 && src.mod == mod_REG);
- emit_op_modrm(p, 0xf7, 0, x86_make_reg (file_REG32, reg_SP), src );
+ DUMP_R( src );
+ emit_1ub(p, 0xf7);
+ emit_modrm_noreg(p, 4, src );
+}
+
+
+void x86_imul( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_2ub(p, X86_TWOB, 0xAF);
+ emit_modrm(p, dst, src);
}
+
void x86_sub( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_op_modrm(p, 0x2b, 0x29, dst, src );
}
@@ -435,6 +587,7 @@ void x86_or( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_op_modrm( p, 0x0b, 0x09, dst, src );
}
@@ -442,6 +595,7 @@ void x86_and( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_op_modrm( p, 0x23, 0x21, dst, src );
}
@@ -456,6 +610,7 @@ void sse_movss( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, 0xF3, X86_TWOB);
emit_op_modrm( p, 0x10, 0x11, dst, src );
}
@@ -464,6 +619,7 @@ void sse_movaps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_1ub(p, X86_TWOB);
emit_op_modrm( p, 0x28, 0x29, dst, src );
}
@@ -472,6 +628,7 @@ void sse_movups( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_1ub(p, X86_TWOB);
emit_op_modrm( p, 0x10, 0x11, dst, src );
}
@@ -480,6 +637,7 @@ void sse_movhps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
assert(dst.mod != mod_REG || src.mod != mod_REG);
emit_1ub(p, X86_TWOB);
emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */
@@ -489,6 +647,7 @@ void sse_movlps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
assert(dst.mod != mod_REG || src.mod != mod_REG);
emit_1ub(p, X86_TWOB);
emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */
@@ -498,6 +657,7 @@ void sse_maxps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x5F);
emit_modrm( p, dst, src );
}
@@ -506,6 +666,7 @@ void sse_maxss( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_3ub(p, 0xF3, X86_TWOB, 0x5F);
emit_modrm( p, dst, src );
}
@@ -514,6 +675,7 @@ void sse_divss( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_3ub(p, 0xF3, X86_TWOB, 0x5E);
emit_modrm( p, dst, src );
}
@@ -522,6 +684,7 @@ void sse_minps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x5D);
emit_modrm( p, dst, src );
}
@@ -530,6 +693,7 @@ void sse_subps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x5C);
emit_modrm( p, dst, src );
}
@@ -538,6 +702,7 @@ void sse_mulps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x59);
emit_modrm( p, dst, src );
}
@@ -546,6 +711,7 @@ void sse_mulss( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_3ub(p, 0xF3, X86_TWOB, 0x59);
emit_modrm( p, dst, src );
}
@@ -554,6 +720,7 @@ void sse_addps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x58);
emit_modrm( p, dst, src );
}
@@ -562,6 +729,7 @@ void sse_addss( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_3ub(p, 0xF3, X86_TWOB, 0x58);
emit_modrm( p, dst, src );
}
@@ -570,6 +738,7 @@ void sse_andnps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x55);
emit_modrm( p, dst, src );
}
@@ -578,6 +747,7 @@ void sse_andps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x54);
emit_modrm( p, dst, src );
}
@@ -586,6 +756,7 @@ void sse_rsqrtps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x52);
emit_modrm( p, dst, src );
}
@@ -594,6 +765,7 @@ void sse_rsqrtss( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_3ub(p, 0xF3, X86_TWOB, 0x52);
emit_modrm( p, dst, src );
@@ -603,6 +775,7 @@ void sse_movhlps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
assert(dst.mod == mod_REG && src.mod == mod_REG);
emit_2ub(p, X86_TWOB, 0x12);
emit_modrm( p, dst, src );
@@ -612,6 +785,7 @@ void sse_movlhps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
assert(dst.mod == mod_REG && src.mod == mod_REG);
emit_2ub(p, X86_TWOB, 0x16);
emit_modrm( p, dst, src );
@@ -621,6 +795,7 @@ void sse_orps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x56);
emit_modrm( p, dst, src );
}
@@ -629,6 +804,7 @@ void sse_xorps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x57);
emit_modrm( p, dst, src );
}
@@ -637,6 +813,7 @@ void sse_cvtps2pi( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
assert(dst.file == file_MMX &&
(src.file == file_XMM || src.mod != mod_REG));
@@ -646,36 +823,48 @@ void sse_cvtps2pi( struct x86_function *p,
emit_modrm( p, dst, src );
}
+void sse2_cvtdq2ps( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_2ub(p, X86_TWOB, 0x5b);
+ emit_modrm( p, dst, src );
+}
+
/* Shufps can also be used to implement a reduced swizzle when dest ==
* arg0.
*/
void sse_shufps( struct x86_function *p,
- struct x86_reg dest,
- struct x86_reg arg0,
+ struct x86_reg dst,
+ struct x86_reg src,
unsigned char shuf)
{
+ DUMP_RRI( dst, src, shuf );
emit_2ub(p, X86_TWOB, 0xC6);
- emit_modrm(p, dest, arg0);
+ emit_modrm(p, dst, src);
emit_1ub(p, shuf);
}
void sse_cmpps( struct x86_function *p,
- struct x86_reg dest,
- struct x86_reg arg0,
+ struct x86_reg dst,
+ struct x86_reg src,
unsigned char cc)
{
+ DUMP_RRI( dst, src, cc );
emit_2ub(p, X86_TWOB, 0xC2);
- emit_modrm(p, dest, arg0);
+ emit_modrm(p, dst, src);
emit_1ub(p, cc);
}
void sse_pmovmskb( struct x86_function *p,
- struct x86_reg dest,
+ struct x86_reg dst,
struct x86_reg src)
{
- emit_3ub(p, 0x66, X86_TWOB, 0xD7);
- emit_modrm(p, dest, src);
+ DUMP_RR( dst, src );
+ emit_3ub(p, 0x66, X86_TWOB, 0xD7);
+ emit_modrm(p, dst, src);
}
/***********************************************************************
@@ -686,12 +875,13 @@ void sse_pmovmskb( struct x86_function *p,
* Perform a reduced swizzle:
*/
void sse2_pshufd( struct x86_function *p,
- struct x86_reg dest,
- struct x86_reg arg0,
+ struct x86_reg dst,
+ struct x86_reg src,
unsigned char shuf)
{
+ DUMP_RRI( dst, src, shuf );
emit_3ub(p, 0x66, X86_TWOB, 0x70);
- emit_modrm(p, dest, arg0);
+ emit_modrm(p, dst, src);
emit_1ub(p, shuf);
}
@@ -699,6 +889,7 @@ void sse2_cvttps2dq( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_3ub( p, 0xF3, X86_TWOB, 0x5B );
emit_modrm( p, dst, src );
}
@@ -707,6 +898,7 @@ void sse2_cvtps2dq( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_3ub(p, 0x66, X86_TWOB, 0x5B);
emit_modrm( p, dst, src );
}
@@ -715,6 +907,7 @@ void sse2_packssdw( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_3ub(p, 0x66, X86_TWOB, 0x6B);
emit_modrm( p, dst, src );
}
@@ -723,6 +916,7 @@ void sse2_packsswb( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_3ub(p, 0x66, X86_TWOB, 0x63);
emit_modrm( p, dst, src );
}
@@ -731,14 +925,26 @@ void sse2_packuswb( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_3ub(p, 0x66, X86_TWOB, 0x67);
emit_modrm( p, dst, src );
}
+void sse2_punpcklbw( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_3ub(p, 0x66, X86_TWOB, 0x60);
+ emit_modrm( p, dst, src );
+}
+
+
void sse2_rcpps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x53);
emit_modrm( p, dst, src );
}
@@ -747,6 +953,7 @@ void sse2_rcpss( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_3ub(p, 0xF3, X86_TWOB, 0x53);
emit_modrm( p, dst, src );
}
@@ -755,6 +962,7 @@ void sse2_movd( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, 0x66, X86_TWOB);
emit_op_modrm( p, 0x6e, 0x7e, dst, src );
}
@@ -767,30 +975,35 @@ void sse2_movd( struct x86_function *p,
*/
void x87_fist( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
emit_1ub(p, 0xdb);
emit_modrm_noreg(p, 2, dst);
}
void x87_fistp( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
emit_1ub(p, 0xdb);
emit_modrm_noreg(p, 3, dst);
}
void x87_fild( struct x86_function *p, struct x86_reg arg )
{
+ DUMP_R( arg );
emit_1ub(p, 0xdf);
emit_modrm_noreg(p, 0, arg);
}
void x87_fldz( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xee);
}
void x87_fldcw( struct x86_function *p, struct x86_reg arg )
{
+ DUMP_R( arg );
assert(arg.file == file_REG32);
assert(arg.mod != mod_REG);
emit_1ub(p, 0xd9);
@@ -799,26 +1012,31 @@ void x87_fldcw( struct x86_function *p, struct x86_reg arg )
void x87_fld1( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xe8);
}
void x87_fldl2e( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xea);
}
void x87_fldln2( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xed);
}
void x87_fwait( struct x86_function *p )
{
+ DUMP();
emit_1ub(p, 0x9b);
}
void x87_fnclex( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xdb, 0xe2);
}
@@ -855,49 +1073,55 @@ static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86
assert(0);
}
-void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
+void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
- x87_arith_op(p, dst, arg,
+ DUMP_RR( dst, src );
+ x87_arith_op(p, dst, src,
0xd8, 0xc8,
0xdc, 0xc8,
4);
}
-void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
+void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
- x87_arith_op(p, dst, arg,
+ DUMP_RR( dst, src );
+ x87_arith_op(p, dst, src,
0xd8, 0xe0,
0xdc, 0xe8,
4);
}
-void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
+void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
- x87_arith_op(p, dst, arg,
+ DUMP_RR( dst, src );
+ x87_arith_op(p, dst, src,
0xd8, 0xe8,
0xdc, 0xe0,
5);
}
-void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
+void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
- x87_arith_op(p, dst, arg,
+ DUMP_RR( dst, src );
+ x87_arith_op(p, dst, src,
0xd8, 0xc0,
0xdc, 0xc0,
0);
}
-void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
+void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
- x87_arith_op(p, dst, arg,
+ DUMP_RR( dst, src );
+ x87_arith_op(p, dst, src,
0xd8, 0xf0,
0xdc, 0xf8,
6);
}
-void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
+void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
- x87_arith_op(p, dst, arg,
+ DUMP_RR( dst, src );
+ x87_arith_op(p, dst, src,
0xd8, 0xf8,
0xdc, 0xf0,
7);
@@ -905,6 +1129,7 @@ void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
void x87_fmulp( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xc8+dst.idx);
@@ -912,6 +1137,7 @@ void x87_fmulp( struct x86_function *p, struct x86_reg dst )
void x87_fsubp( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xe8+dst.idx);
@@ -919,6 +1145,7 @@ void x87_fsubp( struct x86_function *p, struct x86_reg dst )
void x87_fsubrp( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xe0+dst.idx);
@@ -926,6 +1153,7 @@ void x87_fsubrp( struct x86_function *p, struct x86_reg dst )
void x87_faddp( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xc0+dst.idx);
@@ -933,6 +1161,7 @@ void x87_faddp( struct x86_function *p, struct x86_reg dst )
void x87_fdivp( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xf8+dst.idx);
@@ -940,6 +1169,7 @@ void x87_fdivp( struct x86_function *p, struct x86_reg dst )
void x87_fdivrp( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xf0+dst.idx);
@@ -947,70 +1177,83 @@ void x87_fdivrp( struct x86_function *p, struct x86_reg dst )
void x87_fucom( struct x86_function *p, struct x86_reg arg )
{
+ DUMP_R( arg );
assert(arg.file == file_x87);
emit_2ub(p, 0xdd, 0xe0+arg.idx);
}
void x87_fucomp( struct x86_function *p, struct x86_reg arg )
{
+ DUMP_R( arg );
assert(arg.file == file_x87);
emit_2ub(p, 0xdd, 0xe8+arg.idx);
}
void x87_fucompp( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xda, 0xe9);
}
void x87_fxch( struct x86_function *p, struct x86_reg arg )
{
+ DUMP_R( arg );
assert(arg.file == file_x87);
emit_2ub(p, 0xd9, 0xc8+arg.idx);
}
void x87_fabs( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xe1);
}
void x87_fchs( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xe0);
}
void x87_fcos( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xff);
}
void x87_fprndint( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xfc);
}
void x87_fscale( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xfd);
}
void x87_fsin( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xfe);
}
void x87_fsincos( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xfb);
}
void x87_fsqrt( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xfa);
}
void x87_fxtract( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xf4);
}
@@ -1020,6 +1263,7 @@ void x87_fxtract( struct x86_function *p )
*/
void x87_f2xm1( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xf0);
}
@@ -1028,6 +1272,7 @@ void x87_f2xm1( struct x86_function *p )
*/
void x87_fyl2x( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xf1);
}
@@ -1038,12 +1283,14 @@ void x87_fyl2x( struct x86_function *p )
*/
void x87_fyl2xp1( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xf9);
}
void x87_fld( struct x86_function *p, struct x86_reg arg )
{
+ DUMP_R( arg );
if (arg.file == file_x87)
emit_2ub(p, 0xd9, 0xc0 + arg.idx);
else {
@@ -1054,6 +1301,7 @@ void x87_fld( struct x86_function *p, struct x86_reg arg )
void x87_fst( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
if (dst.file == file_x87)
emit_2ub(p, 0xdd, 0xd0 + dst.idx);
else {
@@ -1064,6 +1312,7 @@ void x87_fst( struct x86_function *p, struct x86_reg dst )
void x87_fstp( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
if (dst.file == file_x87)
emit_2ub(p, 0xdd, 0xd8 + dst.idx);
else {
@@ -1074,6 +1323,7 @@ void x87_fstp( struct x86_function *p, struct x86_reg dst )
void x87_fcom( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
if (dst.file == file_x87)
emit_2ub(p, 0xd8, 0xd0 + dst.idx);
else {
@@ -1084,6 +1334,7 @@ void x87_fcom( struct x86_function *p, struct x86_reg dst )
void x87_fcomp( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
if (dst.file == file_x87)
emit_2ub(p, 0xd8, 0xd8 + dst.idx);
else {
@@ -1095,6 +1346,7 @@ void x87_fcomp( struct x86_function *p, struct x86_reg dst )
void x87_fnstsw( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
assert(dst.file == file_REG32);
if (dst.idx == reg_AX &&
@@ -1115,6 +1367,7 @@ void x87_fnstsw( struct x86_function *p, struct x86_reg dst )
void mmx_emms( struct x86_function *p )
{
+ DUMP();
assert(p->need_emms);
emit_2ub(p, 0x0f, 0x77);
p->need_emms = 0;
@@ -1124,6 +1377,7 @@ void mmx_packssdw( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
assert(dst.file == file_MMX &&
(src.file == file_MMX || src.mod != mod_REG));
@@ -1137,6 +1391,7 @@ void mmx_packuswb( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
assert(dst.file == file_MMX &&
(src.file == file_MMX || src.mod != mod_REG));
@@ -1150,6 +1405,7 @@ void mmx_movd( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
p->need_emms = 1;
emit_1ub(p, X86_TWOB);
emit_op_modrm( p, 0x6e, 0x7e, dst, src );
@@ -1159,6 +1415,7 @@ void mmx_movq( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
p->need_emms = 1;
emit_1ub(p, X86_TWOB);
emit_op_modrm( p, 0x6f, 0x7f, dst, src );
@@ -1186,18 +1443,25 @@ void x86_init_func( struct x86_function *p )
p->size = 0;
p->store = NULL;
p->csr = p->store;
+ DUMP_START();
}
void x86_init_func_size( struct x86_function *p, unsigned code_size )
{
p->size = code_size;
p->store = rtasm_exec_malloc(code_size);
+ if (p->store == NULL) {
+ p->store = p->error_overflow;
+ }
p->csr = p->store;
+ DUMP_START();
}
void x86_release_func( struct x86_function *p )
{
- rtasm_exec_free(p->store);
+ if (p->store && p->store != p->error_overflow)
+ rtasm_exec_free(p->store);
+
p->store = NULL;
p->csr = NULL;
p->size = 0;
@@ -1206,9 +1470,14 @@ void x86_release_func( struct x86_function *p )
void (*x86_get_func( struct x86_function *p ))(void)
{
+ DUMP_END();
if (DISASSEM && p->store)
debug_printf("disassemble %p %p\n", p->store, p->csr);
- return (void (*)(void)) p->store;
+
+ if (p->store == p->error_overflow)
+ return (void (*)(void)) NULL;
+ else
+ return (void (*)(void)) p->store;
}
#else
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
index 606b41eb358..695a1cef4e3 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
@@ -43,6 +43,7 @@ struct x86_function {
unsigned char *csr;
unsigned stack_offset;
int need_emms;
+ unsigned char error_overflow[4];
const char *fn;
};
@@ -165,6 +166,7 @@ void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg sr
void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_cvtdq2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
@@ -202,6 +204,7 @@ void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src
void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
unsigned char shuf );
void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src );
+void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
@@ -211,6 +214,7 @@ void x86_inc( struct x86_function *p, struct x86_reg reg );
void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_mul( struct x86_function *p, struct x86_reg src );
+void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_pop( struct x86_function *p, struct x86_reg reg );
void x86_push( struct x86_function *p, struct x86_reg reg );
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
index 78e7dec5690..29e104bbd17 100644
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
@@ -287,10 +287,10 @@ micro_abs(
union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src )
{
- dst->f[0] = (float) fabs( (double) src->f[0] );
- dst->f[1] = (float) fabs( (double) src->f[1] );
- dst->f[2] = (float) fabs( (double) src->f[2] );
- dst->f[3] = (float) fabs( (double) src->f[3] );
+ dst->f[0] = fabsf( src->f[0] );
+ dst->f[1] = fabsf( src->f[1] );
+ dst->f[2] = fabsf( src->f[2] );
+ dst->f[3] = fabsf( src->f[3] );
}
static void
@@ -334,10 +334,10 @@ micro_ceil(
union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src )
{
- dst->f[0] = (float) ceil( (double) src->f[0] );
- dst->f[1] = (float) ceil( (double) src->f[1] );
- dst->f[2] = (float) ceil( (double) src->f[2] );
- dst->f[3] = (float) ceil( (double) src->f[3] );
+ dst->f[0] = ceilf( src->f[0] );
+ dst->f[1] = ceilf( src->f[1] );
+ dst->f[2] = ceilf( src->f[2] );
+ dst->f[3] = ceilf( src->f[3] );
}
static void
@@ -345,10 +345,10 @@ micro_cos(
union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src )
{
- dst->f[0] = (float) cos( (double) src->f[0] );
- dst->f[1] = (float) cos( (double) src->f[1] );
- dst->f[2] = (float) cos( (double) src->f[2] );
- dst->f[3] = (float) cos( (double) src->f[3] );
+ dst->f[0] = cosf( src->f[0] );
+ dst->f[1] = cosf( src->f[1] );
+ dst->f[2] = cosf( src->f[2] );
+ dst->f[3] = cosf( src->f[3] );
}
static void
@@ -430,10 +430,10 @@ micro_exp2(
union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src)
{
- dst->f[0] = (float) pow( 2.0, (double) src->f[0] );
- dst->f[1] = (float) pow( 2.0, (double) src->f[1] );
- dst->f[2] = (float) pow( 2.0, (double) src->f[2] );
- dst->f[3] = (float) pow( 2.0, (double) src->f[3] );
+ dst->f[0] = powf( 2.0f, src->f[0] );
+ dst->f[1] = powf( 2.0f, src->f[1] );
+ dst->f[2] = powf( 2.0f, src->f[2] );
+ dst->f[3] = powf( 2.0f, src->f[3] );
}
static void
@@ -463,10 +463,10 @@ micro_flr(
union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src )
{
- dst->f[0] = (float) floor( (double) src->f[0] );
- dst->f[1] = (float) floor( (double) src->f[1] );
- dst->f[2] = (float) floor( (double) src->f[2] );
- dst->f[3] = (float) floor( (double) src->f[3] );
+ dst->f[0] = floorf( src->f[0] );
+ dst->f[1] = floorf( src->f[1] );
+ dst->f[2] = floorf( src->f[2] );
+ dst->f[3] = floorf( src->f[3] );
}
static void
@@ -474,10 +474,10 @@ micro_frc(
union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src )
{
- dst->f[0] = src->f[0] - (float) floor( (double) src->f[0] );
- dst->f[1] = src->f[1] - (float) floor( (double) src->f[1] );
- dst->f[2] = src->f[2] - (float) floor( (double) src->f[2] );
- dst->f[3] = src->f[3] - (float) floor( (double) src->f[3] );
+ dst->f[0] = src->f[0] - floorf( src->f[0] );
+ dst->f[1] = src->f[1] - floorf( src->f[1] );
+ dst->f[2] = src->f[2] - floorf( src->f[2] );
+ dst->f[3] = src->f[3] - floorf( src->f[3] );
}
static void
@@ -510,10 +510,10 @@ micro_lg2(
union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src )
{
- dst->f[0] = (float) log( (double) src->f[0] ) * 1.442695f;
- dst->f[1] = (float) log( (double) src->f[1] ) * 1.442695f;
- dst->f[2] = (float) log( (double) src->f[2] ) * 1.442695f;
- dst->f[3] = (float) log( (double) src->f[3] ) * 1.442695f;
+ dst->f[0] = logf( src->f[0] ) * 1.442695f;
+ dst->f[1] = logf( src->f[1] ) * 1.442695f;
+ dst->f[2] = logf( src->f[2] ) * 1.442695f;
+ dst->f[3] = logf( src->f[3] ) * 1.442695f;
}
static void
@@ -764,10 +764,10 @@ micro_pow(
const union tgsi_exec_channel *src0,
const union tgsi_exec_channel *src1 )
{
- dst->f[0] = (float) pow( (double) src0->f[0], (double) src1->f[0] );
- dst->f[1] = (float) pow( (double) src0->f[1], (double) src1->f[1] );
- dst->f[2] = (float) pow( (double) src0->f[2], (double) src1->f[2] );
- dst->f[3] = (float) pow( (double) src0->f[3], (double) src1->f[3] );
+ dst->f[0] = powf( src0->f[0], src1->f[0] );
+ dst->f[1] = powf( src0->f[1], src1->f[1] );
+ dst->f[2] = powf( src0->f[2], src1->f[2] );
+ dst->f[3] = powf( src0->f[3], src1->f[3] );
}
static void
@@ -775,10 +775,10 @@ micro_rnd(
union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src )
{
- dst->f[0] = (float) floor( (double) (src->f[0] + 0.5f) );
- dst->f[1] = (float) floor( (double) (src->f[1] + 0.5f) );
- dst->f[2] = (float) floor( (double) (src->f[2] + 0.5f) );
- dst->f[3] = (float) floor( (double) (src->f[3] + 0.5f) );
+ dst->f[0] = floorf( src->f[0] + 0.5f );
+ dst->f[1] = floorf( src->f[1] + 0.5f );
+ dst->f[2] = floorf( src->f[2] + 0.5f );
+ dst->f[3] = floorf( src->f[3] + 0.5f );
}
static void
@@ -833,20 +833,20 @@ micro_sin(
union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src )
{
- dst->f[0] = (float) sin( (double) src->f[0] );
- dst->f[1] = (float) sin( (double) src->f[1] );
- dst->f[2] = (float) sin( (double) src->f[2] );
- dst->f[3] = (float) sin( (double) src->f[3] );
+ dst->f[0] = sinf( src->f[0] );
+ dst->f[1] = sinf( src->f[1] );
+ dst->f[2] = sinf( src->f[2] );
+ dst->f[3] = sinf( src->f[3] );
}
static void
micro_sqrt( union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src )
{
- dst->f[0] = (float) sqrt( (double) src->f[0] );
- dst->f[1] = (float) sqrt( (double) src->f[1] );
- dst->f[2] = (float) sqrt( (double) src->f[2] );
- dst->f[3] = (float) sqrt( (double) src->f[3] );
+ dst->f[0] = sqrtf( src->f[0] );
+ dst->f[1] = sqrtf( src->f[1] );
+ dst->f[2] = sqrtf( src->f[2] );
+ dst->f[3] = sqrtf( src->f[3] );
}
static void
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h
index 45c49dd007c..92e2e5e9859 100644
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h
@@ -166,7 +166,7 @@ struct tgsi_exec_machine
float Imms[TGSI_EXEC_NUM_IMMEDIATES][4];
unsigned ImmLimit;
- float (*Consts)[4];
+ const float (*Consts)[4];
struct tgsi_exec_vector *Inputs;
struct tgsi_exec_vector *Outputs;
const struct tgsi_token *Tokens;
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index c37e201b2bc..c3295a27fff 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -36,113 +36,8 @@
#if defined(__i386__) || defined(__386__)
-#define DUMP_SSE 0
+#define HIGH_PRECISION 1 /* for 1/sqrt() */
-#if DUMP_SSE
-
-static void
-_print_reg(
- struct x86_reg reg )
-{
- if (reg.mod != mod_REG)
- debug_printf( "[" );
-
- switch( reg.file ) {
- case file_REG32:
- switch( reg.idx ) {
- case reg_AX:
- debug_printf( "EAX" );
- break;
- case reg_CX:
- debug_printf( "ECX" );
- break;
- case reg_DX:
- debug_printf( "EDX" );
- break;
- case reg_BX:
- debug_printf( "EBX" );
- break;
- case reg_SP:
- debug_printf( "ESP" );
- break;
- case reg_BP:
- debug_printf( "EBP" );
- break;
- case reg_SI:
- debug_printf( "ESI" );
- break;
- case reg_DI:
- debug_printf( "EDI" );
- break;
- }
- break;
- case file_MMX:
- assert( 0 );
- break;
- case file_XMM:
- debug_printf( "XMM%u", reg.idx );
- break;
- case file_x87:
- assert( 0 );
- break;
- }
-
- if (reg.mod == mod_DISP8 ||
- reg.mod == mod_DISP32)
- debug_printf("+%d", reg.disp);
-
- if (reg.mod != mod_REG)
- debug_printf( "]" );
-}
-
-static void
-_fill(
- const char *op )
-{
- unsigned count = 10 - strlen( op );
-
- while( count-- ) {
- debug_printf( " " );
- }
-}
-
-#define DUMP_START() debug_printf( "\nsse-dump start ----------------" )
-#define DUMP_END() debug_printf( "\nsse-dump end ----------------\n" )
-#define DUMP( OP ) debug_printf( "\n%s", OP )
-#define DUMP_I( OP, I ) do {\
- debug_printf( "\n%s", OP );\
- _fill( OP );\
- debug_printf( "%u", I ); } while( 0 )
-#define DUMP_R( OP, R0 ) do {\
- debug_printf( "\n%s", OP );\
- _fill( OP );\
- _print_reg( R0 ); } while( 0 )
-#define DUMP_RR( OP, R0, R1 ) do {\
- debug_printf( "\n%s", OP );\
- _fill( OP );\
- _print_reg( R0 );\
- debug_printf( ", " );\
- _print_reg( R1 ); } while( 0 )
-#define DUMP_RRI( OP, R0, R1, I ) do {\
- debug_printf( "\n%s", OP );\
- _fill( OP );\
- _print_reg( R0 );\
- debug_printf( ", " );\
- _print_reg( R1 );\
- debug_printf( ", " );\
- debug_printf( "%u", I ); } while( 0 )
-
-#else
-
-#define DUMP_START()
-#define DUMP_END()
-#define DUMP( OP )
-#define DUMP_I( OP, I )
-#define DUMP_R( OP, R0 )
-#define DUMP_RR( OP, R0, R1 )
-#define DUMP_RRI( OP, R0, R1, I )
-
-#endif
#define FOR_EACH_CHANNEL( CHAN )\
for( CHAN = 0; CHAN < 4; CHAN++ )
@@ -308,200 +203,6 @@ get_coef(
((vec * 3 + member) * 4 + chan) * 4 );
}
-/**
- * X86 rtasm wrappers.
- */
-
-static void
-emit_addps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "ADDPS", dst, src );
- sse_addps( func, dst, src );
-}
-
-static void
-emit_andnps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "ANDNPS", dst, src );
- sse_andnps( func, dst, src );
-}
-
-static void
-emit_andps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "ANDPS", dst, src );
- sse_andps( func, dst, src );
-}
-
-static void
-emit_call(
- struct x86_function *func,
- void (* addr)() )
-{
- struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
-
- DUMP_I( "CALL", addr );
- x86_mov_reg_imm( func, ecx, (unsigned long) addr );
- x86_call( func, ecx );
-}
-
-static void
-emit_cmpps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src,
- enum sse_cc cc )
-{
- DUMP_RRI( "CMPPS", dst, src, cc );
- sse_cmpps( func, dst, src, cc );
-}
-
-static void
-emit_cvttps2dq(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "CVTTPS2DQ", dst, src );
- sse2_cvttps2dq( func, dst, src );
-}
-
-static void
-emit_maxps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "MAXPS", dst, src );
- sse_maxps( func, dst, src );
-}
-
-static void
-emit_minps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "MINPS", dst, src );
- sse_minps( func, dst, src );
-}
-
-static void
-emit_mov(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "MOV", dst, src );
- x86_mov( func, dst, src );
-}
-
-static void
-emit_movaps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "MOVAPS", dst, src );
- sse_movaps( func, dst, src );
-}
-
-static void
-emit_movss(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "MOVSS", dst, src );
- sse_movss( func, dst, src );
-}
-
-static void
-emit_movups(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "MOVUPS", dst, src );
- sse_movups( func, dst, src );
-}
-
-static void
-emit_mulps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "MULPS", dst, src );
- sse_mulps( func, dst, src );
-}
-
-static void
-emit_or(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "OR", dst, src );
- x86_or( func, dst, src );
-}
-
-static void
-emit_orps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "ORPS", dst, src );
- sse_orps( func, dst, src );
-}
-
-static void
-emit_pmovmskb(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "PMOVMSKB", dst, src );
- sse_pmovmskb( func, dst, src );
-}
-
-static void
-emit_pop(
- struct x86_function *func,
- struct x86_reg dst )
-{
- DUMP_R( "POP", dst );
- x86_pop( func, dst );
-}
-
-static void
-emit_push(
- struct x86_function *func,
- struct x86_reg dst )
-{
- DUMP_R( "PUSH", dst );
- x86_push( func, dst );
-}
-
-static void
-emit_rcpps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "RCPPS", dst, src );
- sse2_rcpps( func, dst, src );
-}
#ifdef WIN32
static void
@@ -509,7 +210,6 @@ emit_retw(
struct x86_function *func,
unsigned size )
{
- DUMP_I( "RET", size );
x86_retw( func, size );
}
#else
@@ -517,51 +217,10 @@ static void
emit_ret(
struct x86_function *func )
{
- DUMP( "RET" );
x86_ret( func );
}
#endif
-static void
-emit_rsqrtps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "RSQRTPS", dst, src );
- sse_rsqrtps( func, dst, src );
-}
-
-static void
-emit_shufps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src,
- unsigned char shuf )
-{
- DUMP_RRI( "SHUFPS", dst, src, shuf );
- sse_shufps( func, dst, src, shuf );
-}
-
-static void
-emit_subps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "SUBPS", dst, src );
- sse_subps( func, dst, src );
-}
-
-static void
-emit_xorps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "XORPS", dst, src );
- sse_xorps( func, dst, src );
-}
/**
* Data fetch helpers.
@@ -580,11 +239,11 @@ emit_const(
unsigned vec,
unsigned chan )
{
- emit_movss(
+ sse_movss(
func,
make_xmm( xmm ),
get_const( vec, chan ) );
- emit_shufps(
+ sse_shufps(
func,
make_xmm( xmm ),
make_xmm( xmm ),
@@ -598,11 +257,11 @@ emit_immediate(
unsigned vec,
unsigned chan )
{
- emit_movss(
+ sse_movss(
func,
make_xmm( xmm ),
get_immediate( vec, chan ) );
- emit_shufps(
+ sse_shufps(
func,
make_xmm( xmm ),
make_xmm( xmm ),
@@ -623,7 +282,7 @@ emit_inputf(
unsigned vec,
unsigned chan )
{
- emit_movups(
+ sse_movups(
func,
make_xmm( xmm ),
get_input( vec, chan ) );
@@ -642,7 +301,7 @@ emit_output(
unsigned vec,
unsigned chan )
{
- emit_movups(
+ sse_movups(
func,
get_output( vec, chan ),
make_xmm( xmm ) );
@@ -661,7 +320,7 @@ emit_tempf(
unsigned vec,
unsigned chan )
{
- emit_movaps(
+ sse_movaps(
func,
make_xmm( xmm ),
get_temp( vec, chan ) );
@@ -682,11 +341,11 @@ emit_coef(
unsigned chan,
unsigned member )
{
- emit_movss(
+ sse_movss(
func,
make_xmm( xmm ),
get_coef( vec, chan, member ) );
- emit_shufps(
+ sse_shufps(
func,
make_xmm( xmm ),
make_xmm( xmm ),
@@ -704,7 +363,7 @@ emit_inputs(
unsigned vec,
unsigned chan )
{
- emit_movups(
+ sse_movups(
func,
get_input( vec, chan ),
make_xmm( xmm ) );
@@ -717,7 +376,7 @@ emit_temps(
unsigned vec,
unsigned chan )
{
- emit_movaps(
+ sse_movaps(
func,
get_temp( vec, chan ),
make_xmm( xmm ) );
@@ -794,39 +453,39 @@ static void
emit_push_gp(
struct x86_function *func )
{
- emit_push(
+ x86_push(
func,
get_const_base() );
- emit_push(
+ x86_push(
func,
get_input_base() );
- emit_push(
+ x86_push(
func,
get_output_base() );
/* It is important on non-win32 platforms that temp base is pushed last.
*/
- emit_push(
+ x86_push(
func,
get_temp_base() );
}
static void
-emit_pop_gp(
+x86_pop_gp(
struct x86_function *func )
{
/* Restore GP registers in a reverse order.
*/
- emit_pop(
+ x86_pop(
func,
get_temp_base() );
- emit_pop(
+ x86_pop(
func,
get_output_base() );
- emit_pop(
+ x86_pop(
func,
get_input_base() );
- emit_pop(
+ x86_pop(
func,
get_const_base() );
}
@@ -837,7 +496,7 @@ emit_func_call_dst(
unsigned xmm_dst,
void (*code)() )
{
- emit_movaps(
+ sse_movaps(
func,
get_temp( TEMP_R0, 0 ),
make_xmm( xmm_dst ) );
@@ -846,19 +505,22 @@ emit_func_call_dst(
func );
#ifdef WIN32
- emit_push(
+ x86_push(
func,
get_temp( TEMP_R0, 0 ) );
#endif
- emit_call(
- func,
- code );
+ {
+ struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
+
+ x86_mov_reg_imm( func, ecx, (unsigned long) code );
+ x86_call( func, ecx );
+ }
- emit_pop_gp(
+ x86_pop_gp(
func );
- emit_movaps(
+ sse_movaps(
func,
make_xmm( xmm_dst ),
get_temp( TEMP_R0, 0 ) );
@@ -871,7 +533,7 @@ emit_func_call_dst_src(
unsigned xmm_src,
void (*code)() )
{
- emit_movaps(
+ sse_movaps(
func,
get_temp( TEMP_R0, 1 ),
make_xmm( xmm_src ) );
@@ -891,7 +553,7 @@ emit_abs(
struct x86_function *func,
unsigned xmm )
{
- emit_andps(
+ sse_andps(
func,
make_xmm( xmm ),
get_temp(
@@ -905,7 +567,7 @@ emit_add(
unsigned xmm_dst,
unsigned xmm_src )
{
- emit_addps(
+ sse_addps(
func,
make_xmm( xmm_dst ),
make_xmm( xmm_src ) );
@@ -916,17 +578,15 @@ cos4f(
float *store )
{
#ifdef WIN32
- store[0] = (float) cos( (double) store[0] );
- store[1] = (float) cos( (double) store[1] );
- store[2] = (float) cos( (double) store[2] );
- store[3] = (float) cos( (double) store[3] );
+ const unsigned X = 0;
#else
const unsigned X = TEMP_R0 * 16;
+#endif
+
store[X + 0] = cosf( store[X + 0] );
store[X + 1] = cosf( store[X + 1] );
store[X + 2] = cosf( store[X + 2] );
store[X + 3] = cosf( store[X + 3] );
-#endif
}
static void
@@ -945,17 +605,14 @@ ex24f(
float *store )
{
#ifdef WIN32
- store[0] = (float) pow( 2.0, (double) store[0] );
- store[1] = (float) pow( 2.0, (double) store[1] );
- store[2] = (float) pow( 2.0, (double) store[2] );
- store[3] = (float) pow( 2.0, (double) store[3] );
+ const unsigned X = 0;
#else
const unsigned X = TEMP_R0 * 16;
+#endif
store[X + 0] = powf( 2.0f, store[X + 0] );
store[X + 1] = powf( 2.0f, store[X + 1] );
store[X + 2] = powf( 2.0f, store[X + 2] );
store[X + 3] = powf( 2.0f, store[X + 3] );
-#endif
}
static void
@@ -974,7 +631,7 @@ emit_f2it(
struct x86_function *func,
unsigned xmm )
{
- emit_cvttps2dq(
+ sse2_cvttps2dq(
func,
make_xmm( xmm ),
make_xmm( xmm ) );
@@ -989,10 +646,10 @@ flr4f(
#else
const unsigned X = TEMP_R0 * 16;
#endif
- store[X + 0] = (float) floor( (double) store[X + 0] );
- store[X + 1] = (float) floor( (double) store[X + 1] );
- store[X + 2] = (float) floor( (double) store[X + 2] );
- store[X + 3] = (float) floor( (double) store[X + 3] );
+ store[X + 0] = floorf( store[X + 0] );
+ store[X + 1] = floorf( store[X + 1] );
+ store[X + 2] = floorf( store[X + 2] );
+ store[X + 3] = floorf( store[X + 3] );
}
static void
@@ -1015,10 +672,10 @@ frc4f(
#else
const unsigned X = TEMP_R0 * 16;
#endif
- store[X + 0] -= (float) floor( (double) store[X + 0] );
- store[X + 1] -= (float) floor( (double) store[X + 1] );
- store[X + 2] -= (float) floor( (double) store[X + 2] );
- store[X + 3] -= (float) floor( (double) store[X + 3] );
+ store[X + 0] -= floorf( store[X + 0] );
+ store[X + 1] -= floorf( store[X + 1] );
+ store[X + 2] -= floorf( store[X + 2] );
+ store[X + 3] -= floorf( store[X + 3] );
}
static void
@@ -1064,7 +721,7 @@ emit_MOV(
unsigned xmm_dst,
unsigned xmm_src )
{
- emit_movups(
+ sse_movups(
func,
make_xmm( xmm_dst ),
make_xmm( xmm_src ) );
@@ -1075,7 +732,7 @@ emit_mul (struct x86_function *func,
unsigned xmm_dst,
unsigned xmm_src)
{
- emit_mulps(
+ sse_mulps(
func,
make_xmm( xmm_dst ),
make_xmm( xmm_src ) );
@@ -1086,7 +743,7 @@ emit_neg(
struct x86_function *func,
unsigned xmm )
{
- emit_xorps(
+ sse_xorps(
func,
make_xmm( xmm ),
get_temp(
@@ -1099,17 +756,14 @@ pow4f(
float *store )
{
#ifdef WIN32
- store[0] = (float) pow( (double) store[0], (double) store[4] );
- store[1] = (float) pow( (double) store[1], (double) store[5] );
- store[2] = (float) pow( (double) store[2], (double) store[6] );
- store[3] = (float) pow( (double) store[3], (double) store[7] );
+ const unsigned X = 0;
#else
const unsigned X = TEMP_R0 * 16;
+#endif
store[X + 0] = powf( store[X + 0], store[X + 4] );
store[X + 1] = powf( store[X + 1], store[X + 5] );
store[X + 2] = powf( store[X + 2], store[X + 6] );
store[X + 3] = powf( store[X + 3], store[X + 7] );
-#endif
}
static void
@@ -1131,22 +785,80 @@ emit_rcp (
unsigned xmm_dst,
unsigned xmm_src )
{
- emit_rcpps(
+ /* On Intel CPUs at least, this is only accurate to 12 bits -- not
+ * good enough. Need to either emit a proper divide or use the
+ * iterative technique described below in emit_rsqrt().
+ */
+ sse2_rcpps(
func,
make_xmm( xmm_dst ),
make_xmm( xmm_src ) );
}
+#if HIGH_PRECISION
+static void XSTDCALL
+rsqrt4f(
+ float *store )
+{
+#ifdef WIN32
+ const unsigned X = 0;
+#else
+ const unsigned X = TEMP_R0 * 16;
+#endif
+ store[X + 0] = 1.0F / sqrtf( store[X + 0] );
+ store[X + 1] = 1.0F / sqrtf( store[X + 1] );
+ store[X + 2] = 1.0F / sqrtf( store[X + 2] );
+ store[X + 3] = 1.0F / sqrtf( store[X + 3] );
+}
+#endif
+
static void
emit_rsqrt(
struct x86_function *func,
unsigned xmm_dst,
unsigned xmm_src )
{
+#if HIGH_PRECISION
+#if 1
+ emit_func_call_dst_src(
+ func,
+ xmm_dst,
+ xmm_src,
+ rsqrt4f );
+#else
+ /* Although rsqrtps() and rcpps() are low precision on some/all SSE
+ * implementations, it is possible to improve its precision at
+ * fairly low cost, using a newton/raphson step, as below:
+ *
+ * x1 = 2 * rcpps(a) - a * rcpps(a) * rcpps(a)
+ * x1 = 0.5 * rsqrtps(a) * [3.0 - (a * rsqrtps(a))* rsqrtps(a)]
+ *
+ * See: http://softwarecommunity.intel.com/articles/eng/1818.htm
+ */
+ /* This is some code that woudl do the above for a scalar 'a'. We
+ * obviously are interested in a vector version:
+ *
+ * movss xmm3, a;
+ * movss xmm1, half;
+ * movss xmm2, three;
+ * rsqrtss xmm0, xmm3;
+ * mulss xmm3, xmm0;
+ * mulss xmm1, xmm0;
+ * mulss xmm3, xmm0;
+ * subss xmm2, xmm3;
+ * mulss xmm1, xmm2;
+ * movss x, xmm1;
+ */
+#endif
+#else
+ /* On Intel CPUs at least, this is only accurate to 12 bits -- not
+ * good enough.
+ */
emit_rsqrtps(
func,
make_xmm( xmm_dst ),
make_xmm( xmm_src ) );
+#endif
}
static void
@@ -1154,7 +866,7 @@ emit_setsign(
struct x86_function *func,
unsigned xmm )
{
- emit_orps(
+ sse_orps(
func,
make_xmm( xmm ),
get_temp(
@@ -1167,17 +879,14 @@ sin4f(
float *store )
{
#ifdef WIN32
- store[0] = (float) sin( (double) store[0] );
- store[1] = (float) sin( (double) store[1] );
- store[2] = (float) sin( (double) store[2] );
- store[3] = (float) sin( (double) store[3] );
+ const unsigned X = 0;
#else
const unsigned X = TEMP_R0 * 16;
+#endif
store[X + 0] = sinf( store[X + 0] );
store[X + 1] = sinf( store[X + 1] );
store[X + 2] = sinf( store[X + 2] );
store[X + 3] = sinf( store[X + 3] );
-#endif
}
static void
@@ -1196,7 +905,7 @@ emit_sub(
unsigned xmm_dst,
unsigned xmm_src )
{
- emit_subps(
+ sse_subps(
func,
make_xmm( xmm_dst ),
make_xmm( xmm_src ) );
@@ -1405,16 +1114,16 @@ emit_kil(
}
}
- emit_push(
+ x86_push(
func,
x86_make_reg( file_REG32, reg_AX ) );
- emit_push(
+ x86_push(
func,
x86_make_reg( file_REG32, reg_DX ) );
FOR_EACH_CHANNEL( chan_index ) {
if( uniquemask & (1 << chan_index) ) {
- emit_cmpps(
+ sse_cmpps(
func,
make_xmm( registers[chan_index] ),
get_temp(
@@ -1423,17 +1132,17 @@ emit_kil(
cc_LessThan );
if( chan_index == firstchan ) {
- emit_pmovmskb(
+ sse_pmovmskb(
func,
x86_make_reg( file_REG32, reg_AX ),
make_xmm( registers[chan_index] ) );
}
else {
- emit_pmovmskb(
+ sse_pmovmskb(
func,
x86_make_reg( file_REG32, reg_DX ),
make_xmm( registers[chan_index] ) );
- emit_or(
+ x86_or(
func,
x86_make_reg( file_REG32, reg_AX ),
x86_make_reg( file_REG32, reg_DX ) );
@@ -1441,17 +1150,17 @@ emit_kil(
}
}
- emit_or(
+ x86_or(
func,
get_temp(
TGSI_EXEC_TEMP_KILMASK_I,
TGSI_EXEC_TEMP_KILMASK_C ),
x86_make_reg( file_REG32, reg_AX ) );
- emit_pop(
+ x86_pop(
func,
x86_make_reg( file_REG32, reg_DX ) );
- emit_pop(
+ x86_pop(
func,
x86_make_reg( file_REG32, reg_AX ) );
}
@@ -1467,12 +1176,12 @@ emit_setcc(
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( func, *inst, 0, 0, chan_index );
FETCH( func, *inst, 1, 1, chan_index );
- emit_cmpps(
+ sse_cmpps(
func,
make_xmm( 0 ),
make_xmm( 1 ),
cc );
- emit_andps(
+ sse_andps(
func,
make_xmm( 0 ),
get_temp(
@@ -1493,22 +1202,22 @@ emit_cmp(
FETCH( func, *inst, 0, 0, chan_index );
FETCH( func, *inst, 1, 1, chan_index );
FETCH( func, *inst, 2, 2, chan_index );
- emit_cmpps(
+ sse_cmpps(
func,
make_xmm( 0 ),
get_temp(
TGSI_EXEC_TEMP_00000000_I,
TGSI_EXEC_TEMP_00000000_C ),
cc_LessThan );
- emit_andps(
+ sse_andps(
func,
make_xmm( 1 ),
make_xmm( 0 ) );
- emit_andnps(
+ sse_andnps(
func,
make_xmm( 0 ),
make_xmm( 2 ) );
- emit_orps(
+ sse_orps(
func,
make_xmm( 0 ),
make_xmm( 1 ) );
@@ -1559,7 +1268,7 @@ emit_instruction(
IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
FETCH( func, *inst, 0, 0, CHAN_X );
- emit_maxps(
+ sse_maxps(
func,
make_xmm( 0 ),
get_temp(
@@ -1568,21 +1277,26 @@ emit_instruction(
STORE( func, *inst, 0, 0, CHAN_Y );
}
if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
+ /* XMM[1] = SrcReg[0].yyyy */
FETCH( func, *inst, 1, 0, CHAN_Y );
- emit_maxps(
+ /* XMM[1] = max(XMM[1], 0) */
+ sse_maxps(
func,
make_xmm( 1 ),
get_temp(
TGSI_EXEC_TEMP_00000000_I,
TGSI_EXEC_TEMP_00000000_C ) );
+ /* XMM[2] = SrcReg[0].wwww */
FETCH( func, *inst, 2, 0, CHAN_W );
- emit_minps(
+ /* XMM[2] = min(XMM[2], 128.0) */
+ sse_minps(
func,
make_xmm( 2 ),
get_temp(
TGSI_EXEC_TEMP_128_I,
TGSI_EXEC_TEMP_128_C ) );
- emit_maxps(
+ /* XMM[2] = max(XMM[2], -128.0) */
+ sse_maxps(
func,
make_xmm( 2 ),
get_temp(
@@ -1590,16 +1304,16 @@ emit_instruction(
TGSI_EXEC_TEMP_MINUS_128_C ) );
emit_pow( func, 1, 2 );
FETCH( func, *inst, 0, 0, CHAN_X );
- emit_xorps(
+ sse_xorps(
func,
make_xmm( 2 ),
make_xmm( 2 ) );
- emit_cmpps(
+ sse_cmpps(
func,
make_xmm( 2 ),
make_xmm( 0 ),
cc_LessThanEqual );
- emit_andps(
+ sse_andps(
func,
make_xmm( 2 ),
make_xmm( 1 ) );
@@ -1721,7 +1435,7 @@ emit_instruction(
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( func, *inst, 0, 0, chan_index );
FETCH( func, *inst, 1, 1, chan_index );
- emit_minps(
+ sse_minps(
func,
make_xmm( 0 ),
make_xmm( 1 ) );
@@ -1733,7 +1447,7 @@ emit_instruction(
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( func, *inst, 0, 0, chan_index );
FETCH( func, *inst, 1, 1, chan_index );
- emit_maxps(
+ sse_maxps(
func,
make_xmm( 0 ),
make_xmm( 1 ) );
@@ -2332,7 +2046,7 @@ emit_declaration(
*/
unsigned
tgsi_emit_sse2(
- struct tgsi_token *tokens,
+ const struct tgsi_token *tokens,
struct x86_function *func,
float (*immediates)[4])
{
@@ -2341,8 +2055,6 @@ tgsi_emit_sse2(
unsigned ok = 1;
uint num_immediates = 0;
- DUMP_START();
-
func->csr = func->store;
tgsi_parse_init( &parse, tokens );
@@ -2352,24 +2064,24 @@ tgsi_emit_sse2(
*/
if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
/* DECLARATION phase, do not load output argument. */
- emit_mov(
+ x86_mov(
func,
get_input_base(),
get_argument( 0 ) );
/* skipping outputs argument here */
- emit_mov(
+ x86_mov(
func,
get_const_base(),
get_argument( 2 ) );
- emit_mov(
+ x86_mov(
func,
get_temp_base(),
get_argument( 3 ) );
- emit_mov(
+ x86_mov(
func,
get_coef_base(),
get_argument( 4 ) );
- emit_mov(
+ x86_mov(
func,
get_immediate_base(),
get_argument( 5 ) );
@@ -2377,23 +2089,23 @@ tgsi_emit_sse2(
else {
assert(parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX);
- emit_mov(
+ x86_mov(
func,
get_input_base(),
get_argument( 0 ) );
- emit_mov(
+ x86_mov(
func,
get_output_base(),
get_argument( 1 ) );
- emit_mov(
+ x86_mov(
func,
get_const_base(),
get_argument( 2 ) );
- emit_mov(
+ x86_mov(
func,
get_temp_base(),
get_argument( 3 ) );
- emit_mov(
+ x86_mov(
func,
get_immediate_base(),
get_argument( 4 ) );
@@ -2416,7 +2128,7 @@ tgsi_emit_sse2(
if( !instruction_phase ) {
/* INSTRUCTION phase, overwrite coeff with output. */
instruction_phase = TRUE;
- emit_mov(
+ x86_mov(
func,
get_output_base(),
get_argument( 1 ) );
@@ -2428,8 +2140,10 @@ tgsi_emit_sse2(
&parse.FullToken.FullInstruction );
if (!ok) {
- debug_printf("failed to translate tgsi opcode %d to SSE\n",
- parse.FullToken.FullInstruction.Instruction.Opcode );
+ debug_printf("failed to translate tgsi opcode %d to SSE (%s)\n",
+ parse.FullToken.FullInstruction.Instruction.Opcode,
+ parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ?
+ "vertex shader" : "fragment shader");
}
break;
@@ -2464,8 +2178,6 @@ tgsi_emit_sse2(
tgsi_parse_free( &parse );
- DUMP_END();
-
return ok;
}
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h
index d56bf7f98ab..063287dc5e9 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h
@@ -10,7 +10,7 @@ struct x86_function;
unsigned
tgsi_emit_sse2(
- struct tgsi_token *tokens,
+ const struct tgsi_token *tokens,
struct x86_function *function,
float (*immediates)[4]
);
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
index ff6a2c4194d..26bfc2051f2 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -25,8 +25,6 @@
*
**************************************************************************/
-#include <stdio.h>
-
#include "pipe/p_debug.h"
#include "pipe/p_util.h"
#include "pipe/p_shader_tokens.h"
@@ -35,196 +33,28 @@
#include "tgsi_parse.h"
#include "tgsi_build.h"
-struct gen_dump
-{
- unsigned tabs;
- void (* write)(
- struct gen_dump *dump,
- const void *data,
- unsigned size );
-};
-
-struct text_dump
-{
- struct gen_dump base;
- char *text;
- unsigned length;
- unsigned capacity;
-};
-
-static void
-_text_dump_write(
- struct gen_dump *dump,
- const void *data,
- unsigned size )
-{
- struct text_dump *td = (struct text_dump *) dump;
- unsigned new_length = td->length + size;
-
- if( new_length >= td->capacity ) {
- unsigned new_capacity = td->capacity;
-
- do {
- if( new_capacity == 0 ) {
- new_capacity = 256;
- }
- else {
- new_capacity *= 2;
- }
- } while( new_length >= new_capacity );
- td->text = (char *) REALLOC(
- td->text,
- td->capacity,
- new_capacity );
- td->capacity = new_capacity;
- }
- memcpy(
- &td->text[td->length],
- data,
- size );
- td->length = new_length;
- td->text[td->length] = '\0';
-}
-
-struct file_dump
-{
- struct gen_dump base;
- FILE *file;
-};
-
-static void
-_file_dump_write(
- struct gen_dump *dump,
- const void *data,
- unsigned size )
-{
- struct file_dump *fd = (struct file_dump *) dump;
-
-#if 0
- fwrite( data, 1, size, fd->file );
-#else
- {
- unsigned i;
-
- for (i = 0; i < size; i++ ) {
- fprintf( fd->file, "%c", ((const char *) data)[i] );
- }
- }
-#endif
-}
-
-static void
-gen_dump_str(
- struct gen_dump *dump,
- const char *str )
-{
- unsigned i;
- size_t len = strlen( str );
-
- for (i = 0; i < len; i++) {
- dump->write( dump, &str[i], 1 );
- if (str[i] == '\n') {
- unsigned i;
-
- for (i = 0; i < dump->tabs; i++) {
- dump->write( dump, " ", 4 );
- }
- }
- }
-}
-
-static void
-gen_dump_chr(
- struct gen_dump *dump,
- const char chr )
-{
- dump->write( dump, &chr, 1 );
-}
-
-static void
-gen_dump_uix(
- struct gen_dump *dump,
- const unsigned ui )
-{
- char str[36];
-
- util_snprintf( str, sizeof(str), "0x%x", ui );
- gen_dump_str( dump, str );
-}
-
-static void
-gen_dump_uid(
- struct gen_dump *dump,
- const unsigned ui )
-{
- char str[16];
-
- util_snprintf( str, sizeof(str), "%u", ui );
- gen_dump_str( dump, str );
-}
-
-static void
-gen_dump_sid(
- struct gen_dump *dump,
- const int si )
-{
- char str[16];
-
- util_snprintf( str, sizeof(str), "%d", si );
- gen_dump_str( dump, str );
-}
-
static void
-gen_dump_flt(
- struct gen_dump *dump,
- const float flt )
-{
- char str[48];
-
- util_snprintf( str, sizeof(str), "%10.4f", flt );
- gen_dump_str( dump, str );
-}
-
-static void
-gen_dump_enum(
- struct gen_dump *dump,
+dump_enum(
const unsigned e,
const char **enums,
const unsigned enums_count )
{
if (e >= enums_count) {
- gen_dump_uid( dump, e );
+ debug_printf( "%u", e );
}
else {
- gen_dump_str( dump, enums[e] );
+ debug_printf( "%s", enums[e] );
}
}
-static void
-gen_dump_tab(
- struct gen_dump *dump )
-{
- ++dump->tabs;
-}
-
-static void
-gen_dump_untab(
- struct gen_dump *dump )
-{
- assert( dump->tabs > 0 );
-
- --dump->tabs;
-}
-
-#define TXT(S) gen_dump_str( dump, S )
-#define CHR(C) gen_dump_chr( dump, C )
-#define UIX(I) gen_dump_uix( dump, I )
-#define UID(I) gen_dump_uid( dump, I )
-#define SID(I) gen_dump_sid( dump, I )
-#define FLT(F) gen_dump_flt( dump, F )
-#define TAB() gen_dump_tab( dump )
-#define UNT() gen_dump_untab( dump )
-#define ENM(E,ENUMS) gen_dump_enum( dump, E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) )
+#define EOL() debug_printf( "\n" )
+#define TXT(S) debug_printf( "%s", S )
+#define CHR(C) debug_printf( "%c", C )
+#define UIX(I) debug_printf( "0x%x", I )
+#define UID(I) debug_printf( "%u", I )
+#define SID(I) debug_printf( "%d", I )
+#define FLT(F) debug_printf( "%10.4f", F )
+#define ENM(E,ENUMS) dump_enum( E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) )
static const char *TGSI_PROCESSOR_TYPES[] =
{
@@ -711,7 +541,6 @@ static const char *TGSI_MODULATES[] =
static void
dump_declaration_short(
- struct gen_dump *dump,
struct tgsi_full_declaration *decl )
{
TXT( "\nDCL " );
@@ -765,7 +594,6 @@ dump_declaration_short(
static void
dump_declaration_verbose(
- struct gen_dump *dump,
struct tgsi_full_declaration *decl,
unsigned ignored,
unsigned deflt,
@@ -803,7 +631,7 @@ dump_declaration_verbose(
UIX( decl->Declaration.Padding );
}
- CHR( '\n' );
+ EOL();
switch( decl->Declaration.Declare ) {
case TGSI_DECLARE_RANGE:
TXT( "\nFirst: " );
@@ -822,7 +650,7 @@ dump_declaration_verbose(
}
if( decl->Declaration.Interpolate ) {
- CHR( '\n' );
+ EOL();
TXT( "\nInterpolate: " );
ENM( decl->Interpolation.Interpolate, TGSI_INTERPOLATES );
if( ignored ) {
@@ -832,7 +660,7 @@ dump_declaration_verbose(
}
if( decl->Declaration.Semantic ) {
- CHR( '\n' );
+ EOL();
TXT( "\nSemanticName : " );
ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS );
TXT( "\nSemanticIndex: " );
@@ -846,7 +674,6 @@ dump_declaration_verbose(
static void
dump_immediate_short(
- struct gen_dump *dump,
struct tgsi_full_immediate *imm )
{
unsigned i;
@@ -874,7 +701,6 @@ dump_immediate_short(
static void
dump_immediate_verbose(
- struct gen_dump *dump,
struct tgsi_full_immediate *imm,
unsigned ignored )
{
@@ -888,7 +714,7 @@ dump_immediate_verbose(
}
for( i = 0; i < imm->Immediate.Size - 1; i++ ) {
- CHR( '\n' );
+ EOL();
switch( imm->Immediate.DataType ) {
case TGSI_IMM_FLOAT32:
TXT( "\nFloat: " );
@@ -903,14 +729,13 @@ dump_immediate_verbose(
static void
dump_instruction_short(
- struct gen_dump *dump,
struct tgsi_full_instruction *inst,
unsigned instno )
{
unsigned i;
boolean first_reg = TRUE;
- CHR( '\n' );
+ EOL();
UID( instno );
CHR( ':' );
ENM( inst->Instruction.Opcode, TGSI_OPCODES_SHORT );
@@ -1042,7 +867,6 @@ dump_instruction_short(
static void
dump_instruction_verbose(
- struct gen_dump *dump,
struct tgsi_full_instruction *inst,
unsigned ignored,
unsigned deflt,
@@ -1070,7 +894,7 @@ dump_instruction_verbose(
}
if( deflt || tgsi_compare_instruction_ext_nv( inst->InstructionExtNv, fi->InstructionExtNv ) ) {
- CHR( '\n' );
+ EOL();
TXT( "\nType : " );
ENM( inst->InstructionExtNv.Type, TGSI_INSTRUCTION_EXTS );
if( deflt || fi->InstructionExtNv.Precision != inst->InstructionExtNv.Precision ) {
@@ -1124,7 +948,7 @@ dump_instruction_verbose(
}
if( deflt || tgsi_compare_instruction_ext_label( inst->InstructionExtLabel, fi->InstructionExtLabel ) ) {
- CHR( '\n' );
+ EOL();
TXT( "\nType : " );
ENM( inst->InstructionExtLabel.Type, TGSI_INSTRUCTION_EXTS );
if( deflt || fi->InstructionExtLabel.Label != inst->InstructionExtLabel.Label ) {
@@ -1142,7 +966,7 @@ dump_instruction_verbose(
}
if( deflt || tgsi_compare_instruction_ext_texture( inst->InstructionExtTexture, fi->InstructionExtTexture ) ) {
- CHR( '\n' );
+ EOL();
TXT( "\nType : " );
ENM( inst->InstructionExtTexture.Type, TGSI_INSTRUCTION_EXTS );
if( deflt || fi->InstructionExtTexture.Texture != inst->InstructionExtTexture.Texture ) {
@@ -1163,7 +987,7 @@ dump_instruction_verbose(
struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
struct tgsi_full_dst_register *fd = &fi->FullDstRegisters[i];
- CHR( '\n' );
+ EOL();
TXT( "\nFile : " );
ENM( dst->DstRegister.File, TGSI_FILES );
if( deflt || fd->DstRegister.WriteMask != dst->DstRegister.WriteMask ) {
@@ -1194,7 +1018,7 @@ dump_instruction_verbose(
}
if( deflt || tgsi_compare_dst_register_ext_concode( dst->DstRegisterExtConcode, fd->DstRegisterExtConcode ) ) {
- CHR( '\n' );
+ EOL();
TXT( "\nType : " );
ENM( dst->DstRegisterExtConcode.Type, TGSI_DST_REGISTER_EXTS );
if( deflt || fd->DstRegisterExtConcode.CondMask != dst->DstRegisterExtConcode.CondMask ) {
@@ -1232,7 +1056,7 @@ dump_instruction_verbose(
}
if( deflt || tgsi_compare_dst_register_ext_modulate( dst->DstRegisterExtModulate, fd->DstRegisterExtModulate ) ) {
- CHR( '\n' );
+ EOL();
TXT( "\nType : " );
ENM( dst->DstRegisterExtModulate.Type, TGSI_DST_REGISTER_EXTS );
if( deflt || fd->DstRegisterExtModulate.Modulate != dst->DstRegisterExtModulate.Modulate ) {
@@ -1254,7 +1078,7 @@ dump_instruction_verbose(
struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
struct tgsi_full_src_register *fs = &fi->FullSrcRegisters[i];
- CHR( '\n' );
+ EOL();
TXT( "\nFile : ");
ENM( src->SrcRegister.File, TGSI_FILES );
if( deflt || fs->SrcRegister.SwizzleX != src->SrcRegister.SwizzleX ) {
@@ -1299,7 +1123,7 @@ dump_instruction_verbose(
}
if( deflt || tgsi_compare_src_register_ext_swz( src->SrcRegisterExtSwz, fs->SrcRegisterExtSwz ) ) {
- CHR( '\n' );
+ EOL();
TXT( "\nType : " );
ENM( src->SrcRegisterExtSwz.Type, TGSI_SRC_REGISTER_EXTS );
if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleX != src->SrcRegisterExtSwz.ExtSwizzleX ) {
@@ -1345,7 +1169,7 @@ dump_instruction_verbose(
}
if( deflt || tgsi_compare_src_register_ext_mod( src->SrcRegisterExtMod, fs->SrcRegisterExtMod ) ) {
- CHR( '\n' );
+ EOL();
TXT( "\nType : " );
ENM( src->SrcRegisterExtMod.Type, TGSI_SRC_REGISTER_EXTS );
if( deflt || fs->SrcRegisterExtMod.Complement != src->SrcRegisterExtMod.Complement ) {
@@ -1380,9 +1204,8 @@ dump_instruction_verbose(
}
}
-static void
-dump_gen(
- struct gen_dump *dump,
+void
+tgsi_dump(
const struct tgsi_token *tokens,
unsigned flags )
{
@@ -1394,16 +1217,16 @@ dump_gen(
unsigned deflt = !(flags & TGSI_DUMP_NO_DEFAULT);
unsigned instno = 0;
- dump->tabs = 0;
-
- /* sanity check */
+ /* sanity checks */
assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_CONT], "OPCODE_CONT") == 0);
+ assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "OPCODE_END") == 0);
+ assert(strcmp(TGSI_OPCODES_SHORT[TGSI_OPCODE_END], "END") == 0);
tgsi_parse_init( &parse, tokens );
TXT( "tgsi-dump begin -----------------" );
- CHR( '\n' );
+ EOL();
ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES_SHORT );
UID( parse.FullVersion.Version.MajorVersion );
CHR( '.' );
@@ -1414,7 +1237,7 @@ dump_gen(
UID( parse.FullVersion.Version.MajorVersion );
TXT( "\nMinorVersion: " );
UID( parse.FullVersion.Version.MinorVersion );
- CHR( '\n' );
+ EOL();
TXT( "\nHeaderSize: " );
UID( parse.FullHeader.Header.HeaderSize );
@@ -1422,7 +1245,7 @@ dump_gen(
UID( parse.FullHeader.Header.BodySize );
TXT( "\nProcessor : " );
ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES );
- CHR( '\n' );
+ EOL();
}
fi = tgsi_default_full_instruction();
@@ -1434,19 +1257,16 @@ dump_gen(
switch( parse.FullToken.Token.Type ) {
case TGSI_TOKEN_TYPE_DECLARATION:
dump_declaration_short(
- dump,
&parse.FullToken.FullDeclaration );
break;
case TGSI_TOKEN_TYPE_IMMEDIATE:
dump_immediate_short(
- dump,
&parse.FullToken.FullImmediate );
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
dump_instruction_short(
- dump,
&parse.FullToken.FullInstruction,
instno );
instno++;
@@ -1471,7 +1291,6 @@ dump_gen(
switch( parse.FullToken.Token.Type ) {
case TGSI_TOKEN_TYPE_DECLARATION:
dump_declaration_verbose(
- dump,
&parse.FullToken.FullDeclaration,
ignored,
deflt,
@@ -1480,14 +1299,12 @@ dump_gen(
case TGSI_TOKEN_TYPE_IMMEDIATE:
dump_immediate_verbose(
- dump,
&parse.FullToken.FullImmediate,
ignored );
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
dump_instruction_verbose(
- dump,
&parse.FullToken.FullInstruction,
ignored,
deflt,
@@ -1498,7 +1315,7 @@ dump_gen(
assert( 0 );
}
- CHR( '\n' );
+ EOL();
}
}
@@ -1506,86 +1323,3 @@ dump_gen(
tgsi_parse_free( &parse );
}
-
-
-static void
-sanity_checks(void)
-{
- assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "OPCODE_END") == 0);
- assert(strcmp(TGSI_OPCODES_SHORT[TGSI_OPCODE_END], "END") == 0);
-}
-
-
-void
-tgsi_dump(
- const struct tgsi_token *tokens,
- unsigned flags )
-{
- struct file_dump dump;
-
- sanity_checks();
-
- dump.base.write = _file_dump_write;
-#if 0
- {
- static unsigned counter = 0;
- char buffer[64];
- sprintf( buffer, "tgsi-dump-%.4u.txt", counter++ );
- dump.file = fopen( buffer, "wt" );
- }
-#else
- dump.file = stderr;
-#endif
-
- dump_gen(
- &dump.base,
- tokens,
- flags );
-
-#if 0
- fclose( dump.file );
-#endif
-}
-
-void
-tgsi_dump_str(
- char **str,
- const struct tgsi_token *tokens,
- unsigned flags )
-{
- struct text_dump dump;
-
- dump.base.write = _text_dump_write;
- dump.text = NULL;
- dump.length = 0;
- dump.capacity = 0;
-
- dump_gen(
- &dump.base,
- tokens,
- flags );
-
- *str = dump.text;
-}
-
-
-void tgsi_debug_dump( struct tgsi_token *tokens )
-{
- char *str, *p;
-
- tgsi_dump_str( &str, tokens, 0 );
-
- p = str;
- while (p != NULL)
- {
- char *end = strchr( p, '\n' );
- if (end != NULL)
- {
- *end++ = '\0';
- }
- debug_printf( "%s\n", p );
- p = end;
- }
-
- FREE( str );
-}
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h
index 51d79a0362e..beb0155d56c 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h
@@ -14,16 +14,6 @@ tgsi_dump(
const struct tgsi_token *tokens,
unsigned flags );
-void
-tgsi_dump_str(
- char **str,
- const struct tgsi_token *tokens,
- unsigned flags );
-
-/* Dump to debug_printf()
- */
-void tgsi_debug_dump( struct tgsi_token *tokens );
-
#if defined __cplusplus
}
#endif
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
index a98e88e3437..da0121c482e 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
@@ -1,6 +1,8 @@
#if !defined TGSI_PARSE_H
#define TGSI_PARSE_H
+#include "pipe/p_shader_tokens.h"
+
#if defined __cplusplus
extern "C" {
#endif
diff --git a/src/gallium/auxiliary/translate/Makefile b/src/gallium/auxiliary/translate/Makefile
new file mode 100644
index 00000000000..39dfb0de300
--- /dev/null
+++ b/src/gallium/auxiliary/translate/Makefile
@@ -0,0 +1,14 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = translate
+
+C_SOURCES = \
+ translate_generic.c \
+ translate_sse.c \
+ translate.c
+
+include ../../Makefile.template
+
+symlinks:
+
diff --git a/src/gallium/auxiliary/translate/SConscript b/src/gallium/auxiliary/translate/SConscript
new file mode 100644
index 00000000000..7608908915f
--- /dev/null
+++ b/src/gallium/auxiliary/translate/SConscript
@@ -0,0 +1,11 @@
+Import('*')
+
+translate = env.ConvenienceLibrary(
+ target = 'translate',
+ source = [
+ 'translate_generic.c',
+ 'translate_sse.c',
+ 'translate.c',
+ ])
+
+auxiliaries.insert(0, translate)
diff --git a/src/gallium/auxiliary/draw/draw_debug.c b/src/gallium/auxiliary/translate/translate.c
index d6220b5f62c..b04bc6eefd7 100644
--- a/src/gallium/auxiliary/draw/draw_debug.c
+++ b/src/gallium/auxiliary/translate/translate.c
@@ -30,84 +30,19 @@
* Keith Whitwell <[email protected]>
*/
-#include "draw_private.h"
-#include "draw_context.h"
+#include "pipe/p_util.h"
+#include "pipe/p_state.h"
+#include "translate.h"
-
-
-static void
-draw_prim_info(unsigned prim, unsigned *first, unsigned *incr)
-{
- assert(prim >= PIPE_PRIM_POINTS);
- assert(prim <= PIPE_PRIM_POLYGON);
-
- switch (prim) {
- case PIPE_PRIM_POINTS:
- *first = 1;
- *incr = 1;
- break;
- case PIPE_PRIM_LINES:
- *first = 2;
- *incr = 2;
- break;
- case PIPE_PRIM_LINE_STRIP:
- *first = 2;
- *incr = 1;
- break;
- case PIPE_PRIM_LINE_LOOP:
- *first = 2;
- *incr = 1;
- break;
- case PIPE_PRIM_TRIANGLES:
- *first = 3;
- *incr = 3;
- break;
- case PIPE_PRIM_TRIANGLE_STRIP:
- *first = 3;
- *incr = 1;
- break;
- case PIPE_PRIM_TRIANGLE_FAN:
- case PIPE_PRIM_POLYGON:
- *first = 3;
- *incr = 1;
- break;
- case PIPE_PRIM_QUADS:
- *first = 4;
- *incr = 4;
- break;
- case PIPE_PRIM_QUAD_STRIP:
- *first = 4;
- *incr = 2;
- break;
- default:
- assert(0);
- *first = 1;
- *incr = 1;
- break;
- }
-}
-
-
-unsigned
-draw_trim_prim( unsigned mode, unsigned count )
+struct translate *translate_create( const struct translate_key *key )
{
- unsigned length, first, incr;
+ struct translate *translate = NULL;
- draw_prim_info( mode, &first, &incr );
+#if defined(__i386__) || defined(__386__) || defined(i386)
+ translate = translate_sse2_create( key );
+ if (translate)
+ return translate;
+#endif
- if (count < first)
- length = 0;
- else
- length = count - (count - first) % incr;
-
- return length;
-}
-
-
-boolean
-draw_validate_prim( unsigned mode, unsigned count )
-{
- return (count > 0 &&
- count == draw_trim_prim( mode, count ));
+ return translate_generic_create( key );
}
-
diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h
new file mode 100644
index 00000000000..d95d1ac4f3d
--- /dev/null
+++ b/src/gallium/auxiliary/translate/translate.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright 2008 Tungsten Graphics, inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/**
+ * Vertex fetch/store/convert code. This functionality is used in two places:
+ * 1. Vertex fetch/convert - to grab vertex data from incoming vertex
+ * arrays and convert to format needed by vertex shaders.
+ * 2. Vertex store/emit - to convert simple float[][4] vertex attributes
+ * (which is the organization used throughout the draw/prim pipeline) to
+ * hardware-specific formats and emit into hardware vertex buffers.
+ *
+ *
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+#ifndef _TRANSLATE_H
+#define _TRANSLATE_H
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_format.h"
+#include "pipe/p_state.h"
+
+struct translate_element
+{
+ enum pipe_format input_format;
+ unsigned input_buffer;
+ unsigned input_offset;
+
+ enum pipe_format output_format;
+ unsigned output_offset;
+};
+
+
+struct translate_key {
+ unsigned output_stride;
+ unsigned nr_elements;
+ struct translate_element element[PIPE_MAX_ATTRIBS];
+};
+
+
+struct translate {
+ struct translate_key key;
+
+ void (*release)( struct translate * );
+
+ void (*set_buffer)( struct translate *,
+ unsigned i,
+ const void *ptr,
+ unsigned stride );
+
+ void (*run_elts)( struct translate *,
+ const unsigned *elts,
+ unsigned count,
+ void *output_buffer);
+
+ void (*run)( struct translate *,
+ unsigned start,
+ unsigned count,
+ void *output_buffer);
+};
+
+
+
+#if 0
+struct translate_context *translate_context_create( void );
+void translate_context_destroy( struct translate_context * );
+
+struct translate *translate_lookup_or_create( struct translate_context *tctx,
+ const struct translate_key *key );
+#endif
+
+
+struct translate *translate_create( const struct translate_key *key );
+
+
+/*******************************************************************************
+ * Private:
+ */
+struct translate *translate_sse2_create( const struct translate_key *key );
+
+struct translate *translate_generic_create( const struct translate_key *key );
+
+
+#endif
diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c
new file mode 100644
index 00000000000..402780ee539
--- /dev/null
+++ b/src/gallium/auxiliary/translate/translate_generic.c
@@ -0,0 +1,676 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+#include "pipe/p_util.h"
+#include "pipe/p_state.h"
+#include "translate.h"
+
+
+#define DRAW_DBG 0
+
+typedef void (*fetch_func)(const void *ptr, float *attrib);
+typedef void (*emit_func)(const float *attrib, void *ptr);
+
+
+
+struct translate_generic {
+ struct translate translate;
+
+ struct {
+ fetch_func fetch;
+ unsigned buffer;
+ unsigned input_offset;
+
+ emit_func emit;
+ unsigned output_offset;
+
+ char *input_ptr;
+ unsigned input_stride;
+
+ } attrib[PIPE_MAX_ATTRIBS];
+
+ unsigned nr_attrib;
+};
+
+
+static struct translate_generic *translate_generic( struct translate *translate )
+{
+ return (struct translate_generic *)translate;
+}
+
+/**
+ * Fetch a float[4] vertex attribute from memory, doing format/type
+ * conversion as needed.
+ *
+ * This is probably needed/dupliocated elsewhere, eg format
+ * conversion, texture sampling etc.
+ */
+#define ATTRIB( NAME, SZ, TYPE, FROM, TO ) \
+static void \
+fetch_##NAME(const void *ptr, float *attrib) \
+{ \
+ const float defaults[4] = { 0.0f,0.0f,0.0f,1.0f }; \
+ unsigned i; \
+ \
+ for (i = 0; i < SZ; i++) { \
+ attrib[i] = FROM(i); \
+ } \
+ \
+ for (; i < 4; i++) { \
+ attrib[i] = defaults[i]; \
+ } \
+} \
+ \
+static void \
+emit_##NAME(const float *attrib, void *ptr) \
+{ \
+ unsigned i; \
+ TYPE *out = (TYPE *)ptr; \
+ \
+ for (i = 0; i < SZ; i++) { \
+ out[i] = TO(attrib[i]); \
+ } \
+}
+
+
+#define FROM_64_FLOAT(i) ((float) ((double *) ptr)[i])
+#define FROM_32_FLOAT(i) (((float *) ptr)[i])
+
+#define FROM_8_USCALED(i) ((float) ((unsigned char *) ptr)[i])
+#define FROM_16_USCALED(i) ((float) ((unsigned short *) ptr)[i])
+#define FROM_32_USCALED(i) ((float) ((unsigned int *) ptr)[i])
+
+#define FROM_8_SSCALED(i) ((float) ((char *) ptr)[i])
+#define FROM_16_SSCALED(i) ((float) ((short *) ptr)[i])
+#define FROM_32_SSCALED(i) ((float) ((int *) ptr)[i])
+
+#define FROM_8_UNORM(i) ((float) ((unsigned char *) ptr)[i] / 255.0f)
+#define FROM_16_UNORM(i) ((float) ((unsigned short *) ptr)[i] / 65535.0f)
+#define FROM_32_UNORM(i) ((float) ((unsigned int *) ptr)[i] / 4294967295.0f)
+
+#define FROM_8_SNORM(i) ((float) ((char *) ptr)[i] / 127.0f)
+#define FROM_16_SNORM(i) ((float) ((short *) ptr)[i] / 32767.0f)
+#define FROM_32_SNORM(i) ((float) ((int *) ptr)[i] / 2147483647.0f)
+
+#define TO_64_FLOAT(x) ((double) x)
+#define TO_32_FLOAT(x) (x)
+
+#define TO_8_USCALED(x) ((unsigned char) x)
+#define TO_16_USCALED(x) ((unsigned short) x)
+#define TO_32_USCALED(x) ((unsigned int) x)
+
+#define TO_8_SSCALED(x) ((char) x)
+#define TO_16_SSCALED(x) ((short) x)
+#define TO_32_SSCALED(x) ((int) x)
+
+#define TO_8_UNORM(x) ((unsigned char) (x * 255.0f))
+#define TO_16_UNORM(x) ((unsigned short) (x * 65535.0f))
+#define TO_32_UNORM(x) ((unsigned int) (x * 4294967295.0f))
+
+#define TO_8_SNORM(x) ((char) (x * 127.0f))
+#define TO_16_SNORM(x) ((short) (x * 32767.0f))
+#define TO_32_SNORM(x) ((int) (x * 2147483647.0f))
+
+
+
+ATTRIB( R64G64B64A64_FLOAT, 4, double, FROM_64_FLOAT, TO_64_FLOAT )
+ATTRIB( R64G64B64_FLOAT, 3, double, FROM_64_FLOAT, TO_64_FLOAT )
+ATTRIB( R64G64_FLOAT, 2, double, FROM_64_FLOAT, TO_64_FLOAT )
+ATTRIB( R64_FLOAT, 1, double, FROM_64_FLOAT, TO_64_FLOAT )
+
+ATTRIB( R32G32B32A32_FLOAT, 4, float, FROM_32_FLOAT, TO_32_FLOAT )
+ATTRIB( R32G32B32_FLOAT, 3, float, FROM_32_FLOAT, TO_32_FLOAT )
+ATTRIB( R32G32_FLOAT, 2, float, FROM_32_FLOAT, TO_32_FLOAT )
+ATTRIB( R32_FLOAT, 1, float, FROM_32_FLOAT, TO_32_FLOAT )
+
+ATTRIB( R32G32B32A32_USCALED, 4, unsigned, FROM_32_USCALED, TO_32_USCALED )
+ATTRIB( R32G32B32_USCALED, 3, unsigned, FROM_32_USCALED, TO_32_USCALED )
+ATTRIB( R32G32_USCALED, 2, unsigned, FROM_32_USCALED, TO_32_USCALED )
+ATTRIB( R32_USCALED, 1, unsigned, FROM_32_USCALED, TO_32_USCALED )
+
+ATTRIB( R32G32B32A32_SSCALED, 4, int, FROM_32_SSCALED, TO_32_SSCALED )
+ATTRIB( R32G32B32_SSCALED, 3, int, FROM_32_SSCALED, TO_32_SSCALED )
+ATTRIB( R32G32_SSCALED, 2, int, FROM_32_SSCALED, TO_32_SSCALED )
+ATTRIB( R32_SSCALED, 1, int, FROM_32_SSCALED, TO_32_SSCALED )
+
+ATTRIB( R32G32B32A32_UNORM, 4, unsigned, FROM_32_UNORM, TO_32_UNORM )
+ATTRIB( R32G32B32_UNORM, 3, unsigned, FROM_32_UNORM, TO_32_UNORM )
+ATTRIB( R32G32_UNORM, 2, unsigned, FROM_32_UNORM, TO_32_UNORM )
+ATTRIB( R32_UNORM, 1, unsigned, FROM_32_UNORM, TO_32_UNORM )
+
+ATTRIB( R32G32B32A32_SNORM, 4, int, FROM_32_SNORM, TO_32_SNORM )
+ATTRIB( R32G32B32_SNORM, 3, int, FROM_32_SNORM, TO_32_SNORM )
+ATTRIB( R32G32_SNORM, 2, int, FROM_32_SNORM, TO_32_SNORM )
+ATTRIB( R32_SNORM, 1, int, FROM_32_SNORM, TO_32_SNORM )
+
+ATTRIB( R16G16B16A16_USCALED, 4, ushort, FROM_16_USCALED, TO_16_USCALED )
+ATTRIB( R16G16B16_USCALED, 3, ushort, FROM_16_USCALED, TO_16_USCALED )
+ATTRIB( R16G16_USCALED, 2, ushort, FROM_16_USCALED, TO_16_USCALED )
+ATTRIB( R16_USCALED, 1, ushort, FROM_16_USCALED, TO_16_USCALED )
+
+ATTRIB( R16G16B16A16_SSCALED, 4, short, FROM_16_SSCALED, TO_16_SSCALED )
+ATTRIB( R16G16B16_SSCALED, 3, short, FROM_16_SSCALED, TO_16_SSCALED )
+ATTRIB( R16G16_SSCALED, 2, short, FROM_16_SSCALED, TO_16_SSCALED )
+ATTRIB( R16_SSCALED, 1, short, FROM_16_SSCALED, TO_16_SSCALED )
+
+ATTRIB( R16G16B16A16_UNORM, 4, ushort, FROM_16_UNORM, TO_16_UNORM )
+ATTRIB( R16G16B16_UNORM, 3, ushort, FROM_16_UNORM, TO_16_UNORM )
+ATTRIB( R16G16_UNORM, 2, ushort, FROM_16_UNORM, TO_16_UNORM )
+ATTRIB( R16_UNORM, 1, ushort, FROM_16_UNORM, TO_16_UNORM )
+
+ATTRIB( R16G16B16A16_SNORM, 4, short, FROM_16_SNORM, TO_16_SNORM )
+ATTRIB( R16G16B16_SNORM, 3, short, FROM_16_SNORM, TO_16_SNORM )
+ATTRIB( R16G16_SNORM, 2, short, FROM_16_SNORM, TO_16_SNORM )
+ATTRIB( R16_SNORM, 1, short, FROM_16_SNORM, TO_16_SNORM )
+
+ATTRIB( R8G8B8A8_USCALED, 4, ubyte, FROM_8_USCALED, TO_8_USCALED )
+ATTRIB( R8G8B8_USCALED, 3, ubyte, FROM_8_USCALED, TO_8_USCALED )
+ATTRIB( R8G8_USCALED, 2, ubyte, FROM_8_USCALED, TO_8_USCALED )
+ATTRIB( R8_USCALED, 1, ubyte, FROM_8_USCALED, TO_8_USCALED )
+
+ATTRIB( R8G8B8A8_SSCALED, 4, char, FROM_8_SSCALED, TO_8_SSCALED )
+ATTRIB( R8G8B8_SSCALED, 3, char, FROM_8_SSCALED, TO_8_SSCALED )
+ATTRIB( R8G8_SSCALED, 2, char, FROM_8_SSCALED, TO_8_SSCALED )
+ATTRIB( R8_SSCALED, 1, char, FROM_8_SSCALED, TO_8_SSCALED )
+
+ATTRIB( R8G8B8A8_UNORM, 4, ubyte, FROM_8_UNORM, TO_8_UNORM )
+ATTRIB( R8G8B8_UNORM, 3, ubyte, FROM_8_UNORM, TO_8_UNORM )
+ATTRIB( R8G8_UNORM, 2, ubyte, FROM_8_UNORM, TO_8_UNORM )
+ATTRIB( R8_UNORM, 1, ubyte, FROM_8_UNORM, TO_8_UNORM )
+
+ATTRIB( R8G8B8A8_SNORM, 4, char, FROM_8_SNORM, TO_8_SNORM )
+ATTRIB( R8G8B8_SNORM, 3, char, FROM_8_SNORM, TO_8_SNORM )
+ATTRIB( R8G8_SNORM, 2, char, FROM_8_SNORM, TO_8_SNORM )
+ATTRIB( R8_SNORM, 1, char, FROM_8_SNORM, TO_8_SNORM )
+
+ATTRIB( A8R8G8B8_UNORM, 4, ubyte, FROM_8_UNORM, TO_8_UNORM )
+//ATTRIB( R8G8B8A8_UNORM, 4, ubyte, FROM_8_UNORM, TO_8_UNORM )
+
+
+
+static void
+fetch_B8G8R8A8_UNORM(const void *ptr, float *attrib)
+{
+ attrib[2] = FROM_8_UNORM(0);
+ attrib[1] = FROM_8_UNORM(1);
+ attrib[0] = FROM_8_UNORM(2);
+ attrib[3] = FROM_8_UNORM(3);
+}
+
+static void
+emit_B8G8R8A8_UNORM( const float *attrib, void *ptr)
+{
+ ubyte *out = (ubyte *)ptr;
+ out[2] = TO_8_UNORM(attrib[0]);
+ out[1] = TO_8_UNORM(attrib[1]);
+ out[0] = TO_8_UNORM(attrib[2]);
+ out[3] = TO_8_UNORM(attrib[3]);
+}
+
+static void
+fetch_NULL( const void *ptr, float *attrib )
+{
+ attrib[0] = 0;
+ attrib[1] = 0;
+ attrib[2] = 0;
+ attrib[3] = 1;
+}
+
+static void
+emit_NULL( const float *attrib, void *ptr )
+{
+ /* do nothing is the only sensible option */
+}
+
+static fetch_func get_fetch_func( enum pipe_format format )
+{
+ switch (format) {
+ case PIPE_FORMAT_R64_FLOAT:
+ return fetch_R64_FLOAT;
+ case PIPE_FORMAT_R64G64_FLOAT:
+ return fetch_R64G64_FLOAT;
+ case PIPE_FORMAT_R64G64B64_FLOAT:
+ return fetch_R64G64B64_FLOAT;
+ case PIPE_FORMAT_R64G64B64A64_FLOAT:
+ return fetch_R64G64B64A64_FLOAT;
+
+ case PIPE_FORMAT_R32_FLOAT:
+ return fetch_R32_FLOAT;
+ case PIPE_FORMAT_R32G32_FLOAT:
+ return fetch_R32G32_FLOAT;
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ return fetch_R32G32B32_FLOAT;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ return fetch_R32G32B32A32_FLOAT;
+
+ case PIPE_FORMAT_R32_UNORM:
+ return fetch_R32_UNORM;
+ case PIPE_FORMAT_R32G32_UNORM:
+ return fetch_R32G32_UNORM;
+ case PIPE_FORMAT_R32G32B32_UNORM:
+ return fetch_R32G32B32_UNORM;
+ case PIPE_FORMAT_R32G32B32A32_UNORM:
+ return fetch_R32G32B32A32_UNORM;
+
+ case PIPE_FORMAT_R32_USCALED:
+ return fetch_R32_USCALED;
+ case PIPE_FORMAT_R32G32_USCALED:
+ return fetch_R32G32_USCALED;
+ case PIPE_FORMAT_R32G32B32_USCALED:
+ return fetch_R32G32B32_USCALED;
+ case PIPE_FORMAT_R32G32B32A32_USCALED:
+ return fetch_R32G32B32A32_USCALED;
+
+ case PIPE_FORMAT_R32_SNORM:
+ return fetch_R32_SNORM;
+ case PIPE_FORMAT_R32G32_SNORM:
+ return fetch_R32G32_SNORM;
+ case PIPE_FORMAT_R32G32B32_SNORM:
+ return fetch_R32G32B32_SNORM;
+ case PIPE_FORMAT_R32G32B32A32_SNORM:
+ return fetch_R32G32B32A32_SNORM;
+
+ case PIPE_FORMAT_R32_SSCALED:
+ return fetch_R32_SSCALED;
+ case PIPE_FORMAT_R32G32_SSCALED:
+ return fetch_R32G32_SSCALED;
+ case PIPE_FORMAT_R32G32B32_SSCALED:
+ return fetch_R32G32B32_SSCALED;
+ case PIPE_FORMAT_R32G32B32A32_SSCALED:
+ return fetch_R32G32B32A32_SSCALED;
+
+ case PIPE_FORMAT_R16_UNORM:
+ return fetch_R16_UNORM;
+ case PIPE_FORMAT_R16G16_UNORM:
+ return fetch_R16G16_UNORM;
+ case PIPE_FORMAT_R16G16B16_UNORM:
+ return fetch_R16G16B16_UNORM;
+ case PIPE_FORMAT_R16G16B16A16_UNORM:
+ return fetch_R16G16B16A16_UNORM;
+
+ case PIPE_FORMAT_R16_USCALED:
+ return fetch_R16_USCALED;
+ case PIPE_FORMAT_R16G16_USCALED:
+ return fetch_R16G16_USCALED;
+ case PIPE_FORMAT_R16G16B16_USCALED:
+ return fetch_R16G16B16_USCALED;
+ case PIPE_FORMAT_R16G16B16A16_USCALED:
+ return fetch_R16G16B16A16_USCALED;
+
+ case PIPE_FORMAT_R16_SNORM:
+ return fetch_R16_SNORM;
+ case PIPE_FORMAT_R16G16_SNORM:
+ return fetch_R16G16_SNORM;
+ case PIPE_FORMAT_R16G16B16_SNORM:
+ return fetch_R16G16B16_SNORM;
+ case PIPE_FORMAT_R16G16B16A16_SNORM:
+ return fetch_R16G16B16A16_SNORM;
+
+ case PIPE_FORMAT_R16_SSCALED:
+ return fetch_R16_SSCALED;
+ case PIPE_FORMAT_R16G16_SSCALED:
+ return fetch_R16G16_SSCALED;
+ case PIPE_FORMAT_R16G16B16_SSCALED:
+ return fetch_R16G16B16_SSCALED;
+ case PIPE_FORMAT_R16G16B16A16_SSCALED:
+ return fetch_R16G16B16A16_SSCALED;
+
+ case PIPE_FORMAT_R8_UNORM:
+ return fetch_R8_UNORM;
+ case PIPE_FORMAT_R8G8_UNORM:
+ return fetch_R8G8_UNORM;
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ return fetch_R8G8B8_UNORM;
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ return fetch_R8G8B8A8_UNORM;
+
+ case PIPE_FORMAT_R8_USCALED:
+ return fetch_R8_USCALED;
+ case PIPE_FORMAT_R8G8_USCALED:
+ return fetch_R8G8_USCALED;
+ case PIPE_FORMAT_R8G8B8_USCALED:
+ return fetch_R8G8B8_USCALED;
+ case PIPE_FORMAT_R8G8B8A8_USCALED:
+ return fetch_R8G8B8A8_USCALED;
+
+ case PIPE_FORMAT_R8_SNORM:
+ return fetch_R8_SNORM;
+ case PIPE_FORMAT_R8G8_SNORM:
+ return fetch_R8G8_SNORM;
+ case PIPE_FORMAT_R8G8B8_SNORM:
+ return fetch_R8G8B8_SNORM;
+ case PIPE_FORMAT_R8G8B8A8_SNORM:
+ return fetch_R8G8B8A8_SNORM;
+
+ case PIPE_FORMAT_R8_SSCALED:
+ return fetch_R8_SSCALED;
+ case PIPE_FORMAT_R8G8_SSCALED:
+ return fetch_R8G8_SSCALED;
+ case PIPE_FORMAT_R8G8B8_SSCALED:
+ return fetch_R8G8B8_SSCALED;
+ case PIPE_FORMAT_R8G8B8A8_SSCALED:
+ return fetch_R8G8B8A8_SSCALED;
+
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ return fetch_A8R8G8B8_UNORM;
+
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ return fetch_B8G8R8A8_UNORM;
+
+ default:
+ assert(0);
+ return fetch_NULL;
+ }
+}
+
+
+
+
+static emit_func get_emit_func( enum pipe_format format )
+{
+ switch (format) {
+ case PIPE_FORMAT_R64_FLOAT:
+ return emit_R64_FLOAT;
+ case PIPE_FORMAT_R64G64_FLOAT:
+ return emit_R64G64_FLOAT;
+ case PIPE_FORMAT_R64G64B64_FLOAT:
+ return emit_R64G64B64_FLOAT;
+ case PIPE_FORMAT_R64G64B64A64_FLOAT:
+ return emit_R64G64B64A64_FLOAT;
+
+ case PIPE_FORMAT_R32_FLOAT:
+ return emit_R32_FLOAT;
+ case PIPE_FORMAT_R32G32_FLOAT:
+ return emit_R32G32_FLOAT;
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ return emit_R32G32B32_FLOAT;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ return emit_R32G32B32A32_FLOAT;
+
+ case PIPE_FORMAT_R32_UNORM:
+ return emit_R32_UNORM;
+ case PIPE_FORMAT_R32G32_UNORM:
+ return emit_R32G32_UNORM;
+ case PIPE_FORMAT_R32G32B32_UNORM:
+ return emit_R32G32B32_UNORM;
+ case PIPE_FORMAT_R32G32B32A32_UNORM:
+ return emit_R32G32B32A32_UNORM;
+
+ case PIPE_FORMAT_R32_USCALED:
+ return emit_R32_USCALED;
+ case PIPE_FORMAT_R32G32_USCALED:
+ return emit_R32G32_USCALED;
+ case PIPE_FORMAT_R32G32B32_USCALED:
+ return emit_R32G32B32_USCALED;
+ case PIPE_FORMAT_R32G32B32A32_USCALED:
+ return emit_R32G32B32A32_USCALED;
+
+ case PIPE_FORMAT_R32_SNORM:
+ return emit_R32_SNORM;
+ case PIPE_FORMAT_R32G32_SNORM:
+ return emit_R32G32_SNORM;
+ case PIPE_FORMAT_R32G32B32_SNORM:
+ return emit_R32G32B32_SNORM;
+ case PIPE_FORMAT_R32G32B32A32_SNORM:
+ return emit_R32G32B32A32_SNORM;
+
+ case PIPE_FORMAT_R32_SSCALED:
+ return emit_R32_SSCALED;
+ case PIPE_FORMAT_R32G32_SSCALED:
+ return emit_R32G32_SSCALED;
+ case PIPE_FORMAT_R32G32B32_SSCALED:
+ return emit_R32G32B32_SSCALED;
+ case PIPE_FORMAT_R32G32B32A32_SSCALED:
+ return emit_R32G32B32A32_SSCALED;
+
+ case PIPE_FORMAT_R16_UNORM:
+ return emit_R16_UNORM;
+ case PIPE_FORMAT_R16G16_UNORM:
+ return emit_R16G16_UNORM;
+ case PIPE_FORMAT_R16G16B16_UNORM:
+ return emit_R16G16B16_UNORM;
+ case PIPE_FORMAT_R16G16B16A16_UNORM:
+ return emit_R16G16B16A16_UNORM;
+
+ case PIPE_FORMAT_R16_USCALED:
+ return emit_R16_USCALED;
+ case PIPE_FORMAT_R16G16_USCALED:
+ return emit_R16G16_USCALED;
+ case PIPE_FORMAT_R16G16B16_USCALED:
+ return emit_R16G16B16_USCALED;
+ case PIPE_FORMAT_R16G16B16A16_USCALED:
+ return emit_R16G16B16A16_USCALED;
+
+ case PIPE_FORMAT_R16_SNORM:
+ return emit_R16_SNORM;
+ case PIPE_FORMAT_R16G16_SNORM:
+ return emit_R16G16_SNORM;
+ case PIPE_FORMAT_R16G16B16_SNORM:
+ return emit_R16G16B16_SNORM;
+ case PIPE_FORMAT_R16G16B16A16_SNORM:
+ return emit_R16G16B16A16_SNORM;
+
+ case PIPE_FORMAT_R16_SSCALED:
+ return emit_R16_SSCALED;
+ case PIPE_FORMAT_R16G16_SSCALED:
+ return emit_R16G16_SSCALED;
+ case PIPE_FORMAT_R16G16B16_SSCALED:
+ return emit_R16G16B16_SSCALED;
+ case PIPE_FORMAT_R16G16B16A16_SSCALED:
+ return emit_R16G16B16A16_SSCALED;
+
+ case PIPE_FORMAT_R8_UNORM:
+ return emit_R8_UNORM;
+ case PIPE_FORMAT_R8G8_UNORM:
+ return emit_R8G8_UNORM;
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ return emit_R8G8B8_UNORM;
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ return emit_R8G8B8A8_UNORM;
+
+ case PIPE_FORMAT_R8_USCALED:
+ return emit_R8_USCALED;
+ case PIPE_FORMAT_R8G8_USCALED:
+ return emit_R8G8_USCALED;
+ case PIPE_FORMAT_R8G8B8_USCALED:
+ return emit_R8G8B8_USCALED;
+ case PIPE_FORMAT_R8G8B8A8_USCALED:
+ return emit_R8G8B8A8_USCALED;
+
+ case PIPE_FORMAT_R8_SNORM:
+ return emit_R8_SNORM;
+ case PIPE_FORMAT_R8G8_SNORM:
+ return emit_R8G8_SNORM;
+ case PIPE_FORMAT_R8G8B8_SNORM:
+ return emit_R8G8B8_SNORM;
+ case PIPE_FORMAT_R8G8B8A8_SNORM:
+ return emit_R8G8B8A8_SNORM;
+
+ case PIPE_FORMAT_R8_SSCALED:
+ return emit_R8_SSCALED;
+ case PIPE_FORMAT_R8G8_SSCALED:
+ return emit_R8G8_SSCALED;
+ case PIPE_FORMAT_R8G8B8_SSCALED:
+ return emit_R8G8B8_SSCALED;
+ case PIPE_FORMAT_R8G8B8A8_SSCALED:
+ return emit_R8G8B8A8_SSCALED;
+
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ return emit_A8R8G8B8_UNORM;
+
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ return emit_B8G8R8A8_UNORM;
+
+ default:
+ assert(0);
+ return emit_NULL;
+ }
+}
+
+
+
+/**
+ * Fetch vertex attributes for 'count' vertices.
+ */
+static void generic_run_elts( struct translate *translate,
+ const unsigned *elts,
+ unsigned count,
+ void *output_buffer )
+{
+ struct translate_generic *tg = translate_generic(translate);
+ char *vert = output_buffer;
+ unsigned nr_attrs = tg->nr_attrib;
+ unsigned attr;
+ unsigned i;
+
+ /* loop over vertex attributes (vertex shader inputs)
+ */
+ for (i = 0; i < count; i++) {
+ unsigned elt = *elts++;
+
+ for (attr = 0; attr < nr_attrs; attr++) {
+ float data[4];
+
+ const char *src = (tg->attrib[attr].input_ptr +
+ tg->attrib[attr].input_stride * elt);
+
+ char *dst = (vert +
+ tg->attrib[attr].output_offset);
+
+ tg->attrib[attr].fetch( src, data );
+
+ if (0) debug_printf("vert %d/%d attr %d: %f %f %f %f\n",
+ i, elt, attr, data[0], data[1], data[2], data[3]);
+
+ tg->attrib[attr].emit( data, dst );
+ }
+
+ vert += tg->translate.key.output_stride;
+ }
+}
+
+
+
+static void generic_run( struct translate *translate,
+ unsigned start,
+ unsigned count,
+ void *output_buffer )
+{
+ struct translate_generic *tg = translate_generic(translate);
+ char *vert = output_buffer;
+ unsigned nr_attrs = tg->nr_attrib;
+ unsigned attr;
+ unsigned i;
+
+ /* loop over vertex attributes (vertex shader inputs)
+ */
+ for (i = 0; i < count; i++) {
+ unsigned elt = start + i;
+
+ for (attr = 0; attr < nr_attrs; attr++) {
+ float data[4];
+
+ const char *src = (tg->attrib[attr].input_ptr +
+ tg->attrib[attr].input_stride * elt);
+
+ char *dst = (vert +
+ tg->attrib[attr].output_offset);
+
+ tg->attrib[attr].fetch( src, data );
+
+ if (0) debug_printf("vert %d attr %d: %f %f %f %f\n",
+ i, attr, data[0], data[1], data[2], data[3]);
+
+ tg->attrib[attr].emit( data, dst );
+ }
+
+ vert += tg->translate.key.output_stride;
+ }
+}
+
+
+
+static void generic_set_buffer( struct translate *translate,
+ unsigned buf,
+ const void *ptr,
+ unsigned stride )
+{
+ struct translate_generic *tg = translate_generic(translate);
+ unsigned i;
+
+ for (i = 0; i < tg->nr_attrib; i++) {
+ if (tg->attrib[i].buffer == buf) {
+ tg->attrib[i].input_ptr = ((char *)ptr +
+ tg->attrib[i].input_offset);
+ tg->attrib[i].input_stride = stride;
+ }
+ }
+}
+
+
+static void generic_release( struct translate *translate )
+{
+ /* Refcount?
+ */
+ FREE(translate);
+}
+
+struct translate *translate_generic_create( const struct translate_key *key )
+{
+ struct translate_generic *tg = CALLOC_STRUCT(translate_generic);
+ unsigned i;
+
+ if (tg == NULL)
+ return NULL;
+
+ tg->translate.key = *key;
+ tg->translate.release = generic_release;
+ tg->translate.set_buffer = generic_set_buffer;
+ tg->translate.run_elts = generic_run_elts;
+ tg->translate.run = generic_run;
+
+ for (i = 0; i < key->nr_elements; i++) {
+
+ tg->attrib[i].fetch = get_fetch_func(key->element[i].input_format);
+ tg->attrib[i].buffer = key->element[i].input_buffer;
+ tg->attrib[i].input_offset = key->element[i].input_offset;
+
+ tg->attrib[i].emit = get_emit_func(key->element[i].output_format);
+ tg->attrib[i].output_offset = key->element[i].output_offset;
+
+ }
+
+ tg->nr_attrib = key->nr_elements;
+
+
+ return &tg->translate;
+}
diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
new file mode 100644
index 00000000000..f590d48b787
--- /dev/null
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -0,0 +1,625 @@
+/*
+ * Copyright 2003 Tungsten Graphics, inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_util.h"
+#include "util/u_simple_list.h"
+
+#include "translate.h"
+
+
+#if defined(__i386__) || defined(__386__) || defined(i386)
+
+#include "rtasm/rtasm_cpu.h"
+#include "rtasm/rtasm_x86sse.h"
+
+
+#define X 0
+#define Y 1
+#define Z 2
+#define W 3
+
+
+#ifdef WIN32
+#define RTASM __cdecl
+#else
+#define RTASM
+#endif
+
+typedef void (RTASM *run_func)( struct translate *translate,
+ unsigned start,
+ unsigned count,
+ void *output_buffer );
+
+typedef void (RTASM *run_elts_func)( struct translate *translate,
+ const unsigned *elts,
+ unsigned count,
+ void *output_buffer );
+
+
+
+struct translate_sse {
+ struct translate translate;
+
+ struct x86_function linear_func;
+ struct x86_function elt_func;
+ struct x86_function *func;
+
+ boolean loaded_identity;
+ boolean loaded_255;
+ boolean loaded_inv_255;
+
+ float identity[4];
+ float float_255[4];
+ float inv_255[4];
+
+ struct {
+ char *input_ptr;
+ unsigned input_stride;
+ } attrib[PIPE_MAX_ATTRIBS];
+
+ run_func gen_run;
+ run_elts_func gen_run_elts;
+
+};
+
+static int get_offset( const void *a, const void *b )
+{
+ return (const char *)b - (const char *)a;
+}
+
+
+
+static struct x86_reg get_identity( struct translate_sse *p )
+{
+ struct x86_reg reg = x86_make_reg(file_XMM, 6);
+
+ if (!p->loaded_identity) {
+ /* Nasty:
+ */
+ struct x86_reg translateESI = x86_make_reg(file_REG32, reg_SI);
+
+ p->loaded_identity = TRUE;
+ p->identity[0] = 0;
+ p->identity[1] = 0;
+ p->identity[2] = 0;
+ p->identity[3] = 1;
+
+ sse_movups(p->func, reg,
+ x86_make_disp(translateESI,
+ get_offset(p, &p->identity[0])));
+ }
+
+ return reg;
+}
+
+static struct x86_reg get_255( struct translate_sse *p )
+{
+ struct x86_reg reg = x86_make_reg(file_XMM, 6);
+
+ if (!p->loaded_255) {
+ struct x86_reg translateESI = x86_make_reg(file_REG32, reg_SI);
+
+ p->loaded_255 = TRUE;
+ p->float_255[0] =
+ p->float_255[1] =
+ p->float_255[2] =
+ p->float_255[3] = 255.0f;
+
+ sse_movups(p->func, reg,
+ x86_make_disp(translateESI,
+ get_offset(p, &p->float_255[0])));
+ }
+
+ return reg;
+ return x86_make_reg(file_XMM, 7);
+}
+
+static struct x86_reg get_inv_255( struct translate_sse *p )
+{
+ struct x86_reg reg = x86_make_reg(file_XMM, 5);
+
+ if (!p->loaded_inv_255) {
+ struct x86_reg translateESI = x86_make_reg(file_REG32, reg_SI);
+
+ p->loaded_inv_255 = TRUE;
+ p->inv_255[0] =
+ p->inv_255[1] =
+ p->inv_255[2] =
+ p->inv_255[3] = 1.0f / 255.0f;
+
+ sse_movups(p->func, reg,
+ x86_make_disp(translateESI,
+ get_offset(p, &p->inv_255[0])));
+ }
+
+ return reg;
+}
+
+
+static void emit_load_R32G32B32A32( struct translate_sse *p,
+ struct x86_reg data,
+ struct x86_reg arg0 )
+{
+ sse_movups(p->func, data, arg0);
+}
+
+static void emit_load_R32G32B32( struct translate_sse *p,
+ struct x86_reg data,
+ struct x86_reg arg0 )
+{
+ /* Have to jump through some hoops:
+ *
+ * c 0 0 0
+ * c 0 0 1
+ * 0 0 c 1
+ * a b c 1
+ */
+ sse_movss(p->func, data, x86_make_disp(arg0, 8));
+ sse_shufps(p->func, data, get_identity(p), SHUF(X,Y,Z,W) );
+ sse_shufps(p->func, data, data, SHUF(Y,Z,X,W) );
+ sse_movlps(p->func, data, arg0);
+}
+
+static void emit_load_R32G32( struct translate_sse *p,
+ struct x86_reg data,
+ struct x86_reg arg0 )
+{
+ /* 0 0 0 1
+ * a b 0 1
+ */
+ sse_movups(p->func, data, get_identity(p) );
+ sse_movlps(p->func, data, arg0);
+}
+
+
+static void emit_load_R32( struct translate_sse *p,
+ struct x86_reg data,
+ struct x86_reg arg0 )
+{
+ /* a 0 0 0
+ * a 0 0 1
+ */
+ sse_movss(p->func, data, arg0);
+ sse_orps(p->func, data, get_identity(p) );
+}
+
+
+static void emit_load_R8G8B8A8_UNORM( struct translate_sse *p,
+ struct x86_reg data,
+ struct x86_reg src )
+{
+
+ /* Load and unpack twice:
+ */
+ sse_movss(p->func, data, src);
+ sse2_punpcklbw(p->func, data, get_identity(p));
+ sse2_punpcklbw(p->func, data, get_identity(p));
+
+ /* Convert to float:
+ */
+ sse2_cvtdq2ps(p->func, data, data);
+
+
+ /* Scale by 1/255.0
+ */
+ sse_mulps(p->func, data, get_inv_255(p));
+}
+
+
+
+
+static void emit_store_R32G32B32A32( struct translate_sse *p,
+ struct x86_reg dest,
+ struct x86_reg dataXMM )
+{
+ sse_movups(p->func, dest, dataXMM);
+}
+
+static void emit_store_R32G32B32( struct translate_sse *p,
+ struct x86_reg dest,
+ struct x86_reg dataXMM )
+{
+ /* Emit two, shuffle, emit one.
+ */
+ sse_movlps(p->func, dest, dataXMM);
+ sse_shufps(p->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */
+ sse_movss(p->func, x86_make_disp(dest,8), dataXMM);
+}
+
+static void emit_store_R32G32( struct translate_sse *p,
+ struct x86_reg dest,
+ struct x86_reg dataXMM )
+{
+ sse_movlps(p->func, dest, dataXMM);
+}
+
+static void emit_store_R32( struct translate_sse *p,
+ struct x86_reg dest,
+ struct x86_reg dataXMM )
+{
+ sse_movss(p->func, dest, dataXMM);
+}
+
+
+
+static void emit_store_R8G8B8A8_UNORM( struct translate_sse *p,
+ struct x86_reg dest,
+ struct x86_reg dataXMM )
+{
+ /* Scale by 255.0
+ */
+ sse_mulps(p->func, dataXMM, get_255(p));
+
+ /* Pack and emit:
+ */
+ sse2_cvtps2dq(p->func, dataXMM, dataXMM);
+ sse2_packssdw(p->func, dataXMM, dataXMM);
+ sse2_packuswb(p->func, dataXMM, dataXMM);
+ sse_movss(p->func, dest, dataXMM);
+}
+
+
+
+
+
+static void get_src_ptr( struct translate_sse *p,
+ struct x86_reg srcEAX,
+ struct x86_reg translateREG,
+ struct x86_reg eltREG,
+ unsigned a )
+{
+ struct x86_reg input_ptr =
+ x86_make_disp(translateREG,
+ get_offset(p, &p->attrib[a].input_ptr));
+
+ struct x86_reg input_stride =
+ x86_make_disp(translateREG,
+ get_offset(p, &p->attrib[a].input_stride));
+
+ /* Calculate pointer to current attrib:
+ */
+ x86_mov(p->func, srcEAX, input_stride);
+ x86_imul(p->func, srcEAX, eltREG);
+ x86_add(p->func, srcEAX, input_ptr);
+}
+
+
+/* Extended swizzles? Maybe later.
+ */
+static void emit_swizzle( struct translate_sse *p,
+ struct x86_reg dest,
+ struct x86_reg src,
+ unsigned shuffle )
+{
+ sse_shufps(p->func, dest, src, shuffle);
+}
+
+
+static boolean translate_attr( struct translate_sse *p,
+ const struct translate_element *a,
+ struct x86_reg srcECX,
+ struct x86_reg dstEAX)
+{
+ struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
+
+ switch (a->input_format) {
+ case PIPE_FORMAT_R32_FLOAT:
+ emit_load_R32(p, dataXMM, srcECX);
+ break;
+ case PIPE_FORMAT_R32G32_FLOAT:
+ emit_load_R32G32(p, dataXMM, srcECX);
+ break;
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ emit_load_R32G32B32(p, dataXMM, srcECX);
+ break;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ emit_load_R32G32B32A32(p, dataXMM, srcECX);
+ break;
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX);
+ emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W));
+ break;
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX);
+ break;
+ default:
+ return FALSE;
+ }
+
+ switch (a->output_format) {
+ case PIPE_FORMAT_R32_FLOAT:
+ emit_store_R32(p, dstEAX, dataXMM);
+ break;
+ case PIPE_FORMAT_R32G32_FLOAT:
+ emit_store_R32G32(p, dstEAX, dataXMM);
+ break;
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ emit_store_R32G32B32(p, dstEAX, dataXMM);
+ break;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ emit_store_R32G32B32A32(p, dstEAX, dataXMM);
+ break;
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W));
+ emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM);
+ break;
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM);
+ break;
+ default:
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+/* Build run( struct translate *translate,
+ * unsigned start,
+ * unsigned count,
+ * void *output_buffer )
+ * or
+ * run_elts( struct translate *translate,
+ * unsigned *elts,
+ * unsigned count,
+ * void *output_buffer )
+ *
+ * Lots of hardcoding
+ *
+ * EAX -- pointer to current output vertex
+ * ECX -- pointer to current attribute
+ *
+ */
+static boolean build_vertex_emit( struct translate_sse *p,
+ struct x86_function *func,
+ boolean linear )
+{
+ struct x86_reg vertexECX = x86_make_reg(file_REG32, reg_AX);
+ struct x86_reg idxEBX = x86_make_reg(file_REG32, reg_BX);
+ struct x86_reg srcEAX = x86_make_reg(file_REG32, reg_CX);
+ struct x86_reg countEBP = x86_make_reg(file_REG32, reg_BP);
+ struct x86_reg translateESI = x86_make_reg(file_REG32, reg_SI);
+ uint8_t *fixup, *label;
+ unsigned j;
+
+ p->func = func;
+ p->loaded_inv_255 = FALSE;
+ p->loaded_255 = FALSE;
+ p->loaded_identity = FALSE;
+
+ x86_init_func(p->func);
+
+ /* Push a few regs?
+ */
+ x86_push(p->func, countEBP);
+ x86_push(p->func, translateESI);
+ x86_push(p->func, idxEBX);
+
+ /* Get vertex count, compare to zero
+ */
+ x86_xor(p->func, idxEBX, idxEBX);
+ x86_mov(p->func, countEBP, x86_fn_arg(p->func, 3));
+ x86_cmp(p->func, countEBP, idxEBX);
+ fixup = x86_jcc_forward(p->func, cc_E);
+
+ /* If linear, idx is the current element, otherwise it is a pointer
+ * to the current element.
+ */
+ x86_mov(p->func, idxEBX, x86_fn_arg(p->func, 2));
+
+ /* Initialize destination register.
+ */
+ x86_mov(p->func, vertexECX, x86_fn_arg(p->func, 4));
+
+ /* Move argument 1 (translate_sse pointer) into a reg:
+ */
+ x86_mov(p->func, translateESI, x86_fn_arg(p->func, 1));
+
+
+ /* always load, needed or not:
+ */
+
+ /* Note address for loop jump */
+ label = x86_get_label(p->func);
+
+
+ for (j = 0; j < p->translate.key.nr_elements; j++) {
+ const struct translate_element *a = &p->translate.key.element[j];
+
+ struct x86_reg destEAX = x86_make_disp(vertexECX,
+ a->output_offset);
+
+ /* Figure out source pointer address:
+ */
+ if (linear) {
+ get_src_ptr(p, srcEAX, translateESI, idxEBX, j);
+ }
+ else {
+ get_src_ptr(p, srcEAX, translateESI, x86_deref(idxEBX), j);
+ }
+
+ if (!translate_attr( p, a, x86_deref(srcEAX), destEAX ))
+ return FALSE;
+ }
+
+ /* Next vertex:
+ */
+ x86_lea(p->func, vertexECX, x86_make_disp(vertexECX, p->translate.key.output_stride));
+
+ /* Incr index
+ */ /* Emit code for each of the attributes. Currently routes
+ * everything through SSE registers, even when it might be more
+ * efficient to stick with regular old x86. No optimization or
+ * other tricks - enough new ground to cover here just getting
+ * things working.
+ */
+
+ if (linear) {
+ x86_inc(p->func, idxEBX);
+ }
+ else {
+ x86_lea(p->func, idxEBX, x86_make_disp(idxEBX, 4));
+ }
+
+ /* decr count, loop if not zero
+ */
+ x86_dec(p->func, countEBP);
+ x86_test(p->func, countEBP, countEBP);
+ x86_jcc(p->func, cc_NZ, label);
+
+ /* Exit mmx state?
+ */
+ if (p->func->need_emms)
+ mmx_emms(p->func);
+
+ /* Land forward jump here:
+ */
+ x86_fixup_fwd_jump(p->func, fixup);
+
+ /* Pop regs and return
+ */
+
+ x86_pop(p->func, idxEBX);
+ x86_pop(p->func, translateESI);
+ x86_pop(p->func, countEBP);
+ x86_ret(p->func);
+
+ return TRUE;
+}
+
+
+
+
+
+
+
+static void translate_sse_set_buffer( struct translate *translate,
+ unsigned buf,
+ const void *ptr,
+ unsigned stride )
+{
+ struct translate_sse *p = (struct translate_sse *)translate;
+ unsigned i;
+
+ for (i = 0; i < p->translate.key.nr_elements; i++) {
+ if (p->translate.key.element[i].input_buffer == buf) {
+ p->attrib[i].input_ptr = ((char *)ptr +
+ p->translate.key.element[i].input_offset);
+ p->attrib[i].input_stride = stride;
+ }
+ }
+}
+
+
+static void translate_sse_release( struct translate *translate )
+{
+ struct translate_sse *p = (struct translate_sse *)translate;
+
+ x86_release_func( &p->linear_func );
+ x86_release_func( &p->elt_func );
+
+ FREE(p);
+}
+
+static void translate_sse_run_elts( struct translate *translate,
+ const unsigned *elts,
+ unsigned count,
+ void *output_buffer )
+{
+ struct translate_sse *p = (struct translate_sse *)translate;
+
+ p->gen_run_elts( translate,
+ elts,
+ count,
+ output_buffer );
+}
+
+static void translate_sse_run( struct translate *translate,
+ unsigned start,
+ unsigned count,
+ void *output_buffer )
+{
+ struct translate_sse *p = (struct translate_sse *)translate;
+
+ p->gen_run( translate,
+ start,
+ count,
+ output_buffer );
+}
+
+
+struct translate *translate_sse2_create( const struct translate_key *key )
+{
+ struct translate_sse *p = NULL;
+
+ if (!rtasm_cpu_has_sse() || !rtasm_cpu_has_sse2())
+ goto fail;
+
+ p = CALLOC_STRUCT( translate_sse );
+ if (p == NULL)
+ goto fail;
+
+ p->translate.key = *key;
+ p->translate.release = translate_sse_release;
+ p->translate.set_buffer = translate_sse_set_buffer;
+ p->translate.run_elts = translate_sse_run_elts;
+ p->translate.run = translate_sse_run;
+
+ if (!build_vertex_emit(p, &p->linear_func, TRUE))
+ goto fail;
+
+ if (!build_vertex_emit(p, &p->elt_func, FALSE))
+ goto fail;
+
+ p->gen_run = (run_func)x86_get_func(&p->linear_func);
+ if (p->gen_run == NULL)
+ goto fail;
+
+ p->gen_run_elts = (run_elts_func)x86_get_func(&p->elt_func);
+ if (p->gen_run_elts == NULL)
+ goto fail;
+
+ return &p->translate;
+
+ fail:
+ if (p)
+ translate_sse_release( &p->translate );
+
+ return NULL;
+}
+
+
+
+#else
+
+void translate_create_sse( const struct translate_key *key )
+{
+ return NULL;
+}
+
+#endif
diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c
index f9366467cd5..25b132b40c7 100644
--- a/src/gallium/auxiliary/util/p_debug.c
+++ b/src/gallium/auxiliary/util/p_debug.c
@@ -59,10 +59,15 @@ void _debug_vprintf(const char *format, va_list ap)
#ifdef WIN32
#ifndef WINCE
/* EngDebugPrint does not handle float point arguments, so we need to use
- * our own vsnprintf implementation */
- char buf[512 + 1];
- util_vsnprintf(buf, sizeof(buf), format, ap);
- _EngDebugPrint("%s", buf);
+ * our own vsnprintf implementation. It is also very slow, so buffer until
+ * we find a newline. */
+ static char buf[512 + 1] = {'\0'};
+ size_t len = strlen(buf);
+ int ret = util_vsnprintf(buf + len, sizeof(buf) - len, format, ap);
+ if(ret > (int)(sizeof(buf) - len - 1) || strchr(buf + len, '\n')) {
+ _EngDebugPrint("%s", buf);
+ buf[0] = '\0';
+ }
#else
/* TODO: Implement debug print for WINCE */
#endif
@@ -195,6 +200,8 @@ debug_get_bool_option(const char *name, boolean dfault)
if(str == NULL)
result = dfault;
+ else if(!strcmp(str, "n"))
+ result = FALSE;
else if(!strcmp(str, "no"))
result = FALSE;
else if(!strcmp(str, "0"))
@@ -246,57 +253,16 @@ debug_get_flags_option(const char *name,
}
-#if defined(WIN32)
-ULONG_PTR debug_config_file = 0;
-void *mapped_config_file = 0;
-
-enum {
- eAssertAbortEn = 0x1,
-};
-
-/* Check for aborts enabled. */
-static unsigned abort_en(void)
-{
- if (!mapped_config_file)
- {
- /* Open an 8 byte file for configuration data. */
- mapped_config_file = EngMapFile(L"\\??\\c:\\gaDebug.cfg", 8, &debug_config_file);
- }
-
- /* A value of "0" (ascii) in the configuration file will clear the
- * first 8 bits in the test byte.
- *
- * A value of "1" (ascii) in the configuration file will set the
- * first bit in the test byte.
- *
- * A value of "2" (ascii) in the configuration file will set the
- * second bit in the test byte.
- *
- * Currently the only interesting values are 0 and 1, which clear
- * and set abort-on-assert behaviour respectively.
- */
- return ((((char *)mapped_config_file)[0]) - 0x30) & eAssertAbortEn;
-}
-#else /* WIN32 */
-static unsigned abort_en(void)
-{
- return !GETENV("GALLIUM_ABORT_ON_ASSERT");
-}
-#endif
-
void _debug_assert_fail(const char *expr,
const char *file,
unsigned line,
const char *function)
{
_debug_printf("%s:%u:%s: Assertion `%s' failed.\n", file, line, function, expr);
- if (abort_en())
- {
+ if (debug_get_bool_option("GALLIUM_ABORT_ON_ASSERT", TRUE))
debug_break();
- } else
- {
+ else
_debug_printf("continuing...\n");
- }
}
diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c
index eec5e600c91..1105066cb82 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -57,6 +57,7 @@ struct blit_state
struct pipe_depth_stencil_alpha_state depthstencil;
struct pipe_rasterizer_state rasterizer;
struct pipe_sampler_state sampler;
+ struct pipe_viewport_state viewport;
struct pipe_shader_state vert_shader;
struct pipe_shader_state frag_shader;
@@ -100,7 +101,7 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso)
memset(&ctx->rasterizer, 0, sizeof(ctx->rasterizer));
ctx->rasterizer.front_winding = PIPE_WINDING_CW;
ctx->rasterizer.cull_mode = PIPE_WINDING_NONE;
- ctx->rasterizer.bypass_clipping = 1; /* bypasses viewport too */
+ ctx->rasterizer.bypass_clipping = 1;
/*ctx->rasterizer.bypass_vs = 1;*/
/* samplers */
@@ -113,8 +114,7 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso)
ctx->sampler.mag_img_filter = 0; /* set later */
ctx->sampler.normalized_coords = 1;
-#if 0
- /* viewport */
+ /* viewport (identity, we setup vertices in wincoords) */
ctx->viewport.scale[0] = 1.0;
ctx->viewport.scale[1] = 1.0;
ctx->viewport.scale[2] = 1.0;
@@ -123,7 +123,6 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso)
ctx->viewport.translate[1] = 0.0;
ctx->viewport.translate[2] = 0.0;
ctx->viewport.translate[3] = 0.0;
-#endif
/* vertex shader */
{
@@ -300,11 +299,15 @@ util_blit_pixels(struct blit_state *ctx,
cso_save_samplers(ctx->cso);
cso_save_sampler_textures(ctx->cso);
cso_save_framebuffer(ctx->cso);
+ cso_save_fragment_shader(ctx->cso);
+ cso_save_vertex_shader(ctx->cso);
+ cso_save_viewport(ctx->cso);
/* set misc state we care about */
cso_set_blend(ctx->cso, &ctx->blend);
cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
+ cso_set_viewport(ctx->cso, &ctx->viewport);
/* sampler */
ctx->sampler.min_img_filter = filter;
@@ -313,11 +316,11 @@ util_blit_pixels(struct blit_state *ctx,
cso_single_sampler_done(ctx->cso);
/* texture */
- pipe->set_sampler_textures(pipe, 1, &tex);
+ cso_set_sampler_textures(ctx->cso, 1, &tex);
/* shaders */
- pipe->bind_fs_state(pipe, ctx->fs);
- pipe->bind_vs_state(pipe, ctx->vs);
+ cso_set_fragment_shader_handle(ctx->cso, ctx->fs);
+ cso_set_vertex_shader_handle(ctx->cso, ctx->vs);
/* drawing dest */
memset(&fb, 0, sizeof(fb));
@@ -344,6 +347,9 @@ util_blit_pixels(struct blit_state *ctx,
cso_restore_samplers(ctx->cso);
cso_restore_sampler_textures(ctx->cso);
cso_restore_framebuffer(ctx->cso);
+ cso_restore_fragment_shader(ctx->cso);
+ cso_restore_vertex_shader(ctx->cso);
+ cso_restore_viewport(ctx->cso);
/* free the texture */
pipe_surface_reference(&texSurf, NULL);
diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c
index 2fd214d22e2..dfdb5f16fe7 100644
--- a/src/gallium/auxiliary/util/u_gen_mipmap.c
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
@@ -61,6 +61,7 @@ struct gen_mipmap_state
struct pipe_depth_stencil_alpha_state depthstencil;
struct pipe_rasterizer_state rasterizer;
struct pipe_sampler_state sampler;
+ struct pipe_viewport_state viewport;
struct pipe_shader_state vert_shader;
struct pipe_shader_state frag_shader;
@@ -712,7 +713,7 @@ util_create_gen_mipmap(struct pipe_context *pipe,
memset(&ctx->rasterizer, 0, sizeof(ctx->rasterizer));
ctx->rasterizer.front_winding = PIPE_WINDING_CW;
ctx->rasterizer.cull_mode = PIPE_WINDING_NONE;
- ctx->rasterizer.bypass_clipping = 1; /* bypasses viewport too */
+ ctx->rasterizer.bypass_clipping = 1;
/*ctx->rasterizer.bypass_vs = 1;*/
/* sampler state */
@@ -723,9 +724,7 @@ util_create_gen_mipmap(struct pipe_context *pipe,
ctx->sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST;
ctx->sampler.normalized_coords = 1;
-
-#if 0
- /* viewport */
+ /* viewport state (identity, verts are in wincoords) */
ctx->viewport.scale[0] = 1.0;
ctx->viewport.scale[1] = 1.0;
ctx->viewport.scale[2] = 1.0;
@@ -734,7 +733,6 @@ util_create_gen_mipmap(struct pipe_context *pipe,
ctx->viewport.translate[1] = 0.0;
ctx->viewport.translate[2] = 0.0;
ctx->viewport.translate[3] = 0.0;
-#endif
/* vertex shader */
{
@@ -825,26 +823,6 @@ util_destroy_gen_mipmap(struct gen_mipmap_state *ctx)
}
-#if 0
-static void
-simple_viewport(struct pipe_context *pipe, uint width, uint height)
-{
- struct pipe_viewport_state vp;
-
- vp.scale[0] = 0.5 * width;
- vp.scale[1] = -0.5 * height;
- vp.scale[2] = 1.0;
- vp.scale[3] = 1.0;
- vp.translate[0] = 0.5 * width;
- vp.translate[1] = 0.5 * height;
- vp.translate[2] = 0.0;
- vp.translate[3] = 0.0;
-
- pipe->set_viewport_state(pipe, &vp);
-}
-#endif
-
-
/**
* Generate mipmap images. It's assumed all needed texture memory is
* already allocated.
@@ -880,17 +858,18 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
cso_save_samplers(ctx->cso);
cso_save_sampler_textures(ctx->cso);
cso_save_framebuffer(ctx->cso);
+ cso_save_fragment_shader(ctx->cso);
+ cso_save_vertex_shader(ctx->cso);
+ cso_save_viewport(ctx->cso);
/* bind our state */
cso_set_blend(ctx->cso, &ctx->blend);
cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
+ cso_set_viewport(ctx->cso, &ctx->viewport);
- pipe->bind_vs_state(pipe, ctx->vs);
- pipe->bind_fs_state(pipe, ctx->fs);
-#if 0
- pipe->set_viewport_state(pipe, &ctx->viewport);
-#endif
+ cso_set_fragment_shader_handle(ctx->cso, ctx->fs);
+ cso_set_vertex_shader_handle(ctx->cso, ctx->vs);
/* init framebuffer state */
memset(&fb, 0, sizeof(fb));
@@ -926,11 +905,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
ctx->sampler.lod_bias = (float) srcLevel;
cso_single_sampler(ctx->cso, 0, &ctx->sampler);
cso_single_sampler_done(ctx->cso);
-#if 0
- simple_viewport(pipe, pt->width[dstLevel], pt->height[dstLevel]);
-#endif
- pipe->set_sampler_textures(pipe, 1, &pt);
+ cso_set_sampler_textures(ctx->cso, 1, &pt);
/* quad coords in window coords (bypassing clipping, viewport mapping) */
set_vertex_data(ctx,
@@ -954,4 +930,7 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
cso_restore_samplers(ctx->cso);
cso_restore_sampler_textures(ctx->cso);
cso_restore_framebuffer(ctx->cso);
+ cso_restore_fragment_shader(ctx->cso);
+ cso_restore_vertex_shader(ctx->cso);
+ cso_restore_viewport(ctx->cso);
}
diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h
index cd13823985e..1f6604c554e 100644
--- a/src/gallium/auxiliary/util/u_pack_color.h
+++ b/src/gallium/auxiliary/util/u_pack_color.h
@@ -40,6 +40,45 @@
/**
+ * Pack ubyte R,G,B,A into dest pixel.
+ */
+static INLINE void
+util_pack_color_ub(ubyte r, ubyte g, ubyte b, ubyte a,
+ enum pipe_format format, void *dest)
+{
+ switch (format) {
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ {
+ uint *d = (uint *) dest;
+ *d = (r << 24) | (g << 16) | (b << 8) | a;
+ }
+ return;
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ {
+ uint *d = (uint *) dest;
+ *d = (a << 24) | (r << 16) | (g << 8) | b;
+ }
+ return;
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ {
+ uint *d = (uint *) dest;
+ *d = (b << 24) | (g << 16) | (r << 8) | a;
+ }
+ return;
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ {
+ ushort *d = (ushort *) dest;
+ *d = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3);
+ }
+ return;
+ /* XXX lots more cases to add */
+ default:
+ debug_printf("gallium: unhandled format in util_pack_color_ub()");
+ }
+}
+
+
+/**
* Note rgba outside [0,1] will be clamped for int pixel formats.
*/
static INLINE void
diff --git a/src/gallium/auxiliary/util/u_time.c b/src/gallium/auxiliary/util/u_time.c
index e6c0b19ff66..01112ebe5aa 100644
--- a/src/gallium/auxiliary/util/u_time.c
+++ b/src/gallium/auxiliary/util/u_time.c
@@ -120,20 +120,20 @@ util_time_compare(const struct util_time *t1,
}
-int
+boolean
util_time_timeout(const struct util_time *start,
const struct util_time *end,
const struct util_time *curr)
{
if(util_time_compare(start, end) <= 0)
- return util_time_compare(start, curr) <= 0 && util_time_compare(curr, end) < 0;
+ return !(util_time_compare(start, curr) <= 0 && util_time_compare(curr, end) < 0);
else
- return util_time_compare(start, curr) <= 0 || util_time_compare(curr, end) < 0;
+ return !(util_time_compare(start, curr) <= 0 || util_time_compare(curr, end) < 0);
}
#ifdef WIN32
-void util_time_usleep(unsigned usecs)
+void util_time_sleep(unsigned usecs)
{
LONGLONG start, curr, end;
diff --git a/src/gallium/auxiliary/util/u_time.h b/src/gallium/auxiliary/util/u_time.h
index 32035cceb5e..c8836c137fa 100644
--- a/src/gallium/auxiliary/util/u_time.h
+++ b/src/gallium/auxiliary/util/u_time.h
@@ -77,9 +77,9 @@ util_time_diff(const struct util_time *t1,
const struct util_time *t2);
/**
- * Returns zero when the timeout expires, non zero otherwise.
+ * Returns non-zero when the timeout expires.
*/
-int
+boolean
util_time_timeout(const struct util_time *start,
const struct util_time *end,
const struct util_time *curr);
@@ -87,7 +87,7 @@ util_time_timeout(const struct util_time *start,
#ifndef WIN32
#define util_time_sleep usleep
#else
-int
+void
util_time_sleep(unsigned usecs);
#endif