summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorKeith Whitwell <[email protected]>2010-03-10 08:29:27 +0000
committerKeith Whitwell <[email protected]>2010-03-10 08:29:27 +0000
commit155fbcb0ed85c6452cbedd2317f201100fe698ab (patch)
tree15e8a2662b1341bd07cebb9cd3624bc4ed42d020 /src
parent9a8342ebcd83a2022b3c9b9539c5b15ac56d6440 (diff)
parenteeaa0861bfc98a06ceec269801271b7453c4fcbd (diff)
Merge commit 'origin/master' into gallium-sw-api-2
Diffstat (limited to 'src')
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_cache.c21
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_cache.h16
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_context.c83
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_context.h7
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.c5
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.c143
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_const.c8
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_logic.c100
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c592
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_type.h50
-rw-r--r--src/gallium/auxiliary/util/u_blit.c16
-rw-r--r--src/gallium/auxiliary/util/u_blitter.c23
-rw-r--r--src/gallium/auxiliary/util/u_blitter.h8
-rw-r--r--src/gallium/auxiliary/util/u_draw_quad.c12
-rw-r--r--src/gallium/auxiliary/util/u_dump_state.c1
-rw-r--r--src/gallium/auxiliary/util/u_format.h10
-rw-r--r--src/gallium/auxiliary/util/u_format_pack.py77
-rw-r--r--src/gallium/auxiliary/util/u_gen_mipmap.c13
-rw-r--r--src/gallium/auxiliary/vl/vl_compositor.c31
-rw-r--r--src/gallium/auxiliary/vl/vl_compositor.h2
-rw-r--r--src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c126
-rw-r--r--src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h4
-rw-r--r--src/gallium/docs/source/context.rst2
-rw-r--r--src/gallium/docs/source/cso/velems.rst24
-rw-r--r--src/gallium/drivers/cell/ppu/cell_context.h8
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_vertex.c41
-rw-r--r--src/gallium/drivers/failover/fo_context.h3
-rw-r--r--src/gallium/drivers/failover/fo_state.c65
-rw-r--r--src/gallium/drivers/failover/fo_state_emit.c10
-rw-r--r--src/gallium/drivers/i915/i915_context.h8
-rw-r--r--src/gallium/drivers/i915/i915_state.c35
-rw-r--r--src/gallium/drivers/i965/brw_context.h5
-rw-r--r--src/gallium/drivers/i965/brw_draw_upload.c208
-rw-r--r--src/gallium/drivers/i965/brw_pipe_vertex.c247
-rw-r--r--src/gallium/drivers/i965/brw_structs.h4
-rw-r--r--src/gallium/drivers/identity/id_context.c54
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.c5
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.h4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state.h16
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_vertex.c35
-rw-r--r--src/gallium/drivers/nouveau/nouveau_screen.c2
-rw-r--r--src/gallium/drivers/nouveau/nouveau_util.h100
-rw-r--r--src/gallium/drivers/nv30/nv30_context.h8
-rw-r--r--src/gallium/drivers/nv30/nv30_miptree.c2
-rw-r--r--src/gallium/drivers/nv30/nv30_state.c34
-rw-r--r--src/gallium/drivers/nv30/nv30_vbo.c8
-rw-r--r--src/gallium/drivers/nv40/nv40_context.h9
-rw-r--r--src/gallium/drivers/nv40/nv40_state.c34
-rw-r--r--src/gallium/drivers/nv40/nv40_state_emit.c2
-rw-r--r--src/gallium/drivers/nv40/nv40_vbo.c8
-rw-r--r--src/gallium/drivers/nv50/Makefile3
-rw-r--r--src/gallium/drivers/nv50/nv50_clear.c2
-rw-r--r--src/gallium/drivers/nv50/nv50_context.c43
-rw-r--r--src/gallium/drivers/nv50/nv50_context.h95
-rw-r--r--src/gallium/drivers/nv50/nv50_program.c40
-rw-r--r--src/gallium/drivers/nv50/nv50_push.c326
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.c34
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.h2
-rw-r--r--src/gallium/drivers/nv50/nv50_state.c39
-rw-r--r--src/gallium/drivers/nv50/nv50_state_validate.c436
-rw-r--r--src/gallium/drivers/nv50/nv50_tex.c35
-rw-r--r--src/gallium/drivers/nv50/nv50_vbo.c1113
-rw-r--r--src/gallium/drivers/r300/r300_blit.c1
-rw-r--r--src/gallium/drivers/r300/r300_context.c2
-rw-r--r--src/gallium/drivers/r300/r300_context.h20
-rw-r--r--src/gallium/drivers/r300/r300_emit.c8
-rw-r--r--src/gallium/drivers/r300/r300_render.c33
-rw-r--r--src/gallium/drivers/r300/r300_state.c226
-rw-r--r--src/gallium/drivers/r300/r300_state_derived.c194
-rw-r--r--src/gallium/drivers/r300/r300_state_inlines.h29
-rw-r--r--src/gallium/drivers/softpipe/sp_context.c5
-rw-r--r--src/gallium/drivers/softpipe/sp_context.h4
-rw-r--r--src/gallium/drivers/softpipe/sp_state.h17
-rw-r--r--src/gallium/drivers/softpipe/sp_state_vertex.c35
-rw-r--r--src/gallium/drivers/svga/svga_context.h8
-rw-r--r--src/gallium/drivers/svga/svga_pipe_vertex.c38
-rw-r--r--src/gallium/drivers/svga/svga_state_need_swtnl.c4
-rw-r--r--src/gallium/drivers/svga/svga_state_rss.c13
-rw-r--r--src/gallium/drivers/svga/svga_state_vdecl.c8
-rw-r--r--src/gallium/drivers/svga/svga_state_vs.c4
-rw-r--r--src/gallium/drivers/svga/svga_swtnl_state.c4
-rw-r--r--src/gallium/drivers/trace/tr_context.c91
-rw-r--r--src/gallium/drivers/trace/tr_dump_state.c1
-rw-r--r--src/gallium/include/pipe/p_compiler.h5
-rw-r--r--src/gallium/include/pipe/p_context.h9
-rw-r--r--src/gallium/include/pipe/p_state.h1
-rw-r--r--src/gallium/state_trackers/python/p_context.i8
-rw-r--r--src/gallium/state_trackers/vega/api_masks.c2
-rw-r--r--src/gallium/state_trackers/vega/polygon.c4
-rw-r--r--src/gallium/state_trackers/vega/renderer.c5
-rw-r--r--src/gallium/state_trackers/vega/vg_context.c8
-rw-r--r--src/gallium/state_trackers/vega/vg_context.h1
-rw-r--r--src/gallium/state_trackers/xorg/xorg_renderer.c13
-rw-r--r--src/gallium/state_trackers/xorg/xorg_renderer.h1
-rw-r--r--src/mesa/SConscript1
-rw-r--r--src/mesa/drivers/dri/i965/brw_disasm.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_program.c14
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c6
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_glsl.c144
-rw-r--r--src/mesa/drivers/dri/r200/Makefile1
-rw-r--r--src/mesa/drivers/dri/r200/r200_context.c1
l---------src/mesa/drivers/dri/r200/radeon_pixel_read.c1
-rw-r--r--src/mesa/drivers/dri/r300/Makefile3
-rw-r--r--src/mesa/drivers/dri/r300/r300_cmdbuf.c35
-rw-r--r--src/mesa/drivers/dri/r300/r300_context.c6
-rw-r--r--src/mesa/drivers/dri/r300/r300_state.c75
-rw-r--r--src/mesa/drivers/dri/r300/r300_tex.c39
-rw-r--r--src/mesa/drivers/dri/r300/r300_tex.h3
l---------src/mesa/drivers/dri/r300/radeon_pixel_read.c1
-rw-r--r--src/mesa/drivers/dri/r600/Makefile3
-rw-r--r--src/mesa/drivers/dri/r600/r600_context.c1
l---------src/mesa/drivers/dri/r600/radeon_pixel_read.c1
-rw-r--r--src/mesa/drivers/dri/radeon/Makefile1
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_common.h6
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_common_context.h1
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_context.c1
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_fbo.c126
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c3
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_pixel_read.c188
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_texture.c16
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_texture.h2
-rw-r--r--src/mesa/glapi/glapi.c44
-rw-r--r--src/mesa/glapi/glapi.h21
-rw-r--r--src/mesa/glapi/glapi_entrypoint.c331
-rw-r--r--src/mesa/glapi/glapi_getproc.c364
-rw-r--r--src/mesa/glapi/glapi_priv.h66
-rw-r--r--src/mesa/sources.mak1
-rw-r--r--src/mesa/state_tracker/st_cb_bitmap.c4
-rw-r--r--src/mesa/state_tracker/st_cb_clear.c5
-rw-r--r--src/mesa/state_tracker/st_cb_drawpixels.c4
-rw-r--r--src/mesa/state_tracker/st_context.c8
-rw-r--r--src/mesa/state_tracker/st_context.h3
-rw-r--r--src/mesa/state_tracker/st_draw.c7
-rw-r--r--src/mesa/state_tracker/st_draw_feedback.c1
134 files changed, 3994 insertions, 2847 deletions
diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c
index a6a07e72c2f..900c64df4b9 100644
--- a/src/gallium/auxiliary/cso_cache/cso_cache.c
+++ b/src/gallium/auxiliary/cso_cache/cso_cache.c
@@ -43,6 +43,7 @@ struct cso_cache {
struct cso_hash *vs_hash;
struct cso_hash *rasterizer_hash;
struct cso_hash *sampler_hash;
+ struct cso_hash *velements_hash;
int max_size;
cso_sanitize_callback sanitize_cb;
@@ -108,6 +109,9 @@ static struct cso_hash *_cso_hash_for_type(struct cso_cache *sc, enum cso_cache_
case CSO_VERTEX_SHADER:
hash = sc->vs_hash;
break;
+ case CSO_VELEMENTS:
+ hash = sc->velements_hash;
+ break;
}
return hash;
@@ -161,6 +165,13 @@ static void delete_vs_state(void *state, void *data)
FREE(state);
}
+static void delete_velements(void *state, void *data)
+{
+ struct cso_velements *cso = (struct cso_velements *)state;
+ if (cso->delete_state)
+ cso->delete_state(cso->context, cso->data);
+ FREE(state);
+}
static INLINE void delete_cso(void *state, enum cso_cache_type type)
{
@@ -183,6 +194,9 @@ static INLINE void delete_cso(void *state, enum cso_cache_type type)
case CSO_VERTEX_SHADER:
delete_vs_state(state, 0);
break;
+ case CSO_VELEMENTS:
+ delete_velements(state, 0);
+ break;
default:
assert(0);
FREE(state);
@@ -294,6 +308,7 @@ struct cso_cache *cso_cache_create(void)
sc->rasterizer_hash = cso_hash_create();
sc->fs_hash = cso_hash_create();
sc->vs_hash = cso_hash_create();
+ sc->velements_hash = cso_hash_create();
sc->sanitize_cb = sanitize_cb;
sc->sanitize_data = 0;
@@ -325,6 +340,9 @@ void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type,
case CSO_VERTEX_SHADER:
hash = sc->vs_hash;
break;
+ case CSO_VELEMENTS:
+ hash = sc->velements_hash;
+ break;
}
iter = cso_hash_first_node(hash);
@@ -351,6 +369,7 @@ void cso_cache_delete(struct cso_cache *sc)
cso_for_each_state(sc, CSO_VERTEX_SHADER, delete_vs_state, 0);
cso_for_each_state(sc, CSO_RASTERIZER, delete_rasterizer_state, 0);
cso_for_each_state(sc, CSO_SAMPLER, delete_sampler_state, 0);
+ cso_for_each_state(sc, CSO_VELEMENTS, delete_velements, 0);
cso_hash_delete(sc->blend_hash);
cso_hash_delete(sc->sampler_hash);
@@ -358,6 +377,7 @@ void cso_cache_delete(struct cso_cache *sc)
cso_hash_delete(sc->rasterizer_hash);
cso_hash_delete(sc->fs_hash);
cso_hash_delete(sc->vs_hash);
+ cso_hash_delete(sc->velements_hash);
FREE(sc);
}
@@ -372,6 +392,7 @@ void cso_set_maximum_cache_size(struct cso_cache *sc, int number)
sanitize_hash(sc, sc->vs_hash, CSO_VERTEX_SHADER, sc->max_size);
sanitize_hash(sc, sc->rasterizer_hash, CSO_RASTERIZER, sc->max_size);
sanitize_hash(sc, sc->sampler_hash, CSO_SAMPLER, sc->max_size);
+ sanitize_hash(sc, sc->velements_hash, CSO_VELEMENTS, sc->max_size);
}
int cso_maximum_cache_size(const struct cso_cache *sc)
diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.h b/src/gallium/auxiliary/cso_cache/cso_cache.h
index eea60b940bb..fb09b83c623 100644
--- a/src/gallium/auxiliary/cso_cache/cso_cache.h
+++ b/src/gallium/auxiliary/cso_cache/cso_cache.h
@@ -53,6 +53,7 @@
* - rasterizer (old setup)
* - sampler
* - vertex shader
+ * - vertex elements
*
* Things that are not constant state objects include:
* - blend_color
@@ -90,7 +91,8 @@ enum cso_cache_type {
CSO_DEPTH_STENCIL_ALPHA,
CSO_RASTERIZER,
CSO_FRAGMENT_SHADER,
- CSO_VERTEX_SHADER
+ CSO_VERTEX_SHADER,
+ CSO_VELEMENTS
};
typedef void (*cso_state_callback)(void *ctx, void *obj);
@@ -144,6 +146,18 @@ struct cso_sampler {
struct pipe_context *context;
};
+struct cso_velems_state {
+ unsigned count;
+ struct pipe_vertex_element velems[PIPE_MAX_ATTRIBS];
+};
+
+struct cso_velements {
+ struct cso_velems_state state;
+ void *data;
+ cso_state_callback delete_state;
+ struct pipe_context *context;
+};
+
unsigned cso_construct_key(void *item, int item_size);
struct cso_cache *cso_cache_create(void);
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c
index a7335c340ca..6500891a10c 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -89,6 +89,7 @@ struct cso_context {
void *rasterizer, *rasterizer_saved;
void *fragment_shader, *fragment_shader_saved, *geometry_shader;
void *vertex_shader, *vertex_shader_saved, *geometry_shader_saved;
+ void *velements, *velements_saved;
struct pipe_clip_state clip;
struct pipe_clip_state clip_saved;
@@ -174,6 +175,20 @@ static boolean delete_vs_state(struct cso_context *ctx, void *state)
return FALSE;
}
+static boolean delete_vertex_elements(struct cso_context *ctx,
+ void *state)
+{
+ struct cso_velements *cso = (struct cso_velements *)state;
+
+ if (ctx->velements == cso->data)
+ return FALSE;
+
+ if (cso->delete_state)
+ cso->delete_state(cso->context, cso->data);
+ FREE(state);
+ return TRUE;
+}
+
static INLINE boolean delete_cso(struct cso_context *ctx,
void *state, enum cso_cache_type type)
@@ -197,6 +212,9 @@ static INLINE boolean delete_cso(struct cso_context *ctx,
case CSO_VERTEX_SHADER:
return delete_vs_state(ctx, state);
break;
+ case CSO_VELEMENTS:
+ return delete_vertex_elements(ctx, state);
+ break;
default:
assert(0);
FREE(state);
@@ -271,6 +289,7 @@ void cso_release_all( struct cso_context *ctx )
ctx->pipe->bind_depth_stencil_alpha_state( ctx->pipe, NULL );
ctx->pipe->bind_fs_state( ctx->pipe, NULL );
ctx->pipe->bind_vs_state( ctx->pipe, NULL );
+ ctx->pipe->bind_vertex_elements_state( ctx->pipe, NULL );
}
for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
@@ -1130,7 +1149,6 @@ void cso_restore_geometry_shader(struct cso_context *ctx)
ctx->geometry_shader_saved = NULL;
}
-
/* clip state */
static INLINE void
@@ -1180,3 +1198,66 @@ cso_restore_clip(struct cso_context *ctx)
ctx->pipe->set_clip_state(ctx->pipe, &ctx->clip_saved);
}
}
+
+enum pipe_error cso_set_vertex_elements(struct cso_context *ctx,
+ unsigned count,
+ const struct pipe_vertex_element *states)
+{
+ unsigned key_size, hash_key;
+ struct cso_hash_iter iter;
+ void *handle;
+ struct cso_velems_state velems_state;
+
+ /* need to include the count into the stored state data too.
+ Otherwise first few count pipe_vertex_elements could be identical even if count
+ is different, and there's no guarantee the hash would be different in that
+ case neither */
+ key_size = sizeof(struct pipe_vertex_element) * count + sizeof(unsigned);
+ velems_state.count = count;
+ memcpy(velems_state.velems, states, sizeof(struct pipe_vertex_element) * count);
+ hash_key = cso_construct_key((void*)&velems_state, key_size);
+ iter = cso_find_state_template(ctx->cache, hash_key, CSO_VELEMENTS, (void*)&velems_state, key_size);
+
+ if (cso_hash_iter_is_null(iter)) {
+ struct cso_velements *cso = MALLOC(sizeof(struct cso_velements));
+ if (!cso)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ memcpy(&cso->state, &velems_state, key_size);
+ cso->data = ctx->pipe->create_vertex_elements_state(ctx->pipe, count, &cso->state.velems[0]);
+ cso->delete_state = (cso_state_callback)ctx->pipe->delete_vertex_elements_state;
+ cso->context = ctx->pipe;
+
+ iter = cso_insert_state(ctx->cache, hash_key, CSO_VELEMENTS, cso);
+ if (cso_hash_iter_is_null(iter)) {
+ FREE(cso);
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+
+ handle = cso->data;
+ }
+ else {
+ handle = ((struct cso_velements *)cso_hash_iter_data(iter))->data;
+ }
+
+ if (ctx->velements != handle) {
+ ctx->velements = handle;
+ ctx->pipe->bind_vertex_elements_state(ctx->pipe, handle);
+ }
+ return PIPE_OK;
+}
+
+void cso_save_vertex_elements(struct cso_context *ctx)
+{
+ assert(!ctx->velements_saved);
+ ctx->velements_saved = ctx->velements;
+}
+
+void cso_restore_vertex_elements(struct cso_context *ctx)
+{
+ if (ctx->velements != ctx->velements_saved) {
+ ctx->velements = ctx->velements_saved;
+ ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->velements_saved);
+ }
+ ctx->velements_saved = NULL;
+}
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h
index 251a9a644f8..9c16abd28dd 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.h
+++ b/src/gallium/auxiliary/cso_cache/cso_context.h
@@ -122,6 +122,12 @@ void
cso_restore_vertex_sampler_textures(struct cso_context *cso);
+enum pipe_error cso_set_vertex_elements(struct cso_context *ctx,
+ unsigned count,
+ const struct pipe_vertex_element *states);
+void cso_save_vertex_elements(struct cso_context *ctx);
+void cso_restore_vertex_elements(struct cso_context *ctx);
+
/* These aren't really sensible -- most of the time the api provides
* object semantics for shaders anyway, and the cases where it doesn't
@@ -157,7 +163,6 @@ void cso_save_geometry_shader(struct cso_context *cso);
void cso_restore_geometry_shader(struct cso_context *cso);
-
enum pipe_error cso_set_framebuffer(struct cso_context *cso,
const struct pipe_framebuffer_state *fb);
void cso_save_framebuffer(struct cso_context *cso);
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index 6d90a6c42fd..a8cdc57ad96 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -307,9 +307,8 @@ draw_arrays_instanced(struct draw_context *draw,
tgsi_dump(draw->vs.vertex_shader->state.tokens, 0);
debug_printf("Elements:\n");
for (i = 0; i < draw->pt.nr_vertex_elements; i++) {
- debug_printf(" format=%s comps=%u\n",
- util_format_name(draw->pt.vertex_element[i].src_format),
- draw->pt.vertex_element[i].nr_components);
+ debug_printf(" format=%s\n",
+ util_format_name(draw->pt.vertex_element[i].src_format));
}
debug_printf("Buffers:\n");
for (i = 0; i < draw->pt.nr_vertex_buffers; i++) {
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 32f9e5201c5..e2c67883972 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -644,13 +644,26 @@ lp_build_abs(struct lp_build_context *bld,
if(type.floating) {
/* Mask out the sign bit */
- LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
- unsigned long long absMask = ~(1ULL << (type.width - 1));
- LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long) absMask));
- a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
- a = LLVMBuildAnd(bld->builder, a, mask, "");
- a = LLVMBuildBitCast(bld->builder, a, vec_type, "");
- return a;
+ if (type.length == 1) {
+ LLVMTypeRef int_type = LLVMIntType(type.width);
+ LLVMTypeRef float_type = LLVMFloatType();
+ unsigned long long absMask = ~(1ULL << (type.width - 1));
+ LLVMValueRef mask = LLVMConstInt(int_type, absMask, 0);
+ a = LLVMBuildBitCast(bld->builder, a, int_type, "");
+ a = LLVMBuildAnd(bld->builder, a, mask, "");
+ a = LLVMBuildBitCast(bld->builder, a, float_type, "");
+ return a;
+ }
+ else {
+ /* vector of floats */
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ unsigned long long absMask = ~(1ULL << (type.width - 1));
+ LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long) absMask));
+ a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ a = LLVMBuildAnd(bld->builder, a, mask, "");
+ a = LLVMBuildBitCast(bld->builder, a, vec_type, "");
+ return a;
+ }
}
if(type.width*type.length == 128 && util_cpu_caps.has_ssse3) {
@@ -753,7 +766,7 @@ lp_build_set_sign(struct lp_build_context *bld,
/**
- * Convert vector of int to vector of float.
+ * Convert vector of (or scalar) int to vector of (or scalar) float.
*/
LLVMValueRef
lp_build_int_to_float(struct lp_build_context *bld,
@@ -764,7 +777,11 @@ lp_build_int_to_float(struct lp_build_context *bld,
assert(type.floating);
/*assert(lp_check_value(type, a));*/
- {
+ if (type.length == 1) {
+ LLVMTypeRef float_type = LLVMFloatType();
+ return LLVMBuildSIToFP(bld->builder, a, float_type, "");
+ }
+ else {
LLVMTypeRef vec_type = lp_build_vec_type(type);
/*LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);*/
LLVMValueRef res;
@@ -921,12 +938,18 @@ lp_build_itrunc(struct lp_build_context *bld,
LLVMValueRef a)
{
const struct lp_type type = bld->type;
- LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
assert(type.floating);
- assert(lp_check_value(type, a));
- return LLVMBuildFPToSI(bld->builder, a, int_vec_type, "");
+ if (type.length == 1) {
+ LLVMTypeRef int_type = LLVMIntType(type.width);
+ return LLVMBuildFPTrunc(bld->builder, a, int_type, "");
+ }
+ else {
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ assert(lp_check_value(type, a));
+ return LLVMBuildFPToSI(bld->builder, a, int_vec_type, "");
+ }
}
@@ -939,6 +962,15 @@ lp_build_iround(struct lp_build_context *bld,
LLVMValueRef res;
assert(type.floating);
+
+ if (type.length == 1) {
+ /* scalar float to int */
+ LLVMTypeRef int_type = LLVMIntType(type.width);
+ /* XXX we want rounding here! */
+ res = LLVMBuildFPToSI(bld->builder, a, int_type, "");
+ return res;
+ }
+
assert(lp_check_value(type, a));
if(util_cpu_caps.has_sse4_1) {
@@ -1207,6 +1239,7 @@ lp_build_polynomial(struct lp_build_context *bld,
unsigned num_coeffs)
{
const struct lp_type type = bld->type;
+ LLVMTypeRef float_type = LLVMFloatType();
LLVMValueRef res = NULL;
unsigned i;
@@ -1216,7 +1249,13 @@ lp_build_polynomial(struct lp_build_context *bld,
__FUNCTION__);
for (i = num_coeffs; i--; ) {
- LLVMValueRef coeff = lp_build_const_scalar(type, coeffs[i]);
+ LLVMValueRef coeff;
+
+ if (type.length == 1)
+ coeff = LLVMConstReal(float_type, coeffs[i]);
+ else
+ coeff = lp_build_const_scalar(type, coeffs[i]);
+
if(res)
res = lp_build_add(bld, coeff, lp_build_mul(bld, x, res));
else
@@ -1410,11 +1449,87 @@ lp_build_log2_approx(struct lp_build_context *bld,
}
+/** scalar version of above function */
+static void
+lp_build_float_log2_approx(struct lp_build_context *bld,
+ LLVMValueRef x,
+ LLVMValueRef *p_exp,
+ LLVMValueRef *p_floor_log2,
+ LLVMValueRef *p_log2)
+{
+ const struct lp_type type = bld->type;
+ LLVMTypeRef float_type = LLVMFloatType();
+ LLVMTypeRef int_type = LLVMIntType(type.width);
+
+ LLVMValueRef expmask = LLVMConstInt(int_type, 0x7f800000, 0);
+ LLVMValueRef mantmask = LLVMConstInt(int_type, 0x007fffff, 0);
+ LLVMValueRef one = LLVMConstBitCast(bld->one, int_type);
+
+ LLVMValueRef i = NULL;
+ LLVMValueRef exp = NULL;
+ LLVMValueRef mant = NULL;
+ LLVMValueRef logexp = NULL;
+ LLVMValueRef logmant = NULL;
+ LLVMValueRef res = NULL;
+
+ if(p_exp || p_floor_log2 || p_log2) {
+ /* TODO: optimize the constant case */
+ if(LLVMIsConstant(x))
+ debug_printf("%s: inefficient/imprecise constant arithmetic\n",
+ __FUNCTION__);
+
+ assert(type.floating && type.width == 32);
+
+ i = LLVMBuildBitCast(bld->builder, x, int_type, "");
+
+ /* exp = (float) exponent(x) */
+ exp = LLVMBuildAnd(bld->builder, i, expmask, "");
+ }
+
+ if(p_floor_log2 || p_log2) {
+ LLVMValueRef c23 = LLVMConstInt(int_type, 23, 0);
+ LLVMValueRef c127 = LLVMConstInt(int_type, 127, 0);
+ logexp = LLVMBuildLShr(bld->builder, exp, c23, "");
+ logexp = LLVMBuildSub(bld->builder, logexp, c127, "");
+ logexp = LLVMBuildSIToFP(bld->builder, logexp, float_type, "");
+ }
+
+ if(p_log2) {
+ /* mant = (float) mantissa(x) */
+ mant = LLVMBuildAnd(bld->builder, i, mantmask, "");
+ mant = LLVMBuildOr(bld->builder, mant, one, "");
+ mant = LLVMBuildBitCast(bld->builder, mant, float_type, "");
+
+ logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial,
+ Elements(lp_build_log2_polynomial));
+
+ /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
+ logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), "");
+
+ res = LLVMBuildAdd(bld->builder, logmant, logexp, "");
+ }
+
+ if(p_exp)
+ *p_exp = exp;
+
+ if(p_floor_log2)
+ *p_floor_log2 = logexp;
+
+ if(p_log2)
+ *p_log2 = res;
+}
+
+
LLVMValueRef
lp_build_log2(struct lp_build_context *bld,
LLVMValueRef x)
{
LLVMValueRef res;
- lp_build_log2_approx(bld, x, NULL, NULL, &res);
+ if (bld->type.length == 1) {
+ lp_build_float_log2_approx(bld, x, NULL, NULL, &res);
+ }
+ else {
+ lp_build_log2_approx(bld, x, NULL, NULL, &res);
+ }
return res;
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.c b/src/gallium/auxiliary/gallivm/lp_bld_const.c
index c8eaa8c3940..53447757e8e 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_const.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_const.c
@@ -264,10 +264,16 @@ lp_build_one(struct lp_type type)
for(i = 1; i < type.length; ++i)
elems[i] = elems[0];
- return LLVMConstVector(elems, type.length);
+ if (type.length == 1)
+ return elems[0];
+ else
+ return LLVMConstVector(elems, type.length);
}
+/**
+ * Build constant-valued vector from a scalar value.
+ */
LLVMValueRef
lp_build_const_scalar(struct lp_type type,
double val)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
index 2726747eaea..7c585fda788 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
@@ -198,7 +198,7 @@ lp_build_compare(LLVMBuilderRef builder,
return res;
}
- }
+ } /* if (type.width * type.length == 128) */
#endif
if(type.floating) {
@@ -238,20 +238,25 @@ lp_build_compare(LLVMBuilderRef builder,
cond = LLVMBuildFCmp(builder, op, a, b, "");
res = LLVMBuildSelect(builder, cond, ones, zeros, "");
#else
- debug_printf("%s: warning: using slow element-wise vector comparison\n",
- __FUNCTION__);
res = LLVMGetUndef(int_vec_type);
- for(i = 0; i < type.length; ++i) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
- cond = LLVMBuildFCmp(builder, op,
- LLVMBuildExtractElement(builder, a, index, ""),
- LLVMBuildExtractElement(builder, b, index, ""),
- "");
- cond = LLVMBuildSelect(builder, cond,
- LLVMConstExtractElement(ones, index),
- LLVMConstExtractElement(zeros, index),
- "");
- res = LLVMBuildInsertElement(builder, res, cond, index, "");
+ if (type.length == 1) {
+ res = LLVMBuildFCmp(builder, op, a, b, "");
+ }
+ else {
+ debug_printf("%s: warning: using slow element-wise float"
+ " vector comparison\n", __FUNCTION__);
+ for (i = 0; i < type.length; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ cond = LLVMBuildFCmp(builder, op,
+ LLVMBuildExtractElement(builder, a, index, ""),
+ LLVMBuildExtractElement(builder, b, index, ""),
+ "");
+ cond = LLVMBuildSelect(builder, cond,
+ LLVMConstExtractElement(ones, index),
+ LLVMConstExtractElement(zeros, index),
+ "");
+ res = LLVMBuildInsertElement(builder, res, cond, index, "");
+ }
}
#endif
}
@@ -286,20 +291,26 @@ lp_build_compare(LLVMBuilderRef builder,
cond = LLVMBuildICmp(builder, op, a, b, "");
res = LLVMBuildSelect(builder, cond, ones, zeros, "");
#else
- debug_printf("%s: warning: using slow element-wise int vector comparison\n",
- __FUNCTION__);
res = LLVMGetUndef(int_vec_type);
- for(i = 0; i < type.length; ++i) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
- cond = LLVMBuildICmp(builder, op,
- LLVMBuildExtractElement(builder, a, index, ""),
- LLVMBuildExtractElement(builder, b, index, ""),
- "");
- cond = LLVMBuildSelect(builder, cond,
- LLVMConstExtractElement(ones, index),
- LLVMConstExtractElement(zeros, index),
- "");
- res = LLVMBuildInsertElement(builder, res, cond, index, "");
+ if (type.length == 1) {
+ res = LLVMBuildICmp(builder, op, a, b, "");
+ }
+ else {
+ debug_printf("%s: warning: using slow element-wise int"
+ " vector comparison\n", __FUNCTION__);
+
+ for(i = 0; i < type.length; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ cond = LLVMBuildICmp(builder, op,
+ LLVMBuildExtractElement(builder, a, index, ""),
+ LLVMBuildExtractElement(builder, b, index, ""),
+ "");
+ cond = LLVMBuildSelect(builder, cond,
+ LLVMConstExtractElement(ones, index),
+ LLVMConstExtractElement(zeros, index),
+ "");
+ res = LLVMBuildInsertElement(builder, res, cond, index, "");
+ }
}
#endif
}
@@ -339,26 +350,31 @@ lp_build_select(struct lp_build_context *bld,
if(a == b)
return a;
- if(type.floating) {
- LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
- a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
- b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
+ if (type.length == 1) {
+ res = LLVMBuildSelect(bld->builder, mask, a, b, "");
}
+ else {
+ if(type.floating) {
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
+ }
- a = LLVMBuildAnd(bld->builder, a, mask, "");
+ a = LLVMBuildAnd(bld->builder, a, mask, "");
- /* This often gets translated to PANDN, but sometimes the NOT is
- * pre-computed and stored in another constant. The best strategy depends
- * on available registers, so it is not a big deal -- hopefully LLVM does
- * the right decision attending the rest of the program.
- */
- b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
+ /* This often gets translated to PANDN, but sometimes the NOT is
+ * pre-computed and stored in another constant. The best strategy depends
+ * on available registers, so it is not a big deal -- hopefully LLVM does
+ * the right decision attending the rest of the program.
+ */
+ b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
- res = LLVMBuildOr(bld->builder, a, b, "");
+ res = LLVMBuildOr(bld->builder, a, b, "");
- if(type.floating) {
- LLVMTypeRef vec_type = lp_build_vec_type(type);
- res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
+ if(type.floating) {
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
+ res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
+ }
}
return res;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 1dca29cdd58..a965d394f44 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -65,6 +65,14 @@ struct lp_build_sample_context
const struct util_format_description *format_desc;
+ /** regular scalar float type */
+ struct lp_type float_type;
+ struct lp_build_context float_bld;
+
+ /** regular scalar float type */
+ struct lp_type int_type;
+ struct lp_build_context int_bld;
+
/** Incoming coordinates type and build context */
struct lp_type coord_type;
struct lp_build_context coord_bld;
@@ -108,6 +116,27 @@ wrap_mode_uses_border_color(unsigned mode)
}
+static LLVMValueRef
+lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
+ LLVMValueRef data_array, LLVMValueRef level)
+{
+ LLVMValueRef indexes[2], data_ptr;
+ indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ indexes[1] = level;
+ data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, "");
+ data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
+ return data_ptr;
+}
+
+
+static LLVMValueRef
+lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
+ LLVMValueRef data_array, int level)
+{
+ LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
+ return lp_build_get_mipmap_level(bld, data_array, lvl);
+}
+
/**
* Gen code to fetch a texel from a texture at int coords (x, y).
@@ -124,14 +153,13 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
LLVMValueRef x,
LLVMValueRef y,
LLVMValueRef y_stride,
- LLVMValueRef data_array,
+ LLVMValueRef data_ptr,
LLVMValueRef *texel)
{
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
LLVMValueRef offset;
LLVMValueRef packed;
LLVMValueRef use_border = NULL;
- LLVMValueRef data_ptr;
/* use_border = x < 0 || x >= width || y < 0 || y >= height */
if (wrap_mode_uses_border_color(bld->static_state->wrap_s)) {
@@ -154,16 +182,6 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
}
}
- /* XXX always use mipmap level 0 for now */
- {
- const int level = 0;
- LLVMValueRef indexes[2];
- indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
- indexes[1] = LLVMConstInt(LLVMInt32Type(), level, 0);
- data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, "");
- data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
- }
-
/*
* Note: if we find an app which frequently samples the texture border
* we might want to implement a true conditional here to avoid sampling
@@ -233,17 +251,8 @@ lp_build_sample_packed(struct lp_build_sample_context *bld,
assert(bld->format_desc->block.height == 1);
assert(bld->format_desc->block.bits <= bld->texel_type.width);
- /* XXX always use mipmap level 0 for now */
- {
- const int level = 0;
- LLVMValueRef indexes[2];
- /* get data_ptr[level] */
- indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
- indexes[1] = LLVMConstInt(LLVMInt32Type(), level, 0);
- data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, "");
- /* load texture base address */
- data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
- }
+ /* get pointer to mipmap level 0 data */
+ data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
return lp_build_gather(bld->builder,
bld->texel_type.length,
@@ -733,7 +742,210 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
/**
- * Sample 2D texture with nearest filtering.
+ * Codegen equivalent for u_minify().
+ * Return max(1, base_size >> level);
+ */
+static LLVMValueRef
+lp_build_minify(struct lp_build_sample_context *bld,
+ LLVMValueRef base_size,
+ LLVMValueRef level)
+{
+ LLVMValueRef size = LLVMBuildAShr(bld->builder, base_size, level, "minify");
+ size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one);
+ return size;
+}
+
+
+static int
+texture_dims(enum pipe_texture_target tex)
+{
+ switch (tex) {
+ case PIPE_TEXTURE_1D:
+ return 1;
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_CUBE:
+ return 2;
+ case PIPE_TEXTURE_3D:
+ return 3;
+ default:
+ assert(0 && "bad texture target in texture_dims()");
+ return 2;
+ }
+}
+
+
+/**
+ * Generate code to compute texture level of detail (lambda).
+ * \param s vector of texcoord s values
+ * \param t vector of texcoord t values
+ * \param r vector of texcoord r values
+ * \param width scalar int texture width
+ * \param height scalar int texture height
+ * \param depth scalar int texture depth
+ */
+static LLVMValueRef
+lp_build_lod_selector(struct lp_build_sample_context *bld,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef r,
+ LLVMValueRef width,
+ LLVMValueRef height,
+ LLVMValueRef depth)
+
+{
+ const int dims = texture_dims(bld->static_state->target);
+ struct lp_build_context *coord_bld = &bld->coord_bld;
+ struct lp_build_context *float_bld = &bld->float_bld;
+ LLVMValueRef lod_bias = LLVMConstReal(LLVMFloatType(), bld->static_state->lod_bias);
+ LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod);
+ LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(), bld->static_state->max_lod);
+
+ LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ LLVMValueRef index1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
+ LLVMValueRef index2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
+
+ LLVMValueRef s0, s1, s2;
+ LLVMValueRef t0, t1, t2;
+ LLVMValueRef r0, r1, r2;
+ LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
+ LLVMValueRef rho, lod;
+
+ /*
+ * dsdx = abs(s[1] - s[0]);
+ * dsdy = abs(s[2] - s[0]);
+ * dtdx = abs(t[1] - t[0]);
+ * dtdy = abs(t[2] - t[0]);
+ * drdx = abs(r[1] - r[0]);
+ * drdy = abs(r[2] - r[0]);
+ * XXX we're assuming a four-element quad in 2x2 layout here.
+ */
+ s0 = LLVMBuildExtractElement(bld->builder, s, index0, "s0");
+ s1 = LLVMBuildExtractElement(bld->builder, s, index1, "s1");
+ s2 = LLVMBuildExtractElement(bld->builder, s, index2, "s2");
+ dsdx = LLVMBuildSub(bld->builder, s1, s0, "");
+ dsdx = lp_build_abs(float_bld, dsdx);
+ dsdy = LLVMBuildSub(bld->builder, s2, s0, "");
+ dsdy = lp_build_abs(float_bld, dsdy);
+ if (dims > 1) {
+ t0 = LLVMBuildExtractElement(bld->builder, t, index0, "t0");
+ t1 = LLVMBuildExtractElement(bld->builder, t, index1, "t1");
+ t2 = LLVMBuildExtractElement(bld->builder, t, index2, "t2");
+ dtdx = LLVMBuildSub(bld->builder, t1, t0, "");
+ dtdx = lp_build_abs(float_bld, dtdx);
+ dtdy = LLVMBuildSub(bld->builder, t2, t0, "");
+ dtdy = lp_build_abs(float_bld, dtdy);
+ if (dims > 2) {
+ r0 = LLVMBuildExtractElement(bld->builder, r, index0, "r0");
+ r1 = LLVMBuildExtractElement(bld->builder, r, index1, "r1");
+ r2 = LLVMBuildExtractElement(bld->builder, r, index2, "r2");
+ drdx = LLVMBuildSub(bld->builder, r1, r0, "");
+ drdx = lp_build_abs(float_bld, drdx);
+ drdy = LLVMBuildSub(bld->builder, r2, r0, "");
+ drdy = lp_build_abs(float_bld, drdy);
+ }
+ }
+
+ /* Compute rho = max of all partial derivatives scaled by texture size.
+ * XXX this could be vectorized somewhat
+ */
+ rho = LLVMBuildMul(bld->builder,
+ lp_build_max(float_bld, dsdx, dsdy),
+ lp_build_int_to_float(float_bld, width), "");
+ if (dims > 1) {
+ LLVMValueRef max;
+ max = LLVMBuildMul(bld->builder,
+ lp_build_max(float_bld, dtdx, dtdy),
+ lp_build_int_to_float(float_bld, height), "");
+ rho = lp_build_max(float_bld, rho, max);
+ if (dims > 2) {
+ max = LLVMBuildMul(bld->builder,
+ lp_build_max(float_bld, drdx, drdy),
+ lp_build_int_to_float(float_bld, depth), "");
+ rho = lp_build_max(float_bld, rho, max);
+ }
+ }
+
+ /* compute lod = log2(rho) */
+ lod = lp_build_log2(float_bld, rho);
+
+ /* add lod bias */
+ lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "LOD bias");
+
+ /* clamp lod */
+ lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
+
+ return lod;
+}
+
+
+/**
+ * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer
+ * mipmap level index.
+ * Note: this is all scalar code.
+ * \param lod scalar float texture level of detail
+ * \param level_out returns integer
+ */
+static void
+lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
+ unsigned unit,
+ LLVMValueRef lod,
+ LLVMValueRef *level_out)
+{
+ struct lp_build_context *float_bld = &bld->float_bld;
+ struct lp_build_context *int_bld = &bld->int_bld;
+ LLVMValueRef last_level, level;
+
+ LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0);
+
+ last_level = bld->dynamic_state->last_level(bld->dynamic_state,
+ bld->builder, unit);
+
+ /* convert float lod to integer */
+ level = lp_build_iround(float_bld, lod);
+
+ /* clamp level to legal range of levels */
+ *level_out = lp_build_clamp(int_bld, level, zero, last_level);
+}
+
+
+/**
+ * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to
+ * two (adjacent) mipmap level indexes. Later, we'll sample from those
+ * two mipmap levels and interpolate between them.
+ */
+static void
+lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
+ unsigned unit,
+ LLVMValueRef lod,
+ LLVMValueRef *level0_out,
+ LLVMValueRef *level1_out,
+ LLVMValueRef *weight_out)
+{
+ struct lp_build_context *coord_bld = &bld->coord_bld;
+ struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
+ LLVMValueRef last_level, level;
+
+ last_level = bld->dynamic_state->last_level(bld->dynamic_state,
+ bld->builder, unit);
+
+ /* convert float lod to integer */
+ level = lp_build_ifloor(coord_bld, lod);
+
+ /* compute level 0 and clamp to legal range of levels */
+ *level0_out = lp_build_clamp(int_coord_bld, level,
+ int_coord_bld->zero,
+ last_level);
+ /* compute level 1 and clamp to legal range of levels */
+ *level1_out = lp_build_add(int_coord_bld, *level0_out, int_coord_bld->one);
+ *level1_out = lp_build_min(int_coord_bld, *level1_out, int_coord_bld->zero);
+
+ *weight_out = lp_build_fract(coord_bld, lod);
+}
+
+
+
+/**
+ * Sample 2D texture with nearest filtering, no mipmapping.
*/
static void
lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld,
@@ -746,6 +958,7 @@ lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld,
LLVMValueRef *texel)
{
LLVMValueRef x, y;
+ LLVMValueRef data_ptr;
x = lp_build_sample_wrap_nearest(bld, s, width,
bld->static_state->pot_width,
@@ -757,7 +970,63 @@ lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld,
lp_build_name(x, "tex.x.wrapped");
lp_build_name(y, "tex.y.wrapped");
- lp_build_sample_texel_soa(bld, width, height, x, y, stride, data_array, texel);
+ /* get pointer to mipmap level 0 data */
+ data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
+
+ lp_build_sample_texel_soa(bld, width, height, x, y, stride, data_ptr, texel);
+}
+
+
+/**
+ * Sample 2D texture with nearest filtering, nearest mipmap.
+ */
+static void
+lp_build_sample_2d_nearest_mip_nearest_soa(struct lp_build_sample_context *bld,
+ unsigned unit,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef width,
+ LLVMValueRef height,
+ LLVMValueRef width_vec,
+ LLVMValueRef height_vec,
+ LLVMValueRef stride,
+ LLVMValueRef data_array,
+ LLVMValueRef *texel)
+{
+ LLVMValueRef x, y;
+ LLVMValueRef lod, ilevel, ilevel_vec;
+ LLVMValueRef data_ptr;
+
+ /* compute float LOD */
+ lod = lp_build_lod_selector(bld, s, t, NULL, width, height, NULL);
+
+ /* convert LOD to int */
+ lp_build_nearest_mip_level(bld, unit, lod, &ilevel);
+
+ ilevel_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel);
+
+ /* compute width_vec, height at mipmap level 'ilevel' */
+ width_vec = lp_build_minify(bld, width_vec, ilevel_vec);
+ height_vec = lp_build_minify(bld, height_vec, ilevel_vec);
+ stride = lp_build_minify(bld, stride, ilevel_vec);
+
+ x = lp_build_sample_wrap_nearest(bld, s, width_vec,
+ bld->static_state->pot_width,
+ bld->static_state->wrap_s);
+ y = lp_build_sample_wrap_nearest(bld, t, height_vec,
+ bld->static_state->pot_height,
+ bld->static_state->wrap_t);
+
+ lp_build_name(x, "tex.x.wrapped");
+ lp_build_name(y, "tex.y.wrapped");
+
+ /* get pointer to mipmap level [ilevel] data */
+ if (0)
+ data_ptr = lp_build_get_mipmap_level(bld, data_array, ilevel);
+ else
+ data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
+
+ lp_build_sample_texel_soa(bld, width_vec, height_vec, x, y, stride, data_ptr, texel);
}
@@ -779,6 +1048,7 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
LLVMValueRef x0, x1;
LLVMValueRef y0, y1;
LLVMValueRef neighbors[2][2][4];
+ LLVMValueRef data_ptr;
unsigned chan;
lp_build_sample_wrap_linear(bld, s, width, bld->static_state->pot_width,
@@ -786,10 +1056,13 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
lp_build_sample_wrap_linear(bld, t, height, bld->static_state->pot_height,
bld->static_state->wrap_t, &y0, &y1, &t_fpart);
- lp_build_sample_texel_soa(bld, width, height, x0, y0, stride, data_array, neighbors[0][0]);
- lp_build_sample_texel_soa(bld, width, height, x1, y0, stride, data_array, neighbors[0][1]);
- lp_build_sample_texel_soa(bld, width, height, x0, y1, stride, data_array, neighbors[1][0]);
- lp_build_sample_texel_soa(bld, width, height, x1, y1, stride, data_array, neighbors[1][1]);
+ /* get pointer to mipmap level 0 data */
+ data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
+
+ lp_build_sample_texel_soa(bld, width, height, x0, y0, stride, data_ptr, neighbors[0][0]);
+ lp_build_sample_texel_soa(bld, width, height, x1, y0, stride, data_ptr, neighbors[0][1]);
+ lp_build_sample_texel_soa(bld, width, height, x0, y1, stride, data_ptr, neighbors[1][0]);
+ lp_build_sample_texel_soa(bld, width, height, x1, y1, stride, data_ptr, neighbors[1][1]);
/* TODO: Don't interpolate missing channels */
for(chan = 0; chan < 4; ++chan) {
@@ -857,7 +1130,7 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
LLVMValueRef packed, packed_lo, packed_hi;
LLVMValueRef unswizzled[4];
- lp_build_context_init(&i32, builder, lp_type_int(32));
+ lp_build_context_init(&i32, builder, lp_type_int_vec(32));
lp_build_context_init(&h16, builder, lp_type_ufixed(16));
lp_build_context_init(&u8n, builder, lp_type_unorm(8));
@@ -1066,194 +1339,11 @@ lp_build_sample_compare(struct lp_build_sample_context *bld,
}
-static int
-texture_dims(enum pipe_texture_target tex)
-{
- switch (tex) {
- case PIPE_TEXTURE_1D:
- return 1;
- case PIPE_TEXTURE_2D:
- case PIPE_TEXTURE_CUBE:
- return 2;
- case PIPE_TEXTURE_3D:
- return 3;
- default:
- assert(0 && "bad texture target in texture_dims()");
- return 2;
- }
-}
-
-
-/**
- * Generate code to compute texture level of detail (lambda).
- * \param s vector of texcoord s values
- * \param t vector of texcoord t values
- * \param r vector of texcoord r values
- * \param width scalar int texture width
- * \param height scalar int texture height
- * \param depth scalar int texture depth
- */
-static LLVMValueRef
-lp_build_lod_selector(struct lp_build_sample_context *bld,
- LLVMValueRef s,
- LLVMValueRef t,
- LLVMValueRef r,
- LLVMValueRef width,
- LLVMValueRef height,
- LLVMValueRef depth)
-
-{
- const int dims = texture_dims(bld->static_state->target);
- struct lp_build_context *coord_bld = &bld->coord_bld;
-
- LLVMValueRef lod_bias = lp_build_const_scalar(bld->coord_bld.type,
- bld->static_state->lod_bias);
- LLVMValueRef min_lod = lp_build_const_scalar(bld->coord_bld.type,
- bld->static_state->min_lod);
- LLVMValueRef max_lod = lp_build_const_scalar(bld->coord_bld.type,
- bld->static_state->max_lod);
-
- LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
- LLVMValueRef index1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
- LLVMValueRef index2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
-
- LLVMValueRef s0, s1, s2;
- LLVMValueRef t0, t1, t2;
- LLVMValueRef r0, r1, r2;
- LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
- LLVMValueRef rho, lod;
-
- /*
- * dsdx = abs(s[1] - s[0]);
- * dsdy = abs(s[2] - s[0]);
- * dtdx = abs(t[1] - t[0]);
- * dtdy = abs(t[2] - t[0]);
- * drdx = abs(r[1] - r[0]);
- * drdy = abs(r[2] - r[0]);
- * XXX we're assuming a four-element quad in 2x2 layout here.
- */
- s0 = LLVMBuildExtractElement(bld->builder, s, index0, "s0");
- s1 = LLVMBuildExtractElement(bld->builder, s, index1, "s1");
- s2 = LLVMBuildExtractElement(bld->builder, s, index2, "s2");
- dsdx = lp_build_abs(coord_bld, lp_build_sub(coord_bld, s1, s0));
- dsdy = lp_build_abs(coord_bld, lp_build_sub(coord_bld, s2, s0));
- if (dims > 1) {
- t0 = LLVMBuildExtractElement(bld->builder, t, index0, "t0");
- t1 = LLVMBuildExtractElement(bld->builder, t, index1, "t1");
- t2 = LLVMBuildExtractElement(bld->builder, t, index2, "t2");
- dtdx = lp_build_abs(coord_bld, lp_build_sub(coord_bld, t1, t0));
- dtdy = lp_build_abs(coord_bld, lp_build_sub(coord_bld, t2, t0));
- if (dims > 2) {
- r0 = LLVMBuildExtractElement(bld->builder, r, index0, "r0");
- r1 = LLVMBuildExtractElement(bld->builder, r, index1, "r1");
- r2 = LLVMBuildExtractElement(bld->builder, r, index2, "r2");
- drdx = lp_build_abs(coord_bld, lp_build_sub(coord_bld, r1, r0));
- drdy = lp_build_abs(coord_bld, lp_build_sub(coord_bld, r2, r0));
- }
- }
-
- /* Compute rho = max of all partial derivatives scaled by texture size.
- * XXX this can be vectorized somewhat
- */
- rho = lp_build_mul(coord_bld,
- lp_build_max(coord_bld, dsdx, dsdy),
- lp_build_int_to_float(coord_bld, width));
- if (dims > 1) {
- LLVMValueRef max;
- max = lp_build_mul(coord_bld,
- lp_build_max(coord_bld, dtdx, dtdy),
- lp_build_int_to_float(coord_bld, height));
- rho = lp_build_max(coord_bld, rho, max);
- if (dims > 2) {
- max = lp_build_mul(coord_bld,
- lp_build_max(coord_bld, drdx, drdy),
- lp_build_int_to_float(coord_bld, depth));
- rho = lp_build_max(coord_bld, rho, max);
- }
- }
-
- /* compute lod = log2(rho) */
- lod = lp_build_log2(coord_bld, rho);
-
- /* add lod bias */
- lod = lp_build_add(coord_bld, lod, lod_bias);
-
- /* clamp lod */
- lod = lp_build_clamp(coord_bld, lod, min_lod, max_lod);
-
- return lod;
-}
-
-
-/**
- * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer
- * mipmap level index.
- * \param lod scalar float texture level of detail
- * \param level_out returns integer
- */
-static void
-lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
- unsigned unit,
- LLVMValueRef lod,
- LLVMValueRef *level_out)
-{
- struct lp_build_context *coord_bld = &bld->coord_bld;
- struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
- LLVMValueRef last_level, level;
-
- last_level = bld->dynamic_state->last_level(bld->dynamic_state,
- bld->builder, unit);
-
- /* convert float lod to integer */
- level = lp_build_iround(coord_bld, lod);
-
- /* clamp level to legal range of levels */
- *level_out = lp_build_clamp(int_coord_bld, level,
- int_coord_bld->zero,
- last_level);
-}
-
-
-/**
- * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to
- * two (adjacent) mipmap level indexes. Later, we'll sample from those
- * two mipmap levels and interpolate between them.
- */
-static void
-lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
- unsigned unit,
- LLVMValueRef lod,
- LLVMValueRef *level0_out,
- LLVMValueRef *level1_out,
- LLVMValueRef *weight_out)
-{
- struct lp_build_context *coord_bld = &bld->coord_bld;
- struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
- LLVMValueRef last_level, level;
-
- last_level = bld->dynamic_state->last_level(bld->dynamic_state,
- bld->builder, unit);
-
- /* convert float lod to integer */
- level = lp_build_ifloor(coord_bld, lod);
-
- /* compute level 0 and clamp to legal range of levels */
- *level0_out = lp_build_clamp(int_coord_bld, level,
- int_coord_bld->zero,
- last_level);
- /* compute level 1 and clamp to legal range of levels */
- *level1_out = lp_build_add(int_coord_bld, *level0_out, int_coord_bld->one);
- *level1_out = lp_build_min(int_coord_bld, *level1_out, int_coord_bld->zero);
-
- *weight_out = lp_build_fract(coord_bld, lod);
-}
-
-
-
/**
* Build texture sampling code.
* 'texel' will return a vector of four LLVMValueRefs corresponding to
* R, G, B, A.
+ * \param type vector float type to use for coords, etc.
*/
void
lp_build_sample_soa(LLVMBuilderRef builder,
@@ -1267,17 +1357,19 @@ lp_build_sample_soa(LLVMBuilderRef builder,
LLVMValueRef *texel)
{
struct lp_build_sample_context bld;
- LLVMValueRef width;
- LLVMValueRef height;
- LLVMValueRef stride;
+ LLVMValueRef width, width_vec;
+ LLVMValueRef height, height_vec;
+ LLVMValueRef stride, stride_vec;
LLVMValueRef data_array;
LLVMValueRef s;
LLVMValueRef t;
LLVMValueRef r;
+ boolean done = FALSE;
(void) lp_build_lod_selector; /* temporary to silence warning */
(void) lp_build_nearest_mip_level;
(void) lp_build_linear_mip_levels;
+ (void) lp_build_minify;
/* Setup our build context */
memset(&bld, 0, sizeof bld);
@@ -1285,10 +1377,16 @@ lp_build_sample_soa(LLVMBuilderRef builder,
bld.static_state = static_state;
bld.dynamic_state = dynamic_state;
bld.format_desc = util_format_description(static_state->format);
+
+ bld.float_type = lp_type_float(32);
+ bld.int_type = lp_type_int(32);
bld.coord_type = type;
bld.uint_coord_type = lp_uint_type(type);
bld.int_coord_type = lp_int_type(type);
bld.texel_type = type;
+
+ lp_build_context_init(&bld.float_bld, builder, bld.float_type);
+ lp_build_context_init(&bld.int_bld, builder, bld.int_type);
lp_build_context_init(&bld.coord_bld, builder, bld.coord_type);
lp_build_context_init(&bld.uint_coord_bld, builder, bld.uint_coord_type);
lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type);
@@ -1305,30 +1403,56 @@ lp_build_sample_soa(LLVMBuilderRef builder,
t = coords[1];
r = coords[2];
- width = lp_build_broadcast_scalar(&bld.uint_coord_bld, width);
- height = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
- stride = lp_build_broadcast_scalar(&bld.uint_coord_bld, stride);
+ width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width);
+ height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
+ stride_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, stride);
if(static_state->target == PIPE_TEXTURE_1D)
t = bld.coord_bld.zero;
- switch (static_state->min_img_filter) {
- case PIPE_TEX_FILTER_NEAREST:
- lp_build_sample_2d_nearest_soa(&bld, s, t, width, height,
- stride, data_array, texel);
+ switch (static_state->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NONE:
break;
- case PIPE_TEX_FILTER_LINEAR:
- if(lp_format_is_rgba8(bld.format_desc) &&
- is_simple_wrap_mode(static_state->wrap_s) &&
- is_simple_wrap_mode(static_state->wrap_t))
- lp_build_sample_2d_linear_aos(&bld, s, t, width, height,
- stride, data_array, texel);
- else
- lp_build_sample_2d_linear_soa(&bld, s, t, width, height,
- stride, data_array, texel);
+ case PIPE_TEX_MIPFILTER_NEAREST:
+
+ switch (static_state->min_img_filter) {
+ case PIPE_TEX_FILTER_NEAREST:
+ lp_build_sample_2d_nearest_mip_nearest_soa(&bld, unit,
+ s, t,
+ width, height,
+ width_vec, height_vec,
+ stride_vec,
+ data_array, texel);
+ done = TRUE;
+ break;
+ }
+
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
break;
default:
- assert(0);
+ assert(0 && "invalid mip filter");
+ }
+
+ if (!done) {
+ switch (static_state->min_img_filter) {
+ case PIPE_TEX_FILTER_NEAREST:
+ lp_build_sample_2d_nearest_soa(&bld, s, t, width_vec, height_vec,
+ stride_vec, data_array, texel);
+ break;
+ case PIPE_TEX_FILTER_LINEAR:
+ if(lp_format_is_rgba8(bld.format_desc) &&
+ is_simple_wrap_mode(static_state->wrap_s) &&
+ is_simple_wrap_mode(static_state->wrap_t))
+ lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec,
+ stride_vec, data_array, texel);
+ else
+ lp_build_sample_2d_linear_soa(&bld, s, t, width_vec, height_vec,
+ stride_vec, data_array, texel);
+ break;
+ default:
+ assert(0);
+ }
}
/* FIXME: respect static_state->min_mip_filter */;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.h b/src/gallium/auxiliary/gallivm/lp_bld_type.h
index 16946cc28a2..4daa904e633 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_type.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_type.h
@@ -103,7 +103,7 @@ struct lp_type {
unsigned width:14;
/**
- * Vector length.
+ * Vector length. If length==1, this is a scalar (float/int) type.
*
* width*length should be a power of two greater or equal to eight.
*
@@ -139,6 +139,7 @@ struct lp_build_context
};
+/** Create scalar float type */
static INLINE struct lp_type
lp_type_float(unsigned width)
{
@@ -148,12 +149,29 @@ lp_type_float(unsigned width)
res_type.floating = TRUE;
res_type.sign = TRUE;
res_type.width = width;
+ res_type.length = 1;
+
+ return res_type;
+}
+
+
+/** Create vector of float type */
+static INLINE struct lp_type
+lp_type_float_vec(unsigned width)
+{
+ struct lp_type res_type;
+
+ memset(&res_type, 0, sizeof res_type);
+ res_type.floating = TRUE;
+ res_type.sign = TRUE;
+ res_type.width = width;
res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
return res_type;
}
+/** Create scalar int type */
static INLINE struct lp_type
lp_type_int(unsigned width)
{
@@ -162,12 +180,28 @@ lp_type_int(unsigned width)
memset(&res_type, 0, sizeof res_type);
res_type.sign = TRUE;
res_type.width = width;
+ res_type.length = 1;
+
+ return res_type;
+}
+
+
+/** Create vector int type */
+static INLINE struct lp_type
+lp_type_int_vec(unsigned width)
+{
+ struct lp_type res_type;
+
+ memset(&res_type, 0, sizeof res_type);
+ res_type.sign = TRUE;
+ res_type.width = width;
res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
return res_type;
}
+/** Create scalar uint type */
static INLINE struct lp_type
lp_type_uint(unsigned width)
{
@@ -175,6 +209,20 @@ lp_type_uint(unsigned width)
memset(&res_type, 0, sizeof res_type);
res_type.width = width;
+ res_type.length = 1;
+
+ return res_type;
+}
+
+
+/** Create vector uint type */
+static INLINE struct lp_type
+lp_type_uint_vec(unsigned width)
+{
+ struct lp_type res_type;
+
+ memset(&res_type, 0, sizeof res_type);
+ res_type.width = width;
res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
return res_type;
diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c
index 0b263a9db5c..4d0737ccd3d 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -63,6 +63,7 @@ struct blit_state
struct pipe_sampler_state sampler;
struct pipe_viewport_state viewport;
struct pipe_clip_state clip;
+ struct pipe_vertex_element velem[2];
void *vs;
void *fs[TGSI_WRITEMASK_XYZW + 1];
@@ -114,6 +115,15 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso)
ctx->sampler.mag_img_filter = 0; /* set later */
ctx->sampler.normalized_coords = 1;
+ /* vertex elements state */
+ memset(&ctx->velem[0], 0, sizeof(ctx->velem[0]) * 2);
+ for (i = 0; i < 2; i++) {
+ ctx->velem[i].src_offset = i * 4 * sizeof(float);
+ ctx->velem[i].instance_divisor = 0;
+ ctx->velem[i].vertex_buffer_index = 0;
+ ctx->velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ }
+
/* vertex shader - still required to provide the linkage between
* fragment shader input semantics and vertex_element/buffers.
*/
@@ -410,12 +420,14 @@ util_blit_pixels_writemask(struct blit_state *ctx,
cso_save_fragment_shader(ctx->cso);
cso_save_vertex_shader(ctx->cso);
cso_save_clip(ctx->cso);
+ cso_save_vertex_elements(ctx->cso);
/* set misc state we care about */
cso_set_blend(ctx->cso, &ctx->blend);
cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
cso_set_clip(ctx->cso, &ctx->clip);
+ cso_set_vertex_elements(ctx->cso, 2, ctx->velem);
/* sampler */
ctx->sampler.min_img_filter = filter;
@@ -480,6 +492,7 @@ util_blit_pixels_writemask(struct blit_state *ctx,
cso_restore_fragment_shader(ctx->cso);
cso_restore_vertex_shader(ctx->cso);
cso_restore_clip(ctx->cso);
+ cso_restore_vertex_elements(ctx->cso);
pipe_texture_reference(&tex, NULL);
}
@@ -564,12 +577,14 @@ util_blit_pixels_tex(struct blit_state *ctx,
cso_save_fragment_shader(ctx->cso);
cso_save_vertex_shader(ctx->cso);
cso_save_clip(ctx->cso);
+ cso_save_vertex_elements(ctx->cso);
/* set misc state we care about */
cso_set_blend(ctx->cso, &ctx->blend);
cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
cso_set_clip(ctx->cso, &ctx->clip);
+ cso_set_vertex_elements(ctx->cso, 2, ctx->velem);
/* sampler */
ctx->sampler.min_img_filter = filter;
@@ -628,4 +643,5 @@ util_blit_pixels_tex(struct blit_state *ctx,
cso_restore_fragment_shader(ctx->cso);
cso_restore_vertex_shader(ctx->cso);
cso_restore_clip(ctx->cso);
+ cso_restore_vertex_elements(ctx->cso);
}
diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index 0ba09d33bfc..33d09085f0b 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -88,6 +88,8 @@ struct blitter_context_priv
void *dsa_write_depth_keep_stencil;
void *dsa_keep_depth_stencil;
+ void *velem_state;
+
/* Sampler state for clamping to a miplevel. */
void *sampler_state[PIPE_MAX_TEXTURE_LEVELS];
@@ -108,6 +110,7 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
struct pipe_depth_stencil_alpha_state dsa = { { 0 } };
struct pipe_rasterizer_state rs_state = { 0 };
struct pipe_sampler_state *sampler_state;
+ struct pipe_vertex_element velem[2];
unsigned i;
ctx = CALLOC_STRUCT(blitter_context_priv);
@@ -122,6 +125,7 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
ctx->blitter.saved_rs_state = INVALID_PTR;
ctx->blitter.saved_fs = INVALID_PTR;
ctx->blitter.saved_vs = INVALID_PTR;
+ ctx->blitter.saved_velem_state = INVALID_PTR;
ctx->blitter.saved_fb_state.nr_cbufs = ~0;
ctx->blitter.saved_num_textures = ~0;
ctx->blitter.saved_num_sampler_states = ~0;
@@ -170,6 +174,16 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
rs_state.flatshade = 1;
ctx->rs_state = pipe->create_rasterizer_state(pipe, &rs_state);
+ /* vertex elements state */
+ memset(&velem[0], 0, sizeof(velem[0]) * 2);
+ for (i = 0; i < 2; i++) {
+ velem[i].src_offset = i * 4 * sizeof(float);
+ velem[i].instance_divisor = 0;
+ velem[i].vertex_buffer_index = 0;
+ velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ }
+ ctx->velem_state = pipe->create_vertex_elements_state(pipe, 2, &velem[0]);
+
/* fragment shaders are created on-demand */
/* vertex shaders */
@@ -219,6 +233,7 @@ void util_blitter_destroy(struct blitter_context *blitter)
pipe->delete_rasterizer_state(pipe, ctx->rs_state);
pipe->delete_vs_state(pipe, ctx->vs_col);
pipe->delete_vs_state(pipe, ctx->vs_tex);
+ pipe->delete_vertex_elements_state(pipe, ctx->velem_state);
for (i = 0; i < PIPE_MAX_TEXTURE_TYPES; i++) {
if (ctx->fs_texfetch_col[i])
@@ -246,7 +261,8 @@ static void blitter_check_saved_CSOs(struct blitter_context_priv *ctx)
ctx->blitter.saved_dsa_state != INVALID_PTR &&
ctx->blitter.saved_rs_state != INVALID_PTR &&
ctx->blitter.saved_fs != INVALID_PTR &&
- ctx->blitter.saved_vs != INVALID_PTR);
+ ctx->blitter.saved_vs != INVALID_PTR &&
+ ctx->blitter.saved_velem_state != INVALID_PTR);
}
static void blitter_restore_CSOs(struct blitter_context_priv *ctx)
@@ -259,12 +275,14 @@ static void blitter_restore_CSOs(struct blitter_context_priv *ctx)
pipe->bind_rasterizer_state(pipe, ctx->blitter.saved_rs_state);
pipe->bind_fs_state(pipe, ctx->blitter.saved_fs);
pipe->bind_vs_state(pipe, ctx->blitter.saved_vs);
+ pipe->bind_vertex_elements_state(pipe, ctx->blitter.saved_velem_state);
ctx->blitter.saved_blend_state = INVALID_PTR;
ctx->blitter.saved_dsa_state = INVALID_PTR;
ctx->blitter.saved_rs_state = INVALID_PTR;
ctx->blitter.saved_fs = INVALID_PTR;
ctx->blitter.saved_vs = INVALID_PTR;
+ ctx->blitter.saved_velem_state = INVALID_PTR;
pipe->set_stencil_ref(pipe, &ctx->blitter.saved_stencil_ref);
@@ -569,6 +587,7 @@ void util_blitter_clear(struct blitter_context *blitter,
pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
pipe->bind_rasterizer_state(pipe, ctx->rs_state);
+ pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, num_cbufs));
pipe->bind_vs_state(pipe, ctx->vs_col);
@@ -634,6 +653,7 @@ static void util_blitter_do_copy(struct blitter_context *blitter,
pipe->bind_vs_state(pipe, ctx->vs_tex);
pipe->bind_fragment_sampler_states(pipe, 1,
blitter_get_sampler_state(ctx, src->level));
+ pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
pipe->set_fragment_sampler_textures(pipe, 1, &src->texture);
pipe->set_framebuffer_state(pipe, &fb_state);
@@ -807,6 +827,7 @@ void util_blitter_fill(struct blitter_context *blitter,
pipe->bind_rasterizer_state(pipe, ctx->rs_state);
pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, 1));
pipe->bind_vs_state(pipe, ctx->vs_col);
+ pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
/* set a framebuffer state */
fb_state.width = dst->width;
diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h
index 92008fce992..ecafdabafae 100644
--- a/src/gallium/auxiliary/util/u_blitter.h
+++ b/src/gallium/auxiliary/util/u_blitter.h
@@ -43,6 +43,7 @@ struct blitter_context
/* Private members, really. */
void *saved_blend_state; /**< blend state */
void *saved_dsa_state; /**< depth stencil alpha state */
+ void *saved_velem_state; /**< vertex elements state */
void *saved_rs_state; /**< rasterizer state */
void *saved_fs, *saved_vs; /**< fragment shader, vertex shader */
@@ -173,6 +174,13 @@ void util_blitter_save_depth_stencil_alpha(struct blitter_context *blitter,
}
static INLINE
+void util_blitter_save_vertex_elements(struct blitter_context *blitter,
+ void *state)
+{
+ blitter->saved_velem_state = state;
+}
+
+static INLINE
void util_blitter_save_stencil_ref(struct blitter_context *blitter,
const struct pipe_stencil_ref *state)
{
diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c
index 14506e84519..8c194102bfc 100644
--- a/src/gallium/auxiliary/util/u_draw_quad.c
+++ b/src/gallium/auxiliary/util/u_draw_quad.c
@@ -45,8 +45,6 @@ util_draw_vertex_buffer(struct pipe_context *pipe,
uint num_attribs)
{
struct pipe_vertex_buffer vbuffer;
- struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
- uint i;
assert(num_attribs <= PIPE_MAX_ATTRIBS);
@@ -58,15 +56,7 @@ util_draw_vertex_buffer(struct pipe_context *pipe,
vbuffer.max_index = num_verts - 1;
pipe->set_vertex_buffers(pipe, 1, &vbuffer);
- /* tell pipe about the vertex attributes */
- for (i = 0; i < num_attribs; i++) {
- velements[i].src_offset = i * 4 * sizeof(float);
- velements[i].instance_divisor = 0;
- velements[i].vertex_buffer_index = 0;
- velements[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
- velements[i].nr_components = 4;
- }
- pipe->set_vertex_elements(pipe, num_attribs, velements);
+ /* note: vertex elements already set by caller */
/* draw */
pipe->draw_arrays(pipe, prim_type, 0, num_verts);
diff --git a/src/gallium/auxiliary/util/u_dump_state.c b/src/gallium/auxiliary/util/u_dump_state.c
index ae7afd7311e..52cf3ef4ce0 100644
--- a/src/gallium/auxiliary/util/u_dump_state.c
+++ b/src/gallium/auxiliary/util/u_dump_state.c
@@ -700,7 +700,6 @@ util_dump_vertex_element(struct os_stream *stream, const struct pipe_vertex_elem
util_dump_member(stream, uint, state, src_offset);
util_dump_member(stream, uint, state, vertex_buffer_index);
- util_dump_member(stream, uint, state, nr_components);
util_dump_member(stream, format, state, src_format);
diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h
index b2aa5bfb188..c08fdcafcc8 100644
--- a/src/gallium/auxiliary/util/u_format.h
+++ b/src/gallium/auxiliary/util/u_format.h
@@ -415,6 +415,16 @@ util_format_has_alpha(enum pipe_format format)
}
}
+/**
+ * Return the number of components stored.
+ * Formats with block size != 1x1 will always have 1 component (the block).
+ */
+static INLINE unsigned
+util_format_get_nr_components(enum pipe_format format)
+{
+ const struct util_format_description *desc = util_format_description(format);
+ return desc->nr_channels;
+}
/*
* Format access functions.
diff --git a/src/gallium/auxiliary/util/u_format_pack.py b/src/gallium/auxiliary/util/u_format_pack.py
index b49039db39b..409d024c637 100644
--- a/src/gallium/auxiliary/util/u_format_pack.py
+++ b/src/gallium/auxiliary/util/u_format_pack.py
@@ -418,31 +418,70 @@ def generate_format_pack(format, src_channel, src_native_type, src_suffix):
dst_native_type = native_type(format)
+ assert format.layout == PLAIN
+
+ inv_swizzle = format.inv_swizzles()
+
print 'static INLINE void'
print 'util_format_%s_pack_%s(void *dst, %s r, %s g, %s b, %s a)' % (name, src_suffix, src_native_type, src_native_type, src_native_type, src_native_type)
print '{'
- print ' union util_format_%s pixel;' % format.short_name()
-
- assert format.layout == PLAIN
+
+ if format.is_bitmask():
+ depth = format.block_size()
+ print ' uint%u_t value = 0;' % depth
- inv_swizzle = format.inv_swizzles()
+ shift = 0
+ for i in range(4):
+ dst_channel = format.channels[i]
+ if inv_swizzle[i] is not None:
+ value = 'rgba'[inv_swizzle[i]]
+ value = conversion_expr(src_channel, dst_channel, dst_native_type, value)
+ if format.colorspace == ZS:
+ if i == 3:
+ value = get_one(dst_channel)
+ elif i >= 1:
+ value = '0'
+ if dst_channel.type in (UNSIGNED, SIGNED):
+ if shift + dst_channel.size < depth:
+ value = '(%s) & 0x%x' % (value, (1 << dst_channel.size) - 1)
+ if shift:
+ value = '(%s) << %u' % (value, shift)
+ if dst_channel.type == SIGNED:
+ # Cast to unsigned
+ value = '(uint%u_t)(%s) ' % (depth, value)
+ else:
+ value = None
+ if value is not None:
+ print ' value |= %s;' % (value)
+
+ shift += dst_channel.size
- for i in range(4):
- dst_channel = format.channels[i]
- width = dst_channel.size
- if inv_swizzle[i] is None:
- continue
- value = 'rgba'[inv_swizzle[i]]
- value = conversion_expr(src_channel, dst_channel, dst_native_type, value)
- if format.colorspace == ZS:
- if i == 3:
- value = get_one(dst_channel)
- elif i >= 1:
- value = '0'
- print ' pixel.chan.%s = %s;' % (dst_channel.name, value)
+ print '#ifdef PIPE_ARCH_BIG_ENDIAN'
+ print ' value = util_bswap%u(value);' % depth
+ print '#endif'
+
+ print ' *(uint%u_t *)dst = value;' % depth
- bswap_format(format)
- print ' memcpy(dst, &pixel, sizeof pixel);'
+ else:
+ print ' union util_format_%s pixel;' % format.short_name()
+
+ for i in range(4):
+ dst_channel = format.channels[i]
+ width = dst_channel.size
+ if inv_swizzle[i] is None:
+ continue
+ value = 'rgba'[inv_swizzle[i]]
+ value = conversion_expr(src_channel, dst_channel, dst_native_type, value)
+ if format.colorspace == ZS:
+ if i == 3:
+ value = get_one(dst_channel)
+ elif i >= 1:
+ value = '0'
+ print ' pixel.chan.%s = %s;' % (dst_channel.name, value)
+
+ bswap_format(format)
+ print ' memcpy(dst, &pixel, sizeof pixel);'
+
print '}'
print
diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c
index fc027e48e4e..d421bee8efe 100644
--- a/src/gallium/auxiliary/util/u_gen_mipmap.c
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
@@ -62,6 +62,7 @@ struct gen_mipmap_state
struct pipe_rasterizer_state rasterizer;
struct pipe_sampler_state sampler;
struct pipe_clip_state clip;
+ struct pipe_vertex_element velem[2];
void *vs;
void *fs2d, *fsCube;
@@ -1307,6 +1308,15 @@ util_create_gen_mipmap(struct pipe_context *pipe,
ctx->sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST;
ctx->sampler.normalized_coords = 1;
+ /* vertex elements state */
+ memset(&ctx->velem[0], 0, sizeof(ctx->velem[0]) * 2);
+ for (i = 0; i < 2; i++) {
+ ctx->velem[i].src_offset = i * 4 * sizeof(float);
+ ctx->velem[i].instance_divisor = 0;
+ ctx->velem[i].vertex_buffer_index = 0;
+ ctx->velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ }
+
/* vertex shader - still needed to specify mapping from fragment
* shader input semantics to vertex elements
*/
@@ -1501,12 +1511,14 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
cso_save_vertex_shader(ctx->cso);
cso_save_viewport(ctx->cso);
cso_save_clip(ctx->cso);
+ cso_save_vertex_elements(ctx->cso);
/* bind our state */
cso_set_blend(ctx->cso, &ctx->blend);
cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
cso_set_clip(ctx->cso, &ctx->clip);
+ cso_set_vertex_elements(ctx->cso, 2, ctx->velem);
cso_set_fragment_shader_handle(ctx->cso, fs);
cso_set_vertex_shader_handle(ctx->cso, ctx->vs);
@@ -1593,4 +1605,5 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
cso_restore_vertex_shader(ctx->cso);
cso_restore_viewport(ctx->cso);
cso_restore_clip(ctx->cso);
+ cso_restore_vertex_elements(ctx->cso);
}
diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index ba23435f698..6d461cb8800 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -230,6 +230,7 @@ static bool
init_pipe_state(struct vl_compositor *c)
{
struct pipe_sampler_state sampler;
+ struct pipe_vertex_element vertex_elems[2];
assert(c);
@@ -251,15 +252,27 @@ init_pipe_state(struct vl_compositor *c)
/*sampler.border_color[i] = ;*/
/*sampler.max_anisotropy = ;*/
c->sampler = c->pipe->create_sampler_state(c->pipe, &sampler);
-
+
+ vertex_elems[0].src_offset = 0;
+ vertex_elems[0].instance_divisor = 0;
+ vertex_elems[0].vertex_buffer_index = 0;
+ vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
+ vertex_elems[1].src_offset = 0;
+ vertex_elems[1].instance_divisor = 0;
+ vertex_elems[1].vertex_buffer_index = 1;
+ vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
+ c->vertex_elems = c->pipe->create_vertex_elements_state(c->pipe, 2, vertex_elems);
+
+
return true;
}
static void cleanup_pipe_state(struct vl_compositor *c)
{
assert(c);
-
+
c->pipe->delete_sampler_state(c->pipe, c->sampler);
+ c->pipe->delete_vertex_elements_state(c->pipe, c->vertex_elems);
}
static bool
@@ -314,12 +327,6 @@ init_buffers(struct vl_compositor *c)
pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[0].buffer);
- c->vertex_elems[0].src_offset = 0;
- c->vertex_elems[0].instance_divisor = 0;
- c->vertex_elems[0].vertex_buffer_index = 0;
- c->vertex_elems[0].nr_components = 2;
- c->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
/*
* Create our texcoord buffer and texcoord buffer element
* Texcoord buffer contains the TCs for mapping the rendered surface to the 4 vertices
@@ -344,12 +351,6 @@ init_buffers(struct vl_compositor *c)
pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[1].buffer);
- c->vertex_elems[1].src_offset = 0;
- c->vertex_elems[1].instance_divisor = 0;
- c->vertex_elems[1].vertex_buffer_index = 1;
- c->vertex_elems[1].nr_components = 2;
- c->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
/*
* Create our vertex shader's constant buffer
* Const buffer contains scaling and translation vectors
@@ -483,7 +484,7 @@ void vl_compositor_render(struct vl_compositor *compositor,
compositor->pipe->bind_vs_state(compositor->pipe, compositor->vertex_shader);
compositor->pipe->bind_fs_state(compositor->pipe, compositor->fragment_shader);
compositor->pipe->set_vertex_buffers(compositor->pipe, 2, compositor->vertex_bufs);
- compositor->pipe->set_vertex_elements(compositor->pipe, 2, compositor->vertex_elems);
+ compositor->pipe->bind_vertex_elements_state(compositor->pipe, compositor->vertex_elems);
compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_VERTEX, 0, compositor->vs_const_buf);
compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_FRAGMENT, 0, compositor->fs_const_buf);
diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h
index 6a9a3fd7af1..51755554da1 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.h
+++ b/src/gallium/auxiliary/vl/vl_compositor.h
@@ -43,10 +43,10 @@ struct vl_compositor
void *sampler;
void *vertex_shader;
void *fragment_shader;
+ void *vertex_elems;
struct pipe_viewport_state viewport;
struct pipe_scissor_state scissor;
struct pipe_vertex_buffer vertex_bufs[2];
- struct pipe_vertex_element vertex_elems[2];
struct pipe_buffer *vs_const_buf, *fs_const_buf;
};
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index f323de0ea55..0763b5bb0e4 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -708,6 +708,7 @@ static bool
init_pipe_state(struct vl_mpeg12_mc_renderer *r)
{
struct pipe_sampler_state sampler;
+ struct pipe_vertex_element vertex_elems[8];
unsigned filters[5];
unsigned i;
@@ -771,6 +772,59 @@ init_pipe_state(struct vl_mpeg12_mc_renderer *r)
r->samplers.all[i] = r->pipe->create_sampler_state(r->pipe, &sampler);
}
+ /* Position element */
+ vertex_elems[0].src_offset = 0;
+ vertex_elems[0].instance_divisor = 0;
+ vertex_elems[0].vertex_buffer_index = 0;
+ vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+ /* Luma, texcoord element */
+ vertex_elems[1].src_offset = sizeof(struct vertex2f);
+ vertex_elems[1].instance_divisor = 0;
+ vertex_elems[1].vertex_buffer_index = 0;
+ vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+ /* Chroma Cr texcoord element */
+ vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2;
+ vertex_elems[2].instance_divisor = 0;
+ vertex_elems[2].vertex_buffer_index = 0;
+ vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+ /* Chroma Cb texcoord element */
+ vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3;
+ vertex_elems[3].instance_divisor = 0;
+ vertex_elems[3].vertex_buffer_index = 0;
+ vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+ /* First ref surface top field texcoord element */
+ vertex_elems[4].src_offset = 0;
+ vertex_elems[4].instance_divisor = 0;
+ vertex_elems[4].vertex_buffer_index = 1;
+ vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+ /* First ref surface bottom field texcoord element */
+ vertex_elems[5].src_offset = sizeof(struct vertex2f);
+ vertex_elems[5].instance_divisor = 0;
+ vertex_elems[5].vertex_buffer_index = 1;
+ vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+ /* Second ref surface top field texcoord element */
+ vertex_elems[6].src_offset = 0;
+ vertex_elems[6].instance_divisor = 0;
+ vertex_elems[6].vertex_buffer_index = 2;
+ vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+ /* Second ref surface bottom field texcoord element */
+ vertex_elems[7].src_offset = sizeof(struct vertex2f);
+ vertex_elems[7].instance_divisor = 0;
+ vertex_elems[7].vertex_buffer_index = 2;
+ vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+ /* need versions with 4,6 and 8 vertex elems */
+ r->vertex_elems[0] = r->pipe->create_vertex_elements_state(r->pipe, 4, vertex_elems);
+ r->vertex_elems[1] = r->pipe->create_vertex_elements_state(r->pipe, 6, vertex_elems);
+ r->vertex_elems[2] = r->pipe->create_vertex_elements_state(r->pipe, 8, vertex_elems);
+
return true;
}
@@ -783,6 +837,8 @@ cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r)
for (i = 0; i < 5; ++i)
r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]);
+ for (i = 0; i < 3; i++)
+ r->pipe->delete_vertex_elements_state(r->pipe, r->vertex_elems[i]);
}
static bool
@@ -888,62 +944,6 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
);
}
- /* Position element */
- r->vertex_elems[0].src_offset = 0;
- r->vertex_elems[0].instance_divisor = 0;
- r->vertex_elems[0].vertex_buffer_index = 0;
- r->vertex_elems[0].nr_components = 2;
- r->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
- /* Luma, texcoord element */
- r->vertex_elems[1].src_offset = sizeof(struct vertex2f);
- r->vertex_elems[1].instance_divisor = 0;
- r->vertex_elems[1].vertex_buffer_index = 0;
- r->vertex_elems[1].nr_components = 2;
- r->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
- /* Chroma Cr texcoord element */
- r->vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2;
- r->vertex_elems[2].instance_divisor = 0;
- r->vertex_elems[2].vertex_buffer_index = 0;
- r->vertex_elems[2].nr_components = 2;
- r->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
- /* Chroma Cb texcoord element */
- r->vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3;
- r->vertex_elems[3].instance_divisor = 0;
- r->vertex_elems[3].vertex_buffer_index = 0;
- r->vertex_elems[3].nr_components = 2;
- r->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
- /* First ref surface top field texcoord element */
- r->vertex_elems[4].src_offset = 0;
- r->vertex_elems[4].instance_divisor = 0;
- r->vertex_elems[4].vertex_buffer_index = 1;
- r->vertex_elems[4].nr_components = 2;
- r->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
- /* First ref surface bottom field texcoord element */
- r->vertex_elems[5].src_offset = sizeof(struct vertex2f);
- r->vertex_elems[5].instance_divisor = 0;
- r->vertex_elems[5].vertex_buffer_index = 1;
- r->vertex_elems[5].nr_components = 2;
- r->vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
- /* Second ref surface top field texcoord element */
- r->vertex_elems[6].src_offset = 0;
- r->vertex_elems[6].instance_divisor = 0;
- r->vertex_elems[6].vertex_buffer_index = 2;
- r->vertex_elems[6].nr_components = 2;
- r->vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
- /* Second ref surface bottom field texcoord element */
- r->vertex_elems[7].src_offset = sizeof(struct vertex2f);
- r->vertex_elems[7].instance_divisor = 0;
- r->vertex_elems[7].vertex_buffer_index = 2;
- r->vertex_elems[7].nr_components = 2;
- r->vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
r->vs_const_buf = pipe_buffer_create
(
r->pipe->screen,
@@ -1307,7 +1307,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) {
r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all);
- r->pipe->set_vertex_elements(r->pipe, 4, r->vertex_elems);
+ r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[0]);
r->pipe->set_fragment_sampler_textures(r->pipe, 3, r->textures.all);
r->pipe->bind_fragment_sampler_states(r->pipe, 3, r->samplers.all);
r->pipe->bind_vs_state(r->pipe, r->i_vs);
@@ -1320,7 +1320,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) {
r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
- r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
+ r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[1]);
r->textures.individual.ref[0] = r->past;
r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
@@ -1334,7 +1334,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
if (false /*num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0 */ ) {
r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
- r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
+ r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[1]);
r->textures.individual.ref[0] = r->past;
r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
@@ -1348,7 +1348,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) {
r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
- r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
+ r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[1]);
r->textures.individual.ref[0] = r->future;
r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
@@ -1362,7 +1362,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0 */ ) {
r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
- r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
+ r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[1]);
r->textures.individual.ref[0] = r->future;
r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
@@ -1376,7 +1376,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) {
r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
- r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems);
+ r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[2]);
r->textures.individual.ref[0] = r->past;
r->textures.individual.ref[1] = r->future;
r->pipe->set_fragment_sampler_textures(r->pipe, 5, r->textures.all);
@@ -1391,7 +1391,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
if (false /*num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0 */ ) {
r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
- r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems);
+ r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[2]);
r->textures.individual.ref[0] = r->past;
r->textures.individual.ref[1] = r->future;
r->pipe->set_fragment_sampler_textures(r->pipe, 5, r->textures.all);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index f00b8c7b8b1..a11a3e7307b 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -66,8 +66,8 @@ struct vl_mpeg12_mc_renderer
struct pipe_buffer *vs_const_buf;
struct pipe_buffer *fs_const_buf;
struct pipe_framebuffer_state fb_state;
- struct pipe_vertex_element vertex_elems[8];
-
+ void *vertex_elems[3];
+
union
{
void *all[5];
diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst
index 9080addba44..4608e97adbb 100644
--- a/src/gallium/docs/source/context.rst
+++ b/src/gallium/docs/source/context.rst
@@ -24,6 +24,7 @@ CSO objects handled by the context object:
* :ref:`Depth, Stencil, & Alpha`: ``*_depth_stencil_alpha_state``
* :ref:`Shader`: These have two sets of methods. ``*_fs_state`` is for
fragment shaders, and ``*_vs_state`` is for vertex shaders.
+* :ref:`Vertex Elements`: ``*_vertex_elements_state``
Resource Binding State
@@ -60,7 +61,6 @@ objects. They all follow simple, one-method binding calls, e.g.
not have the scissor test enabled, then the scissor bounds never need to
be set since they will not be used.
* ``set_viewport_state``
-* ``set_vertex_elements``
Clearing
diff --git a/src/gallium/docs/source/cso/velems.rst b/src/gallium/docs/source/cso/velems.rst
new file mode 100644
index 00000000000..8e758fae103
--- /dev/null
+++ b/src/gallium/docs/source/cso/velems.rst
@@ -0,0 +1,24 @@
+.. _vertex,elements
+
+Vertex Elements
+===============
+
+This state controls format etc. of the input attributes contained
+in the pipe_vertex_buffer(s). There's one pipe_vertex_element array member
+for each input attribute.
+
+Members
+-------
+
+src_offset
+ The byte offset of the attribute in the buffer given by
+ vertex_buffer_index for the first vertex.
+instance_divisor
+ The instance data rate divisor, used for instancing.
+ 0 means this is per-vertex data, n means per-instance data used for
+ n consecutive instances (n > 0).
+vertex_buffer_index
+ The vertex buffer this attribute lives in. Several attributes may
+ live in the same vertex buffer.
+src_format
+ The format of the attribute data. One of the PIPE_FORMAT tokens.
diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h
index 233b91dec6b..28f80b82cd5 100644
--- a/src/gallium/drivers/cell/ppu/cell_context.h
+++ b/src/gallium/drivers/cell/ppu/cell_context.h
@@ -93,6 +93,11 @@ struct cell_buffer_list
struct cell_buffer_node *head;
};
+struct cell_velems_state
+{
+ unsigned count;
+ struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
+}
/**
* Per-context state, subclass of pipe_context.
@@ -110,6 +115,7 @@ struct cell_context
const struct pipe_rasterizer_state *rasterizer;
const struct cell_vertex_shader_state *vs;
const struct cell_fragment_shader_state *fs;
+ const struct cell_velems_state *velems;
struct spe_function logic_op;
@@ -125,8 +131,6 @@ struct cell_context
struct pipe_viewport_state viewport;
struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
uint num_vertex_buffers;
- struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
- uint num_vertex_elements;
ubyte *cbuf_map[PIPE_MAX_COLOR_BUFS];
ubyte *zsbuf_map;
diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c
index fbe55c84721..d3efb8ecea2 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c
@@ -32,24 +32,43 @@
#include "cell_context.h"
#include "cell_state.h"
+#include "util/u_memory.h"
#include "draw/draw_context.h"
-static void
-cell_set_vertex_elements(struct pipe_context *pipe,
- unsigned count,
- const struct pipe_vertex_element *elements)
+void *
+cell_create_vertex_elements_state(struct pipe_context *pipe,
+ unsigned count,
+ const struct pipe_vertex_element *attribs)
{
- struct cell_context *cell = cell_context(pipe);
-
+ struct cell_velems_state *velems;
assert(count <= PIPE_MAX_ATTRIBS);
+ velems = (struct cell_velems_state *) MALLOC(sizeof(struct cell_velems_state));
+ if (velems) {
+ velems->count = count;
+ memcpy(velems->velem, attribs, sizeof(*attribs) * count);
+ }
+ return velems;
+}
+
+void
+cell_bind_vertex_elements_state(struct pipe_context *pipe,
+ void *velems)
+{
+ struct cell_context *cell = cell_context(pipe);
+ struct cell_velems_state *cell_velems = (struct cell_velems_state *) velems;
- memcpy(cell->vertex_element, elements, count * sizeof(elements[0]));
- cell->num_vertex_elements = count;
+ cell->velems = cell_velems;
cell->dirty |= CELL_NEW_VERTEX;
- draw_set_vertex_elements(cell->draw, count, elements);
+ draw_set_vertex_elements(cell->draw, cell_velems->count, cell_velems->velem);
+}
+
+void
+cell_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
+{
+ FREE( velems );
}
@@ -75,5 +94,7 @@ void
cell_init_vertex_functions(struct cell_context *cell)
{
cell->pipe.set_vertex_buffers = cell_set_vertex_buffers;
- cell->pipe.set_vertex_elements = cell_set_vertex_elements;
+ cell->pipe.create_vertex_elements_state = cell_create_vertex_elements_state;
+ cell->pipe.bind_vertex_elements_state = cell_bind_vertex_elements_state;
+ cell->pipe.delete_vertex_elements_state = cell_delete_vertex_elements_state;
}
diff --git a/src/gallium/drivers/failover/fo_context.h b/src/gallium/drivers/failover/fo_context.h
index bb1a168ea7a..4a754465bbe 100644
--- a/src/gallium/drivers/failover/fo_context.h
+++ b/src/gallium/drivers/failover/fo_context.h
@@ -78,6 +78,7 @@ struct failover_context {
const struct fo_state *rasterizer;
const struct fo_state *fragment_shader;
const struct fo_state *vertex_shader;
+ const struct fo_state *vertex_elements;
struct pipe_blend_color blend_color;
struct pipe_stencil_ref stencil_ref;
@@ -89,10 +90,8 @@ struct failover_context {
struct pipe_texture *vertex_textures[PIPE_MAX_VERTEX_SAMPLERS];
struct pipe_viewport_state viewport;
struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
- struct pipe_vertex_element vertex_elements[PIPE_MAX_ATTRIBS];
uint num_vertex_buffers;
- uint num_vertex_elements;
void *sw_sampler_state[PIPE_MAX_SAMPLERS];
void *hw_sampler_state[PIPE_MAX_SAMPLERS];
diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c
index 970606a3f50..0247fb803b2 100644
--- a/src/gallium/drivers/failover/fo_state.c
+++ b/src/gallium/drivers/failover/fo_state.c
@@ -255,9 +255,52 @@ failover_delete_vs_state(struct pipe_context *pipe,
free(state);
}
+
+
+static void *
+failover_create_vertex_elements_state( struct pipe_context *pipe,
+ unsigned count,
+ const struct pipe_vertex_element *velems )
+{
+ struct fo_state *state = malloc(sizeof(struct fo_state));
+ struct failover_context *failover = failover_context(pipe);
+
+ state->sw_state = failover->sw->create_vertex_elements_state(failover->sw, count, velems);
+ state->hw_state = failover->hw->create_vertex_elements_state(failover->hw, count, velems);
+
+ return state;
+}
+
+static void
+failover_bind_vertex_elements_state(struct pipe_context *pipe,
+ void *velems )
+{
+ struct failover_context *failover = failover_context(pipe);
+ struct fo_state *state = (struct fo_state*)velems;
+
+ failover->vertex_elements = state;
+ failover->dirty |= FO_NEW_VERTEX_ELEMENT;
+ failover->sw->bind_vertex_elements_state( failover->sw, velems );
+ failover->hw->bind_vertex_elements_state( failover->hw, velems );
+}
+
+static void
+failover_delete_vertex_elements_state( struct pipe_context *pipe,
+ void *velems )
+{
+ struct fo_state *state = (struct fo_state*)velems;
+ struct failover_context *failover = failover_context(pipe);
+
+ failover->sw->delete_vertex_elements_state(failover->sw, state->sw_state);
+ failover->hw->delete_vertex_elements_state(failover->hw, state->hw_state);
+ state->sw_state = 0;
+ state->hw_state = 0;
+ free(state);
+}
+
static void
failover_set_polygon_stipple( struct pipe_context *pipe,
- const struct pipe_poly_stipple *stipple )
+ const struct pipe_poly_stipple *stipple )
{
struct failover_context *failover = failover_context(pipe);
@@ -490,22 +533,6 @@ failover_set_vertex_buffers(struct pipe_context *pipe,
}
-static void
-failover_set_vertex_elements(struct pipe_context *pipe,
- unsigned count,
- const struct pipe_vertex_element *vertex_elements)
-{
- struct failover_context *failover = failover_context(pipe);
-
- memcpy(failover->vertex_elements, vertex_elements,
- count * sizeof(vertex_elements[0]));
-
- failover->dirty |= FO_NEW_VERTEX_ELEMENT;
- failover->num_vertex_elements = count;
- failover->sw->set_vertex_elements( failover->sw, count, vertex_elements );
- failover->hw->set_vertex_elements( failover->hw, count, vertex_elements );
-}
-
void
failover_set_constant_buffer(struct pipe_context *pipe,
uint shader, uint index,
@@ -543,6 +570,9 @@ failover_init_state_functions( struct failover_context *failover )
failover->pipe.create_vs_state = failover_create_vs_state;
failover->pipe.bind_vs_state = failover_bind_vs_state;
failover->pipe.delete_vs_state = failover_delete_vs_state;
+ failover->pipe.create_vertex_elements_state = failover_create_vertex_elements_state;
+ failover->pipe.bind_vertex_elements_state = failover_bind_vertex_elements_state;
+ failover->pipe.delete_vertex_elements_state = failover_delete_vertex_elements_state;
failover->pipe.set_blend_color = failover_set_blend_color;
failover->pipe.set_stencil_ref = failover_set_stencil_ref;
@@ -554,6 +584,5 @@ failover_init_state_functions( struct failover_context *failover )
failover->pipe.set_vertex_sampler_textures = failover_set_vertex_sampler_textures;
failover->pipe.set_viewport_state = failover_set_viewport_state;
failover->pipe.set_vertex_buffers = failover_set_vertex_buffers;
- failover->pipe.set_vertex_elements = failover_set_vertex_elements;
failover->pipe.set_constant_buffer = failover_set_constant_buffer;
}
diff --git a/src/gallium/drivers/failover/fo_state_emit.c b/src/gallium/drivers/failover/fo_state_emit.c
index 5c000808425..09ca1944971 100644
--- a/src/gallium/drivers/failover/fo_state_emit.c
+++ b/src/gallium/drivers/failover/fo_state_emit.c
@@ -81,6 +81,10 @@ failover_state_emit( struct failover_context *failover )
failover->sw->bind_vs_state( failover->sw,
failover->vertex_shader->sw_state );
+ if (failover->dirty & FO_NEW_VERTEX_ELEMENT)
+ failover->sw->bind_vertex_elements_state( failover->sw,
+ failover->vertex_elements->sw_state );
+
if (failover->dirty & FO_NEW_STIPPLE)
failover->sw->set_polygon_stipple( failover->sw, &failover->poly_stipple );
@@ -116,11 +120,5 @@ failover_state_emit( struct failover_context *failover )
failover->vertex_buffers );
}
- if (failover->dirty & FO_NEW_VERTEX_ELEMENT) {
- failover->sw->set_vertex_elements( failover->sw,
- failover->num_vertex_elements,
- failover->vertex_elements );
- }
-
failover->dirty = 0;
}
diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h
index 499a727314d..49945376837 100644
--- a/src/gallium/drivers/i915/i915_context.h
+++ b/src/gallium/drivers/i915/i915_context.h
@@ -148,7 +148,7 @@ struct i915_state
/** Describes the current hardware vertex layout */
struct vertex_info vertex_info;
-
+
unsigned id; /* track lost context events */
};
@@ -187,6 +187,11 @@ struct i915_sampler_state {
unsigned maxlod;
};
+struct i915_velems_state {
+ unsigned count;
+ struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
+};
+
#define I915_MAX_TEXTURE_2D_LEVELS 11 /* max 1024x1024 */
#define I915_MAX_TEXTURE_3D_LEVELS 8 /* max 128x128x128 */
@@ -250,7 +255,6 @@ struct i915_context
unsigned num_samplers;
unsigned num_textures;
- unsigned num_vertex_elements;
unsigned num_vertex_buffers;
struct intel_batchbuffer *batch;
diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c
index 62169918e2b..8927dfc33d4 100644
--- a/src/gallium/drivers/i915/i915_state.c
+++ b/src/gallium/drivers/i915/i915_state.c
@@ -742,21 +742,42 @@ static void i915_set_vertex_buffers(struct pipe_context *pipe,
draw_set_vertex_buffers(i915->draw, count, buffers);
}
-static void i915_set_vertex_elements(struct pipe_context *pipe,
- unsigned count,
- const struct pipe_vertex_element *elements)
+static void *
+i915_create_vertex_elements_state(struct pipe_context *pipe,
+ unsigned count,
+ const struct pipe_vertex_element *attribs)
+{
+ struct i915_velems_state *velems;
+ assert(count <= PIPE_MAX_ATTRIBS);
+ velems = (struct i915_velems_state *) MALLOC(sizeof(struct i915_velems_state));
+ if (velems) {
+ velems->count = count;
+ memcpy(velems->velem, attribs, sizeof(*attribs) * count);
+ }
+ return velems;
+}
+
+static void
+i915_bind_vertex_elements_state(struct pipe_context *pipe,
+ void *velems)
{
struct i915_context *i915 = i915_context(pipe);
+ struct i915_velems_state *i915_velems = (struct i915_velems_state *) velems;
+
/* Because we change state before the draw_set_vertex_buffers call
* we need a flush here, just to be sure.
*/
draw_flush(i915->draw);
- i915->num_vertex_elements = count;
/* pass-through to draw module */
- draw_set_vertex_elements(i915->draw, count, elements);
+ draw_set_vertex_elements(i915->draw, i915_velems->count, i915_velems->velem);
}
+static void
+i915_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
+{
+ FREE( velems );
+}
void
i915_init_state_functions( struct i915_context *i915 )
@@ -782,6 +803,9 @@ i915_init_state_functions( struct i915_context *i915 )
i915->base.create_vs_state = i915_create_vs_state;
i915->base.bind_vs_state = i915_bind_vs_state;
i915->base.delete_vs_state = i915_delete_vs_state;
+ i915->base.create_vertex_elements_state = i915_create_vertex_elements_state;
+ i915->base.bind_vertex_elements_state = i915_bind_vertex_elements_state;
+ i915->base.delete_vertex_elements_state = i915_delete_vertex_elements_state;
i915->base.set_blend_color = i915_set_blend_color;
i915->base.set_stencil_ref = i915_set_stencil_ref;
@@ -794,5 +818,4 @@ i915_init_state_functions( struct i915_context *i915 )
i915->base.set_fragment_sampler_textures = i915_set_sampler_textures;
i915->base.set_viewport_state = i915_set_viewport_state;
i915->base.set_vertex_buffers = i915_set_vertex_buffers;
- i915->base.set_vertex_elements = i915_set_vertex_elements;
}
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index 12cfa7b049c..f5b1a06576b 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -351,7 +351,7 @@ struct brw_vs_prog_data {
/* Size == 0 if output either not written, or always [0,0,0,1]
*/
-struct brw_vs_ouput_sizes {
+struct brw_vs_output_sizes {
GLubyte output_size[PIPE_MAX_SHADER_OUTPUTS];
};
@@ -546,14 +546,13 @@ struct brw_context
const struct brw_blend_state *blend;
const struct brw_rasterizer_state *rast;
const struct brw_depth_stencil_state *zstencil;
+ const struct brw_vertex_element_packet *velems;
const struct brw_sampler *sampler[PIPE_MAX_SAMPLERS];
unsigned num_samplers;
struct pipe_texture *texture[PIPE_MAX_SAMPLERS];
struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
- struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
- unsigned num_vertex_elements;
unsigned num_textures;
unsigned num_vertex_buffers;
diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c
index 9f136eec71c..0820ba20a08 100644
--- a/src/gallium/drivers/i965/brw_draw_upload.c
+++ b/src/gallium/drivers/i965/brw_draw_upload.c
@@ -42,141 +42,6 @@
-static unsigned brw_translate_surface_format( unsigned id )
-{
- switch (id) {
- case PIPE_FORMAT_R64_FLOAT:
- return BRW_SURFACEFORMAT_R64_FLOAT;
- case PIPE_FORMAT_R64G64_FLOAT:
- return BRW_SURFACEFORMAT_R64G64_FLOAT;
- case PIPE_FORMAT_R64G64B64_FLOAT:
- return BRW_SURFACEFORMAT_R64G64B64_FLOAT;
- case PIPE_FORMAT_R64G64B64A64_FLOAT:
- return BRW_SURFACEFORMAT_R64G64B64A64_FLOAT;
-
- case PIPE_FORMAT_R32_FLOAT:
- return BRW_SURFACEFORMAT_R32_FLOAT;
- case PIPE_FORMAT_R32G32_FLOAT:
- return BRW_SURFACEFORMAT_R32G32_FLOAT;
- case PIPE_FORMAT_R32G32B32_FLOAT:
- return BRW_SURFACEFORMAT_R32G32B32_FLOAT;
- case PIPE_FORMAT_R32G32B32A32_FLOAT:
- return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
-
- case PIPE_FORMAT_R32_UNORM:
- return BRW_SURFACEFORMAT_R32_UNORM;
- case PIPE_FORMAT_R32G32_UNORM:
- return BRW_SURFACEFORMAT_R32G32_UNORM;
- case PIPE_FORMAT_R32G32B32_UNORM:
- return BRW_SURFACEFORMAT_R32G32B32_UNORM;
- case PIPE_FORMAT_R32G32B32A32_UNORM:
- return BRW_SURFACEFORMAT_R32G32B32A32_UNORM;
-
- case PIPE_FORMAT_R32_USCALED:
- return BRW_SURFACEFORMAT_R32_USCALED;
- case PIPE_FORMAT_R32G32_USCALED:
- return BRW_SURFACEFORMAT_R32G32_USCALED;
- case PIPE_FORMAT_R32G32B32_USCALED:
- return BRW_SURFACEFORMAT_R32G32B32_USCALED;
- case PIPE_FORMAT_R32G32B32A32_USCALED:
- return BRW_SURFACEFORMAT_R32G32B32A32_USCALED;
-
- case PIPE_FORMAT_R32_SNORM:
- return BRW_SURFACEFORMAT_R32_SNORM;
- case PIPE_FORMAT_R32G32_SNORM:
- return BRW_SURFACEFORMAT_R32G32_SNORM;
- case PIPE_FORMAT_R32G32B32_SNORM:
- return BRW_SURFACEFORMAT_R32G32B32_SNORM;
- case PIPE_FORMAT_R32G32B32A32_SNORM:
- return BRW_SURFACEFORMAT_R32G32B32A32_SNORM;
-
- case PIPE_FORMAT_R32_SSCALED:
- return BRW_SURFACEFORMAT_R32_SSCALED;
- case PIPE_FORMAT_R32G32_SSCALED:
- return BRW_SURFACEFORMAT_R32G32_SSCALED;
- case PIPE_FORMAT_R32G32B32_SSCALED:
- return BRW_SURFACEFORMAT_R32G32B32_SSCALED;
- case PIPE_FORMAT_R32G32B32A32_SSCALED:
- return BRW_SURFACEFORMAT_R32G32B32A32_SSCALED;
-
- case PIPE_FORMAT_R16_UNORM:
- return BRW_SURFACEFORMAT_R16_UNORM;
- case PIPE_FORMAT_R16G16_UNORM:
- return BRW_SURFACEFORMAT_R16G16_UNORM;
- case PIPE_FORMAT_R16G16B16_UNORM:
- return BRW_SURFACEFORMAT_R16G16B16_UNORM;
- case PIPE_FORMAT_R16G16B16A16_UNORM:
- return BRW_SURFACEFORMAT_R16G16B16A16_UNORM;
-
- case PIPE_FORMAT_R16_USCALED:
- return BRW_SURFACEFORMAT_R16_USCALED;
- case PIPE_FORMAT_R16G16_USCALED:
- return BRW_SURFACEFORMAT_R16G16_USCALED;
- case PIPE_FORMAT_R16G16B16_USCALED:
- return BRW_SURFACEFORMAT_R16G16B16_USCALED;
- case PIPE_FORMAT_R16G16B16A16_USCALED:
- return BRW_SURFACEFORMAT_R16G16B16A16_USCALED;
-
- case PIPE_FORMAT_R16_SNORM:
- return BRW_SURFACEFORMAT_R16_SNORM;
- case PIPE_FORMAT_R16G16_SNORM:
- return BRW_SURFACEFORMAT_R16G16_SNORM;
- case PIPE_FORMAT_R16G16B16_SNORM:
- return BRW_SURFACEFORMAT_R16G16B16_SNORM;
- case PIPE_FORMAT_R16G16B16A16_SNORM:
- return BRW_SURFACEFORMAT_R16G16B16A16_SNORM;
-
- case PIPE_FORMAT_R16_SSCALED:
- return BRW_SURFACEFORMAT_R16_SSCALED;
- case PIPE_FORMAT_R16G16_SSCALED:
- return BRW_SURFACEFORMAT_R16G16_SSCALED;
- case PIPE_FORMAT_R16G16B16_SSCALED:
- return BRW_SURFACEFORMAT_R16G16B16_SSCALED;
- case PIPE_FORMAT_R16G16B16A16_SSCALED:
- return BRW_SURFACEFORMAT_R16G16B16A16_SSCALED;
-
- case PIPE_FORMAT_R8_UNORM:
- return BRW_SURFACEFORMAT_R8_UNORM;
- case PIPE_FORMAT_R8G8_UNORM:
- return BRW_SURFACEFORMAT_R8G8_UNORM;
- case PIPE_FORMAT_R8G8B8_UNORM:
- return BRW_SURFACEFORMAT_R8G8B8_UNORM;
- case PIPE_FORMAT_R8G8B8A8_UNORM:
- return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
-
- case PIPE_FORMAT_R8_USCALED:
- return BRW_SURFACEFORMAT_R8_USCALED;
- case PIPE_FORMAT_R8G8_USCALED:
- return BRW_SURFACEFORMAT_R8G8_USCALED;
- case PIPE_FORMAT_R8G8B8_USCALED:
- return BRW_SURFACEFORMAT_R8G8B8_USCALED;
- case PIPE_FORMAT_R8G8B8A8_USCALED:
- return BRW_SURFACEFORMAT_R8G8B8A8_USCALED;
-
- case PIPE_FORMAT_R8_SNORM:
- return BRW_SURFACEFORMAT_R8_SNORM;
- case PIPE_FORMAT_R8G8_SNORM:
- return BRW_SURFACEFORMAT_R8G8_SNORM;
- case PIPE_FORMAT_R8G8B8_SNORM:
- return BRW_SURFACEFORMAT_R8G8B8_SNORM;
- case PIPE_FORMAT_R8G8B8A8_SNORM:
- return BRW_SURFACEFORMAT_R8G8B8A8_SNORM;
-
- case PIPE_FORMAT_R8_SSCALED:
- return BRW_SURFACEFORMAT_R8_SSCALED;
- case PIPE_FORMAT_R8G8_SSCALED:
- return BRW_SURFACEFORMAT_R8G8_SSCALED;
- case PIPE_FORMAT_R8G8B8_SSCALED:
- return BRW_SURFACEFORMAT_R8G8B8_SSCALED;
- case PIPE_FORMAT_R8G8B8A8_SSCALED:
- return BRW_SURFACEFORMAT_R8G8B8A8_SSCALED;
-
- default:
- assert(0);
- return 0;
- }
-}
-
static unsigned get_index_type(int type)
{
switch (type) {
@@ -315,75 +180,16 @@ static int brw_emit_vertex_buffers( struct brw_context *brw )
-
static int brw_emit_vertex_elements(struct brw_context *brw)
{
- GLuint nr = brw->curr.num_vertex_elements;
- GLuint i;
+ const struct brw_vertex_element_packet *brw_velems = brw->curr.velems;
+ unsigned size = brw_velems->header.length + 2;
+ /* why is this here */
brw_emit_query_begin(brw);
- /* If the VS doesn't read any inputs (calculating vertex position from
- * a state variable for some reason, for example), emit a single pad
- * VERTEX_ELEMENT struct and bail.
- *
- * The stale VB state stays in place, but they don't do anything unless
- * a VE loads from them.
- */
- if (nr == 0) {
- BEGIN_BATCH(3, IGNORE_CLIPRECTS);
- OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1);
- OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) |
- BRW_VE0_VALID |
- (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
- (0 << BRW_VE0_SRC_OFFSET_SHIFT));
- OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
- (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
- (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
- (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT));
- ADVANCE_BATCH();
- return 0;
- }
-
- /* Now emit vertex element (VEP) state packets.
- *
- */
- BEGIN_BATCH(1 + nr * 2, IGNORE_CLIPRECTS);
- OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + nr * 2) - 2));
- for (i = 0; i < nr; i++) {
- const struct pipe_vertex_element *input = &brw->curr.vertex_element[i];
- uint32_t format = brw_translate_surface_format( input->src_format );
- uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC;
- uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC;
- uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC;
- uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC;
-
- switch (input->nr_components) {
- case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; /* fallthrough */
- case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; /* fallthrough */
- case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; /* fallthrough */
- case 3: comp3 = BRW_VE1_COMPONENT_STORE_1_FLT;
- break;
- }
-
- OUT_BATCH((input->vertex_buffer_index << BRW_VE0_INDEX_SHIFT) |
- BRW_VE0_VALID |
- (format << BRW_VE0_FORMAT_SHIFT) |
- (input->src_offset << BRW_VE0_SRC_OFFSET_SHIFT));
+ brw_batchbuffer_data(brw->batch, brw_velems, size * 4, IGNORE_CLIPRECTS);
- if (BRW_IS_IGDNG(brw))
- OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
- (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
- (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
- (comp3 << BRW_VE1_COMPONENT_3_SHIFT));
- else
- OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
- (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
- (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
- (comp3 << BRW_VE1_COMPONENT_3_SHIFT) |
- ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
- }
- ADVANCE_BATCH();
return 0;
}
@@ -396,10 +202,11 @@ static int brw_emit_vertices( struct brw_context *brw )
if (ret)
return ret;
+ /* XXX should separate this? */
ret = brw_emit_vertex_elements( brw );
if (ret)
return ret;
-
+
return 0;
}
@@ -407,7 +214,8 @@ static int brw_emit_vertices( struct brw_context *brw )
const struct brw_tracked_state brw_vertices = {
.dirty = {
.mesa = (PIPE_NEW_INDEX_RANGE |
- PIPE_NEW_VERTEX_BUFFER),
+ PIPE_NEW_VERTEX_BUFFER |
+ PIPE_NEW_VERTEX_ELEMENT),
.brw = BRW_NEW_BATCH,
.cache = 0,
},
diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c
index e3c48e31493..d6a840857ec 100644
--- a/src/gallium/drivers/i965/brw_pipe_vertex.c
+++ b/src/gallium/drivers/i965/brw_pipe_vertex.c
@@ -1,22 +1,251 @@
#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_structs.h"
+#include "util/u_memory.h"
+#include "util/u_format.h"
-static void brw_set_vertex_elements( struct pipe_context *pipe,
- unsigned count,
- const struct pipe_vertex_element *elements )
+
+static unsigned brw_translate_surface_format( unsigned id )
+{
+ switch (id) {
+ case PIPE_FORMAT_R64_FLOAT:
+ return BRW_SURFACEFORMAT_R64_FLOAT;
+ case PIPE_FORMAT_R64G64_FLOAT:
+ return BRW_SURFACEFORMAT_R64G64_FLOAT;
+ case PIPE_FORMAT_R64G64B64_FLOAT:
+ return BRW_SURFACEFORMAT_R64G64B64_FLOAT;
+ case PIPE_FORMAT_R64G64B64A64_FLOAT:
+ return BRW_SURFACEFORMAT_R64G64B64A64_FLOAT;
+
+ case PIPE_FORMAT_R32_FLOAT:
+ return BRW_SURFACEFORMAT_R32_FLOAT;
+ case PIPE_FORMAT_R32G32_FLOAT:
+ return BRW_SURFACEFORMAT_R32G32_FLOAT;
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ return BRW_SURFACEFORMAT_R32G32B32_FLOAT;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+
+ case PIPE_FORMAT_R32_UNORM:
+ return BRW_SURFACEFORMAT_R32_UNORM;
+ case PIPE_FORMAT_R32G32_UNORM:
+ return BRW_SURFACEFORMAT_R32G32_UNORM;
+ case PIPE_FORMAT_R32G32B32_UNORM:
+ return BRW_SURFACEFORMAT_R32G32B32_UNORM;
+ case PIPE_FORMAT_R32G32B32A32_UNORM:
+ return BRW_SURFACEFORMAT_R32G32B32A32_UNORM;
+
+ case PIPE_FORMAT_R32_USCALED:
+ return BRW_SURFACEFORMAT_R32_USCALED;
+ case PIPE_FORMAT_R32G32_USCALED:
+ return BRW_SURFACEFORMAT_R32G32_USCALED;
+ case PIPE_FORMAT_R32G32B32_USCALED:
+ return BRW_SURFACEFORMAT_R32G32B32_USCALED;
+ case PIPE_FORMAT_R32G32B32A32_USCALED:
+ return BRW_SURFACEFORMAT_R32G32B32A32_USCALED;
+
+ case PIPE_FORMAT_R32_SNORM:
+ return BRW_SURFACEFORMAT_R32_SNORM;
+ case PIPE_FORMAT_R32G32_SNORM:
+ return BRW_SURFACEFORMAT_R32G32_SNORM;
+ case PIPE_FORMAT_R32G32B32_SNORM:
+ return BRW_SURFACEFORMAT_R32G32B32_SNORM;
+ case PIPE_FORMAT_R32G32B32A32_SNORM:
+ return BRW_SURFACEFORMAT_R32G32B32A32_SNORM;
+
+ case PIPE_FORMAT_R32_SSCALED:
+ return BRW_SURFACEFORMAT_R32_SSCALED;
+ case PIPE_FORMAT_R32G32_SSCALED:
+ return BRW_SURFACEFORMAT_R32G32_SSCALED;
+ case PIPE_FORMAT_R32G32B32_SSCALED:
+ return BRW_SURFACEFORMAT_R32G32B32_SSCALED;
+ case PIPE_FORMAT_R32G32B32A32_SSCALED:
+ return BRW_SURFACEFORMAT_R32G32B32A32_SSCALED;
+
+ case PIPE_FORMAT_R16_UNORM:
+ return BRW_SURFACEFORMAT_R16_UNORM;
+ case PIPE_FORMAT_R16G16_UNORM:
+ return BRW_SURFACEFORMAT_R16G16_UNORM;
+ case PIPE_FORMAT_R16G16B16_UNORM:
+ return BRW_SURFACEFORMAT_R16G16B16_UNORM;
+ case PIPE_FORMAT_R16G16B16A16_UNORM:
+ return BRW_SURFACEFORMAT_R16G16B16A16_UNORM;
+
+ case PIPE_FORMAT_R16_USCALED:
+ return BRW_SURFACEFORMAT_R16_USCALED;
+ case PIPE_FORMAT_R16G16_USCALED:
+ return BRW_SURFACEFORMAT_R16G16_USCALED;
+ case PIPE_FORMAT_R16G16B16_USCALED:
+ return BRW_SURFACEFORMAT_R16G16B16_USCALED;
+ case PIPE_FORMAT_R16G16B16A16_USCALED:
+ return BRW_SURFACEFORMAT_R16G16B16A16_USCALED;
+
+ case PIPE_FORMAT_R16_SNORM:
+ return BRW_SURFACEFORMAT_R16_SNORM;
+ case PIPE_FORMAT_R16G16_SNORM:
+ return BRW_SURFACEFORMAT_R16G16_SNORM;
+ case PIPE_FORMAT_R16G16B16_SNORM:
+ return BRW_SURFACEFORMAT_R16G16B16_SNORM;
+ case PIPE_FORMAT_R16G16B16A16_SNORM:
+ return BRW_SURFACEFORMAT_R16G16B16A16_SNORM;
+
+ case PIPE_FORMAT_R16_SSCALED:
+ return BRW_SURFACEFORMAT_R16_SSCALED;
+ case PIPE_FORMAT_R16G16_SSCALED:
+ return BRW_SURFACEFORMAT_R16G16_SSCALED;
+ case PIPE_FORMAT_R16G16B16_SSCALED:
+ return BRW_SURFACEFORMAT_R16G16B16_SSCALED;
+ case PIPE_FORMAT_R16G16B16A16_SSCALED:
+ return BRW_SURFACEFORMAT_R16G16B16A16_SSCALED;
+
+ case PIPE_FORMAT_R8_UNORM:
+ return BRW_SURFACEFORMAT_R8_UNORM;
+ case PIPE_FORMAT_R8G8_UNORM:
+ return BRW_SURFACEFORMAT_R8G8_UNORM;
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ return BRW_SURFACEFORMAT_R8G8B8_UNORM;
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
+
+ case PIPE_FORMAT_R8_USCALED:
+ return BRW_SURFACEFORMAT_R8_USCALED;
+ case PIPE_FORMAT_R8G8_USCALED:
+ return BRW_SURFACEFORMAT_R8G8_USCALED;
+ case PIPE_FORMAT_R8G8B8_USCALED:
+ return BRW_SURFACEFORMAT_R8G8B8_USCALED;
+ case PIPE_FORMAT_R8G8B8A8_USCALED:
+ return BRW_SURFACEFORMAT_R8G8B8A8_USCALED;
+
+ case PIPE_FORMAT_R8_SNORM:
+ return BRW_SURFACEFORMAT_R8_SNORM;
+ case PIPE_FORMAT_R8G8_SNORM:
+ return BRW_SURFACEFORMAT_R8G8_SNORM;
+ case PIPE_FORMAT_R8G8B8_SNORM:
+ return BRW_SURFACEFORMAT_R8G8B8_SNORM;
+ case PIPE_FORMAT_R8G8B8A8_SNORM:
+ return BRW_SURFACEFORMAT_R8G8B8A8_SNORM;
+
+ case PIPE_FORMAT_R8_SSCALED:
+ return BRW_SURFACEFORMAT_R8_SSCALED;
+ case PIPE_FORMAT_R8G8_SSCALED:
+ return BRW_SURFACEFORMAT_R8G8_SSCALED;
+ case PIPE_FORMAT_R8G8B8_SSCALED:
+ return BRW_SURFACEFORMAT_R8G8B8_SSCALED;
+ case PIPE_FORMAT_R8G8B8A8_SSCALED:
+ return BRW_SURFACEFORMAT_R8G8B8A8_SSCALED;
+
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+static void brw_translate_vertex_elements(struct brw_context *brw,
+ struct brw_vertex_element_packet *brw_velems,
+ const struct pipe_vertex_element *attribs,
+ unsigned count)
+{
+ unsigned i;
+
+ /* If the VS doesn't read any inputs (calculating vertex position from
+ * a state variable for some reason, for example), emit a single pad
+ * VERTEX_ELEMENT struct and bail.
+ *
+ * The stale VB state stays in place, but they don't do anything unless
+ * a VE loads from them.
+ */
+ brw_velems->header.opcode = CMD_VERTEX_ELEMENT;
+
+ if (count == 0) {
+ brw_velems->header.length = 1;
+ brw_velems->ve[0].ve0.src_offset = 0;
+ brw_velems->ve[0].ve0.src_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+ brw_velems->ve[0].ve0.valid = 1;
+ brw_velems->ve[0].ve0.vertex_buffer_index = 0;
+ brw_velems->ve[0].ve1.dst_offset = 0;
+ brw_velems->ve[0].ve1.vfcomponent0 = BRW_VE1_COMPONENT_STORE_0;
+ brw_velems->ve[0].ve1.vfcomponent1 = BRW_VE1_COMPONENT_STORE_0;
+ brw_velems->ve[0].ve1.vfcomponent2 = BRW_VE1_COMPONENT_STORE_0;
+ brw_velems->ve[0].ve1.vfcomponent3 = BRW_VE1_COMPONENT_STORE_1_FLT;
+ return;
+ }
+
+
+ /* Now emit vertex element (VEP) state packets.
+ *
+ */
+ brw_velems->header.length = (1 + count * 2) - 2;
+ for (i = 0; i < count; i++) {
+ const struct pipe_vertex_element *input = &attribs[i];
+ unsigned nr_components = util_format_get_nr_components(input->src_format);
+
+ uint32_t format = brw_translate_surface_format( input->src_format );
+ uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC;
+ uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC;
+ uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC;
+ uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC;
+
+ switch (nr_components) {
+ case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; /* fallthrough */
+ case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; /* fallthrough */
+ case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; /* fallthrough */
+ case 3: comp3 = BRW_VE1_COMPONENT_STORE_1_FLT;
+ break;
+ }
+
+ brw_velems->ve[i].ve0.src_offset = input->src_offset;
+ brw_velems->ve[i].ve0.src_format = format;
+ brw_velems->ve[i].ve0.valid = 1;
+ brw_velems->ve[i].ve0.vertex_buffer_index = input->vertex_buffer_index;
+ brw_velems->ve[i].ve1.vfcomponent0 = comp0;
+ brw_velems->ve[i].ve1.vfcomponent1 = comp1;
+ brw_velems->ve[i].ve1.vfcomponent2 = comp2;
+ brw_velems->ve[i].ve1.vfcomponent3 = comp3;
+
+ if (BRW_IS_IGDNG(brw))
+ brw_velems->ve[i].ve1.dst_offset = 0;
+ else
+ brw_velems->ve[i].ve1.dst_offset = i * 4;
+ }
+}
+
+static void* brw_create_vertex_elements_state( struct pipe_context *pipe,
+ unsigned count,
+ const struct pipe_vertex_element *attribs )
{
+ /* note: for the brw_swtnl.c code (if ever we need draw fallback) we'd also need
+ to store the original data */
struct brw_context *brw = brw_context(pipe);
+ struct brw_vertex_element_packet *velems;
+ assert(count <= BRW_VEP_MAX);
+ velems = (struct brw_vertex_element_packet *) MALLOC(sizeof(struct brw_vertex_element_packet));
+ if (velems) {
+ brw_translate_vertex_elements(brw, velems, attribs, count);
+ }
+ return velems;
+}
- memcpy(brw->curr.vertex_element, elements, count * sizeof(elements[0]));
- brw->curr.num_vertex_elements = count;
+static void brw_bind_vertex_elements_state(struct pipe_context *pipe,
+ void *velems)
+{
+ struct brw_context *brw = brw_context(pipe);
+ struct brw_vertex_element_packet *brw_velems = (struct brw_vertex_element_packet *) velems;
+
+ brw->curr.velems = brw_velems;
brw->state.dirty.mesa |= PIPE_NEW_VERTEX_ELEMENT;
}
+static void brw_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
+{
+ FREE( velems );
+}
+
static void brw_set_vertex_buffers(struct pipe_context *pipe,
- unsigned count,
- const struct pipe_vertex_buffer *buffers)
+ unsigned count,
+ const struct pipe_vertex_buffer *buffers)
{
struct brw_context *brw = brw_context(pipe);
unsigned i;
@@ -49,7 +278,9 @@ void
brw_pipe_vertex_init( struct brw_context *brw )
{
brw->base.set_vertex_buffers = brw_set_vertex_buffers;
- brw->base.set_vertex_elements = brw_set_vertex_elements;
+ brw->base.create_vertex_elements_state = brw_create_vertex_elements_state;
+ brw->base.bind_vertex_elements_state = brw_bind_vertex_elements_state;
+ brw->base.delete_vertex_elements_state = brw_delete_vertex_elements_state;
}
diff --git a/src/gallium/drivers/i965/brw_structs.h b/src/gallium/drivers/i965/brw_structs.h
index bf10bc04de7..e97ddeb5e1c 100644
--- a/src/gallium/drivers/i965/brw_structs.h
+++ b/src/gallium/drivers/i965/brw_structs.h
@@ -28,7 +28,7 @@
* Authors:
* Keith Whitwell <[email protected]>
*/
-
+
#ifndef BRW_STRUCTS_H
#define BRW_STRUCTS_H
@@ -1149,7 +1149,7 @@ struct brw_vertex_element_state
GLuint valid:1;
GLuint vertex_buffer_index:5;
} ve0;
-
+
struct
{
GLuint dst_offset:8;
diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c
index 8248b2a4132..baf0ae44016 100644
--- a/src/gallium/drivers/identity/id_context.c
+++ b/src/gallium/drivers/identity/id_context.c
@@ -377,6 +377,42 @@ identity_delete_vs_state(struct pipe_context *_pipe,
vs);
}
+
+static void *
+identity_create_vertex_elements_state(struct pipe_context *_pipe,
+ unsigned num_elements,
+ const struct pipe_vertex_element *vertex_elements)
+{
+ struct identity_context *id_pipe = identity_context(_pipe);
+ struct pipe_context *pipe = id_pipe->pipe;
+
+ return pipe->create_vertex_elements_state(pipe,
+ num_elements,
+ vertex_elements);
+}
+
+static void
+identity_bind_vertex_elements_state(struct pipe_context *_pipe,
+ void *velems)
+{
+ struct identity_context *id_pipe = identity_context(_pipe);
+ struct pipe_context *pipe = id_pipe->pipe;
+
+ pipe->bind_vertex_elements_state(pipe,
+ velems);
+}
+
+static void
+identity_delete_vertex_elements_state(struct pipe_context *_pipe,
+ void *velems)
+{
+ struct identity_context *id_pipe = identity_context(_pipe);
+ struct pipe_context *pipe = id_pipe->pipe;
+
+ pipe->delete_vertex_elements_state(pipe,
+ velems);
+}
+
static void
identity_set_blend_color(struct pipe_context *_pipe,
const struct pipe_blend_color *blend_color)
@@ -563,20 +599,6 @@ identity_set_vertex_buffers(struct pipe_context *_pipe,
num_buffers,
buffers);
}
-
-static void
-identity_set_vertex_elements(struct pipe_context *_pipe,
- unsigned num_elements,
- const struct pipe_vertex_element *vertex_elements)
-{
- struct identity_context *id_pipe = identity_context(_pipe);
- struct pipe_context *pipe = id_pipe->pipe;
-
- pipe->set_vertex_elements(pipe,
- num_elements,
- vertex_elements);
-}
-
static void
identity_surface_copy(struct pipe_context *_pipe,
struct pipe_surface *_dst,
@@ -733,6 +755,9 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
id_pipe->base.create_vs_state = identity_create_vs_state;
id_pipe->base.bind_vs_state = identity_bind_vs_state;
id_pipe->base.delete_vs_state = identity_delete_vs_state;
+ id_pipe->base.create_vertex_elements_state = identity_create_vertex_elements_state;
+ id_pipe->base.bind_vertex_elements_state = identity_bind_vertex_elements_state;
+ id_pipe->base.delete_vertex_elements_state = identity_delete_vertex_elements_state;
id_pipe->base.set_blend_color = identity_set_blend_color;
id_pipe->base.set_stencil_ref = identity_set_stencil_ref;
id_pipe->base.set_clip_state = identity_set_clip_state;
@@ -744,7 +769,6 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
id_pipe->base.set_fragment_sampler_textures = identity_set_fragment_sampler_textures;
id_pipe->base.set_vertex_sampler_textures = identity_set_vertex_sampler_textures;
id_pipe->base.set_vertex_buffers = identity_set_vertex_buffers;
- id_pipe->base.set_vertex_elements = identity_set_vertex_elements;
id_pipe->base.surface_copy = identity_surface_copy;
id_pipe->base.surface_fill = identity_surface_fill;
id_pipe->base.clear = identity_clear;
diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index e31ae6a3fc1..d94efec16a4 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -145,6 +145,10 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv )
llvmpipe->pipe.bind_vs_state = llvmpipe_bind_vs_state;
llvmpipe->pipe.delete_vs_state = llvmpipe_delete_vs_state;
+ llvmpipe->pipe.create_vertex_elements_state = llvmpipe_create_vertex_elements_state;
+ llvmpipe->pipe.bind_vertex_elements_state = llvmpipe_bind_vertex_elements_state;
+ llvmpipe->pipe.delete_vertex_elements_state = llvmpipe_delete_vertex_elements_state;
+
llvmpipe->pipe.set_blend_color = llvmpipe_set_blend_color;
llvmpipe->pipe.set_stencil_ref = llvmpipe_set_stencil_ref;
llvmpipe->pipe.set_clip_state = llvmpipe_set_clip_state;
@@ -157,7 +161,6 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv )
llvmpipe->pipe.set_viewport_state = llvmpipe_set_viewport_state;
llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers;
- llvmpipe->pipe.set_vertex_elements = llvmpipe_set_vertex_elements;
llvmpipe->pipe.draw_arrays = llvmpipe_draw_arrays;
llvmpipe->pipe.draw_elements = llvmpipe_draw_elements;
diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h
index 955c7eb8e0e..217ec59b688 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_context.h
@@ -46,6 +46,7 @@ struct lp_fragment_shader;
struct lp_vertex_shader;
struct lp_blend_state;
struct setup_context;
+struct lp_velems_state;
struct llvmpipe_context {
struct pipe_context pipe; /**< base class */
@@ -58,6 +59,7 @@ struct llvmpipe_context {
const struct pipe_rasterizer_state *rasterizer;
struct lp_fragment_shader *fs;
const struct lp_vertex_shader *vs;
+ const struct lp_velems_state *velems;
/** Other rendering state */
struct pipe_blend_color blend_color;
@@ -71,13 +73,11 @@ struct llvmpipe_context {
struct pipe_texture *vertex_textures[PIPE_MAX_VERTEX_SAMPLERS];
struct pipe_viewport_state viewport;
struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
- struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
unsigned num_samplers;
unsigned num_textures;
unsigned num_vertex_samplers;
unsigned num_vertex_textures;
- unsigned num_vertex_elements;
unsigned num_vertex_buffers;
unsigned dirty; /**< Mask of LP_NEW_x flags */
diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h
index 9beba32271f..6dbdc195bfc 100644
--- a/src/gallium/drivers/llvmpipe/lp_state.h
+++ b/src/gallium/drivers/llvmpipe/lp_state.h
@@ -119,6 +119,10 @@ struct lp_vertex_shader {
struct draw_vertex_shader *draw_data;
};
+struct lp_velems_state {
+ unsigned count;
+ struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
+};
void *
@@ -176,8 +180,14 @@ void *llvmpipe_create_vs_state(struct pipe_context *,
void llvmpipe_bind_vs_state(struct pipe_context *, void *);
void llvmpipe_delete_vs_state(struct pipe_context *, void *);
+void *llvmpipe_create_vertex_elements_state(struct pipe_context *,
+ unsigned count,
+ const struct pipe_vertex_element *);
+void llvmpipe_bind_vertex_elements_state(struct pipe_context *, void *);
+void llvmpipe_delete_vertex_elements_state(struct pipe_context *, void *);
+
void llvmpipe_set_polygon_stipple( struct pipe_context *,
- const struct pipe_poly_stipple * );
+ const struct pipe_poly_stipple * );
void llvmpipe_set_scissor_state( struct pipe_context *,
const struct pipe_scissor_state * );
@@ -194,10 +204,6 @@ llvmpipe_set_vertex_sampler_textures(struct pipe_context *,
void llvmpipe_set_viewport_state( struct pipe_context *,
const struct pipe_viewport_state * );
-void llvmpipe_set_vertex_elements(struct pipe_context *,
- unsigned count,
- const struct pipe_vertex_element *);
-
void llvmpipe_set_vertex_buffers(struct pipe_context *,
unsigned count,
const struct pipe_vertex_buffer *);
diff --git a/src/gallium/drivers/llvmpipe/lp_state_vertex.c b/src/gallium/drivers/llvmpipe/lp_state_vertex.c
index 57ac25ea0cb..f6427aa908e 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_vertex.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_vertex.c
@@ -35,24 +35,41 @@
#include "draw/draw_context.h"
+void *
+llvmpipe_create_vertex_elements_state(struct pipe_context *pipe,
+ unsigned count,
+ const struct pipe_vertex_element *attribs)
+{
+ struct lp_velems_state *velems;
+ assert(count <= PIPE_MAX_ATTRIBS);
+ velems = (struct lp_velems_state *) MALLOC(sizeof(struct lp_velems_state));
+ if (velems) {
+ velems->count = count;
+ memcpy(velems->velem, attribs, sizeof(*attribs) * count);
+ }
+ return velems;
+}
+
void
-llvmpipe_set_vertex_elements(struct pipe_context *pipe,
- unsigned count,
- const struct pipe_vertex_element *attribs)
+llvmpipe_bind_vertex_elements_state(struct pipe_context *pipe,
+ void *velems)
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ struct lp_velems_state *lp_velems = (struct lp_velems_state *) velems;
- assert(count <= PIPE_MAX_ATTRIBS);
-
- memcpy(llvmpipe->vertex_element, attribs,
- count * sizeof(struct pipe_vertex_element));
- llvmpipe->num_vertex_elements = count;
+ llvmpipe->velems = lp_velems;
llvmpipe->dirty |= LP_NEW_VERTEX;
- draw_set_vertex_elements(llvmpipe->draw, count, attribs);
+ if (velems)
+ draw_set_vertex_elements(llvmpipe->draw, lp_velems->count, lp_velems->velem);
}
+void
+llvmpipe_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
+{
+ FREE( velems );
+}
void
llvmpipe_set_vertex_buffers(struct pipe_context *pipe,
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c
index f7d10a591f6..b1ad686022a 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -124,7 +124,7 @@ nouveau_screen_map_flags(unsigned pipe)
if (pipe & PIPE_BUFFER_USAGE_DONTBLOCK)
flags |= NOUVEAU_BO_NOWAIT;
else
- if (pipe & 0 /*PIPE_BUFFER_USAGE_UNSYNCHRONIZED*/)
+ if (pipe & PIPE_BUFFER_USAGE_UNSYNCHRONIZED)
flags |= NOUVEAU_BO_NOSYNC;
return flags;
diff --git a/src/gallium/drivers/nouveau/nouveau_util.h b/src/gallium/drivers/nouveau/nouveau_util.h
index a10114beab9..7f16e31c3f0 100644
--- a/src/gallium/drivers/nouveau/nouveau_util.h
+++ b/src/gallium/drivers/nouveau/nouveau_util.h
@@ -88,4 +88,104 @@ static INLINE unsigned log2i(unsigned i)
return r;
}
+struct u_split_prim {
+ void *priv;
+ void (*emit)(void *priv, unsigned start, unsigned count);
+ void (*edge)(void *priv, boolean enabled);
+
+ unsigned mode;
+ unsigned start;
+ unsigned p_start;
+ unsigned p_end;
+
+ int repeat_first:1;
+ int close_first:1;
+ int edgeflag_off:1;
+};
+
+static inline void
+u_split_prim_init(struct u_split_prim *s,
+ unsigned mode, unsigned start, unsigned count)
+{
+ if (mode == PIPE_PRIM_LINE_LOOP) {
+ s->mode = PIPE_PRIM_LINE_STRIP;
+ s->close_first = 1;
+ } else {
+ s->mode = mode;
+ s->close_first = 0;
+ }
+ s->start = start;
+ s->p_start = start;
+ s->p_end = start + count;
+ s->edgeflag_off = 0;
+ s->repeat_first = 0;
+}
+
+static INLINE boolean
+u_split_prim_next(struct u_split_prim *s, unsigned max_verts)
+{
+ int repeat = 0;
+
+ if (s->repeat_first) {
+ s->emit(s->priv, s->start, 1);
+ max_verts--;
+ if (s->edgeflag_off) {
+ s->edge(s->priv, TRUE);
+ s->edgeflag_off = FALSE;
+ }
+ }
+
+ if (s->p_start + s->close_first + max_verts >= s->p_end) {
+ s->emit(s->priv, s->p_start, s->p_end - s->p_start);
+ if (s->close_first)
+ s->emit(s->priv, s->start, 1);
+ return TRUE;
+ }
+
+ switch (s->mode) {
+ case PIPE_PRIM_LINES:
+ max_verts &= ~1;
+ break;
+ case PIPE_PRIM_LINE_STRIP:
+ repeat = 1;
+ break;
+ case PIPE_PRIM_POLYGON:
+ max_verts--;
+ s->emit(s->priv, s->p_start, max_verts);
+ s->edge(s->priv, FALSE);
+ s->emit(s->priv, s->p_start + max_verts, 1);
+ s->p_start += max_verts;
+ s->repeat_first = TRUE;
+ s->edgeflag_off = TRUE;
+ return FALSE;
+ case PIPE_PRIM_TRIANGLES:
+ max_verts = max_verts - (max_verts % 3);
+ break;
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ /* to ensure winding stays correct, always split
+ * on an even number of generated triangles
+ */
+ max_verts = max_verts & ~1;
+ repeat = 2;
+ break;
+ case PIPE_PRIM_TRIANGLE_FAN:
+ s->repeat_first = TRUE;
+ repeat = 1;
+ break;
+ case PIPE_PRIM_QUADS:
+ max_verts &= ~3;
+ break;
+ case PIPE_PRIM_QUAD_STRIP:
+ max_verts &= ~1;
+ repeat = 2;
+ break;
+ default:
+ break;
+ }
+
+ s->emit (s->priv, s->p_start, max_verts);
+ s->p_start += (max_verts - repeat);
+ return FALSE;
+}
+
#endif
diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h
index ea259aadf35..1786460aec1 100644
--- a/src/gallium/drivers/nv30/nv30_context.h
+++ b/src/gallium/drivers/nv30/nv30_context.h
@@ -107,6 +107,11 @@ struct nv30_state {
struct nouveau_stateobj *hw[NV30_STATE_MAX];
};
+struct nv30_vtxelt_state {
+ struct pipe_vertex_element pipe[16];
+ unsigned num_elements;
+};
+
struct nv30_context {
struct pipe_context pipe;
@@ -142,8 +147,7 @@ struct nv30_context {
unsigned dirty_samplers;
struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
unsigned vtxbuf_nr;
- struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
- unsigned vtxelt_nr;
+ struct nv30_vtxelt_state *vtxelt;
};
static INLINE struct nv30_context *
diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c
index 5ef74a832dc..bfa27b632f7 100644
--- a/src/gallium/drivers/nv30/nv30_miptree.c
+++ b/src/gallium/drivers/nv30/nv30_miptree.c
@@ -236,5 +236,5 @@ nv30_screen_init_miptree_functions(struct pipe_screen *pscreen)
pscreen->get_tex_surface = nv30_miptree_surface_new;
pscreen->tex_surface_destroy = nv30_miptree_surface_del;
- nouveau_screen(pscreen)->texture_blanket = nv50_miptree_blanket;
+ nouveau_screen(pscreen)->texture_blanket = nv30_miptree_blanket;
}
diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c
index d911c807074..24b15a63ac4 100644
--- a/src/gallium/drivers/nv30/nv30_state.c
+++ b/src/gallium/drivers/nv30/nv30_state.c
@@ -669,15 +669,34 @@ nv30_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
/*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/
}
+static void *
+nv30_vtxelts_state_create(struct pipe_context *pipe,
+ unsigned num_elements,
+ const struct pipe_vertex_element *elements)
+{
+ struct nv30_vtxelt_state *cso = CALLOC_STRUCT(nv30_vtxelt_state);
+
+ assert(num_elements < 16); /* not doing fallbacks yet */
+ cso->num_elements = num_elements;
+ memcpy(cso->pipe, elements, num_elements * sizeof(*elements));
+
+/* nv30_vtxelt_construct(cso);*/
+
+ return (void *)cso;
+}
+
static void
-nv30_set_vertex_elements(struct pipe_context *pipe, unsigned count,
- const struct pipe_vertex_element *ve)
+nv30_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso)
{
- struct nv30_context *nv30 = nv30_context(pipe);
+ FREE(hwcso);
+}
- memcpy(nv30->vtxelt, ve, sizeof(*ve) * count);
- nv30->vtxelt_nr = count;
+static void
+nv30_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv30_context *nv30 = nv30_context(pipe);
+ nv30->vtxelt = hwcso;
nv30->dirty |= NV30_NEW_ARRAYS;
/*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/
}
@@ -722,7 +741,10 @@ nv30_init_state_functions(struct nv30_context *nv30)
nv30->pipe.set_scissor_state = nv30_set_scissor_state;
nv30->pipe.set_viewport_state = nv30_set_viewport_state;
+ nv30->pipe.create_vertex_elements_state = nv30_vtxelts_state_create;
+ nv30->pipe.delete_vertex_elements_state = nv30_vtxelts_state_delete;
+ nv30->pipe.bind_vertex_elements_state = nv30_vtxelts_state_bind;
+
nv30->pipe.set_vertex_buffers = nv30_set_vertex_buffers;
- nv30->pipe.set_vertex_elements = nv30_set_vertex_elements;
}
diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c
index e48823a9138..f3856bb5a5e 100644
--- a/src/gallium/drivers/nv30/nv30_vbo.c
+++ b/src/gallium/drivers/nv30/nv30_vbo.c
@@ -492,16 +492,16 @@ nv30_vbo_validate(struct nv30_context *nv30)
int hw;
vtxbuf = so_new(3, 17, 18);
- so_method(vtxbuf, rankine, NV34TCL_VTXBUF_ADDRESS(0), nv30->vtxelt_nr);
+ so_method(vtxbuf, rankine, NV34TCL_VTXBUF_ADDRESS(0), nv30->vtxelt->num_elements);
vtxfmt = so_new(1, 16, 0);
- so_method(vtxfmt, rankine, NV34TCL_VTXFMT(0), nv30->vtxelt_nr);
+ so_method(vtxfmt, rankine, NV34TCL_VTXFMT(0), nv30->vtxelt->num_elements);
- for (hw = 0; hw < nv30->vtxelt_nr; hw++) {
+ for (hw = 0; hw < nv30->vtxelt->num_elements; hw++) {
struct pipe_vertex_element *ve;
struct pipe_vertex_buffer *vb;
unsigned type, ncomp;
- ve = &nv30->vtxelt[hw];
+ ve = &nv30->vtxelt->pipe[hw];
vb = &nv30->vtxbuf[ve->vertex_buffer_index];
if (!vb->stride) {
diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h
index 97fb6a2ef94..2550ec654b3 100644
--- a/src/gallium/drivers/nv40/nv40_context.h
+++ b/src/gallium/drivers/nv40/nv40_context.h
@@ -107,6 +107,12 @@ struct nv40_state {
struct nouveau_stateobj *hw[NV40_STATE_MAX];
};
+
+struct nv40_vtxelt_state {
+ struct pipe_vertex_element pipe[16];
+ unsigned num_elements;
+};
+
struct nv40_context {
struct pipe_context pipe;
@@ -157,8 +163,7 @@ struct nv40_context {
unsigned dirty_samplers;
struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
unsigned vtxbuf_nr;
- struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
- unsigned vtxelt_nr;
+ struct nv40_vtxelt_state *vtxelt;
};
static INLINE struct nv40_context *
diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c
index 4f28675e64c..ee471e6ce4c 100644
--- a/src/gallium/drivers/nv40/nv40_state.c
+++ b/src/gallium/drivers/nv40/nv40_state.c
@@ -684,15 +684,34 @@ nv40_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
nv40->draw_dirty |= NV40_NEW_ARRAYS;
}
+static void *
+nv40_vtxelts_state_create(struct pipe_context *pipe,
+ unsigned num_elements,
+ const struct pipe_vertex_element *elements)
+{
+ struct nv40_vtxelt_state *cso = CALLOC_STRUCT(nv40_vtxelt_state);
+
+ assert(num_elements < 16); /* not doing fallbacks yet */
+ cso->num_elements = num_elements;
+ memcpy(cso->pipe, elements, num_elements * sizeof(*elements));
+
+/* nv40_vtxelt_construct(cso);*/
+
+ return (void *)cso;
+}
+
static void
-nv40_set_vertex_elements(struct pipe_context *pipe, unsigned count,
- const struct pipe_vertex_element *ve)
+nv40_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso)
{
- struct nv40_context *nv40 = nv40_context(pipe);
+ FREE(hwcso);
+}
- memcpy(nv40->vtxelt, ve, sizeof(*ve) * count);
- nv40->vtxelt_nr = count;
+static void
+nv40_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ nv40->vtxelt = hwcso;
nv40->dirty |= NV40_NEW_ARRAYS;
nv40->draw_dirty |= NV40_NEW_ARRAYS;
}
@@ -737,7 +756,10 @@ nv40_init_state_functions(struct nv40_context *nv40)
nv40->pipe.set_scissor_state = nv40_set_scissor_state;
nv40->pipe.set_viewport_state = nv40_set_viewport_state;
+ nv40->pipe.create_vertex_elements_state = nv40_vtxelts_state_create;
+ nv40->pipe.delete_vertex_elements_state = nv40_vtxelts_state_delete;
+ nv40->pipe.bind_vertex_elements_state = nv40_vtxelts_state_bind;
+
nv40->pipe.set_vertex_buffers = nv40_set_vertex_buffers;
- nv40->pipe.set_vertex_elements = nv40_set_vertex_elements;
}
diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c
index 8990f303ce4..297d71f4fac 100644
--- a/src/gallium/drivers/nv40/nv40_state_emit.c
+++ b/src/gallium/drivers/nv40/nv40_state_emit.c
@@ -174,7 +174,7 @@ nv40_state_validate_swtnl(struct nv40_context *nv40)
if (nv40->draw_dirty & NV40_NEW_ARRAYS) {
draw_set_vertex_buffers(draw, nv40->vtxbuf_nr, nv40->vtxbuf);
- draw_set_vertex_elements(draw, nv40->vtxelt_nr, nv40->vtxelt);
+ draw_set_vertex_elements(draw, nv40->vtxelt->num_elements, nv40->vtxelt->pipe);
}
nv40_state_do_validate(nv40, swtnl_states);
diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c
index 7812460d2ed..fabdf4bf23b 100644
--- a/src/gallium/drivers/nv40/nv40_vbo.c
+++ b/src/gallium/drivers/nv40/nv40_vbo.c
@@ -493,16 +493,16 @@ nv40_vbo_validate(struct nv40_context *nv40)
int hw;
vtxbuf = so_new(3, 17, 18);
- so_method(vtxbuf, curie, NV40TCL_VTXBUF_ADDRESS(0), nv40->vtxelt_nr);
+ so_method(vtxbuf, curie, NV40TCL_VTXBUF_ADDRESS(0), nv40->vtxelt->num_elements);
vtxfmt = so_new(1, 16, 0);
- so_method(vtxfmt, curie, NV40TCL_VTXFMT(0), nv40->vtxelt_nr);
+ so_method(vtxfmt, curie, NV40TCL_VTXFMT(0), nv40->vtxelt->num_elements);
- for (hw = 0; hw < nv40->vtxelt_nr; hw++) {
+ for (hw = 0; hw < nv40->vtxelt->num_elements; hw++) {
struct pipe_vertex_element *ve;
struct pipe_vertex_buffer *vb;
unsigned type, ncomp;
- ve = &nv40->vtxelt[hw];
+ ve = &nv40->vtxelt->pipe[hw];
vb = &nv40->vtxbuf[ve->vertex_buffer_index];
if (!vb->stride) {
diff --git a/src/gallium/drivers/nv50/Makefile b/src/gallium/drivers/nv50/Makefile
index 612aea28a34..5d622e1c13c 100644
--- a/src/gallium/drivers/nv50/Makefile
+++ b/src/gallium/drivers/nv50/Makefile
@@ -16,6 +16,7 @@ C_SOURCES = \
nv50_surface.c \
nv50_tex.c \
nv50_transfer.c \
- nv50_vbo.c
+ nv50_vbo.c \
+ nv50_push.c
include ../../Makefile.template
diff --git a/src/gallium/drivers/nv50/nv50_clear.c b/src/gallium/drivers/nv50/nv50_clear.c
index e0b2d2880b0..8afc95c9fc6 100644
--- a/src/gallium/drivers/nv50/nv50_clear.c
+++ b/src/gallium/drivers/nv50/nv50_clear.c
@@ -36,7 +36,7 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers,
struct pipe_framebuffer_state *fb = &nv50->framebuffer;
unsigned mode = 0, i;
- if (!nv50_state_validate(nv50))
+ if (!nv50_state_validate(nv50, 64))
return;
if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c
index 7be12fcdef4..0eb42f323ff 100644
--- a/src/gallium/drivers/nv50/nv50_context.c
+++ b/src/gallium/drivers/nv50/nv50_context.c
@@ -46,43 +46,13 @@ static void
nv50_destroy(struct pipe_context *pipe)
{
struct nv50_context *nv50 = nv50_context(pipe);
+ int i;
- if (nv50->state.fb)
- so_ref(NULL, &nv50->state.fb);
- if (nv50->state.blend)
- so_ref(NULL, &nv50->state.blend);
- if (nv50->state.blend_colour)
- so_ref(NULL, &nv50->state.blend_colour);
- if (nv50->state.zsa)
- so_ref(NULL, &nv50->state.zsa);
- if (nv50->state.rast)
- so_ref(NULL, &nv50->state.rast);
- if (nv50->state.stipple)
- so_ref(NULL, &nv50->state.stipple);
- if (nv50->state.scissor)
- so_ref(NULL, &nv50->state.scissor);
- if (nv50->state.viewport)
- so_ref(NULL, &nv50->state.viewport);
- if (nv50->state.tsc_upload)
- so_ref(NULL, &nv50->state.tsc_upload);
- if (nv50->state.tic_upload)
- so_ref(NULL, &nv50->state.tic_upload);
- if (nv50->state.vertprog)
- so_ref(NULL, &nv50->state.vertprog);
- if (nv50->state.fragprog)
- so_ref(NULL, &nv50->state.fragprog);
- if (nv50->state.geomprog)
- so_ref(NULL, &nv50->state.geomprog);
- if (nv50->state.fp_linkage)
- so_ref(NULL, &nv50->state.fp_linkage);
- if (nv50->state.gp_linkage)
- so_ref(NULL, &nv50->state.gp_linkage);
- if (nv50->state.vtxfmt)
- so_ref(NULL, &nv50->state.vtxfmt);
- if (nv50->state.vtxbuf)
- so_ref(NULL, &nv50->state.vtxbuf);
- if (nv50->state.vtxattr)
- so_ref(NULL, &nv50->state.vtxattr);
+ for (i = 0; i < 64; i++) {
+ if (!nv50->state.hw[i])
+ continue;
+ so_ref(NULL, &nv50->state.hw[i]);
+ }
draw_destroy(nv50->draw);
@@ -123,7 +93,6 @@ nv50_create(struct pipe_screen *pscreen, void *priv)
nv50->pipe.is_buffer_referenced = nouveau_is_buffer_referenced;
screen->base.channel->user_private = nv50;
- screen->base.channel->flush_notify = nv50_state_flush_notify;
nv50_init_surface_functions(nv50);
nv50_init_state_functions(nv50);
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index 044437e75fa..8793c2aac5d 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -72,6 +72,12 @@ struct nv50_sampler_stateobj {
unsigned tsc[8];
};
+struct nv50_vtxelt_stateobj {
+ struct pipe_vertex_element pipe[16];
+ unsigned num_elements;
+ uint32_t hw[16];
+};
+
static INLINE unsigned
get_tile_height(uint32_t tile_mode)
{
@@ -117,30 +123,12 @@ nv50_surface(struct pipe_surface *pt)
}
struct nv50_state {
- unsigned dirty;
+ struct nouveau_stateobj *hw[64];
+ uint64_t hw_dirty;
- struct nouveau_stateobj *fb;
- struct nouveau_stateobj *blend;
- struct nouveau_stateobj *blend_colour;
- struct nouveau_stateobj *zsa;
- struct nouveau_stateobj *stencil_ref;
- struct nouveau_stateobj *rast;
- struct nouveau_stateobj *stipple;
- struct nouveau_stateobj *scissor;
- unsigned scissor_enabled;
- struct nouveau_stateobj *viewport;
- struct nouveau_stateobj *tsc_upload;
- struct nouveau_stateobj *tic_upload;
unsigned miptree_nr[PIPE_SHADER_TYPES];
- struct nouveau_stateobj *vertprog;
- struct nouveau_stateobj *fragprog;
- struct nouveau_stateobj *geomprog;
- struct nouveau_stateobj *fp_linkage;
- struct nouveau_stateobj *gp_linkage;
- struct nouveau_stateobj *vtxfmt;
struct nouveau_stateobj *vtxbuf;
struct nouveau_stateobj *vtxattr;
- struct nouveau_stateobj *instbuf;
unsigned vtxelt_nr;
};
@@ -169,14 +157,13 @@ struct nv50_context {
struct pipe_buffer *constbuf[PIPE_SHADER_TYPES];
struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
unsigned vtxbuf_nr;
- struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
- unsigned vtxelt_nr;
+ struct nv50_vtxelt_stateobj *vtxelt;
struct nv50_sampler_stateobj *sampler[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
unsigned sampler_nr[PIPE_SHADER_TYPES];
struct nv50_miptree *miptree[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
unsigned miptree_nr[PIPE_SHADER_TYPES];
- uint16_t vbo_fifo;
+ unsigned vbo_fifo;
};
static INLINE struct nv50_context *
@@ -218,24 +205,36 @@ extern void nv50_draw_elements_instanced(struct pipe_context *pipe,
unsigned count,
unsigned startInstance,
unsigned instanceCount);
-extern void nv50_vbo_validate(struct nv50_context *nv50);
+extern void nv50_vtxelt_construct(struct nv50_vtxelt_stateobj *cso);
+extern struct nouveau_stateobj *nv50_vbo_validate(struct nv50_context *nv50);
+
+/* nv50_push.c */
+extern void
+nv50_push_elements_instanced(struct pipe_context *, struct pipe_buffer *,
+ unsigned idxsize, unsigned mode, unsigned start,
+ unsigned count, unsigned i_start,
+ unsigned i_count);
/* nv50_clear.c */
extern void nv50_clear(struct pipe_context *pipe, unsigned buffers,
const float *rgba, double depth, unsigned stencil);
/* nv50_program.c */
-extern void nv50_vertprog_validate(struct nv50_context *nv50);
-extern void nv50_fragprog_validate(struct nv50_context *nv50);
-extern void nv50_geomprog_validate(struct nv50_context *nv50);
-extern void nv50_fp_linkage_validate(struct nv50_context *nv50);
-extern void nv50_gp_linkage_validate(struct nv50_context *nv50);
+extern struct nouveau_stateobj *
+nv50_vertprog_validate(struct nv50_context *nv50);
+extern struct nouveau_stateobj *
+nv50_fragprog_validate(struct nv50_context *nv50);
+extern struct nouveau_stateobj *
+nv50_geomprog_validate(struct nv50_context *nv50);
+extern struct nouveau_stateobj *
+nv50_fp_linkage_validate(struct nv50_context *nv50);
+extern struct nouveau_stateobj *
+nv50_gp_linkage_validate(struct nv50_context *nv50);
extern void nv50_program_destroy(struct nv50_context *nv50,
struct nv50_program *p);
/* nv50_state_validate.c */
-extern boolean nv50_state_validate(struct nv50_context *nv50);
-extern void nv50_state_flush_notify(struct nouveau_channel *chan);
+extern boolean nv50_state_validate(struct nv50_context *nv50, unsigned dwords);
extern void nv50_so_init_sifc(struct nv50_context *nv50,
struct nouveau_stateobj *so,
@@ -243,7 +242,8 @@ extern void nv50_so_init_sifc(struct nv50_context *nv50,
unsigned offset, unsigned size);
/* nv50_tex.c */
-extern void nv50_tex_validate(struct nv50_context *);
+extern void nv50_tex_relocs(struct nv50_context *);
+extern struct nouveau_stateobj *nv50_tex_validate(struct nv50_context *);
/* nv50_transfer.c */
extern void
@@ -257,4 +257,35 @@ nv50_upload_sifc(struct nv50_context *nv50,
struct pipe_context *
nv50_create(struct pipe_screen *pscreen, void *priv);
+static INLINE unsigned
+nv50_prim(unsigned mode)
+{
+ switch (mode) {
+ case PIPE_PRIM_POINTS: return NV50TCL_VERTEX_BEGIN_POINTS;
+ case PIPE_PRIM_LINES: return NV50TCL_VERTEX_BEGIN_LINES;
+ case PIPE_PRIM_LINE_LOOP: return NV50TCL_VERTEX_BEGIN_LINE_LOOP;
+ case PIPE_PRIM_LINE_STRIP: return NV50TCL_VERTEX_BEGIN_LINE_STRIP;
+ case PIPE_PRIM_TRIANGLES: return NV50TCL_VERTEX_BEGIN_TRIANGLES;
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP;
+ case PIPE_PRIM_TRIANGLE_FAN: return NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN;
+ case PIPE_PRIM_QUADS: return NV50TCL_VERTEX_BEGIN_QUADS;
+ case PIPE_PRIM_QUAD_STRIP: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP;
+ case PIPE_PRIM_POLYGON: return NV50TCL_VERTEX_BEGIN_POLYGON;
+ case PIPE_PRIM_LINES_ADJACENCY:
+ return NV50TCL_VERTEX_BEGIN_LINES_ADJACENCY;
+ case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+ return NV50TCL_VERTEX_BEGIN_LINE_STRIP_ADJACENCY;
+ case PIPE_PRIM_TRIANGLES_ADJACENCY:
+ return NV50TCL_VERTEX_BEGIN_TRIANGLES_ADJACENCY;
+ case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+ return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP_ADJACENCY;
+ default:
+ break;
+ }
+
+ NOUVEAU_ERR("invalid primitive type %d\n", mode);
+ return NV50TCL_VERTEX_BEGIN_POINTS;
+}
+
#endif
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 2372cbbef69..c857816b31a 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -4270,7 +4270,7 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
FREE(up);
}
-void
+struct nouveau_stateobj *
nv50_vertprog_validate(struct nv50_context *nv50)
{
struct nouveau_grobj *tesla = nv50->screen->tesla;
@@ -4286,6 +4286,9 @@ nv50_vertprog_validate(struct nv50_context *nv50)
nv50_program_validate_data(nv50, p);
nv50_program_validate_code(nv50, p);
+ if (!(nv50->dirty & NV50_NEW_VERTPROG))
+ return NULL;
+
so = so_new(5, 7, 2);
so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
@@ -4301,11 +4304,10 @@ nv50_vertprog_validate(struct nv50_context *nv50)
so_data (so, p->cfg.high_temp);
so_method(so, tesla, NV50TCL_VP_START_ID, 1);
so_data (so, 0); /* program start offset */
- so_ref(so, &nv50->state.vertprog);
- so_ref(NULL, &so);
+ return so;
}
-void
+struct nouveau_stateobj *
nv50_fragprog_validate(struct nv50_context *nv50)
{
struct nouveau_grobj *tesla = nv50->screen->tesla;
@@ -4321,6 +4323,9 @@ nv50_fragprog_validate(struct nv50_context *nv50)
nv50_program_validate_data(nv50, p);
nv50_program_validate_code(nv50, p);
+ if (!(nv50->dirty & NV50_NEW_FRAGPROG))
+ return NULL;
+
so = so_new(6, 7, 2);
so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
@@ -4337,11 +4342,10 @@ nv50_fragprog_validate(struct nv50_context *nv50)
so_data (so, p->cfg.regs[3]);
so_method(so, tesla, NV50TCL_FP_START_ID, 1);
so_data (so, 0); /* program start offset */
- so_ref(so, &nv50->state.fragprog);
- so_ref(NULL, &so);
+ return so;
}
-void
+struct nouveau_stateobj *
nv50_geomprog_validate(struct nv50_context *nv50)
{
struct nouveau_grobj *tesla = nv50->screen->tesla;
@@ -4357,6 +4361,9 @@ nv50_geomprog_validate(struct nv50_context *nv50)
nv50_program_validate_data(nv50, p);
nv50_program_validate_code(nv50, p);
+ if (!(nv50->dirty & NV50_NEW_GEOMPROG))
+ return NULL;
+
so = so_new(6, 7, 2);
so_method(so, tesla, NV50TCL_GP_ADDRESS_HIGH, 2);
so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
@@ -4373,8 +4380,7 @@ nv50_geomprog_validate(struct nv50_context *nv50)
so_data (so, p->cfg.vert_count);
so_method(so, tesla, NV50TCL_GP_START_ID, 1);
so_data (so, 0);
- so_ref(so, &nv50->state.geomprog);
- so_ref(NULL, &so);
+ return so;
}
static uint32_t
@@ -4454,7 +4460,7 @@ nv50_vec4_map(uint32_t *map32, int mid, uint8_t zval, uint32_t lin[4],
return mid;
}
-void
+struct nouveau_stateobj *
nv50_fp_linkage_validate(struct nv50_context *nv50)
{
struct nouveau_grobj *tesla = nv50->screen->tesla;
@@ -4580,8 +4586,7 @@ nv50_fp_linkage_validate(struct nv50_context *nv50)
so_method(so, tesla, NV50TCL_GP_ENABLE, 1);
so_data (so, (vp->type == PIPE_SHADER_GEOMETRY) ? 1 : 0);
- so_ref(so, &nv50->state.fp_linkage);
- so_ref(NULL, &so);
+ return so;
}
static int
@@ -4615,7 +4620,7 @@ construct_vp_gp_mapping(uint32_t *map32, int m,
return m;
}
-void
+struct nouveau_stateobj *
nv50_gp_linkage_validate(struct nv50_context *nv50)
{
struct nouveau_grobj *tesla = nv50->screen->tesla;
@@ -4625,10 +4630,8 @@ nv50_gp_linkage_validate(struct nv50_context *nv50)
uint32_t map[16];
int m = 0;
- if (!gp) {
- so_ref(NULL, &nv50->state.gp_linkage);
- return;
- }
+ if (!gp)
+ return NULL;
memset(map, 0, sizeof(map));
m = construct_vp_gp_mapping(map, m, vp, gp);
@@ -4646,8 +4649,7 @@ nv50_gp_linkage_validate(struct nv50_context *nv50)
so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), m);
so_datap (so, map, m);
- so_ref(so, &nv50->state.gp_linkage);
- so_ref(NULL, &so);
+ return so;
}
void
diff --git a/src/gallium/drivers/nv50/nv50_push.c b/src/gallium/drivers/nv50/nv50_push.c
new file mode 100644
index 00000000000..96a1f32d304
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_push.c
@@ -0,0 +1,326 @@
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+
+#include "nouveau/nouveau_util.h"
+#include "nv50_context.h"
+
+struct push_context {
+ struct nv50_context *nv50;
+
+ unsigned vtx_size;
+
+ void *idxbuf;
+ unsigned idxsize;
+
+ float edgeflag;
+ int edgeflag_attr;
+
+ struct {
+ void *map;
+ unsigned stride;
+ unsigned divisor;
+ unsigned step;
+ void (*push)(struct nouveau_channel *, void *);
+ } attr[16];
+ unsigned attr_nr;
+};
+
+static void
+emit_b32_1(struct nouveau_channel *chan, void *data)
+{
+ uint32_t *v = data;
+
+ OUT_RING(chan, v[0]);
+}
+
+static void
+emit_b32_2(struct nouveau_channel *chan, void *data)
+{
+ uint32_t *v = data;
+
+ OUT_RING(chan, v[0]);
+ OUT_RING(chan, v[1]);
+}
+
+static void
+emit_b32_3(struct nouveau_channel *chan, void *data)
+{
+ uint32_t *v = data;
+
+ OUT_RING(chan, v[0]);
+ OUT_RING(chan, v[1]);
+ OUT_RING(chan, v[2]);
+}
+
+static void
+emit_b32_4(struct nouveau_channel *chan, void *data)
+{
+ uint32_t *v = data;
+
+ OUT_RING(chan, v[0]);
+ OUT_RING(chan, v[1]);
+ OUT_RING(chan, v[2]);
+ OUT_RING(chan, v[3]);
+}
+
+static void
+emit_b16_1(struct nouveau_channel *chan, void *data)
+{
+ uint16_t *v = data;
+
+ OUT_RING(chan, v[0]);
+}
+
+static void
+emit_b16_3(struct nouveau_channel *chan, void *data)
+{
+ uint16_t *v = data;
+
+ OUT_RING(chan, (v[1] << 16) | v[0]);
+ OUT_RING(chan, v[2]);
+}
+
+static void
+emit_b08_1(struct nouveau_channel *chan, void *data)
+{
+ uint8_t *v = data;
+
+ OUT_RING(chan, v[0]);
+}
+
+static void
+emit_b08_3(struct nouveau_channel *chan, void *data)
+{
+ uint8_t *v = data;
+
+ OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]);
+}
+
+static INLINE void
+emit_vertex(struct push_context *ctx, unsigned n)
+{
+ struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
+ struct nouveau_channel *chan = tesla->channel;
+ int i;
+
+ if (ctx->edgeflag_attr < 16) {
+ float *edgeflag = ctx->attr[ctx->edgeflag_attr].map +
+ ctx->attr[ctx->edgeflag_attr].stride * n;
+
+ if (*edgeflag != ctx->edgeflag) {
+ BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
+ OUT_RING (chan, *edgeflag ? 1 : 0);
+ ctx->edgeflag = *edgeflag;
+ }
+ }
+
+ BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, ctx->vtx_size);
+ for (i = 0; i < ctx->attr_nr; i++)
+ ctx->attr[i].push(chan, ctx->attr[i].map + ctx->attr[i].stride * n);
+}
+
+static void
+emit_edgeflag(void *priv, boolean enabled)
+{
+ struct push_context *ctx = priv;
+ struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
+ struct nouveau_channel *chan = tesla->channel;
+
+ BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
+ OUT_RING (chan, enabled ? 1 : 0);
+}
+
+static void
+emit_elt08(void *priv, unsigned start, unsigned count)
+{
+ struct push_context *ctx = priv;
+ uint8_t *idxbuf = ctx->idxbuf;
+
+ while (count--)
+ emit_vertex(ctx, idxbuf[start++]);
+}
+
+static void
+emit_elt16(void *priv, unsigned start, unsigned count)
+{
+ struct push_context *ctx = priv;
+ uint16_t *idxbuf = ctx->idxbuf;
+
+ while (count--)
+ emit_vertex(ctx, idxbuf[start++]);
+}
+
+static void
+emit_elt32(void *priv, unsigned start, unsigned count)
+{
+ struct push_context *ctx = priv;
+ uint32_t *idxbuf = ctx->idxbuf;
+
+ while (count--)
+ emit_vertex(ctx, idxbuf[start++]);
+}
+
+static void
+emit_verts(void *priv, unsigned start, unsigned count)
+{
+ while (count--)
+ emit_vertex(priv, start++);
+}
+
+void
+nv50_push_elements_instanced(struct pipe_context *pipe,
+ struct pipe_buffer *idxbuf, unsigned idxsize,
+ unsigned mode, unsigned start, unsigned count,
+ unsigned i_start, unsigned i_count)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_channel *chan = tesla->channel;
+ struct push_context ctx;
+ const unsigned p_overhead = 4 + /* begin/end */
+ 4; /* potential edgeflag enable/disable */
+ const unsigned v_overhead = 1 + /* VERTEX_DATA packet header */
+ 2; /* potential edgeflag modification */
+ struct u_split_prim s;
+ unsigned vtx_size;
+ boolean nzi = FALSE;
+ int i;
+
+ ctx.nv50 = nv50;
+ ctx.attr_nr = 0;
+ ctx.idxbuf = NULL;
+ ctx.vtx_size = 0;
+ ctx.edgeflag = 0.5f;
+ ctx.edgeflag_attr = nv50->vertprog->cfg.edgeflag_in;
+
+ /* map vertex buffers, determine vertex size */
+ for (i = 0; i < nv50->vtxelt->num_elements; i++) {
+ struct pipe_vertex_element *ve = &nv50->vtxelt->pipe[i];
+ struct pipe_vertex_buffer *vb = &nv50->vtxbuf[ve->vertex_buffer_index];
+ struct nouveau_bo *bo = nouveau_bo(vb->buffer);
+ unsigned size, nr_components, n;
+
+ if (!(nv50->vbo_fifo & (1 << i)))
+ continue;
+ n = ctx.attr_nr++;
+
+ if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) {
+ assert(bo->map);
+ return;
+ }
+ ctx.attr[n].map = bo->map + vb->buffer_offset + ve->src_offset;
+ nouveau_bo_unmap(bo);
+
+ ctx.attr[n].stride = vb->stride;
+ ctx.attr[n].divisor = ve->instance_divisor;
+ if (ctx.attr[n].divisor) {
+ ctx.attr[n].step = i_start % ve->instance_divisor;
+ ctx.attr[n].map += i_start * vb->stride;
+ }
+
+ size = util_format_get_component_bits(ve->src_format,
+ UTIL_FORMAT_COLORSPACE_RGB, 0);
+ nr_components = util_format_get_nr_components(ve->src_format);
+ switch (size) {
+ case 8:
+ switch (nr_components) {
+ case 1: ctx.attr[n].push = emit_b08_1; break;
+ case 2: ctx.attr[n].push = emit_b16_1; break;
+ case 3: ctx.attr[n].push = emit_b08_3; break;
+ case 4: ctx.attr[n].push = emit_b32_1; break;
+ }
+ ctx.vtx_size++;
+ break;
+ case 16:
+ switch (nr_components) {
+ case 1: ctx.attr[n].push = emit_b16_1; break;
+ case 2: ctx.attr[n].push = emit_b32_1; break;
+ case 3: ctx.attr[n].push = emit_b16_3; break;
+ case 4: ctx.attr[n].push = emit_b32_2; break;
+ }
+ ctx.vtx_size += (nr_components + 1) >> 1;
+ break;
+ case 32:
+ switch (nr_components) {
+ case 1: ctx.attr[n].push = emit_b32_1; break;
+ case 2: ctx.attr[n].push = emit_b32_2; break;
+ case 3: ctx.attr[n].push = emit_b32_3; break;
+ case 4: ctx.attr[n].push = emit_b32_4; break;
+ }
+ ctx.vtx_size += nr_components;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+ }
+ vtx_size = ctx.vtx_size + v_overhead;
+
+ /* map index buffer, if present */
+ if (idxbuf) {
+ struct nouveau_bo *bo = nouveau_bo(idxbuf);
+
+ if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) {
+ assert(bo->map);
+ return;
+ }
+ ctx.idxbuf = bo->map;
+ ctx.idxsize = idxsize;
+ nouveau_bo_unmap(bo);
+ }
+
+ s.priv = &ctx;
+ s.edge = emit_edgeflag;
+ if (idxbuf) {
+ if (idxsize == 1)
+ s.emit = emit_elt08;
+ else
+ if (idxsize == 2)
+ s.emit = emit_elt16;
+ else
+ s.emit = emit_elt32;
+ } else
+ s.emit = emit_verts;
+
+ /* per-instance loop */
+ BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
+ OUT_RING (chan, NV50_CB_AUX | (24 << 8));
+ OUT_RING (chan, i_start);
+ while (i_count--) {
+ unsigned max_verts;
+ boolean done;
+
+ for (i = 0; i < ctx.attr_nr; i++) {
+ if (!ctx.attr[i].divisor ||
+ ctx.attr[i].divisor != ++ctx.attr[i].step)
+ continue;
+ ctx.attr[i].step = 0;
+ ctx.attr[i].map += ctx.attr[i].stride;
+ }
+
+ u_split_prim_init(&s, mode, start, count);
+ do {
+ if (AVAIL_RING(chan) < p_overhead + (6 * vtx_size)) {
+ FIRE_RING(chan);
+ if (!nv50_state_validate(nv50, p_overhead + (6 * vtx_size))) {
+ assert(0);
+ return;
+ }
+ }
+
+ max_verts = AVAIL_RING(chan);
+ max_verts -= p_overhead;
+ max_verts /= vtx_size;
+
+ BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
+ OUT_RING (chan, nv50_prim(s.mode) | (nzi ? (1 << 28) : 0));
+ done = u_split_prim_next(&s, max_verts);
+ BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
+ OUT_RING (chan, 0);
+ } while (!done);
+
+ nzi = TRUE;
+ }
+}
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index eed6031eafa..7e2e8aa336e 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -95,6 +95,8 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
static int
nv50_screen_get_param(struct pipe_screen *pscreen, int param)
{
+ struct nv50_screen *screen = nv50_screen(pscreen);
+
switch (param) {
case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
return 32;
@@ -132,9 +134,9 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param)
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
return 1;
case NOUVEAU_CAP_HW_VTXBUF:
- return 1;
+ return screen->force_push ? 0 : 1;
case NOUVEAU_CAP_HW_IDXBUF:
- return 1;
+ return screen->force_push ? 0 : 1;
case PIPE_CAP_INDEP_BLEND_ENABLE:
return 1;
case PIPE_CAP_INDEP_BLEND_FUNC:
@@ -202,28 +204,6 @@ nv50_screen_destroy(struct pipe_screen *pscreen)
FREE(screen);
}
-static int
-nv50_pre_pipebuffer_map(struct pipe_screen *pscreen, struct pipe_buffer *pb,
- unsigned usage)
-{
- struct nv50_screen *screen = nv50_screen(pscreen);
- struct nv50_context *ctx = screen->cur_ctx;
-
- if (!(pb->usage & PIPE_BUFFER_USAGE_VERTEX))
- return 0;
-
- /* Our vtxbuf got mapped, it can no longer be considered part of current
- * state, remove it to avoid emitting reloc markers.
- */
- if (ctx && ctx->state.vtxbuf && so_bo_is_reloc(ctx->state.vtxbuf,
- nouveau_bo(pb))) {
- so_ref(NULL, &ctx->state.vtxbuf);
- ctx->dirty |= NV50_NEW_ARRAYS;
- }
-
- return 0;
-}
-
struct pipe_screen *
nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
{
@@ -252,7 +232,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
pscreen->get_paramf = nv50_screen_get_paramf;
pscreen->is_format_supported = nv50_screen_is_format_supported;
pscreen->context_create = nv50_create;
- screen->base.pre_pipebuffer_map_callback = nv50_pre_pipebuffer_map;
nv50_screen_init_miptree_functions(pscreen);
nv50_transfer_init_screen_functions(pscreen);
@@ -508,10 +487,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
so_method(so, screen->tesla, NV50TCL_LINKED_TSC, 1);
so_data (so, 1);
- /* activate first scissor rectangle */
- so_method(so, screen->tesla, NV50TCL_SCISSOR_ENABLE(0), 1);
- so_data (so, 1);
-
so_method(so, screen->tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
so_data (so, 1); /* default edgeflag to TRUE */
@@ -520,6 +495,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
so_ref (NULL, &so);
nouveau_pushbuf_flush(chan, 0);
+ screen->force_push = debug_get_bool_option("NV50_ALWAYS_PUSH", FALSE);
return pscreen;
}
diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h
index 2687b721277..d1bc80cb9ed 100644
--- a/src/gallium/drivers/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nv50/nv50_screen.h
@@ -28,6 +28,8 @@ struct nv50_screen {
struct nouveau_bo *tsc;
struct nouveau_stateobj *static_init;
+
+ boolean force_push;
};
static INLINE struct nv50_screen *
diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
index 7d304907b65..b0e5552eff4 100644
--- a/src/gallium/drivers/nv50/nv50_state.c
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -302,7 +302,7 @@ static void *
nv50_rasterizer_state_create(struct pipe_context *pipe,
const struct pipe_rasterizer_state *cso)
{
- struct nouveau_stateobj *so = so_new(15, 21, 0);
+ struct nouveau_stateobj *so = so_new(16, 22, 0);
struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla;
struct nv50_rasterizer_stateobj *rso =
CALLOC_STRUCT(nv50_rasterizer_stateobj);
@@ -314,6 +314,9 @@ nv50_rasterizer_state_create(struct pipe_context *pipe,
* - point_sprite / sprite_coord_mode
*/
+ so_method(so, tesla, NV50TCL_SCISSOR_ENABLE(0), 1);
+ so_data (so, cso->scissor);
+
so_method(so, tesla, NV50TCL_SHADE_MODEL, 1);
so_data (so, cso->flatshade ? NV50TCL_SHADE_MODEL_FLAT :
NV50TCL_SHADE_MODEL_SMOOTH);
@@ -720,15 +723,34 @@ nv50_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
nv50->dirty |= NV50_NEW_ARRAYS;
}
+static void *
+nv50_vtxelts_state_create(struct pipe_context *pipe,
+ unsigned num_elements,
+ const struct pipe_vertex_element *elements)
+{
+ struct nv50_vtxelt_stateobj *cso = CALLOC_STRUCT(nv50_vtxelt_stateobj);
+
+ assert(num_elements < 16); /* not doing fallbacks yet */
+ cso->num_elements = num_elements;
+ memcpy(cso->pipe, elements, num_elements * sizeof(*elements));
+
+ nv50_vtxelt_construct(cso);
+
+ return (void *)cso;
+}
+
static void
-nv50_set_vertex_elements(struct pipe_context *pipe, unsigned count,
- const struct pipe_vertex_element *ve)
+nv50_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso)
{
- struct nv50_context *nv50 = nv50_context(pipe);
+ FREE(hwcso);
+}
- memcpy(nv50->vtxelt, ve, sizeof(*ve) * count);
- nv50->vtxelt_nr = count;
+static void
+nv50_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ nv50->vtxelt = hwcso;
nv50->dirty |= NV50_NEW_ARRAYS;
}
@@ -778,7 +800,10 @@ nv50_init_state_functions(struct nv50_context *nv50)
nv50->pipe.set_scissor_state = nv50_set_scissor_state;
nv50->pipe.set_viewport_state = nv50_set_viewport_state;
+ nv50->pipe.create_vertex_elements_state = nv50_vtxelts_state_create;
+ nv50->pipe.delete_vertex_elements_state = nv50_vtxelts_state_delete;
+ nv50->pipe.bind_vertex_elements_state = nv50_vtxelts_state_bind;
+
nv50->pipe.set_vertex_buffers = nv50_set_vertex_buffers;
- nv50->pipe.set_vertex_elements = nv50_set_vertex_elements;
}
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index c974cc92dcc..2c8e7ca7982 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -25,8 +25,8 @@
#include "nv50_context.h"
#include "nouveau/nouveau_stateobj.h"
-static void
-nv50_state_validate_fb(struct nv50_context *nv50)
+static struct nouveau_stateobj *
+validate_fb(struct nv50_context *nv50)
{
struct nouveau_grobj *tesla = nv50->screen->tesla;
struct nouveau_stateobj *so = so_new(32, 79, 18);
@@ -167,12 +167,7 @@ nv50_state_validate_fb(struct nv50_context *nv50)
so_data (so, w << 16);
so_data (so, h << 16);
- /* we set scissors to framebuffer size when they're 'turned off' */
- nv50->dirty |= NV50_NEW_SCISSOR;
- so_ref(NULL, &nv50->state.scissor);
-
- so_ref(so, &nv50->state.fb);
- so_ref(NULL, &so);
+ return so;
}
static void
@@ -199,263 +194,256 @@ nv50_validate_samplers(struct nv50_context *nv50, struct nouveau_stateobj *so,
}
}
-static void
-nv50_state_emit(struct nv50_context *nv50)
+static struct nouveau_stateobj *
+validate_blend(struct nv50_context *nv50)
{
- struct nv50_screen *screen = nv50->screen;
- struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_stateobj *so = NULL;
+ so_ref(nv50->blend->so, &so);
+ return so;
+}
- /* XXX: this is racy for multiple contexts active on separate
- * threads.
- */
- if (screen->cur_ctx != nv50) {
- if (nv50->state.fb)
- nv50->state.dirty |= NV50_NEW_FRAMEBUFFER;
- if (nv50->state.blend)
- nv50->state.dirty |= NV50_NEW_BLEND;
- if (nv50->state.zsa)
- nv50->state.dirty |= NV50_NEW_ZSA;
- if (nv50->state.vertprog)
- nv50->state.dirty |= NV50_NEW_VERTPROG;
- if (nv50->state.fragprog)
- nv50->state.dirty |= NV50_NEW_FRAGPROG;
- if (nv50->state.geomprog)
- nv50->state.dirty |= NV50_NEW_GEOMPROG;
- if (nv50->state.rast)
- nv50->state.dirty |= NV50_NEW_RASTERIZER;
- if (nv50->state.blend_colour)
- nv50->state.dirty |= NV50_NEW_BLEND_COLOUR;
- if (nv50->state.stencil_ref)
- nv50->state.dirty |= NV50_NEW_STENCIL_REF;
- if (nv50->state.stipple)
- nv50->state.dirty |= NV50_NEW_STIPPLE;
- if (nv50->state.scissor)
- nv50->state.dirty |= NV50_NEW_SCISSOR;
- if (nv50->state.viewport)
- nv50->state.dirty |= NV50_NEW_VIEWPORT;
- if (nv50->state.tsc_upload)
- nv50->state.dirty |= NV50_NEW_SAMPLER;
- if (nv50->state.tic_upload)
- nv50->state.dirty |= NV50_NEW_TEXTURE;
- if (nv50->state.vtxfmt && nv50->state.vtxbuf)
- nv50->state.dirty |= NV50_NEW_ARRAYS;
- screen->cur_ctx = nv50;
- }
+static struct nouveau_stateobj *
+validate_zsa(struct nv50_context *nv50)
+{
+ struct nouveau_stateobj *so = NULL;
+ so_ref(nv50->zsa->so, &so);
+ return so;
+}
- if (nv50->state.dirty & NV50_NEW_FRAMEBUFFER)
- so_emit(chan, nv50->state.fb);
- if (nv50->state.dirty & NV50_NEW_BLEND)
- so_emit(chan, nv50->state.blend);
- if (nv50->state.dirty & NV50_NEW_ZSA)
- so_emit(chan, nv50->state.zsa);
- if (nv50->state.dirty & NV50_NEW_VERTPROG)
- so_emit(chan, nv50->state.vertprog);
- if (nv50->state.dirty & NV50_NEW_FRAGPROG)
- so_emit(chan, nv50->state.fragprog);
- if (nv50->state.dirty & NV50_NEW_GEOMPROG && nv50->state.geomprog)
- so_emit(chan, nv50->state.geomprog);
- if (nv50->state.dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG |
- NV50_NEW_GEOMPROG | NV50_NEW_RASTERIZER))
- so_emit(chan, nv50->state.fp_linkage);
- if ((nv50->state.dirty & (NV50_NEW_VERTPROG | NV50_NEW_GEOMPROG))
- && nv50->state.gp_linkage)
- so_emit(chan, nv50->state.gp_linkage);
- if (nv50->state.dirty & NV50_NEW_RASTERIZER)
- so_emit(chan, nv50->state.rast);
- if (nv50->state.dirty & NV50_NEW_BLEND_COLOUR)
- so_emit(chan, nv50->state.blend_colour);
- if (nv50->state.dirty & NV50_NEW_STENCIL_REF)
- so_emit(chan, nv50->state.stencil_ref);
- if (nv50->state.dirty & NV50_NEW_STIPPLE)
- so_emit(chan, nv50->state.stipple);
- if (nv50->state.dirty & NV50_NEW_SCISSOR)
- so_emit(chan, nv50->state.scissor);
- if (nv50->state.dirty & NV50_NEW_VIEWPORT)
- so_emit(chan, nv50->state.viewport);
- if (nv50->state.dirty & NV50_NEW_SAMPLER)
- so_emit(chan, nv50->state.tsc_upload);
- if (nv50->state.dirty & NV50_NEW_TEXTURE)
- so_emit(chan, nv50->state.tic_upload);
- if (nv50->state.dirty & NV50_NEW_ARRAYS) {
- so_emit(chan, nv50->state.vtxfmt);
- so_emit(chan, nv50->state.vtxbuf);
- if (nv50->state.vtxattr)
- so_emit(chan, nv50->state.vtxattr);
- }
- nv50->state.dirty = 0;
+static struct nouveau_stateobj *
+validate_rast(struct nv50_context *nv50)
+{
+ struct nouveau_stateobj *so = NULL;
+ so_ref(nv50->rasterizer->so, &so);
+ return so;
}
-void
-nv50_state_flush_notify(struct nouveau_channel *chan)
+static struct nouveau_stateobj *
+validate_blend_colour(struct nv50_context *nv50)
{
- struct nv50_context *nv50 = chan->user_private;
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_stateobj *so = so_new(1, 4, 0);
+
+ so_method(so, tesla, NV50TCL_BLEND_COLOR(0), 4);
+ so_data (so, fui(nv50->blend_colour.color[0]));
+ so_data (so, fui(nv50->blend_colour.color[1]));
+ so_data (so, fui(nv50->blend_colour.color[2]));
+ so_data (so, fui(nv50->blend_colour.color[3]));
+ return so;
+}
- if (nv50->state.tic_upload && !(nv50->dirty & NV50_NEW_TEXTURE))
- so_emit(chan, nv50->state.tic_upload);
+static struct nouveau_stateobj *
+validate_stencil_ref(struct nv50_context *nv50)
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_stateobj *so = so = so_new(2, 2, 0);
- so_emit_reloc_markers(chan, nv50->state.fb);
- so_emit_reloc_markers(chan, nv50->state.vertprog);
- so_emit_reloc_markers(chan, nv50->state.fragprog);
- so_emit_reloc_markers(chan, nv50->state.vtxbuf);
- so_emit_reloc_markers(chan, nv50->screen->static_init);
+ so_method(so, tesla, NV50TCL_STENCIL_FRONT_FUNC_REF, 1);
+ so_data (so, nv50->stencil_ref.ref_value[0]);
+ so_method(so, tesla, NV50TCL_STENCIL_BACK_FUNC_REF, 1);
+ so_data (so, nv50->stencil_ref.ref_value[1]);
+ return so;
+}
- if (nv50->state.instbuf)
- so_emit_reloc_markers(chan, nv50->state.instbuf);
+static struct nouveau_stateobj *
+validate_stipple(struct nv50_context *nv50)
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_stateobj *so = so_new(1, 32, 0);
+ int i;
+
+ so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_PATTERN(0), 32);
+ for (i = 0; i < 32; i++)
+ so_data(so, util_bswap32(nv50->stipple.stipple[i]));
+ return so;
}
-boolean
-nv50_state_validate(struct nv50_context *nv50)
+static struct nouveau_stateobj *
+validate_scissor(struct nv50_context *nv50)
{
struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct pipe_scissor_state *s = &nv50->scissor;
struct nouveau_stateobj *so;
- unsigned i;
- if (nv50->dirty & NV50_NEW_FRAMEBUFFER)
- nv50_state_validate_fb(nv50);
+ so = so_new(1, 2, 0);
+ so_method(so, tesla, NV50TCL_SCISSOR_HORIZ(0), 2);
+ so_data (so, (s->maxx << 16) | s->minx);
+ so_data (so, (s->maxy << 16) | s->miny);
+ return so;
+}
+
+static struct nouveau_stateobj *
+validate_viewport(struct nv50_context *nv50)
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_stateobj *so = so_new(5, 9, 0);
+
+ so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE_X(0), 3);
+ so_data (so, fui(nv50->viewport.translate[0]));
+ so_data (so, fui(nv50->viewport.translate[1]));
+ so_data (so, fui(nv50->viewport.translate[2]));
+ so_method(so, tesla, NV50TCL_VIEWPORT_SCALE_X(0), 3);
+ so_data (so, fui(nv50->viewport.scale[0]));
+ so_data (so, fui(nv50->viewport.scale[1]));
+ so_data (so, fui(nv50->viewport.scale[2]));
+
+ so_method(so, tesla, NV50TCL_VIEWPORT_TRANSFORM_EN, 1);
+ so_data (so, 1);
+ /* 0x0000 = remove whole primitive only (xyz)
+ * 0x1018 = remove whole primitive only (xy), clamp z
+ * 0x1080 = clip primitive (xyz)
+ * 0x1098 = clip primitive (xy), clamp z
+ */
+ so_method(so, tesla, NV50TCL_VIEW_VOLUME_CLIP_CTRL, 1);
+ so_data (so, 0x1080);
+ /* no idea what 0f90 does */
+ so_method(so, tesla, 0x0f90, 1);
+ so_data (so, 0);
+
+ return so;
+}
- if (nv50->dirty & NV50_NEW_BLEND)
- so_ref(nv50->blend->so, &nv50->state.blend);
+static struct nouveau_stateobj *
+validate_sampler(struct nv50_context *nv50)
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_stateobj *so;
+ unsigned nr = 0, i;
- if (nv50->dirty & NV50_NEW_ZSA)
- so_ref(nv50->zsa->so, &nv50->state.zsa);
+ for (i = 0; i < PIPE_SHADER_TYPES; ++i)
+ nr += nv50->sampler_nr[i];
- if (nv50->dirty & (NV50_NEW_VERTPROG | NV50_NEW_VERTPROG_CB))
- nv50_vertprog_validate(nv50);
+ so = so_new(1 + 5 * PIPE_SHADER_TYPES,
+ 1 + 19 * PIPE_SHADER_TYPES + nr * 8,
+ PIPE_SHADER_TYPES * 2);
- if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB))
- nv50_fragprog_validate(nv50);
+ nv50_validate_samplers(nv50, so, PIPE_SHADER_VERTEX);
+ nv50_validate_samplers(nv50, so, PIPE_SHADER_FRAGMENT);
- if (nv50->dirty & (NV50_NEW_GEOMPROG | NV50_NEW_GEOMPROG_CB))
- nv50_geomprog_validate(nv50);
+ so_method(so, tesla, 0x1334, 1); /* flush TSC */
+ so_data (so, 0);
- if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG |
- NV50_NEW_GEOMPROG | NV50_NEW_RASTERIZER))
- nv50_fp_linkage_validate(nv50);
+ return so;
+}
- if (nv50->dirty & (NV50_NEW_GEOMPROG | NV50_NEW_VERTPROG))
- nv50_gp_linkage_validate(nv50);
+static struct nouveau_stateobj *
+validate_vtxbuf(struct nv50_context *nv50)
+{
+ struct nouveau_stateobj *so = NULL;
+ so_ref(nv50->state.vtxbuf, &so);
+ return so;
+}
- if (nv50->dirty & NV50_NEW_RASTERIZER)
- so_ref(nv50->rasterizer->so, &nv50->state.rast);
+static struct nouveau_stateobj *
+validate_vtxattr(struct nv50_context *nv50)
+{
+ struct nouveau_stateobj *so = NULL;
+ so_ref(nv50->state.vtxattr, &so);
+ return so;
+}
- if (nv50->dirty & NV50_NEW_BLEND_COLOUR) {
- so = so_new(1, 4, 0);
- so_method(so, tesla, NV50TCL_BLEND_COLOR(0), 4);
- so_data (so, fui(nv50->blend_colour.color[0]));
- so_data (so, fui(nv50->blend_colour.color[1]));
- so_data (so, fui(nv50->blend_colour.color[2]));
- so_data (so, fui(nv50->blend_colour.color[3]));
- so_ref(so, &nv50->state.blend_colour);
- so_ref(NULL, &so);
- }
+struct state_validate {
+ struct nouveau_stateobj *(*func)(struct nv50_context *nv50);
+ unsigned states;
+} validate_list[] = {
+ { validate_fb , NV50_NEW_FRAMEBUFFER },
+ { validate_blend , NV50_NEW_BLEND },
+ { validate_zsa , NV50_NEW_ZSA },
+ { nv50_vertprog_validate , NV50_NEW_VERTPROG | NV50_NEW_VERTPROG_CB },
+ { nv50_fragprog_validate , NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB },
+ { nv50_geomprog_validate , NV50_NEW_GEOMPROG | NV50_NEW_GEOMPROG_CB },
+ { nv50_fp_linkage_validate, NV50_NEW_VERTPROG | NV50_NEW_GEOMPROG |
+ NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER },
+ { nv50_gp_linkage_validate, NV50_NEW_VERTPROG | NV50_NEW_GEOMPROG },
+ { validate_rast , NV50_NEW_RASTERIZER },
+ { validate_blend_colour , NV50_NEW_BLEND_COLOUR },
+ { validate_stencil_ref , NV50_NEW_STENCIL_REF },
+ { validate_stipple , NV50_NEW_STIPPLE },
+ { validate_scissor , NV50_NEW_SCISSOR },
+ { validate_viewport , NV50_NEW_VIEWPORT },
+ { validate_sampler , NV50_NEW_SAMPLER },
+ { nv50_tex_validate , NV50_NEW_TEXTURE | NV50_NEW_SAMPLER },
+ { nv50_vbo_validate , NV50_NEW_ARRAYS },
+ { validate_vtxbuf , NV50_NEW_ARRAYS },
+ { validate_vtxattr , NV50_NEW_ARRAYS },
+ {}
+};
+#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
- if (nv50->dirty & NV50_NEW_STENCIL_REF) {
- so = so_new(2, 2, 0);
- so_method(so, tesla, NV50TCL_STENCIL_FRONT_FUNC_REF, 1);
- so_data (so, nv50->stencil_ref.ref_value[0]);
- so_method(so, tesla, NV50TCL_STENCIL_BACK_FUNC_REF, 1);
- so_data (so, nv50->stencil_ref.ref_value[1]);
- so_ref(so, &nv50->state.stencil_ref);
- so_ref(NULL, &so);
- }
+boolean
+nv50_state_validate(struct nv50_context *nv50, unsigned wait_dwords)
+{
+ struct nouveau_channel *chan = nv50->screen->base.channel;
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ unsigned nr_relocs = 128, nr_dwords = wait_dwords + 128 + 4;
+ int ret, i;
- if (nv50->dirty & NV50_NEW_STIPPLE) {
- so = so_new(1, 32, 0);
- so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_PATTERN(0), 32);
- for (i = 0; i < 32; i++)
- so_data(so, util_bswap32(nv50->stipple.stipple[i]));
- so_ref(so, &nv50->state.stipple);
- so_ref(NULL, &so);
- }
+ for (i = 0; i < validate_list_len; i++) {
+ struct state_validate *validate = &validate_list[i];
+ struct nouveau_stateobj *so;
- if (nv50->dirty & (NV50_NEW_SCISSOR | NV50_NEW_RASTERIZER)) {
- struct pipe_rasterizer_state *rast = &nv50->rasterizer->pipe;
- struct pipe_scissor_state *s = &nv50->scissor;
+ if (!(nv50->dirty & validate->states))
+ continue;
- if (nv50->state.scissor &&
- (rast->scissor == 0 && nv50->state.scissor_enabled == 0))
- goto scissor_uptodate;
- nv50->state.scissor_enabled = rast->scissor;
+ so = validate->func(nv50);
+ if (!so)
+ continue;
- so = so_new(1, 2, 0);
- so_method(so, tesla, NV50TCL_SCISSOR_HORIZ(0), 2);
- if (nv50->state.scissor_enabled) {
- so_data(so, (s->maxx << 16) | s->minx);
- so_data(so, (s->maxy << 16) | s->miny);
- } else {
- so_data(so, (nv50->framebuffer.width << 16));
- so_data(so, (nv50->framebuffer.height << 16));
- }
- so_ref(so, &nv50->state.scissor);
- so_ref(NULL, &so);
- nv50->state.dirty |= NV50_NEW_SCISSOR;
- }
-scissor_uptodate:
-
- if (nv50->dirty & (NV50_NEW_VIEWPORT | NV50_NEW_RASTERIZER)) {
- if (nv50->state.viewport &&
- !(nv50->dirty & NV50_NEW_VIEWPORT))
- goto viewport_uptodate;
-
- so = so_new(5, 9, 0);
- so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE_X(0), 3);
- so_data (so, fui(nv50->viewport.translate[0]));
- so_data (so, fui(nv50->viewport.translate[1]));
- so_data (so, fui(nv50->viewport.translate[2]));
- so_method(so, tesla, NV50TCL_VIEWPORT_SCALE_X(0), 3);
- so_data (so, fui(nv50->viewport.scale[0]));
- so_data (so, fui(nv50->viewport.scale[1]));
- so_data (so, fui(nv50->viewport.scale[2]));
-
- so_method(so, tesla, NV50TCL_VIEWPORT_TRANSFORM_EN, 1);
- so_data (so, 1);
- /* 0x0000 = remove whole primitive only (xyz)
- * 0x1018 = remove whole primitive only (xy), clamp z
- * 0x1080 = clip primitive (xyz)
- * 0x1098 = clip primitive (xy), clamp z
- */
- so_method(so, tesla, NV50TCL_VIEW_VOLUME_CLIP_CTRL, 1);
- so_data (so, 0x1080);
- /* no idea what 0f90 does */
- so_method(so, tesla, 0x0f90, 1);
- so_data (so, 0);
+ nr_dwords += (so->total + so->cur);
+ nr_relocs += so->cur_reloc;
- so_ref(so, &nv50->state.viewport);
+ so_ref(so, &nv50->state.hw[i]);
so_ref(NULL, &so);
- nv50->state.dirty |= NV50_NEW_VIEWPORT;
+ nv50->state.hw_dirty |= (1 << i);
}
-viewport_uptodate:
-
- if (nv50->dirty & NV50_NEW_SAMPLER) {
- unsigned nr = 0;
-
- for (i = 0; i < PIPE_SHADER_TYPES; ++i)
- nr += nv50->sampler_nr[i];
+ nv50->dirty = 0;
- so = so_new(1 + 5 * PIPE_SHADER_TYPES,
- 1 + 19 * PIPE_SHADER_TYPES + nr * 8,
- PIPE_SHADER_TYPES * 2);
+ if (nv50->screen->cur_ctx != nv50) {
+ for (i = 0; i < validate_list_len; i++) {
+ if (!nv50->state.hw[i] ||
+ (nv50->state.hw_dirty & (1 << i)))
+ continue;
- nv50_validate_samplers(nv50, so, PIPE_SHADER_VERTEX);
- nv50_validate_samplers(nv50, so, PIPE_SHADER_FRAGMENT);
+ nr_dwords += (nv50->state.hw[i]->total +
+ nv50->state.hw[i]->cur);
+ nr_relocs += nv50->state.hw[i]->cur_reloc;
+ nv50->state.hw_dirty |= (1 << i);
+ }
- so_method(so, tesla, 0x1334, 1); /* flush TSC */
- so_data (so, 0);
+ nv50->screen->cur_ctx = nv50;
+ }
- so_ref(so, &nv50->state.tsc_upload);
- so_ref(NULL, &so);
+ ret = MARK_RING(chan, nr_dwords, nr_relocs);
+ if (ret) {
+ debug_printf("MARK_RING(%d, %d) failed: %d\n",
+ nr_dwords, nr_relocs, ret);
+ return FALSE;
}
- if (nv50->dirty & (NV50_NEW_TEXTURE | NV50_NEW_SAMPLER))
- nv50_tex_validate(nv50);
+ while (nv50->state.hw_dirty) {
+ i = ffs(nv50->state.hw_dirty) - 1;
+ nv50->state.hw_dirty &= ~(1 << i);
- if (nv50->dirty & NV50_NEW_ARRAYS)
- nv50_vbo_validate(nv50);
+ so_emit(chan, nv50->state.hw[i]);
+ }
- nv50->state.dirty |= nv50->dirty;
- nv50->dirty = 0;
- nv50_state_emit(nv50);
+ /* Yes, really, we need to do this. If a buffer that is referenced
+ * on the hardware isn't part of changed state above, without doing
+ * this the kernel is given no clue that the buffer is being used
+ * still. This can cause all sorts of fun issues.
+ */
+ nv50_tex_relocs(nv50);
+ so_emit_reloc_markers(chan, nv50->state.hw[0]); /* fb */
+ so_emit_reloc_markers(chan, nv50->state.hw[3]); /* vp */
+ so_emit_reloc_markers(chan, nv50->state.hw[4]); /* fp */
+ so_emit_reloc_markers(chan, nv50->state.hw[17]); /* vb */
+ so_emit_reloc_markers(chan, nv50->screen->static_init);
+ /* No idea.. */
+ BEGIN_RING(chan, tesla, 0x142c, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, tesla, 0x142c, 1);
+ OUT_RING (chan, 0);
return TRUE;
}
diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c
index de0560e20cd..4c48b12cd87 100644
--- a/src/gallium/drivers/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nv50/nv50_tex.c
@@ -24,6 +24,7 @@
#include "nv50_texture.h"
#include "nouveau/nouveau_stateobj.h"
+#include "nouveau/nouveau_reloc.h"
#include "util/u_format.h"
@@ -195,6 +196,35 @@ nv50_validate_textures(struct nv50_context *nv50, struct nouveau_stateobj *so,
}
void
+nv50_tex_relocs(struct nv50_context *nv50)
+{
+ struct nouveau_channel *chan = nv50->screen->tesla->channel;
+ int p, unit;
+
+ p = PIPE_SHADER_FRAGMENT;
+ for (unit = 0; unit < nv50->miptree_nr[p]; unit++) {
+ if (!nv50->miptree[p][unit])
+ continue;
+ nouveau_reloc_emit(chan, nv50->screen->tic,
+ ((p * 32) + unit) * 32, NULL,
+ nv50->miptree[p][unit]->base.bo, 0, 0,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW |
+ NOUVEAU_BO_RD, 0, 0);
+ }
+
+ p = PIPE_SHADER_VERTEX;
+ for (unit = 0; unit < nv50->miptree_nr[p]; unit++) {
+ if (!nv50->miptree[p][unit])
+ continue;
+ nouveau_reloc_emit(chan, nv50->screen->tic,
+ ((p * 32) + unit) * 32, NULL,
+ nv50->miptree[p][unit]->base.bo, 0, 0,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW |
+ NOUVEAU_BO_RD, 0, 0);
+ }
+}
+
+struct nouveau_stateobj *
nv50_tex_validate(struct nv50_context *nv50)
{
struct nouveau_stateobj *so;
@@ -217,12 +247,11 @@ nv50_tex_validate(struct nv50_context *nv50)
so_ref(NULL, &so);
NOUVEAU_ERR("failed tex validate\n");
- return;
+ return NULL;
}
so_method(so, tesla, 0x1330, 1); /* flush TIC */
so_data (so, 0);
- so_ref(so, &nv50->state.tic_upload);
- so_ref(NULL, &so);
+ return so;
}
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index 1c8ee0b9adf..6b9c1ee231e 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -25,53 +25,9 @@
#include "util/u_inlines.h"
#include "util/u_format.h"
+#include "nouveau/nouveau_util.h"
#include "nv50_context.h"
-static boolean
-nv50_push_elements_u08(struct nv50_context *, uint8_t *, unsigned);
-
-static boolean
-nv50_push_elements_u16(struct nv50_context *, uint16_t *, unsigned);
-
-static boolean
-nv50_push_elements_u32(struct nv50_context *, uint32_t *, unsigned);
-
-static boolean
-nv50_push_arrays(struct nv50_context *, unsigned, unsigned);
-
-#define NV50_USING_LOATHED_EDGEFLAG(ctx) ((ctx)->vertprog->cfg.edgeflag_in < 16)
-
-static INLINE unsigned
-nv50_prim(unsigned mode)
-{
- switch (mode) {
- case PIPE_PRIM_POINTS: return NV50TCL_VERTEX_BEGIN_POINTS;
- case PIPE_PRIM_LINES: return NV50TCL_VERTEX_BEGIN_LINES;
- case PIPE_PRIM_LINE_LOOP: return NV50TCL_VERTEX_BEGIN_LINE_LOOP;
- case PIPE_PRIM_LINE_STRIP: return NV50TCL_VERTEX_BEGIN_LINE_STRIP;
- case PIPE_PRIM_TRIANGLES: return NV50TCL_VERTEX_BEGIN_TRIANGLES;
- case PIPE_PRIM_TRIANGLE_STRIP:
- return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP;
- case PIPE_PRIM_TRIANGLE_FAN: return NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN;
- case PIPE_PRIM_QUADS: return NV50TCL_VERTEX_BEGIN_QUADS;
- case PIPE_PRIM_QUAD_STRIP: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP;
- case PIPE_PRIM_POLYGON: return NV50TCL_VERTEX_BEGIN_POLYGON;
- case PIPE_PRIM_LINES_ADJACENCY:
- return NV50TCL_VERTEX_BEGIN_LINES_ADJACENCY;
- case PIPE_PRIM_LINE_STRIP_ADJACENCY:
- return NV50TCL_VERTEX_BEGIN_LINE_STRIP_ADJACENCY;
- case PIPE_PRIM_TRIANGLES_ADJACENCY:
- return NV50TCL_VERTEX_BEGIN_TRIANGLES_ADJACENCY;
- case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
- return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP_ADJACENCY;
- default:
- break;
- }
-
- NOUVEAU_ERR("invalid primitive type %d\n", mode);
- return NV50TCL_VERTEX_BEGIN_POINTS;
-}
-
static INLINE uint32_t
nv50_vbo_type_to_hw(enum pipe_format format)
{
@@ -139,15 +95,16 @@ nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve)
uint32_t hw_type, hw_size;
enum pipe_format pf = ve->src_format;
const struct util_format_description *desc;
- unsigned size;
+ unsigned size, nr_components;
desc = util_format_description(pf);
assert(desc);
size = util_format_get_component_bits(pf, UTIL_FORMAT_COLORSPACE_RGB, 0);
+ nr_components = util_format_get_nr_components(pf);
hw_type = nv50_vbo_type_to_hw(pf);
- hw_size = nv50_vbo_size_to_hw(size, ve->nr_components);
+ hw_size = nv50_vbo_size_to_hw(size, nr_components);
if (!hw_type || !hw_size) {
NOUVEAU_ERR("unsupported vbo format: %s\n", util_format_name(pf));
@@ -161,250 +118,58 @@ nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve)
return (hw_type | hw_size);
}
-/* For instanced drawing from user buffers, hitting the FIFO repeatedly
- * with the same vertex data is probably worse than uploading all data.
- */
-static boolean
-nv50_upload_vtxbuf(struct nv50_context *nv50, unsigned i)
-{
- struct nv50_screen *nscreen = nv50->screen;
- struct pipe_screen *pscreen = &nscreen->base.base;
- struct pipe_buffer *buf = nscreen->strm_vbuf[i];
- struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i];
- uint8_t *src;
- unsigned size = align(vb->buffer->size, 4096);
-
- if (buf && buf->size < size)
- pipe_buffer_reference(&nscreen->strm_vbuf[i], NULL);
-
- if (!nscreen->strm_vbuf[i]) {
- nscreen->strm_vbuf[i] = pipe_buffer_create(
- pscreen, 0, PIPE_BUFFER_USAGE_VERTEX, size);
- buf = nscreen->strm_vbuf[i];
- }
-
- src = pipe_buffer_map(pscreen, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ);
- if (!src)
- return FALSE;
- src += vb->buffer_offset;
-
- size = (vb->max_index + 1) * vb->stride + 16; /* + 16 is for stride 0 */
- if (vb->buffer_offset + size > vb->buffer->size)
- size = vb->buffer->size - vb->buffer_offset;
-
- pipe_buffer_write(pscreen, buf, vb->buffer_offset, size, src);
- pipe_buffer_unmap(pscreen, vb->buffer);
-
- vb->buffer = buf; /* don't pipe_reference, this is a private copy */
- return TRUE;
-}
-
-static void
-nv50_upload_user_vbufs(struct nv50_context *nv50)
-{
- unsigned i;
-
- if (nv50->vbo_fifo)
- nv50->dirty |= NV50_NEW_ARRAYS;
- if (!(nv50->dirty & NV50_NEW_ARRAYS))
- return;
-
- for (i = 0; i < nv50->vtxbuf_nr; ++i) {
- if (nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX)
- continue;
- nv50_upload_vtxbuf(nv50, i);
- }
-}
-
-static void
-nv50_set_static_vtxattr(struct nv50_context *nv50, unsigned i, void *data)
-{
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nouveau_channel *chan = tesla->channel;
- float v[4];
-
- util_format_read_4f(nv50->vtxelt[i].src_format,
- v, 0, data, 0, 0, 0, 1, 1);
-
- switch (nv50->vtxelt[i].nr_components) {
- case 4:
- BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_4F_X(i), 4);
- OUT_RINGf (chan, v[0]);
- OUT_RINGf (chan, v[1]);
- OUT_RINGf (chan, v[2]);
- OUT_RINGf (chan, v[3]);
- break;
- case 3:
- BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_3F_X(i), 3);
- OUT_RINGf (chan, v[0]);
- OUT_RINGf (chan, v[1]);
- OUT_RINGf (chan, v[2]);
- break;
- case 2:
- BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_2F_X(i), 2);
- OUT_RINGf (chan, v[0]);
- OUT_RINGf (chan, v[1]);
- break;
- case 1:
- BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_1F(i), 1);
- OUT_RINGf (chan, v[0]);
- break;
- default:
- assert(0);
- break;
- }
-}
-
-static unsigned
-init_per_instance_arrays_immd(struct nv50_context *nv50,
- unsigned startInstance,
- unsigned pos[16], unsigned step[16])
-{
- struct nouveau_bo *bo;
- unsigned i, b, count = 0;
-
- for (i = 0; i < nv50->vtxelt_nr; ++i) {
- if (!nv50->vtxelt[i].instance_divisor)
- continue;
- ++count;
- b = nv50->vtxelt[i].vertex_buffer_index;
-
- pos[i] = nv50->vtxelt[i].src_offset +
- nv50->vtxbuf[b].buffer_offset +
- startInstance * nv50->vtxbuf[b].stride;
- step[i] = startInstance % nv50->vtxelt[i].instance_divisor;
-
- bo = nouveau_bo(nv50->vtxbuf[b].buffer);
- if (!bo->map)
- nouveau_bo_map(bo, NOUVEAU_BO_RD);
-
- nv50_set_static_vtxattr(nv50, i, (uint8_t *)bo->map + pos[i]);
- }
-
- return count;
-}
-
-static unsigned
-init_per_instance_arrays(struct nv50_context *nv50,
- unsigned startInstance,
- unsigned pos[16], unsigned step[16])
-{
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nouveau_channel *chan = tesla->channel;
+struct instance {
struct nouveau_bo *bo;
- struct nouveau_stateobj *so;
- unsigned i, b, count = 0;
- const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
-
- if (nv50->vbo_fifo)
- return init_per_instance_arrays_immd(nv50, startInstance,
- pos, step);
-
- so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2);
-
- for (i = 0; i < nv50->vtxelt_nr; ++i) {
- if (!nv50->vtxelt[i].instance_divisor)
- continue;
- ++count;
- b = nv50->vtxelt[i].vertex_buffer_index;
-
- pos[i] = nv50->vtxelt[i].src_offset +
- nv50->vtxbuf[b].buffer_offset +
- startInstance * nv50->vtxbuf[b].stride;
-
- if (!startInstance) {
- step[i] = 0;
- continue;
- }
- step[i] = startInstance % nv50->vtxelt[i].instance_divisor;
-
- bo = nouveau_bo(nv50->vtxbuf[b].buffer);
-
- so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2);
- so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0);
- so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0);
- }
-
- if (count && startInstance) {
- so_ref (so, &nv50->state.instbuf); /* for flush notify */
- so_emit(chan, nv50->state.instbuf);
- }
- so_ref (NULL, &so);
-
- return count;
-}
+ unsigned delta;
+ unsigned stride;
+ unsigned step;
+ unsigned divisor;
+};
static void
-step_per_instance_arrays_immd(struct nv50_context *nv50,
- unsigned pos[16], unsigned step[16])
+instance_init(struct nv50_context *nv50, struct instance *a, unsigned first)
{
- struct nouveau_bo *bo;
- unsigned i, b;
+ int i;
- for (i = 0; i < nv50->vtxelt_nr; ++i) {
- if (!nv50->vtxelt[i].instance_divisor)
- continue;
- if (++step[i] != nv50->vtxelt[i].instance_divisor)
- continue;
- b = nv50->vtxelt[i].vertex_buffer_index;
- bo = nouveau_bo(nv50->vtxbuf[b].buffer);
+ for (i = 0; i < nv50->vtxelt->num_elements; i++) {
+ struct pipe_vertex_element *ve = &nv50->vtxelt->pipe[i];
+ struct pipe_vertex_buffer *vb;
- step[i] = 0;
- pos[i] += nv50->vtxbuf[b].stride;
+ a[i].divisor = ve->instance_divisor;
+ if (a[i].divisor) {
+ vb = &nv50->vtxbuf[ve->vertex_buffer_index];
- nv50_set_static_vtxattr(nv50, i, (uint8_t *)bo->map + pos[i]);
+ a[i].bo = nouveau_bo(vb->buffer);
+ a[i].stride = vb->stride;
+ a[i].step = first % a[i].divisor;
+ a[i].delta = vb->buffer_offset + ve->src_offset +
+ (first * a[i].stride);
+ }
}
}
static void
-step_per_instance_arrays(struct nv50_context *nv50,
- unsigned pos[16], unsigned step[16])
+instance_step(struct nv50_context *nv50, struct instance *a)
{
+ struct nouveau_channel *chan = nv50->screen->tesla->channel;
struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nouveau_channel *chan = tesla->channel;
- struct nouveau_bo *bo;
- struct nouveau_stateobj *so;
- unsigned i, b;
- const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
-
- if (nv50->vbo_fifo) {
- step_per_instance_arrays_immd(nv50, pos, step);
- return;
- }
-
- so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2);
+ int i;
- for (i = 0; i < nv50->vtxelt_nr; ++i) {
- if (!nv50->vtxelt[i].instance_divisor)
+ for (i = 0; i < nv50->vtxelt->num_elements; i++) {
+ if (!a[i].divisor)
continue;
- b = nv50->vtxelt[i].vertex_buffer_index;
- if (++step[i] == nv50->vtxelt[i].instance_divisor) {
- step[i] = 0;
- pos[i] += nv50->vtxbuf[b].stride;
+ BEGIN_RING(chan, tesla,
+ NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2);
+ OUT_RELOCh(chan, a[i].bo, a[i].delta, NOUVEAU_BO_RD |
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_GART);
+ OUT_RELOCl(chan, a[i].bo, a[i].delta, NOUVEAU_BO_RD |
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_GART);
+ if (++a[i].step == a[i].divisor) {
+ a[i].step = 0;
+ a[i].delta += a[i].stride;
}
-
- bo = nouveau_bo(nv50->vtxbuf[b].buffer);
-
- so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2);
- so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0);
- so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0);
}
-
- so_ref (so, &nv50->state.instbuf); /* for flush notify */
- so_ref (NULL, &so);
-
- so_emit(chan, nv50->state.instbuf);
-}
-
-static INLINE void
-nv50_unmap_vbufs(struct nv50_context *nv50)
-{
- unsigned i;
-
- for (i = 0; i < nv50->vtxbuf_nr; ++i)
- if (nouveau_bo(nv50->vtxbuf[i].buffer)->map)
- nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer));
}
void
@@ -415,198 +180,207 @@ nv50_draw_arrays_instanced(struct pipe_context *pipe,
struct nv50_context *nv50 = nv50_context(pipe);
struct nouveau_channel *chan = nv50->screen->tesla->channel;
struct nouveau_grobj *tesla = nv50->screen->tesla;
- unsigned i, nz_divisors;
- unsigned step[16], pos[16];
-
- if (!NV50_USING_LOATHED_EDGEFLAG(nv50))
- nv50_upload_user_vbufs(nv50);
+ struct instance a[16];
+ unsigned prim = nv50_prim(mode);
- nv50_state_validate(nv50);
+ instance_init(nv50, a, startInstance);
+ if (!nv50_state_validate(nv50, 10 + 16*3))
+ return;
- nz_divisors = init_per_instance_arrays(nv50, startInstance, pos, step);
+ if (nv50->vbo_fifo) {
+ nv50_push_elements_instanced(pipe, NULL, 0, mode, start,
+ count, startInstance,
+ instanceCount);
+ return;
+ }
BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
OUT_RING (chan, NV50_CB_AUX | (24 << 8));
OUT_RING (chan, startInstance);
+ while (instanceCount--) {
+ if (AVAIL_RING(chan) < (7 + 16*3)) {
+ FIRE_RING(chan);
+ if (!nv50_state_validate(nv50, 7 + 16*3)) {
+ assert(0);
+ return;
+ }
+ }
+ instance_step(nv50, a);
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
- OUT_RING (chan, nv50_prim(mode));
-
- if (nv50->vbo_fifo)
- nv50_push_arrays(nv50, start, count);
- else {
+ BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
+ OUT_RING (chan, prim);
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
OUT_RING (chan, start);
OUT_RING (chan, count);
- }
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
- OUT_RING (chan, 0);
-
- for (i = 1; i < instanceCount; i++) {
- if (nz_divisors) /* any non-zero array divisors ? */
- step_per_instance_arrays(nv50, pos, step);
-
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
- OUT_RING (chan, nv50_prim(mode) | (1 << 28));
-
- if (nv50->vbo_fifo)
- nv50_push_arrays(nv50, start, count);
- else {
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
- OUT_RING (chan, start);
- OUT_RING (chan, count);
- }
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
OUT_RING (chan, 0);
- }
- nv50_unmap_vbufs(nv50);
- so_ref(NULL, &nv50->state.instbuf);
+ prim |= (1 << 28);
+ }
}
void
nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
unsigned count)
{
- struct nv50_context *nv50 = nv50_context(pipe);
- struct nouveau_channel *chan = nv50->screen->tesla->channel;
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- boolean ret;
-
- nv50_state_validate(nv50);
-
- BEGIN_RING(chan, tesla, 0x142c, 1);
- OUT_RING (chan, 0);
- BEGIN_RING(chan, tesla, 0x142c, 1);
- OUT_RING (chan, 0);
-
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
- OUT_RING (chan, nv50_prim(mode));
-
- if (nv50->vbo_fifo)
- ret = nv50_push_arrays(nv50, start, count);
- else {
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
- OUT_RING (chan, start);
- OUT_RING (chan, count);
- ret = TRUE;
- }
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
- OUT_RING (chan, 0);
-
- nv50_unmap_vbufs(nv50);
-
- /* XXX: not sure what to do if ret != TRUE: flush and retry?
- */
- assert(ret);
+ nv50_draw_arrays_instanced(pipe, mode, start, count, 0, 1);
}
-static INLINE boolean
-nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map,
- unsigned start, unsigned count)
-{
- struct nouveau_channel *chan = nv50->screen->tesla->channel;
- struct nouveau_grobj *tesla = nv50->screen->tesla;
-
- map += start;
+struct inline_ctx {
+ struct nv50_context *nv50;
+ void *map;
+};
- if (nv50->vbo_fifo)
- return nv50_push_elements_u08(nv50, map, count);
+static void
+inline_elt08(void *priv, unsigned start, unsigned count)
+{
+ struct inline_ctx *ctx = priv;
+ struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
+ struct nouveau_channel *chan = tesla->channel;
+ uint8_t *map = (uint8_t *)ctx->map + start;
if (count & 1) {
BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1);
OUT_RING (chan, map[0]);
map++;
- count--;
+ count &= ~1;
}
- while (count) {
- unsigned nr = count > 2046 ? 2046 : count;
- int i;
-
- BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, nr >> 1);
- for (i = 0; i < nr; i += 2)
- OUT_RING (chan, (map[i + 1] << 16) | map[i]);
+ count >>= 1;
+ if (!count)
+ return;
- count -= nr;
- map += nr;
+ BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, count);
+ while (count--) {
+ OUT_RING(chan, (map[1] << 16) | map[0]);
+ map += 2;
}
- return TRUE;
}
-static INLINE boolean
-nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map,
- unsigned start, unsigned count)
+static void
+inline_elt16(void *priv, unsigned start, unsigned count)
{
- struct nouveau_channel *chan = nv50->screen->tesla->channel;
- struct nouveau_grobj *tesla = nv50->screen->tesla;
-
- map += start;
-
- if (nv50->vbo_fifo)
- return nv50_push_elements_u16(nv50, map, count);
+ struct inline_ctx *ctx = priv;
+ struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
+ struct nouveau_channel *chan = tesla->channel;
+ uint16_t *map = (uint16_t *)ctx->map + start;
if (count & 1) {
BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1);
OUT_RING (chan, map[0]);
+ count &= ~1;
map++;
- count--;
}
- while (count) {
- unsigned nr = count > 2046 ? 2046 : count;
- int i;
-
- BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, nr >> 1);
- for (i = 0; i < nr; i += 2)
- OUT_RING (chan, (map[i + 1] << 16) | map[i]);
+ count >>= 1;
+ if (!count)
+ return;
- count -= nr;
- map += nr;
+ BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, count);
+ while (count--) {
+ OUT_RING(chan, (map[1] << 16) | map[0]);
+ map += 2;
}
- return TRUE;
}
-static INLINE boolean
-nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map,
- unsigned start, unsigned count)
+static void
+inline_elt32(void *priv, unsigned start, unsigned count)
+{
+ struct inline_ctx *ctx = priv;
+ struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
+ struct nouveau_channel *chan = tesla->channel;
+
+ BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U32, count);
+ OUT_RINGp (chan, (uint32_t *)ctx->map + start, count);
+}
+
+static void
+inline_edgeflag(void *priv, boolean enabled)
{
+ struct inline_ctx *ctx = priv;
+ struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
+ struct nouveau_channel *chan = tesla->channel;
+
+ BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
+ OUT_RING (chan, enabled ? 1 : 0);
+}
+
+static void
+nv50_draw_elements_inline(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer, unsigned indexSize,
+ unsigned mode, unsigned start, unsigned count,
+ unsigned startInstance, unsigned instanceCount)
+{
+ struct pipe_screen *pscreen = pipe->screen;
+ struct nv50_context *nv50 = nv50_context(pipe);
struct nouveau_channel *chan = nv50->screen->tesla->channel;
struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct instance a[16];
+ struct inline_ctx ctx;
+ struct u_split_prim s;
+ boolean nzi = FALSE;
+ unsigned overhead;
+
+ overhead = 16*3; /* potential instance adjustments */
+ overhead += 4; /* Begin()/End() */
+ overhead += 4; /* potential edgeflag disable/reenable */
+ overhead += 3; /* potentially 3 VTX_ELT_U16/U32 packet headers */
+
+ s.priv = &ctx;
+ if (indexSize == 1)
+ s.emit = inline_elt08;
+ else
+ if (indexSize == 2)
+ s.emit = inline_elt16;
+ else
+ s.emit = inline_elt32;
+ s.edge = inline_edgeflag;
+
+ ctx.nv50 = nv50;
+ ctx.map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ);
+ assert(ctx.map);
+ if (!ctx.map)
+ return;
- map += start;
+ instance_init(nv50, a, startInstance);
+ if (!nv50_state_validate(nv50, overhead + 6 + 3))
+ return;
- if (nv50->vbo_fifo)
- return nv50_push_elements_u32(nv50, map, count);
+ BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
+ OUT_RING (chan, NV50_CB_AUX | (24 << 8));
+ OUT_RING (chan, startInstance);
+ while (instanceCount--) {
+ unsigned max_verts;
+ boolean done;
+
+ u_split_prim_init(&s, mode, start, count);
+ do {
+ if (AVAIL_RING(chan) < (overhead + 6)) {
+ FIRE_RING(chan);
+ if (!nv50_state_validate(nv50, (overhead + 6))) {
+ assert(0);
+ return;
+ }
+ }
- while (count) {
- unsigned nr = count > 2047 ? 2047 : count;
+ max_verts = AVAIL_RING(chan) - overhead;
+ if (max_verts > 2047)
+ max_verts = 2047;
+ if (indexSize != 4)
+ max_verts <<= 1;
+ instance_step(nv50, a);
- BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U32, nr);
- OUT_RINGp (chan, map, nr);
+ BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
+ OUT_RING (chan, nv50_prim(s.mode) | (nzi ? (1<<28) : 0));
+ done = u_split_prim_next(&s, max_verts);
+ BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
+ OUT_RING (chan, 0);
+ } while (!done);
- count -= nr;
- map += nr;
+ nzi = TRUE;
}
- return TRUE;
-}
-static INLINE void
-nv50_draw_elements_inline(struct nv50_context *nv50,
- void *map, unsigned indexSize,
- unsigned start, unsigned count)
-{
- switch (indexSize) {
- case 1:
- nv50_draw_elements_inline_u08(nv50, map, start, count);
- break;
- case 2:
- nv50_draw_elements_inline_u16(nv50, map, start, count);
- break;
- case 4:
- nv50_draw_elements_inline_u32(nv50, map, start, count);
- break;
- }
+ pipe_buffer_unmap(pscreen, indexBuffer);
}
void
@@ -617,49 +391,68 @@ nv50_draw_elements_instanced(struct pipe_context *pipe,
unsigned startInstance, unsigned instanceCount)
{
struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_channel *chan = nv50->screen->tesla->channel;
struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nouveau_channel *chan = tesla->channel;
- struct pipe_screen *pscreen = pipe->screen;
- void *map;
- unsigned i, nz_divisors;
- unsigned step[16], pos[16];
+ struct instance a[16];
+ unsigned prim = nv50_prim(mode);
- map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ);
-
- if (!NV50_USING_LOATHED_EDGEFLAG(nv50))
- nv50_upload_user_vbufs(nv50);
-
- nv50_state_validate(nv50);
+ instance_init(nv50, a, startInstance);
+ if (!nv50_state_validate(nv50, 13 + 16*3))
+ return;
- nz_divisors = init_per_instance_arrays(nv50, startInstance, pos, step);
+ if (nv50->vbo_fifo) {
+ nv50_push_elements_instanced(pipe, indexBuffer, indexSize,
+ mode, start, count, startInstance,
+ instanceCount);
+ return;
+ } else
+ if (!(indexBuffer->usage & PIPE_BUFFER_USAGE_INDEX) || indexSize == 1) {
+ nv50_draw_elements_inline(pipe, indexBuffer, indexSize,
+ mode, start, count, startInstance,
+ instanceCount);
+ return;
+ }
BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
OUT_RING (chan, NV50_CB_AUX | (24 << 8));
OUT_RING (chan, startInstance);
-
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
- OUT_RING (chan, nv50_prim(mode));
-
- nv50_draw_elements_inline(nv50, map, indexSize, start, count);
-
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
- OUT_RING (chan, 0);
-
- for (i = 1; i < instanceCount; ++i) {
- if (nz_divisors) /* any non-zero array divisors ? */
- step_per_instance_arrays(nv50, pos, step);
+ while (instanceCount--) {
+ if (AVAIL_RING(chan) < (7 + 16*3)) {
+ FIRE_RING(chan);
+ if (!nv50_state_validate(nv50, 10 + 16*3)) {
+ assert(0);
+ return;
+ }
+ }
+ instance_step(nv50, a);
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
- OUT_RING (chan, nv50_prim(mode) | (1 << 28));
-
- nv50_draw_elements_inline(nv50, map, indexSize, start, count);
-
+ OUT_RING (chan, prim);
+ if (indexSize == 4) {
+ BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32 | 0x30000, 0);
+ OUT_RING (chan, count);
+ nouveau_pushbuf_submit(chan, nouveau_bo(indexBuffer),
+ start << 2, count << 2);
+ } else
+ if (indexSize == 2) {
+ unsigned vb_start = (start & ~1);
+ unsigned vb_end = (start + count + 1) & ~1;
+ unsigned dwords = (vb_end - vb_start) >> 1;
+
+ BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16_SETUP, 1);
+ OUT_RING (chan, ((start & 1) << 31) | count);
+ BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x30000, 0);
+ OUT_RING (chan, dwords);
+ nouveau_pushbuf_submit(chan, nouveau_bo(indexBuffer),
+ vb_start << 1, dwords << 2);
+ BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16_SETUP, 1);
+ OUT_RING (chan, 0);
+ }
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
OUT_RING (chan, 0);
- }
- nv50_unmap_vbufs(nv50);
- so_ref(NULL, &nv50->state.instbuf);
+ prim |= (1 << 28);
+ }
}
void
@@ -667,51 +460,8 @@ nv50_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer, unsigned indexSize,
unsigned mode, unsigned start, unsigned count)
{
- struct nv50_context *nv50 = nv50_context(pipe);
- struct nouveau_channel *chan = nv50->screen->tesla->channel;
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct pipe_screen *pscreen = pipe->screen;
- void *map;
-
- nv50_state_validate(nv50);
-
- BEGIN_RING(chan, tesla, 0x142c, 1);
- OUT_RING (chan, 0);
- BEGIN_RING(chan, tesla, 0x142c, 1);
- OUT_RING (chan, 0);
-
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
- OUT_RING (chan, nv50_prim(mode));
-
- if (!nv50->vbo_fifo && indexSize == 4) {
- BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32 | 0x30000, 0);
- OUT_RING (chan, count);
- nouveau_pushbuf_submit(chan, nouveau_bo(indexBuffer),
- start << 2, count << 2);
- } else
- if (!nv50->vbo_fifo && indexSize == 2) {
- unsigned vb_start = (start & ~1);
- unsigned vb_end = (start + count + 1) & ~1;
- unsigned dwords = (vb_end - vb_start) >> 1;
-
- BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16_SETUP, 1);
- OUT_RING (chan, ((start & 1) << 31) | count);
- BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x30000, 0);
- OUT_RING (chan, dwords);
- nouveau_pushbuf_submit(chan, nouveau_bo(indexBuffer),
- vb_start << 1, dwords << 2);
- BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16_SETUP, 1);
- OUT_RING (chan, 0);
- } else {
- map = pipe_buffer_map(pscreen, indexBuffer,
- PIPE_BUFFER_USAGE_CPU_READ);
- nv50_draw_elements_inline(nv50, map, indexSize, start, count);
- nv50_unmap_vbufs(nv50);
- pipe_buffer_unmap(pscreen, indexBuffer);
- }
-
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
- OUT_RING (chan, 0);
+ nv50_draw_elements_instanced(pipe, indexBuffer, indexSize,
+ mode, start, count, 0, 1);
}
static INLINE boolean
@@ -726,6 +476,7 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
struct nouveau_bo *bo = nouveau_bo(vb->buffer);
float v[4];
int ret;
+ unsigned nr_components = util_format_get_nr_components(ve->src_format);
ret = nouveau_bo_map(bo, NOUVEAU_BO_RD);
if (ret)
@@ -736,9 +487,10 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
0, 0, 1, 1);
so = *pso;
if (!so)
- *pso = so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 4, 0);
+ *pso = so = so_new(nv50->vtxelt->num_elements,
+ nv50->vtxelt->num_elements * 4, 0);
- switch (ve->nr_components) {
+ switch (nr_components) {
case 4:
so_method(so, tesla, NV50TCL_VTX_ATTR_4F_X(attrib), 4);
so_data (so, fui(v[0]));
@@ -775,6 +527,18 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
}
void
+nv50_vtxelt_construct(struct nv50_vtxelt_stateobj *cso)
+{
+ unsigned i;
+
+ for (i = 0; i < cso->num_elements; ++i) {
+ struct pipe_vertex_element *ve = &cso->pipe[i];
+
+ cso->hw[i] = nv50_vbo_vtxelt_to_hw(ve);
+ }
+}
+
+struct nouveau_stateobj *
nv50_vbo_validate(struct nv50_context *nv50)
{
struct nouveau_grobj *tesla = nv50->screen->tesla;
@@ -783,30 +547,31 @@ nv50_vbo_validate(struct nv50_context *nv50)
/* don't validate if Gallium took away our buffers */
if (nv50->vtxbuf_nr == 0)
- return;
- nv50->vbo_fifo = 0;
+ return NULL;
+
+ if (nv50->screen->force_push ||
+ nv50->vertprog->cfg.edgeflag_in < 16)
+ nv50->vbo_fifo = 0xffff;
- for (i = 0; i < nv50->vtxbuf_nr; ++i)
+ for (i = 0; i < nv50->vtxbuf_nr; i++) {
if (nv50->vtxbuf[i].stride &&
!(nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX))
nv50->vbo_fifo = 0xffff;
+ }
- if (NV50_USING_LOATHED_EDGEFLAG(nv50))
- nv50->vbo_fifo = 0xffff; /* vertprog can't set edgeflag */
-
- n_ve = MAX2(nv50->vtxelt_nr, nv50->state.vtxelt_nr);
+ n_ve = MAX2(nv50->vtxelt->num_elements, nv50->state.vtxelt_nr);
vtxattr = NULL;
- vtxbuf = so_new(n_ve * 2, n_ve * 5, nv50->vtxelt_nr * 4);
+ vtxbuf = so_new(n_ve * 2, n_ve * 5, nv50->vtxelt->num_elements * 4);
vtxfmt = so_new(1, n_ve, 0);
so_method(vtxfmt, tesla, NV50TCL_VERTEX_ARRAY_ATTRIB(0), n_ve);
- for (i = 0; i < nv50->vtxelt_nr; i++) {
- struct pipe_vertex_element *ve = &nv50->vtxelt[i];
+ for (i = 0; i < nv50->vtxelt->num_elements; i++) {
+ struct pipe_vertex_element *ve = &nv50->vtxelt->pipe[i];
struct pipe_vertex_buffer *vb =
&nv50->vtxbuf[ve->vertex_buffer_index];
struct nouveau_bo *bo = nouveau_bo(vb->buffer);
- uint32_t hw = nv50_vbo_vtxelt_to_hw(ve);
+ uint32_t hw = nv50->vtxelt->hw[i];
if (!vb->stride &&
nv50_vbo_static_attrib(nv50, i, &vtxattr, ve, vb)) {
@@ -821,13 +586,13 @@ nv50_vbo_validate(struct nv50_context *nv50)
}
if (nv50->vbo_fifo) {
- so_data (vtxfmt, hw |
- (ve->instance_divisor ? (1 << 4) : i));
+ so_data (vtxfmt, hw | (ve->instance_divisor ? (1 << 4) : i));
so_method(vtxbuf, tesla,
NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
so_data (vtxbuf, 0);
continue;
}
+
so_data(vtxfmt, hw | i);
so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 3);
@@ -855,355 +620,13 @@ nv50_vbo_validate(struct nv50_context *nv50)
so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
so_data (vtxbuf, 0);
}
- nv50->state.vtxelt_nr = nv50->vtxelt_nr;
+ nv50->state.vtxelt_nr = nv50->vtxelt->num_elements;
- so_ref (vtxfmt, &nv50->state.vtxfmt);
so_ref (vtxbuf, &nv50->state.vtxbuf);
so_ref (vtxattr, &nv50->state.vtxattr);
so_ref (NULL, &vtxbuf);
- so_ref (NULL, &vtxfmt);
so_ref (NULL, &vtxattr);
+ return vtxfmt;
}
-typedef void (*pfn_push)(struct nouveau_channel *, void *);
-
-struct nv50_vbo_emitctx
-{
- pfn_push push[16];
- uint8_t *map[16];
- unsigned stride[16];
- unsigned nr_ve;
- unsigned vtx_dwords;
- unsigned vtx_max;
-
- float edgeflag;
- unsigned ve_edgeflag;
-};
-
-static INLINE void
-emit_vtx_next(struct nouveau_channel *chan, struct nv50_vbo_emitctx *emit)
-{
- unsigned i;
-
- for (i = 0; i < emit->nr_ve; ++i) {
- emit->push[i](chan, emit->map[i]);
- emit->map[i] += emit->stride[i];
- }
-}
-
-static INLINE void
-emit_vtx(struct nouveau_channel *chan, struct nv50_vbo_emitctx *emit,
- uint32_t vi)
-{
- unsigned i;
-
- for (i = 0; i < emit->nr_ve; ++i)
- emit->push[i](chan, emit->map[i] + emit->stride[i] * vi);
-}
-
-static INLINE boolean
-nv50_map_vbufs(struct nv50_context *nv50)
-{
- int i;
-
- for (i = 0; i < nv50->vtxbuf_nr; ++i) {
- struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i];
- unsigned size = vb->stride * (vb->max_index + 1) + 16;
-
- if (nouveau_bo(vb->buffer)->map)
- continue;
-
- size = vb->stride * (vb->max_index + 1) + 16;
- size = MIN2(size, vb->buffer->size);
- if (!size)
- size = vb->buffer->size;
-
- if (nouveau_bo_map_range(nouveau_bo(vb->buffer),
- 0, size, NOUVEAU_BO_RD))
- break;
- }
-
- if (i == nv50->vtxbuf_nr)
- return TRUE;
- for (; i >= 0; --i)
- nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer));
- return FALSE;
-}
-
-static void
-emit_b32_1(struct nouveau_channel *chan, void *data)
-{
- uint32_t *v = data;
-
- OUT_RING(chan, v[0]);
-}
-
-static void
-emit_b32_2(struct nouveau_channel *chan, void *data)
-{
- uint32_t *v = data;
-
- OUT_RING(chan, v[0]);
- OUT_RING(chan, v[1]);
-}
-
-static void
-emit_b32_3(struct nouveau_channel *chan, void *data)
-{
- uint32_t *v = data;
-
- OUT_RING(chan, v[0]);
- OUT_RING(chan, v[1]);
- OUT_RING(chan, v[2]);
-}
-
-static void
-emit_b32_4(struct nouveau_channel *chan, void *data)
-{
- uint32_t *v = data;
-
- OUT_RING(chan, v[0]);
- OUT_RING(chan, v[1]);
- OUT_RING(chan, v[2]);
- OUT_RING(chan, v[3]);
-}
-
-static void
-emit_b16_1(struct nouveau_channel *chan, void *data)
-{
- uint16_t *v = data;
-
- OUT_RING(chan, v[0]);
-}
-
-static void
-emit_b16_3(struct nouveau_channel *chan, void *data)
-{
- uint16_t *v = data;
-
- OUT_RING(chan, (v[1] << 16) | v[0]);
- OUT_RING(chan, v[2]);
-}
-
-static void
-emit_b08_1(struct nouveau_channel *chan, void *data)
-{
- uint8_t *v = data;
-
- OUT_RING(chan, v[0]);
-}
-
-static void
-emit_b08_3(struct nouveau_channel *chan, void *data)
-{
- uint8_t *v = data;
-
- OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]);
-}
-
-static boolean
-emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit,
- unsigned start)
-{
- unsigned i;
-
- if (nv50_map_vbufs(nv50) == FALSE)
- return FALSE;
-
- emit->ve_edgeflag = nv50->vertprog->cfg.edgeflag_in;
-
- emit->edgeflag = 0.5f;
- emit->nr_ve = 0;
- emit->vtx_dwords = 0;
-
- for (i = 0; i < nv50->vtxelt_nr; ++i) {
- struct pipe_vertex_element *ve;
- struct pipe_vertex_buffer *vb;
- unsigned n, size;
- const struct util_format_description *desc;
-
- ve = &nv50->vtxelt[i];
- vb = &nv50->vtxbuf[ve->vertex_buffer_index];
- if (!(nv50->vbo_fifo & (1 << i)) || ve->instance_divisor)
- continue;
- n = emit->nr_ve++;
-
- emit->stride[n] = vb->stride;
- emit->map[n] = (uint8_t *)nouveau_bo(vb->buffer)->map +
- vb->buffer_offset +
- (start * vb->stride + ve->src_offset);
-
- desc = util_format_description(ve->src_format);
- assert(desc);
-
- size = util_format_get_component_bits(
- ve->src_format, UTIL_FORMAT_COLORSPACE_RGB, 0);
-
- assert(ve->nr_components > 0 && ve->nr_components <= 4);
-
- /* It shouldn't be necessary to push the implicit 1s
- * for case 3 and size 8 cases 1, 2, 3.
- */
- switch (size) {
- default:
- NOUVEAU_ERR("unsupported vtxelt size: %u\n", size);
- return FALSE;
- case 32:
- switch (ve->nr_components) {
- case 1: emit->push[n] = emit_b32_1; break;
- case 2: emit->push[n] = emit_b32_2; break;
- case 3: emit->push[n] = emit_b32_3; break;
- case 4: emit->push[n] = emit_b32_4; break;
- }
- emit->vtx_dwords += ve->nr_components;
- break;
- case 16:
- switch (ve->nr_components) {
- case 1: emit->push[n] = emit_b16_1; break;
- case 2: emit->push[n] = emit_b32_1; break;
- case 3: emit->push[n] = emit_b16_3; break;
- case 4: emit->push[n] = emit_b32_2; break;
- }
- emit->vtx_dwords += (ve->nr_components + 1) >> 1;
- break;
- case 8:
- switch (ve->nr_components) {
- case 1: emit->push[n] = emit_b08_1; break;
- case 2: emit->push[n] = emit_b16_1; break;
- case 3: emit->push[n] = emit_b08_3; break;
- case 4: emit->push[n] = emit_b32_1; break;
- }
- emit->vtx_dwords += 1;
- break;
- }
- }
-
- emit->vtx_max = 512 / emit->vtx_dwords;
- if (emit->ve_edgeflag < 16)
- emit->vtx_max = 1;
-
- return TRUE;
-}
-
-static INLINE void
-set_edgeflag(struct nouveau_channel *chan,
- struct nouveau_grobj *tesla,
- struct nv50_vbo_emitctx *emit, uint32_t index)
-{
- unsigned i = emit->ve_edgeflag;
-
- if (i < 16) {
- float f = *((float *)(emit->map[i] + index * emit->stride[i]));
-
- if (emit->edgeflag != f) {
- emit->edgeflag = f;
-
- BEGIN_RING(chan, tesla, 0x15e4, 1);
- OUT_RING (chan, f ? 1 : 0);
- }
- }
-}
-
-static boolean
-nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count)
-{
- struct nouveau_channel *chan = nv50->screen->base.channel;
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nv50_vbo_emitctx emit;
- if (emit_prepare(nv50, &emit, start) == FALSE)
- return FALSE;
-
- while (count) {
- unsigned i, dw, nr = MIN2(count, emit.vtx_max);
- dw = nr * emit.vtx_dwords;
-
- set_edgeflag(chan, tesla, &emit, 0); /* nr will be 1 */
-
- BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
- for (i = 0; i < nr; ++i)
- emit_vtx_next(chan, &emit);
-
- count -= nr;
- }
-
- return TRUE;
-}
-
-static boolean
-nv50_push_elements_u32(struct nv50_context *nv50, uint32_t *map, unsigned count)
-{
- struct nouveau_channel *chan = nv50->screen->base.channel;
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nv50_vbo_emitctx emit;
-
- if (emit_prepare(nv50, &emit, 0) == FALSE)
- return FALSE;
-
- while (count) {
- unsigned i, dw, nr = MIN2(count, emit.vtx_max);
- dw = nr * emit.vtx_dwords;
-
- set_edgeflag(chan, tesla, &emit, *map);
-
- BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
- for (i = 0; i < nr; ++i)
- emit_vtx(chan, &emit, *map++);
-
- count -= nr;
- }
-
- return TRUE;
-}
-
-static boolean
-nv50_push_elements_u16(struct nv50_context *nv50, uint16_t *map, unsigned count)
-{
- struct nouveau_channel *chan = nv50->screen->base.channel;
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nv50_vbo_emitctx emit;
-
- if (emit_prepare(nv50, &emit, 0) == FALSE)
- return FALSE;
-
- while (count) {
- unsigned i, dw, nr = MIN2(count, emit.vtx_max);
- dw = nr * emit.vtx_dwords;
-
- set_edgeflag(chan, tesla, &emit, *map);
-
- BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
- for (i = 0; i < nr; ++i)
- emit_vtx(chan, &emit, *map++);
-
- count -= nr;
- }
-
- return TRUE;
-}
-
-static boolean
-nv50_push_elements_u08(struct nv50_context *nv50, uint8_t *map, unsigned count)
-{
- struct nouveau_channel *chan = nv50->screen->base.channel;
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nv50_vbo_emitctx emit;
-
- if (emit_prepare(nv50, &emit, 0) == FALSE)
- return FALSE;
-
- while (count) {
- unsigned i, dw, nr = MIN2(count, emit.vtx_max);
- dw = nr * emit.vtx_dwords;
-
- set_edgeflag(chan, tesla, &emit, *map);
-
- BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
- for (i = 0; i < nr; ++i)
- emit_vtx(chan, &emit, *map++);
-
- count -= nr;
- }
-
- return TRUE;
-}
diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c
index 513cc0f5d44..b7ad6b20206 100644
--- a/src/gallium/drivers/r300/r300_blit.c
+++ b/src/gallium/drivers/r300/r300_blit.c
@@ -36,6 +36,7 @@ static void r300_blitter_save_states(struct r300_context* r300)
util_blitter_save_vertex_shader(r300->blitter, r300->vs_state.state);
util_blitter_save_viewport(r300->blitter, &r300->viewport);
util_blitter_save_clip(r300->blitter, &r300->clip);
+ util_blitter_save_vertex_elements(r300->blitter, r300->velems);
}
/* Clear currently bound buffers. */
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index e0a55323273..923e1e541ff 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -60,7 +60,6 @@ static void r300_destroy_context(struct pipe_context* context)
FREE(r300->rs_block_state.state);
FREE(r300->scissor_state.state);
FREE(r300->textures_state.state);
- FREE(r300->vertex_stream_state.state);
FREE(r300->vap_output_state.state);
FREE(r300->viewport_state.state);
FREE(r300->ztop_state.state);
@@ -147,7 +146,6 @@ static void r300_setup_atoms(struct r300_context* r300)
r300->rs_block_state.state = CALLOC_STRUCT(r300_rs_block);
r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state);
r300->textures_state.state = CALLOC_STRUCT(r300_textures_state);
- r300->vertex_stream_state.state = CALLOC_STRUCT(r300_vertex_stream_state);
r300->vap_output_state.state = CALLOC_STRUCT(r300_vap_output_state);
r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state);
r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state);
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index c2825d5f267..985e3391126 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -278,6 +278,23 @@ struct r300_texture {
enum r300_buffer_tiling microtile, macrotile;
};
+struct r300_vertex_info {
+ /* Parent class */
+ struct vertex_info vinfo;
+
+ /* R300_VAP_PROG_STREAK_CNTL_[0-7] */
+ uint32_t vap_prog_stream_cntl[8];
+ /* R300_VAP_PROG_STREAK_CNTL_EXT_[0-7] */
+ uint32_t vap_prog_stream_cntl_ext[8];
+};
+
+struct r300_vertex_element_state {
+ unsigned count;
+ struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
+
+ struct r300_vertex_stream_state vertex_stream;
+};
+
extern struct pipe_viewport_state r300_viewport_identity;
struct r300_context {
@@ -350,8 +367,7 @@ struct r300_context {
int vertex_buffer_count;
int vertex_buffer_max_index;
/* Vertex elements for Gallium. */
- struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
- int vertex_element_count;
+ struct r300_vertex_element_state *velems;
/* Vertex info for Draw. */
struct vertex_info vertex_info;
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 51fc590e5d9..55e9217fd32 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -757,9 +757,9 @@ void r300_emit_textures_state(struct r300_context *r300,
void r300_emit_aos(struct r300_context* r300, unsigned offset)
{
struct pipe_vertex_buffer *vb1, *vb2, *vbuf = r300->vertex_buffer;
- struct pipe_vertex_element *velem = r300->vertex_element;
+ struct pipe_vertex_element *velem = r300->velems->velem;
int i;
- unsigned size1, size2, aos_count = r300->vertex_element_count;
+ unsigned size1, size2, aos_count = r300->velems->count;
unsigned packet_size = (aos_count * 3 + 1) / 2;
CS_LOCALS(r300);
@@ -1004,7 +1004,7 @@ void r300_emit_buffer_validate(struct r300_context *r300,
(struct r300_textures_state*)r300->textures_state.state;
struct r300_texture* tex;
struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;
- struct pipe_vertex_element *velem = r300->vertex_element;
+ struct pipe_vertex_element *velem = r300->velems->velem;
struct pipe_buffer *pbuf;
unsigned i;
boolean invalid = FALSE;
@@ -1062,7 +1062,7 @@ validate:
}
/* ...vertex buffers for HWTCL path... */
if (do_validate_vertex_buffers) {
- for (i = 0; i < r300->vertex_element_count; i++) {
+ for (i = 0; i < r300->velems->count; i++) {
pbuf = vbuf[velem[i].vertex_buffer_index].buffer;
if (!r300->winsys->add_buffer(r300->winsys, pbuf,
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index c8420bcdd5b..9c001ae186d 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -143,7 +143,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
{
struct pipe_vertex_element* velem;
struct pipe_vertex_buffer* vbuf;
- unsigned vertex_element_count = r300->vertex_element_count;
+ unsigned vertex_element_count = r300->velems->count;
unsigned i, v, vbi, dw, elem_offset, dwords;
/* Size of the vertex, in dwords. */
@@ -166,7 +166,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
/* Calculate the vertex size, offsets, strides etc. and map the buffers. */
for (i = 0; i < vertex_element_count; i++) {
- velem = &r300->vertex_element[i];
+ velem = &r300->velems->velem[i];
offset[i] = velem->src_offset / 4;
size[i] = util_format_get_blocksize(velem->src_format) / 4;
vertex_size += size[i];
@@ -183,7 +183,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
}
}
- dwords = 10 + count * vertex_size;
+ dwords = 9 + count * vertex_size;
r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + dwords);
r300_emit_buffer_validate(r300, FALSE, NULL);
@@ -193,8 +193,9 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
OUT_CS_REG(R300_GA_COLOR_CONTROL,
r300_provoking_vertex_fixes(r300, mode));
OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size);
- OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0);
- OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1);
+ OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
+ OUT_CS(count - 1);
+ OUT_CS(0);
OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size);
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) |
r300_translate_primitive(mode));
@@ -202,7 +203,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
/* Emit vertices. */
for (v = 0; v < count; v++) {
for (i = 0; i < vertex_element_count; i++) {
- velem = &r300->vertex_element[i];
+ velem = &r300->velems->velem[i];
vbi = velem->vertex_buffer_index;
elem_offset = offset[i] + stride[vbi] * (v + start);
@@ -215,7 +216,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
/* Unmap buffers. */
for (i = 0; i < vertex_element_count; i++) {
- vbi = r300->vertex_element[i].vertex_buffer_index;
+ vbi = r300->velems->velem[i].vertex_buffer_index;
if (map[vbi]) {
vbuf = &r300->vertex_buffer[vbi];
@@ -238,15 +239,16 @@ static void r300_emit_draw_arrays(struct r300_context *r300,
if (alt_num_verts) {
assert(count < (1 << 24));
- BEGIN_CS(10);
+ BEGIN_CS(9);
OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count);
} else {
- BEGIN_CS(8);
+ BEGIN_CS(7);
}
OUT_CS_REG(R300_GA_COLOR_CONTROL,
r300_provoking_vertex_fixes(r300, mode));
- OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0);
- OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1);
+ OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
+ OUT_CS(count - 1);
+ OUT_CS(0);
OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) |
r300_translate_primitive(mode) |
@@ -281,15 +283,16 @@ static void r300_emit_draw_elements(struct r300_context *r300,
maxIndex = MIN2(maxIndex, r300->vertex_buffer_max_index);
if (alt_num_verts) {
- BEGIN_CS(16);
+ BEGIN_CS(15);
OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count);
} else {
- BEGIN_CS(14);
+ BEGIN_CS(13);
}
OUT_CS_REG(R300_GA_COLOR_CONTROL,
r300_provoking_vertex_fixes(r300, mode));
- OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, minIndex);
- OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, maxIndex);
+ OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
+ OUT_CS(maxIndex);
+ OUT_CS(minIndex);
OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0);
if (indexSize == 4) {
count_dwords = count;
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 8c9f6046228..bd4c2766cb1 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -1046,19 +1046,17 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe,
if (r300->draw) {
draw_flush(r300->draw);
draw_set_vertex_buffers(r300->draw, count, buffers);
- } else {
- r300->vertex_stream_state.dirty = TRUE;
}
}
static boolean r300_validate_aos(struct r300_context *r300)
{
struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;
- struct pipe_vertex_element *velem = r300->vertex_element;
+ struct pipe_vertex_element *velem = r300->velems->velem;
int i;
/* Check if formats and strides are aligned to the size of DWORD. */
- for (i = 0; i < r300->vertex_element_count; i++) {
+ for (i = 0; i < r300->velems->count; i++) {
if (vbuf[velem[i].vertex_buffer_index].stride % 4 != 0 ||
util_format_get_blocksize(velem[i].src_format) % 4 != 0) {
return FALSE;
@@ -1067,20 +1065,209 @@ static boolean r300_validate_aos(struct r300_context *r300)
return TRUE;
}
-static void r300_set_vertex_elements(struct pipe_context* pipe,
- unsigned count,
- const struct pipe_vertex_element* elements)
+static void r300_draw_emit_attrib(struct r300_context* r300,
+ enum attrib_emit emit,
+ enum interp_mode interp,
+ int index)
{
- struct r300_context* r300 = r300_context(pipe);
+ struct r300_vertex_shader* vs = r300->vs_state.state;
+ struct tgsi_shader_info* info = &vs->info;
+ int output;
+
+ output = draw_find_shader_output(r300->draw,
+ info->output_semantic_name[index],
+ info->output_semantic_index[index]);
+ draw_emit_vertex_attr(&r300->vertex_info, emit, interp, output);
+}
+
+static void r300_draw_emit_all_attribs(struct r300_context* r300)
+{
+ struct r300_vertex_shader* vs = r300->vs_state.state;
+ struct r300_shader_semantics* vs_outputs = &vs->outputs;
+ int i, gen_count;
+
+ /* Position. */
+ if (vs_outputs->pos != ATTR_UNUSED) {
+ r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
+ vs_outputs->pos);
+ } else {
+ assert(0);
+ }
+
+ /* Point size. */
+ if (vs_outputs->psize != ATTR_UNUSED) {
+ r300_draw_emit_attrib(r300, EMIT_1F_PSIZE, INTERP_POS,
+ vs_outputs->psize);
+ }
+
+ /* Colors. */
+ for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+ if (vs_outputs->color[i] != ATTR_UNUSED) {
+ r300_draw_emit_attrib(r300, EMIT_4F, INTERP_LINEAR,
+ vs_outputs->color[i]);
+ }
+ }
+
+ /* XXX Back-face colors. */
+
+ /* Texture coordinates. */
+ gen_count = 0;
+ for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
+ if (vs_outputs->generic[i] != ATTR_UNUSED) {
+ r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
+ vs_outputs->generic[i]);
+ gen_count++;
+ }
+ }
+
+ /* Fog coordinates. */
+ if (vs_outputs->fog != ATTR_UNUSED) {
+ r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
+ vs_outputs->fog);
+ gen_count++;
+ }
+
+ /* XXX magic */
+ assert(gen_count <= 8);
+}
+
+/* Update the PSC tables. */
+static void r300_vertex_psc(struct r300_vertex_element_state *velems)
+{
+ struct r300_vertex_stream_state *vstream = &velems->vertex_stream;
+ uint16_t type, swizzle;
+ enum pipe_format format;
+ unsigned i;
+
+ assert(velems->count <= 16);
+
+ /* Vertex shaders have no semantics on their inputs,
+ * so PSC should just route stuff based on the vertex elements,
+ * and not on attrib information. */
+ for (i = 0; i < velems->count; i++) {
+ format = velems->velem[i].src_format;
+
+ type = r300_translate_vertex_data_type(format) |
+ (i << R300_DST_VEC_LOC_SHIFT);
+ swizzle = r300_translate_vertex_data_swizzle(format);
+
+ if (i & 1) {
+ vstream->vap_prog_stream_cntl[i >> 1] |= type << 16;
+ vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16;
+ } else {
+ vstream->vap_prog_stream_cntl[i >> 1] |= type;
+ vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle;
+ }
+ }
- memcpy(r300->vertex_element,
- elements,
- sizeof(struct pipe_vertex_element) * count);
- r300->vertex_element_count = count;
+ /* Set the last vector in the PSC. */
+ if (i) {
+ i -= 1;
+ }
+ vstream->vap_prog_stream_cntl[i >> 1] |=
+ (R300_LAST_VEC << (i & 1 ? 16 : 0));
+
+ vstream->count = (i >> 1) + 1;
+}
+
+/* Update the PSC tables for SW TCL, using Draw. */
+static void r300_swtcl_vertex_psc(struct r300_context *r300,
+ struct r300_vertex_element_state *velems)
+{
+ struct r300_vertex_stream_state *vstream = &velems->vertex_stream;
+ struct r300_vertex_shader* vs = r300->vs_state.state;
+ struct vertex_info* vinfo = &r300->vertex_info;
+ uint16_t type, swizzle;
+ enum pipe_format format;
+ unsigned i, attrib_count;
+ int* vs_output_tab = vs->stream_loc_notcl;
+
+ /* For each Draw attribute, route it to the fragment shader according
+ * to the vs_output_tab. */
+ attrib_count = vinfo->num_attribs;
+ DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count);
+ for (i = 0; i < attrib_count; i++) {
+ DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d,"
+ " vs_output_tab %d\n", vinfo->attrib[i].src_index,
+ vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit,
+ vs_output_tab[i]);
+ }
+
+ for (i = 0; i < attrib_count; i++) {
+ /* Make sure we have a proper destination for our attribute. */
+ assert(vs_output_tab[i] != -1);
+
+ format = draw_translate_vinfo_format(vinfo->attrib[i].emit);
+
+ /* Obtain the type of data in this attribute. */
+ type = r300_translate_vertex_data_type(format) |
+ vs_output_tab[i] << R300_DST_VEC_LOC_SHIFT;
+
+ /* Obtain the swizzle for this attribute. Note that the default
+ * swizzle in the hardware is not XYZW! */
+ swizzle = r300_translate_vertex_data_swizzle(format);
+
+ /* Add the attribute to the PSC table. */
+ if (i & 1) {
+ vstream->vap_prog_stream_cntl[i >> 1] |= type << 16;
+ vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16;
+ } else {
+ vstream->vap_prog_stream_cntl[i >> 1] |= type;
+ vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle;
+ }
+ }
+
+ /* Set the last vector in the PSC. */
+ if (i) {
+ i -= 1;
+ }
+ vstream->vap_prog_stream_cntl[i >> 1] |=
+ (R300_LAST_VEC << (i & 1 ? 16 : 0));
+
+ vstream->count = (i >> 1) + 1;
+}
+
+static void* r300_create_vertex_elements_state(struct pipe_context* pipe,
+ unsigned count,
+ const struct pipe_vertex_element* attribs)
+{
+ struct r300_context *r300 = r300_context(pipe);
+ struct r300_screen* r300screen = r300_screen(pipe->screen);
+ struct r300_vertex_element_state *velems;
+
+ assert(count <= PIPE_MAX_ATTRIBS);
+ velems = CALLOC_STRUCT(r300_vertex_element_state);
+ if (velems != NULL) {
+ velems->count = count;
+ memcpy(velems->velem, attribs, sizeof(struct pipe_vertex_element) * count);
+
+ if (r300screen->caps->has_tcl) {
+ r300_vertex_psc(velems);
+ } else {
+ memset(&r300->vertex_info, 0, sizeof(struct vertex_info));
+ r300_draw_emit_all_attribs(r300);
+ draw_compute_vertex_size(&r300->vertex_info);
+ r300_swtcl_vertex_psc(r300, velems);
+ }
+ }
+ return velems;
+}
+
+static void r300_bind_vertex_elements_state(struct pipe_context *pipe,
+ void *state)
+{
+ struct r300_context *r300 = r300_context(pipe);
+ struct r300_vertex_element_state *velems = state;
+
+ if (velems == NULL) {
+ return;
+ }
+
+ r300->velems = velems;
if (r300->draw) {
draw_flush(r300->draw);
- draw_set_vertex_elements(r300->draw, count, elements);
+ draw_set_vertex_elements(r300->draw, velems->count, velems->velem);
}
if (!r300_validate_aos(r300)) {
@@ -1088,6 +1275,14 @@ static void r300_set_vertex_elements(struct pipe_context* pipe,
assert(0);
abort();
}
+
+ UPDATE_STATE(&velems->vertex_stream, r300->vertex_stream_state);
+ r300->vertex_stream_state.size = (1 + velems->vertex_stream.count) * 2;
+}
+
+static void r300_delete_vertex_elements_state(struct pipe_context *pipe, void *state)
+{
+ FREE(state);
}
static void* r300_create_vs_state(struct pipe_context* pipe,
@@ -1262,7 +1457,10 @@ void r300_init_state_functions(struct r300_context* r300)
r300->context.set_viewport_state = r300_set_viewport_state;
r300->context.set_vertex_buffers = r300_set_vertex_buffers;
- r300->context.set_vertex_elements = r300_set_vertex_elements;
+
+ r300->context.create_vertex_elements_state = r300_create_vertex_elements_state;
+ r300->context.bind_vertex_elements_state = r300_bind_vertex_elements_state;
+ r300->context.delete_vertex_elements_state = r300_delete_vertex_elements_state;
r300->context.create_vs_state = r300_create_vs_state;
r300->context.bind_vs_state = r300_bind_vs_state;
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index e9e40747ef1..6b9f61acd7b 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -37,187 +37,6 @@
/* r300_state_derived: Various bits of state which are dependent upon
* currently bound CSO data. */
-static void r300_draw_emit_attrib(struct r300_context* r300,
- enum attrib_emit emit,
- enum interp_mode interp,
- int index)
-{
- struct r300_vertex_shader* vs = r300->vs_state.state;
- struct tgsi_shader_info* info = &vs->info;
- int output;
-
- output = draw_find_shader_output(r300->draw,
- info->output_semantic_name[index],
- info->output_semantic_index[index]);
- draw_emit_vertex_attr(&r300->vertex_info, emit, interp, output);
-}
-
-static void r300_draw_emit_all_attribs(struct r300_context* r300)
-{
- struct r300_vertex_shader* vs = r300->vs_state.state;
- struct r300_shader_semantics* vs_outputs = &vs->outputs;
- int i, gen_count;
-
- /* Position. */
- if (vs_outputs->pos != ATTR_UNUSED) {
- r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
- vs_outputs->pos);
- } else {
- assert(0);
- }
-
- /* Point size. */
- if (vs_outputs->psize != ATTR_UNUSED) {
- r300_draw_emit_attrib(r300, EMIT_1F_PSIZE, INTERP_POS,
- vs_outputs->psize);
- }
-
- /* Colors. */
- for (i = 0; i < ATTR_COLOR_COUNT; i++) {
- if (vs_outputs->color[i] != ATTR_UNUSED) {
- r300_draw_emit_attrib(r300, EMIT_4F, INTERP_LINEAR,
- vs_outputs->color[i]);
- }
- }
-
- /* XXX Back-face colors. */
-
- /* Texture coordinates. */
- gen_count = 0;
- for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
- if (vs_outputs->generic[i] != ATTR_UNUSED) {
- r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
- vs_outputs->generic[i]);
- gen_count++;
- }
- }
-
- /* Fog coordinates. */
- if (vs_outputs->fog != ATTR_UNUSED) {
- r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
- vs_outputs->fog);
- gen_count++;
- }
-
- /* XXX magic */
- assert(gen_count <= 8);
-}
-
-/* Update the PSC tables. */
-/* XXX move this function into r300_state.c after TCL-bypass gets removed
- * XXX because this one is dependent only on vertex elements. */
-static void r300_vertex_psc(struct r300_context* r300)
-{
- struct r300_vertex_shader* vs = r300->vs_state.state;
- struct r300_vertex_stream_state *vformat =
- (struct r300_vertex_stream_state*)r300->vertex_stream_state.state;
- uint16_t type, swizzle;
- enum pipe_format format;
- unsigned i;
- int identity[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
- int* stream_tab;
-
- memset(vformat, 0, sizeof(struct r300_vertex_stream_state));
-
- stream_tab = identity;
-
- /* Vertex shaders have no semantics on their inputs,
- * so PSC should just route stuff based on the vertex elements,
- * and not on attrib information. */
- DBG(r300, DBG_DRAW, "r300: vs expects %d attribs, routing %d elements"
- " in psc\n",
- vs->info.num_inputs,
- r300->vertex_element_count);
-
- for (i = 0; i < r300->vertex_element_count; i++) {
- format = r300->vertex_element[i].src_format;
-
- type = r300_translate_vertex_data_type(format) |
- (stream_tab[i] << R300_DST_VEC_LOC_SHIFT);
- swizzle = r300_translate_vertex_data_swizzle(format);
-
- if (i & 1) {
- vformat->vap_prog_stream_cntl[i >> 1] |= type << 16;
- vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16;
- } else {
- vformat->vap_prog_stream_cntl[i >> 1] |= type;
- vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle;
- }
- }
-
- assert(i <= 15);
-
- /* Set the last vector in the PSC. */
- if (i) {
- i -= 1;
- }
- vformat->vap_prog_stream_cntl[i >> 1] |=
- (R300_LAST_VEC << (i & 1 ? 16 : 0));
-
- vformat->count = (i >> 1) + 1;
- r300->vertex_stream_state.size = (1 + vformat->count) * 2;
-}
-
-/* Update the PSC tables for SW TCL, using Draw. */
-static void r300_swtcl_vertex_psc(struct r300_context* r300)
-{
- struct r300_vertex_shader* vs = r300->vs_state.state;
- struct r300_vertex_stream_state *vformat =
- (struct r300_vertex_stream_state*)r300->vertex_stream_state.state;
- struct vertex_info* vinfo = &r300->vertex_info;
- uint16_t type, swizzle;
- enum pipe_format format;
- unsigned i, attrib_count;
- int* vs_output_tab = vs->stream_loc_notcl;
-
- memset(vformat, 0, sizeof(struct r300_vertex_stream_state));
-
- /* For each Draw attribute, route it to the fragment shader according
- * to the vs_output_tab. */
- attrib_count = vinfo->num_attribs;
- DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count);
- for (i = 0; i < attrib_count; i++) {
- DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d,"
- " vs_output_tab %d\n", vinfo->attrib[i].src_index,
- vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit,
- vs_output_tab[i]);
- }
-
- for (i = 0; i < attrib_count; i++) {
- /* Make sure we have a proper destination for our attribute. */
- assert(vs_output_tab[i] != -1);
-
- format = draw_translate_vinfo_format(vinfo->attrib[i].emit);
-
- /* Obtain the type of data in this attribute. */
- type = r300_translate_vertex_data_type(format) |
- vs_output_tab[i] << R300_DST_VEC_LOC_SHIFT;
-
- /* Obtain the swizzle for this attribute. Note that the default
- * swizzle in the hardware is not XYZW! */
- swizzle = r300_translate_vertex_data_swizzle(format);
-
- /* Add the attribute to the PSC table. */
- if (i & 1) {
- vformat->vap_prog_stream_cntl[i >> 1] |= type << 16;
- vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16;
- } else {
- vformat->vap_prog_stream_cntl[i >> 1] |= type;
- vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle;
- }
- }
-
- /* Set the last vector in the PSC. */
- if (i) {
- i -= 1;
- }
- vformat->vap_prog_stream_cntl[i >> 1] |=
- (R300_LAST_VEC << (i & 1 ? 16 : 0));
-
- vformat->count = (i >> 1) + 1;
- r300->vertex_stream_state.size = (1 + vformat->count) * 2;
-}
-
static void r300_rs_col(struct r300_rs_block* rs, int id, int ptr,
boolean swizzle_0001)
{
@@ -432,18 +251,8 @@ static void r300_update_rs_block(struct r300_context* r300,
static void r300_update_derived_shader_state(struct r300_context* r300)
{
struct r300_vertex_shader* vs = r300->vs_state.state;
- struct r300_screen* r300screen = r300_screen(r300->context.screen);
r300_update_rs_block(r300, &vs->outputs, &r300->fs->inputs);
-
- if (r300screen->caps->has_tcl) {
- r300_vertex_psc(r300);
- } else {
- memset(&r300->vertex_info, 0, sizeof(struct vertex_info));
- r300_draw_emit_all_attribs(r300);
- draw_compute_vertex_size(&r300->vertex_info);
- r300_swtcl_vertex_psc(r300);
- }
}
static boolean r300_dsa_writes_depth_stencil(struct r300_dsa_state* dsa)
@@ -576,8 +385,7 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300)
void r300_update_derived_state(struct r300_context* r300)
{
- if (r300->rs_block_state.dirty ||
- r300->vertex_stream_state.dirty) { /* XXX put updating PSC out of this file */
+ if (r300->rs_block_state.dirty) {
r300_update_derived_shader_state(r300);
}
diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h
index af7827820cc..a32924ed0a3 100644
--- a/src/gallium/drivers/r300/r300_state_inlines.h
+++ b/src/gallium/drivers/r300/r300_state_inlines.h
@@ -348,39 +348,12 @@ static INLINE uint32_t r300_translate_gb_pipes(int pipe_count)
return 0;
}
-/* Utility function to count the number of components in RGBAZS formats.
- * XXX should go to util or p_format.h */
-static INLINE unsigned pf_component_count(enum pipe_format format) {
- unsigned count = 0;
-
- if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0)) {
- count++;
- }
- if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 1)) {
- count++;
- }
- if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 2)) {
- count++;
- }
- if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 3)) {
- count++;
- }
- if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_ZS, 0)) {
- count++;
- }
- if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_ZS, 1)) {
- count++;
- }
-
- return count;
-}
-
/* Translate pipe_formats into PSC vertex types. */
static INLINE uint16_t
r300_translate_vertex_data_type(enum pipe_format format) {
uint32_t result = 0;
const struct util_format_description *desc;
- unsigned components = pf_component_count(format);
+ unsigned components = util_format_get_nr_components(format);
desc = util_format_description(format);
diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c
index 53eecd1cbf3..30494719f7b 100644
--- a/src/gallium/drivers/softpipe/sp_context.c
+++ b/src/gallium/drivers/softpipe/sp_context.c
@@ -245,6 +245,10 @@ softpipe_create_context( struct pipe_screen *screen,
softpipe->pipe.bind_gs_state = softpipe_bind_gs_state;
softpipe->pipe.delete_gs_state = softpipe_delete_gs_state;
+ softpipe->pipe.create_vertex_elements_state = softpipe_create_vertex_elements_state;
+ softpipe->pipe.bind_vertex_elements_state = softpipe_bind_vertex_elements_state;
+ softpipe->pipe.delete_vertex_elements_state = softpipe_delete_vertex_elements_state;
+
softpipe->pipe.set_blend_color = softpipe_set_blend_color;
softpipe->pipe.set_stencil_ref = softpipe_set_stencil_ref;
softpipe->pipe.set_clip_state = softpipe_set_clip_state;
@@ -257,7 +261,6 @@ softpipe_create_context( struct pipe_screen *screen,
softpipe->pipe.set_viewport_state = softpipe_set_viewport_state;
softpipe->pipe.set_vertex_buffers = softpipe_set_vertex_buffers;
- softpipe->pipe.set_vertex_elements = softpipe_set_vertex_elements;
softpipe->pipe.draw_arrays = softpipe_draw_arrays;
softpipe->pipe.draw_elements = softpipe_draw_elements;
diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h
index 3d69cfdb114..9a8158e6a22 100644
--- a/src/gallium/drivers/softpipe/sp_context.h
+++ b/src/gallium/drivers/softpipe/sp_context.h
@@ -45,6 +45,7 @@ struct softpipe_tile_cache;
struct softpipe_tex_tile_cache;
struct sp_fragment_shader;
struct sp_vertex_shader;
+struct sp_velems_state;
struct softpipe_context {
@@ -59,6 +60,7 @@ struct softpipe_context {
struct sp_fragment_shader *fs;
struct sp_vertex_shader *vs;
struct sp_geometry_shader *gs;
+ struct sp_velems_state *velems;
/** Other rendering state */
struct pipe_blend_color blend_color;
@@ -72,13 +74,11 @@ struct softpipe_context {
struct pipe_texture *vertex_textures[PIPE_MAX_VERTEX_SAMPLERS];
struct pipe_viewport_state viewport;
struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
- struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
unsigned num_samplers;
unsigned num_textures;
unsigned num_vertex_samplers;
unsigned num_vertex_textures;
- unsigned num_vertex_elements;
unsigned num_vertex_buffers;
unsigned dirty; /**< Mask of SP_NEW_x flags */
diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h
index 4370bbeaee2..6b01c0f4d72 100644
--- a/src/gallium/drivers/softpipe/sp_state.h
+++ b/src/gallium/drivers/softpipe/sp_state.h
@@ -100,6 +100,11 @@ struct sp_geometry_shader {
struct draw_geometry_shader *draw_data;
};
+struct sp_velems_state {
+ unsigned count;
+ struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
+};
+
void *
softpipe_create_blend_state(struct pipe_context *,
@@ -160,8 +165,14 @@ void *softpipe_create_gs_state(struct pipe_context *,
void softpipe_bind_gs_state(struct pipe_context *, void *);
void softpipe_delete_gs_state(struct pipe_context *, void *);
+void *softpipe_create_vertex_elements_state(struct pipe_context *,
+ unsigned count,
+ const struct pipe_vertex_element *);
+void softpipe_bind_vertex_elements_state(struct pipe_context *, void *);
+void softpipe_delete_vertex_elements_state(struct pipe_context *, void *);
+
void softpipe_set_polygon_stipple( struct pipe_context *,
- const struct pipe_poly_stipple * );
+ const struct pipe_poly_stipple * );
void softpipe_set_scissor_state( struct pipe_context *,
const struct pipe_scissor_state * );
@@ -178,10 +189,6 @@ softpipe_set_vertex_sampler_textures(struct pipe_context *,
void softpipe_set_viewport_state( struct pipe_context *,
const struct pipe_viewport_state * );
-void softpipe_set_vertex_elements(struct pipe_context *,
- unsigned count,
- const struct pipe_vertex_element *);
-
void softpipe_set_vertex_buffers(struct pipe_context *,
unsigned count,
const struct pipe_vertex_buffer *);
diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c
index b491d92ed15..a151758ddca 100644
--- a/src/gallium/drivers/softpipe/sp_state_vertex.c
+++ b/src/gallium/drivers/softpipe/sp_state_vertex.c
@@ -32,27 +32,44 @@
#include "sp_context.h"
#include "sp_state.h"
+#include "util/u_memory.h"
#include "draw/draw_context.h"
+void *
+softpipe_create_vertex_elements_state(struct pipe_context *pipe,
+ unsigned count,
+ const struct pipe_vertex_element *attribs)
+{
+ struct sp_velems_state *velems;
+ assert(count <= PIPE_MAX_ATTRIBS);
+ velems = (struct sp_velems_state *) MALLOC(sizeof(struct sp_velems_state));
+ if (velems) {
+ velems->count = count;
+ memcpy(velems->velem, attribs, sizeof(*attribs) * count);
+ }
+ return velems;
+}
+
void
-softpipe_set_vertex_elements(struct pipe_context *pipe,
- unsigned count,
- const struct pipe_vertex_element *attribs)
+softpipe_bind_vertex_elements_state(struct pipe_context *pipe,
+ void *velems)
{
struct softpipe_context *softpipe = softpipe_context(pipe);
+ struct sp_velems_state *sp_velems = (struct sp_velems_state *) velems;
- assert(count <= PIPE_MAX_ATTRIBS);
-
- memcpy(softpipe->vertex_element, attribs,
- count * sizeof(struct pipe_vertex_element));
- softpipe->num_vertex_elements = count;
+ softpipe->velems = sp_velems;
softpipe->dirty |= SP_NEW_VERTEX;
- draw_set_vertex_elements(softpipe->draw, count, attribs);
+ draw_set_vertex_elements(softpipe->draw, sp_velems->count, sp_velems->velem);
}
+void
+softpipe_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
+{
+ FREE( velems );
+}
void
softpipe_set_vertex_buffers(struct pipe_context *pipe,
diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h
index 03302e2a6ec..791d30edc0e 100644
--- a/src/gallium/drivers/svga/svga_context.h
+++ b/src/gallium/drivers/svga/svga_context.h
@@ -169,6 +169,11 @@ struct svga_sampler_state {
unsigned view_max_lod;
};
+struct svga_velems_state {
+ unsigned count;
+ struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
+};
+
/* Use to calculate differences between state emitted to hardware and
* current driver-calculated state.
*/
@@ -178,13 +183,13 @@ struct svga_state
const struct svga_depth_stencil_state *depth;
const struct svga_rasterizer_state *rast;
const struct svga_sampler_state *sampler[PIPE_MAX_SAMPLERS];
+ const struct svga_velems_state *velems;
struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; /* or texture ID's? */
struct svga_fragment_shader *fs;
struct svga_vertex_shader *vs;
struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS];
- struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS];
struct pipe_buffer *cb[PIPE_SHADER_TYPES];
struct pipe_framebuffer_state framebuffer;
@@ -204,7 +209,6 @@ struct svga_state
unsigned num_samplers;
unsigned num_textures;
- unsigned num_vertex_elements;
unsigned num_vertex_buffers;
unsigned reduced_prim;
diff --git a/src/gallium/drivers/svga/svga_pipe_vertex.c b/src/gallium/drivers/svga/svga_pipe_vertex.c
index 836b8441da2..1715a47fc62 100644
--- a/src/gallium/drivers/svga/svga_pipe_vertex.c
+++ b/src/gallium/drivers/svga/svga_pipe_vertex.c
@@ -26,6 +26,7 @@
#include "util/u_inlines.h"
#include "pipe/p_defines.h"
#include "util/u_math.h"
+#include "util/u_memory.h"
#include "tgsi/tgsi_parse.h"
#include "svga_screen.h"
@@ -64,20 +65,37 @@ static void svga_set_vertex_buffers(struct pipe_context *pipe,
svga->dirty |= SVGA_NEW_VBUFFER;
}
-static void svga_set_vertex_elements(struct pipe_context *pipe,
- unsigned count,
- const struct pipe_vertex_element *elements)
+
+static void *
+svga_create_vertex_elements_state(struct pipe_context *pipe,
+ unsigned count,
+ const struct pipe_vertex_element *attribs)
{
- struct svga_context *svga = svga_context(pipe);
- unsigned i;
+ struct svga_velems_state *velems;
+ assert(count <= PIPE_MAX_ATTRIBS);
+ velems = (struct svga_velems_state *) MALLOC(sizeof(struct svga_velems_state));
+ if (velems) {
+ velems->count = count;
+ memcpy(velems->velem, attribs, sizeof(*attribs) * count);
+ }
+ return velems;
+}
- for (i = 0; i < count; i++)
- svga->curr.ve[i] = elements[i];
+static void svga_bind_vertex_elements_state(struct pipe_context *pipe,
+ void *velems)
+{
+ struct svga_context *svga = svga_context(pipe);
+ struct svga_velems_state *svga_velems = (struct svga_velems_state *) velems;
- svga->curr.num_vertex_elements = count;
+ svga->curr.velems = svga_velems;
svga->dirty |= SVGA_NEW_VELEMENT;
}
+static void svga_delete_vertex_elements_state(struct pipe_context *pipe,
+ void *velems)
+{
+ FREE(velems);
+}
void svga_cleanup_vertex_state( struct svga_context *svga )
{
@@ -91,7 +109,9 @@ void svga_cleanup_vertex_state( struct svga_context *svga )
void svga_init_vertex_functions( struct svga_context *svga )
{
svga->pipe.set_vertex_buffers = svga_set_vertex_buffers;
- svga->pipe.set_vertex_elements = svga_set_vertex_elements;
+ svga->pipe.create_vertex_elements_state = svga_create_vertex_elements_state;
+ svga->pipe.bind_vertex_elements_state = svga_bind_vertex_elements_state;
+ svga->pipe.delete_vertex_elements_state = svga_delete_vertex_elements_state;
}
diff --git a/src/gallium/drivers/svga/svga_state_need_swtnl.c b/src/gallium/drivers/svga/svga_state_need_swtnl.c
index d774e3e504d..10d473584d1 100644
--- a/src/gallium/drivers/svga/svga_state_need_swtnl.c
+++ b/src/gallium/drivers/svga/svga_state_need_swtnl.c
@@ -76,8 +76,8 @@ static int update_need_swvfetch( struct svga_context *svga,
unsigned i;
boolean need_swvfetch = FALSE;
- for (i = 0; i < svga->curr.num_vertex_elements; i++) {
- svga->state.sw.ve_format[i] = svga_translate_vertex_format(svga->curr.ve[i].src_format);
+ for (i = 0; i < svga->curr.velems->count; i++) {
+ svga->state.sw.ve_format[i] = svga_translate_vertex_format(svga->curr.velems->velem[i].src_format);
if (svga->state.sw.ve_format[i] == SVGA3D_DECLTYPE_MAX) {
need_swvfetch = TRUE;
break;
diff --git a/src/gallium/drivers/svga/svga_state_rss.c b/src/gallium/drivers/svga/svga_state_rss.c
index 107cc403b4d..b7195d246bc 100644
--- a/src/gallium/drivers/svga/svga_state_rss.c
+++ b/src/gallium/drivers/svga/svga_state_rss.c
@@ -191,15 +191,24 @@ static int emit_rss( struct svga_context *svga,
EMIT_RS( svga, svga->curr.stencil_ref.ref_value[0], STENCILREF, fail );
}
- if (dirty & SVGA_NEW_RAST)
+ if (dirty & (SVGA_NEW_RAST | SVGA_NEW_NEED_PIPELINE))
{
const struct svga_rasterizer_state *curr = svga->curr.rast;
+ unsigned cullmode = curr->cullmode;
/* Shademode: still need to rearrange index list to move
* flat-shading PV first vertex.
*/
EMIT_RS( svga, curr->shademode, SHADEMODE, fail );
- EMIT_RS( svga, curr->cullmode, CULLMODE, fail );
+
+ /* Don't do culling while the software pipeline is active. It
+ * does it for us, and additionally introduces potentially
+ * back-facing triangles.
+ */
+ if (svga->state.sw.need_pipeline)
+ cullmode = SVGA3D_FACE_NONE;
+
+ EMIT_RS( svga, cullmode, CULLMODE, fail );
EMIT_RS( svga, curr->scissortestenable, SCISSORTESTENABLE, fail );
EMIT_RS( svga, curr->multisampleantialias, MULTISAMPLEANTIALIAS, fail );
EMIT_RS( svga, curr->lastpixel, LASTPIXEL, fail );
diff --git a/src/gallium/drivers/svga/svga_state_vdecl.c b/src/gallium/drivers/svga/svga_state_vdecl.c
index ded903170b5..f531e223048 100644
--- a/src/gallium/drivers/svga/svga_state_vdecl.c
+++ b/src/gallium/drivers/svga/svga_state_vdecl.c
@@ -95,17 +95,17 @@ upload_user_buffers( struct svga_context *svga )
static int emit_hw_vs_vdecl( struct svga_context *svga,
unsigned dirty )
{
- const struct pipe_vertex_element *ve = svga->curr.ve;
+ const struct pipe_vertex_element *ve = svga->curr.velems->velem;
SVGA3dVertexDecl decl;
unsigned i;
- assert(svga->curr.num_vertex_elements >=
+ assert(svga->curr.velems->count >=
svga->curr.vs->base.info.file_count[TGSI_FILE_INPUT]);
svga_hwtnl_reset_vdecl( svga->hwtnl,
- svga->curr.num_vertex_elements );
+ svga->curr.velems->count );
- for (i = 0; i < svga->curr.num_vertex_elements; i++) {
+ for (i = 0; i < svga->curr.velems->count; i++) {
const struct pipe_vertex_buffer *vb = &svga->curr.vb[ve[i].vertex_buffer_index];
unsigned usage, index;
diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c
index d7999fe53d2..781f7bf5339 100644
--- a/src/gallium/drivers/svga/svga_state_vs.c
+++ b/src/gallium/drivers/svga/svga_state_vs.c
@@ -186,8 +186,8 @@ static int update_zero_stride( struct svga_context *svga,
svga->curr.zero_stride_vertex_elements = 0;
svga->curr.num_zero_stride_vertex_elements = 0;
- for (i = 0; i < svga->curr.num_vertex_elements; i++) {
- const struct pipe_vertex_element *vel = &svga->curr.ve[i];
+ for (i = 0; i < svga->curr.velems->count; i++) {
+ const struct pipe_vertex_element *vel = &svga->curr.velems->velem[i];
const struct pipe_vertex_buffer *vbuffer = &svga->curr.vb[
vel->vertex_buffer_index];
if (vbuffer->stride == 0) {
diff --git a/src/gallium/drivers/svga/svga_swtnl_state.c b/src/gallium/drivers/svga/svga_swtnl_state.c
index 35f36a828fd..246d34e649e 100644
--- a/src/gallium/drivers/svga/svga_swtnl_state.c
+++ b/src/gallium/drivers/svga/svga_swtnl_state.c
@@ -99,8 +99,8 @@ static int update_swtnl_draw( struct svga_context *svga,
if (dirty & SVGA_NEW_VELEMENT)
draw_set_vertex_elements(svga->swtnl.draw,
- svga->curr.num_vertex_elements,
- svga->curr.ve );
+ svga->curr.velems->count,
+ svga->curr.velems->velem );
if (dirty & SVGA_NEW_CLIP)
draw_set_clip_state(svga->swtnl.draw,
diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c
index df40fbade6c..133521f45e2 100644
--- a/src/gallium/drivers/trace/tr_context.c
+++ b/src/gallium/drivers/trace/tr_context.c
@@ -773,6 +773,70 @@ trace_context_delete_vs_state(struct pipe_context *_pipe,
}
+static INLINE void *
+trace_context_create_vertex_elements_state(struct pipe_context *_pipe,
+ unsigned num_elements,
+ const struct pipe_vertex_element *elements)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+ void * result;
+
+ trace_dump_call_begin("pipe_context", "create_vertex_elements_state");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(uint, num_elements);
+
+ trace_dump_arg_begin("elements");
+ trace_dump_struct_array(vertex_element, elements, num_elements);
+ trace_dump_arg_end();
+
+ result = pipe->create_vertex_elements_state(pipe, num_elements, elements);
+
+ trace_dump_ret(ptr, result);
+
+ trace_dump_call_end();
+
+ return result;
+}
+
+
+static INLINE void
+trace_context_bind_vertex_elements_state(struct pipe_context *_pipe,
+ void *state)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ trace_dump_call_begin("pipe_context", "bind_vertex_elements_state");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(ptr, state);
+
+ pipe->bind_vertex_elements_state(pipe, state);
+
+ trace_dump_call_end();
+}
+
+
+static INLINE void
+trace_context_delete_vertex_elements_state(struct pipe_context *_pipe,
+ void *state)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ trace_dump_call_begin("pipe_context", "delete_verte_elements_state");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(ptr, state);
+
+ pipe->delete_vertex_elements_state(pipe, state);
+
+ trace_dump_call_end();
+}
+
+
static INLINE void
trace_context_set_blend_color(struct pipe_context *_pipe,
const struct pipe_blend_color *state)
@@ -1048,29 +1112,6 @@ trace_context_set_vertex_buffers(struct pipe_context *_pipe,
static INLINE void
-trace_context_set_vertex_elements(struct pipe_context *_pipe,
- unsigned num_elements,
- const struct pipe_vertex_element *elements)
-{
- struct trace_context *tr_ctx = trace_context(_pipe);
- struct pipe_context *pipe = tr_ctx->pipe;
-
- trace_dump_call_begin("pipe_context", "set_vertex_elements");
-
- trace_dump_arg(ptr, pipe);
- trace_dump_arg(uint, num_elements);
-
- trace_dump_arg_begin("elements");
- trace_dump_struct_array(vertex_element, elements, num_elements);
- trace_dump_arg_end();
-
- pipe->set_vertex_elements(pipe, num_elements, elements);
-
- trace_dump_call_end();
-}
-
-
-static INLINE void
trace_context_surface_copy(struct pipe_context *_pipe,
struct pipe_surface *dest,
unsigned destx, unsigned desty,
@@ -1303,6 +1344,9 @@ trace_context_create(struct trace_screen *tr_scr,
tr_ctx->base.create_vs_state = trace_context_create_vs_state;
tr_ctx->base.bind_vs_state = trace_context_bind_vs_state;
tr_ctx->base.delete_vs_state = trace_context_delete_vs_state;
+ tr_ctx->base.create_vertex_elements_state = trace_context_create_vertex_elements_state;
+ tr_ctx->base.bind_vertex_elements_state = trace_context_bind_vertex_elements_state;
+ tr_ctx->base.delete_vertex_elements_state = trace_context_delete_vertex_elements_state;
tr_ctx->base.set_blend_color = trace_context_set_blend_color;
tr_ctx->base.set_stencil_ref = trace_context_set_stencil_ref;
tr_ctx->base.set_clip_state = trace_context_set_clip_state;
@@ -1314,7 +1358,6 @@ trace_context_create(struct trace_screen *tr_scr,
tr_ctx->base.set_fragment_sampler_textures = trace_context_set_fragment_sampler_textures;
tr_ctx->base.set_vertex_sampler_textures = trace_context_set_vertex_sampler_textures;
tr_ctx->base.set_vertex_buffers = trace_context_set_vertex_buffers;
- tr_ctx->base.set_vertex_elements = trace_context_set_vertex_elements;
if (pipe->surface_copy)
tr_ctx->base.surface_copy = trace_context_surface_copy;
if (pipe->surface_fill)
diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c
index f97d963dba6..f82dd01c697 100644
--- a/src/gallium/drivers/trace/tr_dump_state.c
+++ b/src/gallium/drivers/trace/tr_dump_state.c
@@ -479,7 +479,6 @@ void trace_dump_vertex_element(const struct pipe_vertex_element *state)
trace_dump_member(uint, state, src_offset);
trace_dump_member(uint, state, vertex_buffer_index);
- trace_dump_member(uint, state, nr_components);
trace_dump_member(format, state, src_format);
diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h
index b93b38310ac..e2766d15cd1 100644
--- a/src/gallium/include/pipe/p_compiler.h
+++ b/src/gallium/include/pipe/p_compiler.h
@@ -31,13 +31,8 @@
#include "p_config.h"
-#ifndef XFree86Server
#include <stdlib.h>
#include <string.h>
-#else
-#include "xf86_ansic.h"
-#include "xf86_libc.h"
-#endif
#include <stddef.h>
#include <stdarg.h>
diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h
index f82b77903e9..376b01aa696 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -177,6 +177,12 @@ struct pipe_context {
void (*bind_gs_state)(struct pipe_context *, void *);
void (*delete_gs_state)(struct pipe_context *, void *);
+ void * (*create_vertex_elements_state)(struct pipe_context *,
+ unsigned num_elements,
+ const struct pipe_vertex_element *);
+ void (*bind_vertex_elements_state)(struct pipe_context *, void *);
+ void (*delete_vertex_elements_state)(struct pipe_context *, void *);
+
/*@}*/
/**
@@ -220,9 +226,6 @@ struct pipe_context {
unsigned num_buffers,
const struct pipe_vertex_buffer * );
- void (*set_vertex_elements)( struct pipe_context *,
- unsigned num_elements,
- const struct pipe_vertex_element * );
/*@}*/
diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h
index 72635d1031b..3a97d888ce6 100644
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@@ -373,7 +373,6 @@ struct pipe_vertex_element
* this attribute live in?
*/
unsigned vertex_buffer_index:8;
- unsigned nr_components:8;
enum pipe_format src_format;
};
diff --git a/src/gallium/state_trackers/python/p_context.i b/src/gallium/state_trackers/python/p_context.i
index 3f36ccb6217..5c44462e80f 100644
--- a/src/gallium/state_trackers/python/p_context.i
+++ b/src/gallium/state_trackers/python/p_context.i
@@ -51,7 +51,7 @@ struct st_context {
void set_blend( const struct pipe_blend_state *state ) {
cso_set_blend($self->cso, state);
}
-
+
void set_fragment_sampler( unsigned index, const struct pipe_sampler_state *state ) {
cso_single_sampler($self->cso, index, state);
cso_single_sampler_done($self->cso);
@@ -222,9 +222,9 @@ struct st_context {
void set_vertex_elements(unsigned num)
{
$self->num_vertex_elements = num;
- $self->pipe->set_vertex_elements($self->pipe,
- $self->num_vertex_elements,
- $self->vertex_elements);
+ cso_set_vertex_elements($self->cso,
+ $self->num_vertex_elements,
+ $self->vertex_elements);
}
/*
diff --git a/src/gallium/state_trackers/vega/api_masks.c b/src/gallium/state_trackers/vega/api_masks.c
index 9c123a4cf95..7eb5ea1f078 100644
--- a/src/gallium/state_trackers/vega/api_masks.c
+++ b/src/gallium/state_trackers/vega/api_masks.c
@@ -86,6 +86,8 @@ draw_clear_quad(struct vg_context *st,
/* draw */
if (buf) {
+ cso_set_vertex_elements(st->cso_context, 2, st->velems);
+
util_draw_vertex_buffer(pipe, buf, 0,
PIPE_PRIM_TRIANGLE_FAN,
4, /* verts */
diff --git a/src/gallium/state_trackers/vega/polygon.c b/src/gallium/state_trackers/vega/polygon.c
index c06dbf52069..eef2c1eb876 100644
--- a/src/gallium/state_trackers/vega/polygon.c
+++ b/src/gallium/state_trackers/vega/polygon.c
@@ -292,12 +292,12 @@ static void draw_polygon(struct vg_context *ctx,
pipe->set_vertex_buffers(pipe, 1, &vbuffer);
/* tell pipe about the vertex attributes */
+ memset(&velement, 0, sizeof(velement));
velement.src_offset = 0;
velement.instance_divisor = 0;
velement.vertex_buffer_index = 0;
velement.src_format = PIPE_FORMAT_R32G32_FLOAT;
- velement.nr_components = COMPONENTS;
- pipe->set_vertex_elements(pipe, 1, &velement);
+ cso_set_vertex_elements(ctx->cso_context, 1, &velement);
/* draw */
pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLE_FAN,
diff --git a/src/gallium/state_trackers/vega/renderer.c b/src/gallium/state_trackers/vega/renderer.c
index 05620efa9c0..47e8b470a11 100644
--- a/src/gallium/state_trackers/vega/renderer.c
+++ b/src/gallium/state_trackers/vega/renderer.c
@@ -210,6 +210,7 @@ void renderer_draw_quad(struct renderer *r,
buf = setup_vertex_data(r, x1, y1, x2, y2, depth);
if (buf) {
+ cso_set_vertex_elements(r->cso, 2, r->owner->velems);
util_draw_vertex_buffer(r->pipe, buf, 0,
PIPE_PRIM_TRIANGLE_FAN,
4, /* verts */
@@ -248,6 +249,7 @@ void renderer_draw_texture(struct renderer *r,
s0, t0, s1, t1, 0.0f);
if (buf) {
+ cso_set_vertex_elements(r->cso, 2, r->owner->velems);
util_draw_vertex_buffer(pipe, buf, 0,
PIPE_PRIM_TRIANGLE_FAN,
4, /* verts */
@@ -370,6 +372,7 @@ void renderer_copy_texture(struct renderer *ctx,
0.0f);
if (buf) {
+ cso_set_vertex_elements(ctx->cso, 2, ctx->owner->velems);
util_draw_vertex_buffer(ctx->pipe, buf, 0,
PIPE_PRIM_TRIANGLE_FAN,
4, /* verts */
@@ -535,6 +538,7 @@ void renderer_copy_surface(struct renderer *ctx,
(float) dstX1, (float) dstY1, z);
if (buf) {
+ cso_set_vertex_elements(ctx->cso, 2, ctx->owner->velems);
util_draw_vertex_buffer(ctx->pipe, buf, 0,
PIPE_PRIM_TRIANGLE_FAN,
4, /* verts */
@@ -587,6 +591,7 @@ void renderer_texture_quad(struct renderer *r,
s0, t0, s1, t1, 0.0f);
if (buf) {
+ cso_set_vertex_elements(r->cso, 2, r->owner->velems);
util_draw_vertex_buffer(pipe, buf, 0,
PIPE_PRIM_TRIANGLE_FAN,
4, /* verts */
diff --git a/src/gallium/state_trackers/vega/vg_context.c b/src/gallium/state_trackers/vega/vg_context.c
index 426bf9bc62b..170391ec031 100644
--- a/src/gallium/state_trackers/vega/vg_context.c
+++ b/src/gallium/state_trackers/vega/vg_context.c
@@ -72,6 +72,7 @@ struct vg_context * vg_create_context(struct pipe_context *pipe,
struct vg_context *share)
{
struct vg_context *ctx;
+ unsigned i;
ctx = CALLOC_STRUCT(vg_context);
@@ -103,6 +104,13 @@ struct vg_context * vg_create_context(struct pipe_context *pipe,
ctx->blend_sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
ctx->blend_sampler.normalized_coords = 0;
+ for (i = 0; i < 2; i++) {
+ ctx->velems[i].src_offset = i * 4 * sizeof(float);
+ ctx->velems[i].instance_divisor = 0;
+ ctx->velems[i].vertex_buffer_index = 0;
+ ctx->velems[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ }
+
vg_set_error(ctx, VG_NO_ERROR);
ctx->owned_objects[VG_OBJECT_PAINT] = cso_hash_create();
diff --git a/src/gallium/state_trackers/vega/vg_context.h b/src/gallium/state_trackers/vega/vg_context.h
index bc88c8d139d..804e9e76d77 100644
--- a/src/gallium/state_trackers/vega/vg_context.h
+++ b/src/gallium/state_trackers/vega/vg_context.h
@@ -146,6 +146,7 @@ struct vg_context
struct vg_shader *clear_vs;
struct vg_shader *texture_vs;
struct pipe_buffer *vs_const_buffer;
+ struct pipe_vertex_element velems[2];
};
struct vg_object {
diff --git a/src/gallium/state_trackers/xorg/xorg_renderer.c b/src/gallium/state_trackers/xorg/xorg_renderer.c
index 83b0d31e38d..1eb926360b9 100644
--- a/src/gallium/state_trackers/xorg/xorg_renderer.c
+++ b/src/gallium/state_trackers/xorg/xorg_renderer.c
@@ -68,6 +68,8 @@ renderer_draw(struct xorg_renderer *r)
if (buf) {
+ cso_set_vertex_elements(r->cso, r->attrs_per_vertex, r->velems);
+
util_draw_vertex_buffer(pipe, buf, 0,
PIPE_PRIM_QUADS,
num_verts, /* verts */
@@ -92,6 +94,7 @@ renderer_init_state(struct xorg_renderer *r)
{
struct pipe_depth_stencil_alpha_state dsa;
struct pipe_rasterizer_state raster;
+ unsigned i;
/* set common initial clip state */
memset(&dsa, 0, sizeof(struct pipe_depth_stencil_alpha_state));
@@ -103,6 +106,14 @@ renderer_init_state(struct xorg_renderer *r)
raster.gl_rasterization_rules = 1;
cso_set_rasterizer(r->cso, &raster);
+ /* vertex elements state */
+ memset(&r->velems[0], 0, sizeof(r->velems[0]) * 3);
+ for (i = 0; i < 3; i++) {
+ r->velems[i].src_offset = i * 4 * sizeof(float);
+ r->velems[i].instance_divisor = 0;
+ r->velems[i].vertex_buffer_index = 0;
+ r->velems[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ }
}
@@ -600,6 +611,8 @@ void renderer_draw_yuv(struct xorg_renderer *r,
if (buf) {
const int num_attribs = 2; /*pos + tex coord*/
+ cso_set_vertex_elements(r->cso, num_attribs, r->velems);
+
util_draw_vertex_buffer(pipe, buf, 0,
PIPE_PRIM_QUADS,
4, /* verts */
diff --git a/src/gallium/state_trackers/xorg/xorg_renderer.h b/src/gallium/state_trackers/xorg/xorg_renderer.h
index af6aa0567d6..3d006287199 100644
--- a/src/gallium/state_trackers/xorg/xorg_renderer.h
+++ b/src/gallium/state_trackers/xorg/xorg_renderer.h
@@ -28,6 +28,7 @@ struct xorg_renderer {
float buffer[BUF_SIZE];
int buffer_size;
+ struct pipe_vertex_element velems[3];
/* number of attributes per vertex for the current
* draw operation */
diff --git a/src/mesa/SConscript b/src/mesa/SConscript
index 0a25dccde56..e80ec5ee880 100644
--- a/src/mesa/SConscript
+++ b/src/mesa/SConscript
@@ -249,6 +249,7 @@ if env['platform'] != 'winddk':
glapi_sources = [
'glapi/glapi.c',
'glapi/glapi_dispatch.c',
+ 'glapi/glapi_entrypoint.c',
'glapi/glapi_getproc.c',
'glapi/glapi_nop.c',
'glapi/glthread.c',
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index a8f6b993ac3..54699cf8d34 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -74,9 +74,9 @@ struct {
[BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 },
[BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 },
[BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 1, .ndst = 01 },
- [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 },
[BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 },
- [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 },
[BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 },
[BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 },
[BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 },
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index c78f7b38aee..1fd957b3ad6 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -95,9 +95,17 @@ static void brwDeleteProgram( GLcontext *ctx,
struct gl_program *prog )
{
if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
- struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
- struct brw_fragment_program *brw_fprog = brw_fragment_program(fprog);
- dri_bo_unreference(brw_fprog->const_buffer);
+ struct gl_fragment_program *fp = (struct gl_fragment_program *) prog;
+ struct brw_fragment_program *brw_fp = brw_fragment_program(fp);
+
+ dri_bo_unreference(brw_fp->const_buffer);
+ }
+
+ if (prog->Target == GL_VERTEX_PROGRAM_ARB) {
+ struct gl_vertex_program *vp = (struct gl_vertex_program *) prog;
+ struct brw_vertex_program *brw_vp = brw_vertex_program(vp);
+
+ dri_bo_unreference(brw_vp->const_buffer);
}
_mesa_delete_program( ctx, prog );
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index a7c4b589727..a48804a660f 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -1717,11 +1717,13 @@ void brw_vs_emit(struct brw_vs_compile *c )
/* patch all the BREAK/CONT instructions from last BEGINLOOP */
while (inst0 > loop_inst[loop_depth]) {
inst0--;
- if (inst0->header.opcode == BRW_OPCODE_BREAK) {
+ if (inst0->header.opcode == BRW_OPCODE_BREAK &&
+ inst0->bits3.if_else.jump_count == 0) {
inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
inst0->bits3.if_else.pop_count = 0;
}
- else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
+ else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
+ inst0->bits3.if_else.jump_count == 0) {
inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
inst0->bits3.if_else.pop_count = 0;
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
index 562608e2ecd..ea3c2405af9 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
@@ -614,112 +614,6 @@ static void invoke_subroutine( struct brw_wm_compile *c,
}
}
-/* Workaround for using brw_wm_emit.c's emit functions, which expect
- * destination regs to be uniquely written. Moves arguments out to
- * temporaries as necessary for instructions which use their destination as
- * a temporary.
- */
-static void
-unalias3(struct brw_wm_compile *c,
- void (*func)(struct brw_compile *c,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1,
- const struct brw_reg *arg2),
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1,
- const struct brw_reg *arg2)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg tmp_arg0[4], tmp_arg1[4], tmp_arg2[4];
- int i, j;
- int mark = mark_tmps(c);
-
- for (j = 0; j < 4; j++) {
- tmp_arg0[j] = arg0[j];
- tmp_arg1[j] = arg1[j];
- tmp_arg2[j] = arg2[j];
- }
-
- for (i = 0; i < 4; i++) {
- if (mask & (1<<i)) {
- for (j = 0; j < 4; j++) {
- if (arg0[j].file == dst[i].file &&
- dst[i].nr == arg0[j].nr) {
- tmp_arg0[j] = alloc_tmp(c);
- brw_MOV(p, tmp_arg0[j], arg0[j]);
- }
- if (arg1[j].file == dst[i].file &&
- dst[i].nr == arg1[j].nr) {
- tmp_arg1[j] = alloc_tmp(c);
- brw_MOV(p, tmp_arg1[j], arg1[j]);
- }
- if (arg2[j].file == dst[i].file &&
- dst[i].nr == arg2[j].nr) {
- tmp_arg2[j] = alloc_tmp(c);
- brw_MOV(p, tmp_arg2[j], arg2[j]);
- }
- }
- }
- }
-
- func(p, dst, mask, tmp_arg0, tmp_arg1, tmp_arg2);
-
- release_tmps(c, mark);
-}
-
-/* Workaround for using brw_wm_emit.c's emit functions, which expect
- * destination regs to be uniquely written. Moves arguments out to
- * temporaries as necessary for instructions which use their destination as
- * a temporary.
- */
-static void
-unalias2(struct brw_wm_compile *c,
- void (*func)(struct brw_compile *c,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1),
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg tmp_arg0[4], tmp_arg1[4];
- int i, j;
- int mark = mark_tmps(c);
-
- for (j = 0; j < 4; j++) {
- tmp_arg0[j] = arg0[j];
- tmp_arg1[j] = arg1[j];
- }
-
- for (i = 0; i < 4; i++) {
- if (mask & (1<<i)) {
- for (j = 0; j < 4; j++) {
- if (arg0[j].file == dst[i].file &&
- dst[i].nr == arg0[j].nr) {
- tmp_arg0[j] = alloc_tmp(c);
- brw_MOV(p, tmp_arg0[j], arg0[j]);
- }
- if (arg1[j].file == dst[i].file &&
- dst[i].nr == arg1[j].nr) {
- tmp_arg1[j] = alloc_tmp(c);
- brw_MOV(p, tmp_arg1[j], arg1[j]);
- }
- }
- }
- }
-
- func(p, dst, mask, tmp_arg0, tmp_arg1);
-
- release_tmps(c, mark);
-}
-
static void emit_arl(struct brw_wm_compile *c,
const struct prog_instruction *inst)
{
@@ -1813,14 +1707,29 @@ static void
get_argument_regs(struct brw_wm_compile *c,
const struct prog_instruction *inst,
int index,
+ struct brw_reg *dst,
struct brw_reg *regs,
int mask)
{
- int i;
+ struct brw_compile *p = &c->func;
+ int i, j;
for (i = 0; i < 4; i++) {
- if (mask & (1 << i))
+ if (mask & (1 << i)) {
regs[i] = get_src_reg(c, inst, index, i);
+
+ /* Unalias destination registers from our sources. */
+ if (regs[i].file == BRW_GENERAL_REGISTER_FILE) {
+ for (j = 0; j < 4; j++) {
+ if (memcmp(&regs[i], &dst[j], sizeof(regs[0])) == 0) {
+ struct brw_reg tmp = alloc_tmp(c);
+ brw_MOV(p, tmp, regs[i]);
+ regs[i] = tmp;
+ break;
+ }
+ }
+ }
+ }
}
}
@@ -1845,6 +1754,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
int dst_flags;
struct brw_reg args[3][4], dst[4];
int j;
+ int mark = mark_tmps( c );
c->cur_inst = i;
@@ -1866,7 +1776,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
}
}
for (j = 0; j < brw_wm_nr_args(inst->Opcode); j++)
- get_argument_regs(c, inst, j, args[j], WRITEMASK_XYZW);
+ get_argument_regs(c, inst, j, dst, args[j], WRITEMASK_XYZW);
dst_flags = inst->DstReg.WriteMask;
if (inst->SaturateMode == SATURATE_ZERO_ONE)
@@ -1920,8 +1830,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
break;
case OPCODE_LRP:
- unalias3(c, emit_lrp,
- dst, dst_flags, args[0], args[1], args[2]);
+ emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
break;
case OPCODE_TRUNC:
emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]);
@@ -1961,10 +1870,10 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
break;
case OPCODE_MIN:
- unalias2(c, emit_min, dst, dst_flags, args[0], args[1]);
+ emit_min(p, dst, dst_flags, args[0], args[1]);
break;
case OPCODE_MAX:
- unalias2(c, emit_max, dst, dst_flags, args[0], args[1]);
+ emit_max(p, dst, dst_flags, args[0], args[1]);
break;
case OPCODE_DDX:
case OPCODE_DDY:
@@ -2103,11 +2012,13 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
/* patch all the BREAK/CONT instructions from last BGNLOOP */
while (inst0 > loop_inst[loop_depth]) {
inst0--;
- if (inst0->header.opcode == BRW_OPCODE_BREAK) {
+ if (inst0->header.opcode == BRW_OPCODE_BREAK &&
+ inst0->bits3.if_else.jump_count == 0) {
inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
inst0->bits3.if_else.pop_count = 0;
}
- else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
+ else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
+ inst0->bits3.if_else.jump_count == 0) {
inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
inst0->bits3.if_else.pop_count = 0;
}
@@ -2119,6 +2030,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
inst->Opcode);
}
+ /* Release temporaries containing any unaliased source regs. */
+ release_tmps( c, mark );
+
if (inst->CondUpdate)
brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
else
diff --git a/src/mesa/drivers/dri/r200/Makefile b/src/mesa/drivers/dri/r200/Makefile
index 3f871005705..9ea81fd5059 100644
--- a/src/mesa/drivers/dri/r200/Makefile
+++ b/src/mesa/drivers/dri/r200/Makefile
@@ -21,6 +21,7 @@ RADEON_COMMON_SOURCES = \
radeon_fbo.c \
radeon_lock.c \
radeon_mipmap_tree.c \
+ radeon_pixel_read.c \
radeon_queryobj.c \
radeon_span.c \
radeon_texture.c \
diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c
index dad2580e08b..4f1a56658cc 100644
--- a/src/mesa/drivers/dri/r200/r200_context.c
+++ b/src/mesa/drivers/dri/r200/r200_context.c
@@ -266,6 +266,7 @@ static void r200_init_vtbl(radeonContextPtr radeon)
radeon->vtbl.emit_query_finish = r200_emit_query_finish;
radeon->vtbl.check_blit = r200_check_blit;
radeon->vtbl.blit = r200_blit;
+ radeon->vtbl.is_format_renderable = radeonIsFormatRenderable;
}
diff --git a/src/mesa/drivers/dri/r200/radeon_pixel_read.c b/src/mesa/drivers/dri/r200/radeon_pixel_read.c
new file mode 120000
index 00000000000..3b03803126f
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_pixel_read.c
@@ -0,0 +1 @@
+../radeon/radeon_pixel_read.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile
index 4257a32b89f..2245998c952 100644
--- a/src/mesa/drivers/dri/r300/Makefile
+++ b/src/mesa/drivers/dri/r300/Makefile
@@ -31,8 +31,9 @@ RADEON_COMMON_SOURCES = \
radeon_fbo.c \
radeon_lock.c \
radeon_mipmap_tree.c \
- radeon_span.c \
+ radeon_pixel_read.c \
radeon_queryobj.c \
+ radeon_span.c \
radeon_texture.c \
radeon_tex_copy.c \
radeon_tex_getimage.c \
diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
index 6cfa5686f4a..e2dbb1dbf40 100644
--- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c
+++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
@@ -332,36 +332,37 @@ void r300_emit_cb_setup(struct r300_context *r300,
assert(offset % 32 == 0);
switch (format) {
- case MESA_FORMAT_RGB565:
- assert(_mesa_little_endian());
- cbpitch |= R300_COLOR_FORMAT_RGB565;
+ case MESA_FORMAT_SL8:
+ case MESA_FORMAT_A8:
+ case MESA_FORMAT_L8:
+ case MESA_FORMAT_I8:
+ cbpitch |= R300_COLOR_FORMAT_I8;
break;
+ case MESA_FORMAT_RGB565:
case MESA_FORMAT_RGB565_REV:
- assert(!_mesa_little_endian());
cbpitch |= R300_COLOR_FORMAT_RGB565;
break;
case MESA_FORMAT_ARGB4444:
- assert(_mesa_little_endian());
- cbpitch |= R300_COLOR_FORMAT_ARGB4444;
- break;
case MESA_FORMAT_ARGB4444_REV:
- assert(!_mesa_little_endian());
cbpitch |= R300_COLOR_FORMAT_ARGB4444;
break;
+ case MESA_FORMAT_RGBA5551:
case MESA_FORMAT_ARGB1555:
- assert(_mesa_little_endian());
- cbpitch |= R300_COLOR_FORMAT_ARGB1555;
- break;
case MESA_FORMAT_ARGB1555_REV:
- assert(!_mesa_little_endian());
cbpitch |= R300_COLOR_FORMAT_ARGB1555;
break;
+ case MESA_FORMAT_RGBA8888:
+ case MESA_FORMAT_RGBA8888_REV:
+ case MESA_FORMAT_XRGB8888:
+ case MESA_FORMAT_ARGB8888:
+ case MESA_FORMAT_XRGB8888_REV:
+ case MESA_FORMAT_ARGB8888_REV:
+ case MESA_FORMAT_SRGBA8:
+ case MESA_FORMAT_SARGB8:
+ cbpitch |= R300_COLOR_FORMAT_ARGB8888;
+ break;
default:
- if (cpp == 4) {
- cbpitch |= R300_COLOR_FORMAT_ARGB8888;
- } else {
- _mesa_problem(r300->radeon.glCtx, "unexpected format in emit_cb_offset()");;
- }
+ _mesa_problem(r300->radeon.glCtx, "unexpected format in emit_cb_offset()");
break;
}
diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c
index ff35cd52753..364e0ba6b61 100644
--- a/src/mesa/drivers/dri/r300/r300_context.c
+++ b/src/mesa/drivers/dri/r300/r300_context.c
@@ -321,6 +321,12 @@ static void r300_init_vtbl(radeonContextPtr radeon)
radeon->vtbl.check_blit = r300_check_blit;
radeon->vtbl.blit = r300_blit;
+
+ if (radeon->radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+ radeon->vtbl.is_format_renderable = r500IsFormatRenderable;
+ } else {
+ radeon->vtbl.is_format_renderable = r300IsFormatRenderable;
+ }
}
static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen)
diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
index 5979dedac4f..87489412419 100644
--- a/src/mesa/drivers/dri/r300/r300_state.c
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -46,6 +46,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/simple_list.h"
#include "main/api_arrayelt.h"
+#include "drivers/common/meta.h"
#include "swrast/swrast.h"
#include "swrast_setup/swrast_setup.h"
#include "shader/prog_parameter.h"
@@ -2237,6 +2238,68 @@ void r300UpdateShaderStates(r300ContextPtr rmesa)
}
}
+#define EASY_US_OUT_FMT(comps, c0, c1, c2, c3) \
+ (R500_OUT_FMT_##comps | R500_C0_SEL_##c0 | R500_C1_SEL_##c1 | \
+ R500_C2_SEL_##c2 | R500_C3_SEL_##c3)
+static void r300SetupUsOutputFormat(GLcontext *ctx)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ uint32_t hw_format;
+ struct radeon_renderbuffer *rrb = radeon_get_colorbuffer(&rmesa->radeon);
+
+ if (!rrb) {
+ return;
+ }
+
+ switch (rrb->base.Format)
+ {
+ case MESA_FORMAT_RGBA5551:
+ case MESA_FORMAT_RGBA8888:
+ hw_format = EASY_US_OUT_FMT(C4_8, A, B, G, R);
+ break;
+ case MESA_FORMAT_RGB565_REV:
+ case MESA_FORMAT_RGBA8888_REV:
+ hw_format = EASY_US_OUT_FMT(C4_8, R, G, B, A);
+ break;
+ case MESA_FORMAT_RGB565:
+ case MESA_FORMAT_ARGB4444:
+ case MESA_FORMAT_ARGB1555:
+ case MESA_FORMAT_XRGB8888:
+ case MESA_FORMAT_ARGB8888:
+ hw_format = EASY_US_OUT_FMT(C4_8, B, G, R, A);
+ break;
+ case MESA_FORMAT_ARGB4444_REV:
+ case MESA_FORMAT_ARGB1555_REV:
+ case MESA_FORMAT_XRGB8888_REV:
+ case MESA_FORMAT_ARGB8888_REV:
+ hw_format = EASY_US_OUT_FMT(C4_8, A, R, G, B);
+ break;
+ case MESA_FORMAT_SRGBA8:
+ hw_format = EASY_US_OUT_FMT(C4_10_GAMMA, A, B, G, R);
+ break;
+ case MESA_FORMAT_SARGB8:
+ hw_format = EASY_US_OUT_FMT(C4_10_GAMMA, B, G, R, A);
+ break;
+ case MESA_FORMAT_SL8:
+ hw_format = EASY_US_OUT_FMT(C4_10_GAMMA, A, A, R, A);
+ break;
+ case MESA_FORMAT_A8:
+ hw_format = EASY_US_OUT_FMT(C4_8, A, A, A, A);
+ break;
+ case MESA_FORMAT_L8:
+ case MESA_FORMAT_I8:
+ hw_format = EASY_US_OUT_FMT(C4_8, A, A, R, A);
+ break;
+ default:
+ assert(!"Unsupported format");
+ break;
+ }
+
+ R300_STATECHANGE(rmesa, us_out_fmt);
+ rmesa->hw.us_out_fmt.cmd[1] = hw_format;
+}
+#undef EASY_US_OUT_FMT
+
/**
* Called by Mesa after an internal state update.
*/
@@ -2266,6 +2329,10 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state)
r300->hw.shade2.cmd[1] &= ~R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST;
}
+ if (new_state & _NEW_BUFFERS) {
+ r300SetupUsOutputFormat(ctx);
+ }
+
r300->radeon.NewGLState |= new_state;
}
@@ -2326,8 +2393,12 @@ void r300InitStateFuncs(struct dd_function_table *functions)
functions->ClipPlane = r300ClipPlane;
functions->Scissor = radeonScissor;
- functions->DrawBuffer = radeonDrawBuffer;
- functions->ReadBuffer = radeonReadBuffer;
+ functions->DrawBuffer = radeonDrawBuffer;
+ functions->ReadBuffer = radeonReadBuffer;
+
+ functions->CopyPixels = _mesa_meta_CopyPixels;
+ functions->DrawPixels = _mesa_meta_DrawPixels;
+ functions->ReadPixels = radeonReadPixels;
}
void r300InitShaderFunctions(r300ContextPtr r300)
diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c
index 8dd85073954..baef206bc26 100644
--- a/src/mesa/drivers/dri/r300/r300_tex.c
+++ b/src/mesa/drivers/dri/r300/r300_tex.c
@@ -308,6 +308,45 @@ static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx,
return &t->base;
}
+unsigned r300IsFormatRenderable(gl_format mesa_format)
+{
+ switch (mesa_format)
+ {
+ case MESA_FORMAT_RGB565:
+ case MESA_FORMAT_RGBA5551:
+ case MESA_FORMAT_RGBA8888:
+ case MESA_FORMAT_RGB565_REV:
+ case MESA_FORMAT_RGBA8888_REV:
+ case MESA_FORMAT_ARGB4444:
+ case MESA_FORMAT_ARGB1555:
+ case MESA_FORMAT_XRGB8888:
+ case MESA_FORMAT_ARGB8888:
+ case MESA_FORMAT_ARGB4444_REV:
+ case MESA_FORMAT_ARGB1555_REV:
+ case MESA_FORMAT_XRGB8888_REV:
+ case MESA_FORMAT_ARGB8888_REV:
+ case MESA_FORMAT_SRGBA8:
+ case MESA_FORMAT_SARGB8:
+ case MESA_FORMAT_SL8:
+ case MESA_FORMAT_A8:
+ case MESA_FORMAT_L8:
+ case MESA_FORMAT_I8:
+ case MESA_FORMAT_Z16:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+unsigned r500IsFormatRenderable(gl_format mesa_format)
+{
+ if (mesa_format == MESA_FORMAT_S8_Z24) {
+ return 1;
+ } else {
+ return r300IsFormatRenderable(mesa_format);
+ }
+}
+
void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *functions)
{
/* Note: we only plug in the functions we implement in the driver
diff --git a/src/mesa/drivers/dri/r300/r300_tex.h b/src/mesa/drivers/dri/r300/r300_tex.h
index 9694e703b83..aca44cd7669 100644
--- a/src/mesa/drivers/dri/r300/r300_tex.h
+++ b/src/mesa/drivers/dri/r300/r300_tex.h
@@ -53,4 +53,7 @@ extern void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_tab
int32_t r300TranslateTexFormat(gl_format mesaFormat);
+unsigned r300IsFormatRenderable(gl_format mesaFormat);
+unsigned r500IsFormatRenderable(gl_format mesaFormat);
+
#endif /* __r300_TEX_H__ */
diff --git a/src/mesa/drivers/dri/r300/radeon_pixel_read.c b/src/mesa/drivers/dri/r300/radeon_pixel_read.c
new file mode 120000
index 00000000000..3b03803126f
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_pixel_read.c
@@ -0,0 +1 @@
+../radeon/radeon_pixel_read.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/Makefile b/src/mesa/drivers/dri/r600/Makefile
index f76859d11e2..17915621ee4 100644
--- a/src/mesa/drivers/dri/r600/Makefile
+++ b/src/mesa/drivers/dri/r600/Makefile
@@ -31,9 +31,10 @@ RADEON_COMMON_SOURCES = \
radeon_fbo.c \
radeon_lock.c \
radeon_mipmap_tree.c \
+ radeon_pixel_read.c \
+ radeon_queryobj.c \
radeon_span.c \
radeon_texture.c \
- radeon_queryobj.c \
radeon_tex_copy.c \
radeon_tex_getimage.c \
radeon_tile.c
diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c
index 134e97e7c33..76d5027649e 100644
--- a/src/mesa/drivers/dri/r600/r600_context.c
+++ b/src/mesa/drivers/dri/r600/r600_context.c
@@ -239,6 +239,7 @@ static void r600_init_vtbl(radeonContextPtr radeon)
radeon->vtbl.emit_query_finish = r600_emit_query_finish;
radeon->vtbl.check_blit = r600_check_blit;
radeon->vtbl.blit = r600_blit;
+ radeon->vtbl.is_format_renderable = radeonIsFormatRenderable;
}
static void r600InitConstValues(GLcontext *ctx, radeonScreenPtr screen)
diff --git a/src/mesa/drivers/dri/r600/radeon_pixel_read.c b/src/mesa/drivers/dri/r600/radeon_pixel_read.c
new file mode 120000
index 00000000000..3b03803126f
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_pixel_read.c
@@ -0,0 +1 @@
+../radeon/radeon_pixel_read.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/radeon/Makefile b/src/mesa/drivers/dri/radeon/Makefile
index 6904ebbee3b..19df62742ec 100644
--- a/src/mesa/drivers/dri/radeon/Makefile
+++ b/src/mesa/drivers/dri/radeon/Makefile
@@ -22,6 +22,7 @@ RADEON_COMMON_SOURCES = \
radeon_fbo.c \
radeon_lock.c \
radeon_mipmap_tree.c \
+ radeon_pixel_read.c \
radeon_queryobj.c \
radeon_span.c \
radeon_texture.c \
diff --git a/src/mesa/drivers/dri/radeon/radeon_common.h b/src/mesa/drivers/dri/radeon/radeon_common.h
index cd01c9984e3..35b3f08fff9 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common.h
+++ b/src/mesa/drivers/dri/radeon/radeon_common.h
@@ -44,6 +44,12 @@ radeon_renderbuffer_set_bo(struct radeon_renderbuffer *rb,
struct radeon_renderbuffer *
radeon_create_renderbuffer(gl_format format, __DRIdrawable *driDrawPriv);
+void
+radeonReadPixels(GLcontext * ctx,
+ GLint x, GLint y, GLsizei width, GLsizei height,
+ GLenum format, GLenum type,
+ const struct gl_pixelstore_attrib *pack, GLvoid * pixels);
+
void radeon_check_front_buffer_rendering(GLcontext *ctx);
static inline struct radeon_renderbuffer *radeon_renderbuffer(struct gl_renderbuffer *rb)
{
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h
index d1a24e265f2..5156c5d0d0a 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common_context.h
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h
@@ -539,6 +539,7 @@ struct radeon_context {
unsigned reg_width,
unsigned reg_height,
unsigned flip_y);
+ unsigned (*is_format_renderable)(gl_format mesa_format);
} vtbl;
};
diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c
index 878a453bd53..56aba16e9e0 100644
--- a/src/mesa/drivers/dri/radeon/radeon_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_context.c
@@ -200,6 +200,7 @@ static void r100_init_vtbl(radeonContextPtr radeon)
radeon->vtbl.emit_query_finish = r100_emit_query_finish;
radeon->vtbl.check_blit = r100_check_blit;
radeon->vtbl.blit = r100_blit;
+ radeon->vtbl.is_format_renderable = radeonIsFormatRenderable;
}
/* Create the device specific context.
diff --git a/src/mesa/drivers/dri/radeon/radeon_fbo.c b/src/mesa/drivers/dri/radeon/radeon_fbo.c
index 46664a17556..63986058356 100644
--- a/src/mesa/drivers/dri/radeon/radeon_fbo.c
+++ b/src/mesa/drivers/dri/radeon/radeon_fbo.c
@@ -409,82 +409,51 @@ radeon_framebuffer_renderbuffer(GLcontext * ctx,
radeon_draw_buffer(ctx, fb);
}
-
-/* TODO: According to EXT_fbo spec internal format of texture image
- * once set during glTexImage call, should be preserved when
- * attaching image to renderbuffer. When HW doesn't support
- * rendering to format of attached image, set framebuffer
- * completeness accordingly in radeon_validate_framebuffer (issue #79).
- */
static GLboolean
radeon_update_wrapper(GLcontext *ctx, struct radeon_renderbuffer *rrb,
struct gl_texture_image *texImage)
{
- int retry = 0;
- gl_format texFormat;
-
radeon_print(RADEON_TEXTURE, RADEON_TRACE,
- "%s(%p, rrb %p, texImage %p) \n",
- __func__, ctx, rrb, texImage);
-
-restart:
- if (texImage->TexFormat == _dri_texformat_argb8888) {
- rrb->base.DataType = GL_UNSIGNED_BYTE;
- DBG("Render to RGBA8 texture OK\n");
+ "%s(%p, rrb %p, texImage %p, texFormat %s) \n",
+ __func__, ctx, rrb, texImage, _mesa_get_format_name(texImage->TexFormat));
+
+ switch (texImage->TexFormat) {
+ case MESA_FORMAT_RGBA8888:
+ case MESA_FORMAT_RGBA8888_REV:
+ case MESA_FORMAT_ARGB8888:
+ case MESA_FORMAT_ARGB8888_REV:
+ case MESA_FORMAT_XRGB8888:
+ case MESA_FORMAT_XRGB8888_REV:
+ case MESA_FORMAT_RGB565:
+ case MESA_FORMAT_RGB565_REV:
+ case MESA_FORMAT_RGBA5551:
+ case MESA_FORMAT_ARGB1555:
+ case MESA_FORMAT_ARGB1555_REV:
+ case MESA_FORMAT_ARGB4444:
+ case MESA_FORMAT_ARGB4444_REV:
+ rrb->base.DataType = GL_UNSIGNED_BYTE;
+ break;
+ case MESA_FORMAT_Z16:
+ rrb->base.DataType = GL_UNSIGNED_SHORT;
+ break;
+ case MESA_FORMAT_X8_Z24:
+ rrb->base.DataType = GL_UNSIGNED_INT;
+ break;
+ case MESA_FORMAT_S8_Z24:
+ rrb->base.DataType = GL_UNSIGNED_INT_24_8_EXT;
+ break;
}
- else if (texImage->TexFormat == _dri_texformat_rgb565) {
- rrb->base.DataType = GL_UNSIGNED_BYTE;
- DBG("Render to RGB5 texture OK\n");
- }
- else if (texImage->TexFormat == _dri_texformat_argb1555) {
- rrb->base.DataType = GL_UNSIGNED_BYTE;
- DBG("Render to ARGB1555 texture OK\n");
- }
- else if (texImage->TexFormat == _dri_texformat_argb4444) {
- rrb->base.DataType = GL_UNSIGNED_BYTE;
- DBG("Render to ARGB4444 texture OK\n");
- }
- else if (texImage->TexFormat == MESA_FORMAT_Z16) {
- rrb->base.DataType = GL_UNSIGNED_SHORT;
- DBG("Render to DEPTH16 texture OK\n");
- }
- else if (texImage->TexFormat == MESA_FORMAT_S8_Z24) {
- rrb->base.DataType = GL_UNSIGNED_INT_24_8_EXT;
- DBG("Render to DEPTH_STENCIL texture OK\n");
- }
- else {
- /* try redoing the FBO */
- if (retry == 1) {
- DBG("Render to texture BAD FORMAT %d\n",
- texImage->TexFormat);
- return GL_FALSE;
- }
- /* XXX why is the tex format being set here?
- * I think this can be removed.
- */
- texImage->TexFormat = radeonChooseTextureFormat(ctx, texImage->InternalFormat, 0,
- _mesa_get_format_datatype(texImage->TexFormat),
- 1);
-
- retry++;
- goto restart;
- }
-
- texFormat = texImage->TexFormat;
-
- rrb->base.Format = texFormat;
-
- rrb->cpp = _mesa_get_format_bytes(texFormat);
+
+ rrb->cpp = _mesa_get_format_bytes(texImage->TexFormat);
rrb->pitch = texImage->Width * rrb->cpp;
+ rrb->base.Format = texImage->TexFormat;
rrb->base.InternalFormat = texImage->InternalFormat;
- rrb->base._BaseFormat = _mesa_base_fbo_format(ctx, rrb->base.InternalFormat);
-
+ rrb->base._BaseFormat = _mesa_base_fbo_format(ctx, rrb->base.InternalFormat);
rrb->base.Width = texImage->Width;
rrb->base.Height = texImage->Height;
-
rrb->base.Delete = radeon_delete_renderbuffer;
rrb->base.AllocStorage = radeon_nop_alloc_storage;
-
+
return GL_TRUE;
}
@@ -607,6 +576,35 @@ radeon_finish_render_texture(GLcontext * ctx,
static void
radeon_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb)
{
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+ gl_format mesa_format;
+ int i;
+
+ for (i = -2; i < (GLint) ctx->Const.MaxColorAttachments; i++) {
+ struct gl_renderbuffer_attachment *att;
+ if (i == -2) {
+ att = &fb->Attachment[BUFFER_DEPTH];
+ } else if (i == -1) {
+ att = &fb->Attachment[BUFFER_STENCIL];
+ } else {
+ att = &fb->Attachment[BUFFER_COLOR0 + i];
+ }
+
+ if (att->Type == GL_TEXTURE) {
+ mesa_format = att->Texture->Image[att->CubeMapFace][att->TextureLevel]->TexFormat;
+ } else {
+ /* All renderbuffer formats are renderable, but not sampable */
+ continue;
+ }
+
+ if (!radeon->vtbl.is_format_renderable(mesa_format)){
+ fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED;
+ radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+ "%s: HW doesn't support format %s as output format of attachment %d\n",
+ __FUNCTION__, _mesa_get_format_name(mesa_format), i);
+ return;
+ }
+ }
}
void radeon_fbo_init(struct radeon_context *radeon)
diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
index c6cc417dd6c..78c5f5dd572 100644
--- a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
+++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
@@ -468,12 +468,9 @@ static void migrate_image_to_miptree(radeon_mipmap_tree *mt,
radeon_mipmap_level *srclvl = &image->mt->levels[image->mtlevel];
- /* TODO: bring back these assertions once the FBOs are fixed */
-#if 0
assert(image->mtlevel == level);
assert(srclvl->size == dstlvl->size);
assert(srclvl->rowstride == dstlvl->rowstride);
-#endif
radeon_bo_map(image->mt->bo, GL_FALSE);
diff --git a/src/mesa/drivers/dri/radeon/radeon_pixel_read.c b/src/mesa/drivers/dri/radeon/radeon_pixel_read.c
new file mode 100644
index 00000000000..27841938e66
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_pixel_read.c
@@ -0,0 +1,188 @@
+/*
+ * Copyright (C) 2010 Maciej Cencora <[email protected]>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "stdint.h"
+#include "main/bufferobj.h"
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/state.h"
+#include "swrast/swrast.h"
+
+#include "radeon_common_context.h"
+#include "radeon_debug.h"
+#include "radeon_mipmap_tree.h"
+
+static gl_format gl_format_and_type_to_mesa_format(GLenum format, GLenum type)
+{
+ switch (format)
+ {
+ case GL_RGB:
+ switch (type) {
+ case GL_UNSIGNED_SHORT_5_6_5:
+ return MESA_FORMAT_RGB565;
+ case GL_UNSIGNED_SHORT_5_6_5_REV:
+ return MESA_FORMAT_RGB565_REV;
+ }
+ break;
+ case GL_RGBA:
+ switch (type) {
+ case GL_UNSIGNED_BYTE:
+ return MESA_FORMAT_RGBA8888_REV;
+ case GL_FLOAT:
+ return MESA_FORMAT_RGBA_FLOAT32;
+ case GL_UNSIGNED_SHORT_4_4_4_4:
+ return MESA_FORMAT_ARGB4444;
+ case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+ return MESA_FORMAT_ARGB4444;
+ case GL_UNSIGNED_SHORT_5_5_5_1:
+ return MESA_FORMAT_RGBA5551;
+ case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+ return MESA_FORMAT_ARGB1555_REV;
+ case GL_UNSIGNED_INT_8_8_8_8:
+ return MESA_FORMAT_ARGB8888;
+ case GL_UNSIGNED_INT_8_8_8_8_REV:
+ return MESA_FORMAT_ARGB8888_REV;
+ }
+ break;
+ }
+
+ return MESA_FORMAT_NONE;
+}
+
+static GLboolean
+do_blit_readpixels(GLcontext * ctx,
+ GLint x, GLint y, GLsizei width, GLsizei height,
+ GLenum format, GLenum type,
+ const struct gl_pixelstore_attrib *pack, GLvoid * pixels)
+{
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+ const struct radeon_renderbuffer *rrb = radeon_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer);
+ const gl_format dst_format = gl_format_and_type_to_mesa_format(format, type);
+ unsigned dst_rowstride, dst_imagesize, aligned_rowstride, flip_y;
+ struct radeon_bo *dst_buffer;
+ GLint dst_x = 0, dst_y = 0;
+
+ /* It's not worth if number of pixels to copy is really small */
+ if (width * height < 100) {
+ return GL_FALSE;
+ }
+
+ if (dst_format == MESA_FORMAT_NONE ||
+ !radeon->vtbl.check_blit(dst_format) || !radeon->vtbl.blit) {
+ return GL_FALSE;
+ }
+
+ if (ctx->_ImageTransferState) {
+ return GL_FALSE;
+ }
+
+ if (pack->SwapBytes || pack->LsbFirst) {
+ return GL_FALSE;
+ }
+
+ if (pack->RowLength > 0) {
+ dst_rowstride = pack->RowLength;
+ } else {
+ dst_rowstride = width;
+ }
+
+ if (!_mesa_clip_copytexsubimage(ctx, &dst_x, &dst_y, &x, &y, &width, &height)) {
+ return GL_TRUE;
+ }
+ assert(x >= 0 && y >= 0);
+
+ aligned_rowstride = get_texture_image_row_stride(radeon, dst_format, dst_rowstride, 0);
+ dst_imagesize = get_texture_image_size(dst_format,
+ aligned_rowstride,
+ height, 1, 0);
+ dst_buffer = radeon_bo_open(radeon->radeonScreen->bom, 0, dst_imagesize, 1024, RADEON_GEM_DOMAIN_GTT, 0);
+
+ /* Disable source Y flipping for FBOs */
+ flip_y = (ctx->ReadBuffer->Name == 0);
+ if (pack->Invert) {
+ y = rrb->base.Height - height - y;
+ flip_y = !flip_y;
+ }
+
+ if (radeon->vtbl.blit(ctx,
+ rrb->bo,
+ rrb->draw_offset,
+ rrb->base.Format,
+ rrb->pitch / rrb->cpp,
+ rrb->base.Width,
+ rrb->base.Height,
+ x,
+ y,
+ dst_buffer,
+ 0, /* dst_offset */
+ dst_format,
+ aligned_rowstride / _mesa_get_format_bytes(dst_format),
+ width,
+ height,
+ 0, /* dst_x */
+ 0, /* dst_y */
+ width,
+ height,
+ flip_y))
+ {
+ radeon_bo_map(dst_buffer, 0);
+ dst_rowstride *= _mesa_get_format_bytes(dst_format);
+ copy_rows(pixels, dst_rowstride, dst_buffer->ptr,
+ aligned_rowstride, height, dst_rowstride);
+ radeon_bo_unmap(dst_buffer);
+ radeon_bo_unref(dst_buffer);
+ return GL_TRUE;
+ } else {
+ radeon_bo_unref(dst_buffer);
+ return GL_FALSE;
+ }
+}
+
+void
+radeonReadPixels(GLcontext * ctx,
+ GLint x, GLint y, GLsizei width, GLsizei height,
+ GLenum format, GLenum type,
+ const struct gl_pixelstore_attrib *pack, GLvoid * pixels)
+{
+ if (do_blit_readpixels(ctx, x, y, width, height, format, type, pack, pixels))
+ return;
+
+ /* Update Mesa state before calling down into _swrast_ReadPixels, as
+ * the spans code requires the computed buffer states to be up to date,
+ * but _swrast_ReadPixels only updates Mesa state after setting up
+ * the spans code.
+ */
+
+ radeon_print(RADEON_FALLBACKS, RADEON_NORMAL,
+ "Falling back to sw for ReadPixels (format %s, type %s)\n",
+ _mesa_lookup_enum_by_nr(format), _mesa_lookup_enum_by_nr(type));
+
+ if (ctx->NewState)
+ _mesa_update_state(ctx);
+
+ _swrast_ReadPixels(ctx, x, y, width, height, format, type, pack, pixels);
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c
index 3ccc711253b..2b655fbd953 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texture.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texture.c
@@ -1006,3 +1006,19 @@ void radeonTexSubImage3D(GLcontext * ctx, GLenum target, GLint level,
radeon_texsubimage(ctx, 3, target, level, xoffset, yoffset, zoffset, width, height, depth, 0,
format, type, pixels, packing, texObj, texImage, 0);
}
+
+unsigned radeonIsFormatRenderable(gl_format mesa_format)
+{
+ if (mesa_format == _dri_texformat_argb8888 || mesa_format == _dri_texformat_rgb565 ||
+ mesa_format == _dri_texformat_argb1555 || mesa_format == _dri_texformat_argb4444)
+ return 1;
+
+ switch (mesa_format)
+ {
+ case MESA_FORMAT_Z16:
+ case MESA_FORMAT_S8_Z24:
+ return 1;
+ default:
+ return 0;
+ }
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.h b/src/mesa/drivers/dri/radeon/radeon_texture.h
index f09dd652142..4ce639ea34e 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texture.h
+++ b/src/mesa/drivers/dri/radeon/radeon_texture.h
@@ -135,4 +135,6 @@ void radeonCopyTexSubImage2D(GLcontext *ctx, GLenum target, GLint level,
GLint x, GLint y,
GLsizei width, GLsizei height);
+unsigned radeonIsFormatRenderable(gl_format mesa_format);
+
#endif
diff --git a/src/mesa/glapi/glapi.c b/src/mesa/glapi/glapi.c
index 13de594aafb..ce85cf6a87b 100644
--- a/src/mesa/glapi/glapi.c
+++ b/src/mesa/glapi/glapi.c
@@ -59,7 +59,7 @@
#endif
#include "glapi/glapi.h"
-#include "glapi/glapitable.h"
+#include "glapi/glapi_priv.h"
extern _glapi_proc __glapi_noop_table[];
@@ -291,45 +291,3 @@ _glapi_get_dispatch(void)
return _glapi_Dispatch;
#endif
}
-
-
-
-
-/*
- * The dispatch table size (number of entries) is the size of the
- * _glapi_table struct plus the number of dynamic entries we can add.
- * The extra slots can be filled in by DRI drivers that register new extension
- * functions.
- */
-#define DISPATCH_TABLE_SIZE (sizeof(struct _glapi_table) / sizeof(void *) + MAX_EXTENSION_FUNCS)
-
-
-/**
- * Return size of dispatch table struct as number of functions (or
- * slots).
- */
-PUBLIC GLuint
-_glapi_get_dispatch_table_size(void)
-{
- return DISPATCH_TABLE_SIZE;
-}
-
-
-/**
- * Make sure there are no NULL pointers in the given dispatch table.
- * Intended for debugging purposes.
- */
-void
-_glapi_check_table_not_null(const struct _glapi_table *table)
-{
-#if 0 /* enable this for extra DEBUG */
- const GLuint entries = _glapi_get_dispatch_table_size();
- const void **tab = (const void **) table;
- GLuint i;
- for (i = 1; i < entries; i++) {
- assert(tab[i]);
- }
-#else
- (void) table;
-#endif
-}
diff --git a/src/mesa/glapi/glapi.h b/src/mesa/glapi/glapi.h
index 1ca2e4beff1..7dcf2e8910b 100644
--- a/src/mesa/glapi/glapi.h
+++ b/src/mesa/glapi/glapi.h
@@ -165,29 +165,8 @@ extern _glapi_proc
_glapi_get_proc_address(const char *funcName);
-/**
- * GL API local functions and defines
- */
-
-extern void
-init_glapi_relocs_once(void);
-
-extern void
-_glapi_check_table_not_null(const struct _glapi_table *table);
-
-
-extern void
-_glapi_check_table(const struct _glapi_table *table);
-
-
extern const char *
_glapi_get_proc_name(unsigned int offset);
-/*
- * Number of extension functions which we can dynamically add at runtime.
- */
-#define MAX_EXTENSION_FUNCS 300
-
-
#endif
diff --git a/src/mesa/glapi/glapi_entrypoint.c b/src/mesa/glapi/glapi_entrypoint.c
new file mode 100644
index 00000000000..5e6e5995f24
--- /dev/null
+++ b/src/mesa/glapi/glapi_entrypoint.c
@@ -0,0 +1,331 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.1
+ *
+ * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file glapi_entrypoint.c
+ *
+ * Arch-specific code for manipulating GL API entrypoints (dispatch stubs).
+ */
+
+
+#ifdef HAVE_DIX_CONFIG_H
+#include <dix-config.h>
+#include "glapi/mesa.h"
+#else
+#include "main/glheader.h"
+#include "main/compiler.h"
+#endif
+
+#include "glapi/glapi.h"
+#include "glapi/glapi_priv.h"
+
+
+#ifdef USE_X86_ASM
+
+#if defined( GLX_USE_TLS )
+extern GLubyte gl_dispatch_functions_start[];
+extern GLubyte gl_dispatch_functions_end[];
+#else
+extern const GLubyte gl_dispatch_functions_start[];
+#endif
+
+#endif /* USE_X86_ASM */
+
+
+#if defined(DISPATCH_FUNCTION_SIZE)
+
+_glapi_proc
+get_entrypoint_address(GLuint functionOffset)
+{
+ return (_glapi_proc) (gl_dispatch_functions_start
+ + (DISPATCH_FUNCTION_SIZE * functionOffset));
+}
+
+#endif
+
+
+#if defined(PTHREADS) || defined(GLX_USE_TLS)
+
+/**
+ * Perform platform-specific GL API entry-point fixups.
+ */
+static void
+init_glapi_relocs( void )
+{
+#if defined(USE_X86_ASM) && defined(GLX_USE_TLS) && !defined(GLX_X86_READONLY_TEXT)
+ extern unsigned long _x86_get_dispatch(void);
+ char run_time_patch[] = {
+ 0x65, 0xa1, 0, 0, 0, 0 /* movl %gs:0,%eax */
+ };
+ GLuint *offset = (GLuint *) &run_time_patch[2]; /* 32-bits for x86/32 */
+ const GLubyte * const get_disp = (const GLubyte *) run_time_patch;
+ GLubyte * curr_func = (GLubyte *) gl_dispatch_functions_start;
+
+ *offset = _x86_get_dispatch();
+ while ( curr_func != (GLubyte *) gl_dispatch_functions_end ) {
+ (void) memcpy( curr_func, get_disp, sizeof(run_time_patch));
+ curr_func += DISPATCH_FUNCTION_SIZE;
+ }
+#endif
+#ifdef USE_SPARC_ASM
+ extern void __glapi_sparc_icache_flush(unsigned int *);
+ static const unsigned int template[] = {
+#ifdef GLX_USE_TLS
+ 0x05000000, /* sethi %hi(_glapi_tls_Dispatch), %g2 */
+ 0x8730e00a, /* srl %g3, 10, %g3 */
+ 0x8410a000, /* or %g2, %lo(_glapi_tls_Dispatch), %g2 */
+#ifdef __arch64__
+ 0xc259c002, /* ldx [%g7 + %g2], %g1 */
+ 0xc2584003, /* ldx [%g1 + %g3], %g1 */
+#else
+ 0xc201c002, /* ld [%g7 + %g2], %g1 */
+ 0xc2004003, /* ld [%g1 + %g3], %g1 */
+#endif
+ 0x81c04000, /* jmp %g1 */
+ 0x01000000, /* nop */
+#else
+#ifdef __arch64__
+ 0x03000000, /* 64-bit 0x00 --> sethi %hh(_glapi_Dispatch), %g1 */
+ 0x05000000, /* 64-bit 0x04 --> sethi %lm(_glapi_Dispatch), %g2 */
+ 0x82106000, /* 64-bit 0x08 --> or %g1, %hm(_glapi_Dispatch), %g1 */
+ 0x8730e00a, /* 64-bit 0x0c --> srl %g3, 10, %g3 */
+ 0x83287020, /* 64-bit 0x10 --> sllx %g1, 32, %g1 */
+ 0x82004002, /* 64-bit 0x14 --> add %g1, %g2, %g1 */
+ 0xc2586000, /* 64-bit 0x18 --> ldx [%g1 + %lo(_glapi_Dispatch)], %g1 */
+#else
+ 0x03000000, /* 32-bit 0x00 --> sethi %hi(_glapi_Dispatch), %g1 */
+ 0x8730e00a, /* 32-bit 0x04 --> srl %g3, 10, %g3 */
+ 0xc2006000, /* 32-bit 0x08 --> ld [%g1 + %lo(_glapi_Dispatch)], %g1 */
+#endif
+ 0x80a06000, /* --> cmp %g1, 0 */
+ 0x02800005, /* --> be +4*5 */
+ 0x01000000, /* --> nop */
+#ifdef __arch64__
+ 0xc2584003, /* 64-bit --> ldx [%g1 + %g3], %g1 */
+#else
+ 0xc2004003, /* 32-bit --> ld [%g1 + %g3], %g1 */
+#endif
+ 0x81c04000, /* --> jmp %g1 */
+ 0x01000000, /* --> nop */
+#ifdef __arch64__
+ 0x9de3bf80, /* 64-bit --> save %sp, -128, %sp */
+#else
+ 0x9de3bfc0, /* 32-bit --> save %sp, -64, %sp */
+#endif
+ 0xa0100003, /* --> mov %g3, %l0 */
+ 0x40000000, /* --> call _glapi_get_dispatch */
+ 0x01000000, /* --> nop */
+ 0x82100008, /* --> mov %o0, %g1 */
+ 0x86100010, /* --> mov %l0, %g3 */
+ 0x10bffff7, /* --> ba -4*9 */
+ 0x81e80000, /* --> restore */
+#endif
+ };
+#ifdef GLX_USE_TLS
+ extern unsigned int __glapi_sparc_tls_stub;
+ extern unsigned long __glapi_sparc_get_dispatch(void);
+ unsigned int *code = &__glapi_sparc_tls_stub;
+ unsigned long dispatch = __glapi_sparc_get_dispatch();
+#else
+ extern unsigned int __glapi_sparc_pthread_stub;
+ unsigned int *code = &__glapi_sparc_pthread_stub;
+ unsigned long dispatch = (unsigned long) &_glapi_Dispatch;
+ unsigned long call_dest = (unsigned long ) &_glapi_get_dispatch;
+ int idx;
+#endif
+
+#if defined(GLX_USE_TLS)
+ code[0] = template[0] | (dispatch >> 10);
+ code[1] = template[1];
+ __glapi_sparc_icache_flush(&code[0]);
+ code[2] = template[2] | (dispatch & 0x3ff);
+ code[3] = template[3];
+ __glapi_sparc_icache_flush(&code[2]);
+ code[4] = template[4];
+ code[5] = template[5];
+ __glapi_sparc_icache_flush(&code[4]);
+ code[6] = template[6];
+ __glapi_sparc_icache_flush(&code[6]);
+#else
+#if defined(__arch64__)
+ code[0] = template[0] | (dispatch >> (32 + 10));
+ code[1] = template[1] | ((dispatch & 0xffffffff) >> 10);
+ __glapi_sparc_icache_flush(&code[0]);
+ code[2] = template[2] | ((dispatch >> 32) & 0x3ff);
+ code[3] = template[3];
+ __glapi_sparc_icache_flush(&code[2]);
+ code[4] = template[4];
+ code[5] = template[5];
+ __glapi_sparc_icache_flush(&code[4]);
+ code[6] = template[6] | (dispatch & 0x3ff);
+ idx = 7;
+#else
+ code[0] = template[0] | (dispatch >> 10);
+ code[1] = template[1];
+ __glapi_sparc_icache_flush(&code[0]);
+ code[2] = template[2] | (dispatch & 0x3ff);
+ idx = 3;
+#endif
+ code[idx + 0] = template[idx + 0];
+ __glapi_sparc_icache_flush(&code[idx - 1]);
+ code[idx + 1] = template[idx + 1];
+ code[idx + 2] = template[idx + 2];
+ __glapi_sparc_icache_flush(&code[idx + 1]);
+ code[idx + 3] = template[idx + 3];
+ code[idx + 4] = template[idx + 4];
+ __glapi_sparc_icache_flush(&code[idx + 3]);
+ code[idx + 5] = template[idx + 5];
+ code[idx + 6] = template[idx + 6];
+ __glapi_sparc_icache_flush(&code[idx + 5]);
+ code[idx + 7] = template[idx + 7];
+ code[idx + 8] = template[idx + 8] |
+ (((call_dest - ((unsigned long) &code[idx + 8]))
+ >> 2) & 0x3fffffff);
+ __glapi_sparc_icache_flush(&code[idx + 7]);
+ code[idx + 9] = template[idx + 9];
+ code[idx + 10] = template[idx + 10];
+ __glapi_sparc_icache_flush(&code[idx + 9]);
+ code[idx + 11] = template[idx + 11];
+ code[idx + 12] = template[idx + 12];
+ __glapi_sparc_icache_flush(&code[idx + 11]);
+ code[idx + 13] = template[idx + 13];
+ __glapi_sparc_icache_flush(&code[idx + 13]);
+#endif
+#endif
+}
+
+void
+init_glapi_relocs_once( void )
+{
+ static pthread_once_t once_control = PTHREAD_ONCE_INIT;
+ pthread_once( & once_control, init_glapi_relocs );
+}
+
+#else
+
+void
+init_glapi_relocs_once( void ) { }
+
+#endif /* defined(PTHREADS) || defined(GLX_USE_TLS) */
+
+
+#ifdef USE_SPARC_ASM
+extern void __glapi_sparc_icache_flush(unsigned int *);
+#endif
+
+/**
+ * Generate a dispatch function (entrypoint) which jumps through
+ * the given slot number (offset) in the current dispatch table.
+ * We need assembly language in order to accomplish this.
+ */
+_glapi_proc
+generate_entrypoint(GLuint functionOffset)
+{
+#if defined(USE_X86_ASM)
+ /* 32 is chosen as something of a magic offset. For x86, the dispatch
+ * at offset 32 is the first one where the offset in the
+ * "jmp OFFSET*4(%eax)" can't be encoded in a single byte.
+ */
+ const GLubyte * const template_func = gl_dispatch_functions_start
+ + (DISPATCH_FUNCTION_SIZE * 32);
+ GLubyte * const code = (GLubyte *) malloc(DISPATCH_FUNCTION_SIZE);
+
+
+ if ( code != NULL ) {
+ (void) memcpy(code, template_func, DISPATCH_FUNCTION_SIZE);
+ fill_in_entrypoint_offset( (_glapi_proc) code, functionOffset );
+ }
+
+ return (_glapi_proc) code;
+#elif defined(USE_SPARC_ASM)
+
+#if defined(PTHREADS) || defined(GLX_USE_TLS)
+ static const unsigned int template[] = {
+ 0x07000000, /* sethi %hi(0), %g3 */
+ 0x8210000f, /* mov %o7, %g1 */
+ 0x40000000, /* call */
+ 0x9e100001, /* mov %g1, %o7 */
+ };
+#ifdef GLX_USE_TLS
+ extern unsigned int __glapi_sparc_tls_stub;
+ unsigned long call_dest = (unsigned long ) &__glapi_sparc_tls_stub;
+#else
+ extern unsigned int __glapi_sparc_pthread_stub;
+ unsigned long call_dest = (unsigned long ) &__glapi_sparc_pthread_stub;
+#endif
+ unsigned int *code = (unsigned int *) malloc(sizeof(template));
+ if (code) {
+ code[0] = template[0] | (functionOffset & 0x3fffff);
+ code[1] = template[1];
+ __glapi_sparc_icache_flush(&code[0]);
+ code[2] = template[2] |
+ (((call_dest - ((unsigned long) &code[2]))
+ >> 2) & 0x3fffffff);
+ code[3] = template[3];
+ __glapi_sparc_icache_flush(&code[2]);
+ }
+ return (_glapi_proc) code;
+#endif
+
+#else
+ (void) functionOffset;
+ return NULL;
+#endif /* USE_*_ASM */
+}
+
+
+/**
+ * This function inserts a new dispatch offset into the assembly language
+ * stub that was generated with the preceeding function.
+ */
+void
+fill_in_entrypoint_offset(_glapi_proc entrypoint, GLuint offset)
+{
+#if defined(USE_X86_ASM)
+ GLubyte * const code = (GLubyte *) entrypoint;
+
+#if DISPATCH_FUNCTION_SIZE == 32
+ *((unsigned int *)(code + 11)) = 4 * offset;
+ *((unsigned int *)(code + 22)) = 4 * offset;
+#elif DISPATCH_FUNCTION_SIZE == 16 && defined( GLX_USE_TLS )
+ *((unsigned int *)(code + 8)) = 4 * offset;
+#elif DISPATCH_FUNCTION_SIZE == 16
+ *((unsigned int *)(code + 7)) = 4 * offset;
+#else
+# error Invalid DISPATCH_FUNCTION_SIZE!
+#endif
+
+#elif defined(USE_SPARC_ASM)
+ unsigned int *code = (unsigned int *) entrypoint;
+ code[0] &= ~0x3fffff;
+ code[0] |= (offset * sizeof(void *)) & 0x3fffff;
+ __glapi_sparc_icache_flush(&code[0]);
+#else
+
+ /* an unimplemented architecture */
+ (void) entrypoint;
+ (void) offset;
+
+#endif /* USE_*_ASM */
+}
diff --git a/src/mesa/glapi/glapi_getproc.c b/src/mesa/glapi/glapi_getproc.c
index a6dbf173e82..46b466920b6 100644
--- a/src/mesa/glapi/glapi_getproc.c
+++ b/src/mesa/glapi/glapi_getproc.c
@@ -39,21 +39,12 @@
#endif
#include "glapi/glapi.h"
-#include "glapi/glapioffsets.h"
+#include "glapi/glapi_priv.h"
#include "glapi/glapitable.h"
+#include "glapi/glapioffsets.h"
-#if defined(USE_X64_64_ASM) && defined(GLX_USE_TLS)
-# define DISPATCH_FUNCTION_SIZE 16
-#elif defined(USE_X86_ASM)
-# if defined(THREADS) && !defined(GLX_USE_TLS)
-# define DISPATCH_FUNCTION_SIZE 32
-# else
-# define DISPATCH_FUNCTION_SIZE 16
-# endif
-#endif
-
-#if !defined(DISPATCH_FUNCTION_SIZE) && !defined(XFree86Server) && !defined(XGLServer)
+#if !defined(DISPATCH_FUNCTION_SIZE) && !defined(XFree86Server)
# define NEED_FUNCTION_POINTER
#endif
@@ -72,7 +63,7 @@ find_entry( const char * n )
for (i = 0; static_functions[i].Name_offset >= 0; i++) {
const char *testName = gl_string_table + static_functions[i].Name_offset;
#ifdef MANGLE
- /* skip the "m" prefix on the name */
+ /* skip the prefix on the name */
if (strcmp(testName, n + 1) == 0)
#else
if (strcmp(testName, n) == 0)
@@ -100,19 +91,7 @@ get_static_proc_offset(const char *funcName)
}
-#ifdef USE_X86_ASM
-
-#if defined( GLX_USE_TLS )
-extern GLubyte gl_dispatch_functions_start[];
-extern GLubyte gl_dispatch_functions_end[];
-#else
-extern const GLubyte gl_dispatch_functions_start[];
-#endif
-
-#endif /* USE_X86_ASM */
-
-
-#if !defined(XFree86Server) && !defined(XGLServer)
+#if !defined(XFree86Server)
/**
* Return dispatch function address for the named static (built-in) function.
@@ -125,12 +104,10 @@ get_static_proc_address(const char *funcName)
if (f) {
#if defined(DISPATCH_FUNCTION_SIZE) && defined(GLX_INDIRECT_RENDERING)
return (f->Address == NULL)
- ? (_glapi_proc) (gl_dispatch_functions_start
- + (DISPATCH_FUNCTION_SIZE * f->Offset))
+ ? get_entrypoint_address(f->Offset)
: f->Address;
#elif defined(DISPATCH_FUNCTION_SIZE)
- return (_glapi_proc) (gl_dispatch_functions_start
- + (DISPATCH_FUNCTION_SIZE * f->Offset));
+ return get_entrypoint_address(f->Offset);
#else
return f->Address;
#endif
@@ -140,7 +117,7 @@ get_static_proc_address(const char *funcName)
}
}
-#endif /* !defined(XFree86Server) && !defined(XGLServer) */
+#endif /* !defined(XFree86Server) */
@@ -162,172 +139,6 @@ get_static_proc_name( GLuint offset )
-#if defined(PTHREADS) || defined(GLX_USE_TLS)
-
-/**
- * Perform platform-specific GL API entry-point fixups.
- */
-static void
-init_glapi_relocs( void )
-{
-#if defined(USE_X86_ASM) && defined(GLX_USE_TLS) && !defined(GLX_X86_READONLY_TEXT)
- extern unsigned long _x86_get_dispatch(void);
- char run_time_patch[] = {
- 0x65, 0xa1, 0, 0, 0, 0 /* movl %gs:0,%eax */
- };
- GLuint *offset = (GLuint *) &run_time_patch[2]; /* 32-bits for x86/32 */
- const GLubyte * const get_disp = (const GLubyte *) run_time_patch;
- GLubyte * curr_func = (GLubyte *) gl_dispatch_functions_start;
-
- *offset = _x86_get_dispatch();
- while ( curr_func != (GLubyte *) gl_dispatch_functions_end ) {
- (void) memcpy( curr_func, get_disp, sizeof(run_time_patch));
- curr_func += DISPATCH_FUNCTION_SIZE;
- }
-#endif
-#ifdef USE_SPARC_ASM
- extern void __glapi_sparc_icache_flush(unsigned int *);
- static const unsigned int template[] = {
-#ifdef GLX_USE_TLS
- 0x05000000, /* sethi %hi(_glapi_tls_Dispatch), %g2 */
- 0x8730e00a, /* srl %g3, 10, %g3 */
- 0x8410a000, /* or %g2, %lo(_glapi_tls_Dispatch), %g2 */
-#ifdef __arch64__
- 0xc259c002, /* ldx [%g7 + %g2], %g1 */
- 0xc2584003, /* ldx [%g1 + %g3], %g1 */
-#else
- 0xc201c002, /* ld [%g7 + %g2], %g1 */
- 0xc2004003, /* ld [%g1 + %g3], %g1 */
-#endif
- 0x81c04000, /* jmp %g1 */
- 0x01000000, /* nop */
-#else
-#ifdef __arch64__
- 0x03000000, /* 64-bit 0x00 --> sethi %hh(_glapi_Dispatch), %g1 */
- 0x05000000, /* 64-bit 0x04 --> sethi %lm(_glapi_Dispatch), %g2 */
- 0x82106000, /* 64-bit 0x08 --> or %g1, %hm(_glapi_Dispatch), %g1 */
- 0x8730e00a, /* 64-bit 0x0c --> srl %g3, 10, %g3 */
- 0x83287020, /* 64-bit 0x10 --> sllx %g1, 32, %g1 */
- 0x82004002, /* 64-bit 0x14 --> add %g1, %g2, %g1 */
- 0xc2586000, /* 64-bit 0x18 --> ldx [%g1 + %lo(_glapi_Dispatch)], %g1 */
-#else
- 0x03000000, /* 32-bit 0x00 --> sethi %hi(_glapi_Dispatch), %g1 */
- 0x8730e00a, /* 32-bit 0x04 --> srl %g3, 10, %g3 */
- 0xc2006000, /* 32-bit 0x08 --> ld [%g1 + %lo(_glapi_Dispatch)], %g1 */
-#endif
- 0x80a06000, /* --> cmp %g1, 0 */
- 0x02800005, /* --> be +4*5 */
- 0x01000000, /* --> nop */
-#ifdef __arch64__
- 0xc2584003, /* 64-bit --> ldx [%g1 + %g3], %g1 */
-#else
- 0xc2004003, /* 32-bit --> ld [%g1 + %g3], %g1 */
-#endif
- 0x81c04000, /* --> jmp %g1 */
- 0x01000000, /* --> nop */
-#ifdef __arch64__
- 0x9de3bf80, /* 64-bit --> save %sp, -128, %sp */
-#else
- 0x9de3bfc0, /* 32-bit --> save %sp, -64, %sp */
-#endif
- 0xa0100003, /* --> mov %g3, %l0 */
- 0x40000000, /* --> call _glapi_get_dispatch */
- 0x01000000, /* --> nop */
- 0x82100008, /* --> mov %o0, %g1 */
- 0x86100010, /* --> mov %l0, %g3 */
- 0x10bffff7, /* --> ba -4*9 */
- 0x81e80000, /* --> restore */
-#endif
- };
-#ifdef GLX_USE_TLS
- extern unsigned int __glapi_sparc_tls_stub;
- extern unsigned long __glapi_sparc_get_dispatch(void);
- unsigned int *code = &__glapi_sparc_tls_stub;
- unsigned long dispatch = __glapi_sparc_get_dispatch();
-#else
- extern unsigned int __glapi_sparc_pthread_stub;
- unsigned int *code = &__glapi_sparc_pthread_stub;
- unsigned long dispatch = (unsigned long) &_glapi_Dispatch;
- unsigned long call_dest = (unsigned long ) &_glapi_get_dispatch;
- int idx;
-#endif
-
-#if defined(GLX_USE_TLS)
- code[0] = template[0] | (dispatch >> 10);
- code[1] = template[1];
- __glapi_sparc_icache_flush(&code[0]);
- code[2] = template[2] | (dispatch & 0x3ff);
- code[3] = template[3];
- __glapi_sparc_icache_flush(&code[2]);
- code[4] = template[4];
- code[5] = template[5];
- __glapi_sparc_icache_flush(&code[4]);
- code[6] = template[6];
- __glapi_sparc_icache_flush(&code[6]);
-#else
-#if defined(__arch64__)
- code[0] = template[0] | (dispatch >> (32 + 10));
- code[1] = template[1] | ((dispatch & 0xffffffff) >> 10);
- __glapi_sparc_icache_flush(&code[0]);
- code[2] = template[2] | ((dispatch >> 32) & 0x3ff);
- code[3] = template[3];
- __glapi_sparc_icache_flush(&code[2]);
- code[4] = template[4];
- code[5] = template[5];
- __glapi_sparc_icache_flush(&code[4]);
- code[6] = template[6] | (dispatch & 0x3ff);
- idx = 7;
-#else
- code[0] = template[0] | (dispatch >> 10);
- code[1] = template[1];
- __glapi_sparc_icache_flush(&code[0]);
- code[2] = template[2] | (dispatch & 0x3ff);
- idx = 3;
-#endif
- code[idx + 0] = template[idx + 0];
- __glapi_sparc_icache_flush(&code[idx - 1]);
- code[idx + 1] = template[idx + 1];
- code[idx + 2] = template[idx + 2];
- __glapi_sparc_icache_flush(&code[idx + 1]);
- code[idx + 3] = template[idx + 3];
- code[idx + 4] = template[idx + 4];
- __glapi_sparc_icache_flush(&code[idx + 3]);
- code[idx + 5] = template[idx + 5];
- code[idx + 6] = template[idx + 6];
- __glapi_sparc_icache_flush(&code[idx + 5]);
- code[idx + 7] = template[idx + 7];
- code[idx + 8] = template[idx + 8] |
- (((call_dest - ((unsigned long) &code[idx + 8]))
- >> 2) & 0x3fffffff);
- __glapi_sparc_icache_flush(&code[idx + 7]);
- code[idx + 9] = template[idx + 9];
- code[idx + 10] = template[idx + 10];
- __glapi_sparc_icache_flush(&code[idx + 9]);
- code[idx + 11] = template[idx + 11];
- code[idx + 12] = template[idx + 12];
- __glapi_sparc_icache_flush(&code[idx + 11]);
- code[idx + 13] = template[idx + 13];
- __glapi_sparc_icache_flush(&code[idx + 13]);
-#endif
-#endif
-}
-
-void
-init_glapi_relocs_once( void )
-{
- static pthread_once_t once_control = PTHREAD_ONCE_INIT;
- pthread_once( & once_control, init_glapi_relocs );
-}
-
-#else
-
-void
-init_glapi_relocs_once( void ) { }
-
-#endif /* defined(PTHREADS) || defined(GLX_USE_TLS) */
-
-
-
/**********************************************************************
* Extension function management.
*/
@@ -378,111 +189,14 @@ struct _glapi_function {
};
-static struct _glapi_function ExtEntryTable[MAX_EXTENSION_FUNCS];
-static GLuint NumExtEntryPoints = 0;
-
-#ifdef USE_SPARC_ASM
-extern void __glapi_sparc_icache_flush(unsigned int *);
-#endif
-
-static void
-fill_in_entrypoint_offset(_glapi_proc entrypoint, GLuint offset);
-
-/**
- * Generate a dispatch function (entrypoint) which jumps through
- * the given slot number (offset) in the current dispatch table.
- * We need assembly language in order to accomplish this.
- */
-static _glapi_proc
-generate_entrypoint(GLuint functionOffset)
-{
-#if defined(USE_X86_ASM)
- /* 32 is chosen as something of a magic offset. For x86, the dispatch
- * at offset 32 is the first one where the offset in the
- * "jmp OFFSET*4(%eax)" can't be encoded in a single byte.
- */
- const GLubyte * const template_func = gl_dispatch_functions_start
- + (DISPATCH_FUNCTION_SIZE * 32);
- GLubyte * const code = (GLubyte *) malloc(DISPATCH_FUNCTION_SIZE);
-
-
- if ( code != NULL ) {
- (void) memcpy(code, template_func, DISPATCH_FUNCTION_SIZE);
- fill_in_entrypoint_offset( (_glapi_proc) code, functionOffset );
- }
-
- return (_glapi_proc) code;
-#elif defined(USE_SPARC_ASM)
-
-#if defined(PTHREADS) || defined(GLX_USE_TLS)
- static const unsigned int template[] = {
- 0x07000000, /* sethi %hi(0), %g3 */
- 0x8210000f, /* mov %o7, %g1 */
- 0x40000000, /* call */
- 0x9e100001, /* mov %g1, %o7 */
- };
-#ifdef GLX_USE_TLS
- extern unsigned int __glapi_sparc_tls_stub;
- unsigned long call_dest = (unsigned long ) &__glapi_sparc_tls_stub;
-#else
- extern unsigned int __glapi_sparc_pthread_stub;
- unsigned long call_dest = (unsigned long ) &__glapi_sparc_pthread_stub;
-#endif
- unsigned int *code = (unsigned int *) malloc(sizeof(template));
- if (code) {
- code[0] = template[0] | (functionOffset & 0x3fffff);
- code[1] = template[1];
- __glapi_sparc_icache_flush(&code[0]);
- code[2] = template[2] |
- (((call_dest - ((unsigned long) &code[2]))
- >> 2) & 0x3fffffff);
- code[3] = template[3];
- __glapi_sparc_icache_flush(&code[2]);
- }
- return (_glapi_proc) code;
-#endif
-
-#else
- (void) functionOffset;
- return NULL;
-#endif /* USE_*_ASM */
-}
-
-
-/**
- * This function inserts a new dispatch offset into the assembly language
- * stub that was generated with the preceeding function.
+/*
+ * Number of extension functions which we can dynamically add at runtime.
*/
-static void
-fill_in_entrypoint_offset(_glapi_proc entrypoint, GLuint offset)
-{
-#if defined(USE_X86_ASM)
- GLubyte * const code = (GLubyte *) entrypoint;
-
-#if DISPATCH_FUNCTION_SIZE == 32
- *((unsigned int *)(code + 11)) = 4 * offset;
- *((unsigned int *)(code + 22)) = 4 * offset;
-#elif DISPATCH_FUNCTION_SIZE == 16 && defined( GLX_USE_TLS )
- *((unsigned int *)(code + 8)) = 4 * offset;
-#elif DISPATCH_FUNCTION_SIZE == 16
- *((unsigned int *)(code + 7)) = 4 * offset;
-#else
-# error Invalid DISPATCH_FUNCTION_SIZE!
-#endif
-
-#elif defined(USE_SPARC_ASM)
- unsigned int *code = (unsigned int *) entrypoint;
- code[0] &= ~0x3fffff;
- code[0] |= (offset * sizeof(void *)) & 0x3fffff;
- __glapi_sparc_icache_flush(&code[0]);
-#else
+#define MAX_EXTENSION_FUNCS 300
- /* an unimplemented architecture */
- (void) entrypoint;
- (void) offset;
-#endif /* USE_*_ASM */
-}
+static struct _glapi_function ExtEntryTable[MAX_EXTENSION_FUNCS];
+static GLuint NumExtEntryPoints = 0;
/**
@@ -710,7 +424,8 @@ _glapi_get_proc_address(const char *funcName)
GLuint i;
#ifdef MANGLE
- if (funcName[0] != 'm' || funcName[1] != 'g' || funcName[2] != 'l')
+ /* skip the prefix on the name */
+ if (funcName[1] != 'g' || funcName[2] != 'l')
return NULL;
#else
if (funcName[0] != 'g' || funcName[1] != 'l')
@@ -724,7 +439,7 @@ _glapi_get_proc_address(const char *funcName)
}
}
-#if !defined( XFree86Server ) && !defined( XGLServer )
+#if !defined( XFree86Server )
/* search static functions */
{
const _glapi_proc func = get_static_proc_address(funcName);
@@ -766,6 +481,51 @@ _glapi_get_proc_name(GLuint offset)
+/**********************************************************************
+ * GL API table functions.
+ */
+
+
+/*
+ * The dispatch table size (number of entries) is the size of the
+ * _glapi_table struct plus the number of dynamic entries we can add.
+ * The extra slots can be filled in by DRI drivers that register new extension
+ * functions.
+ */
+#define DISPATCH_TABLE_SIZE (sizeof(struct _glapi_table) / sizeof(void *) + MAX_EXTENSION_FUNCS)
+
+
+/**
+ * Return size of dispatch table struct as number of functions (or
+ * slots).
+ */
+PUBLIC GLuint
+_glapi_get_dispatch_table_size(void)
+{
+ return DISPATCH_TABLE_SIZE;
+}
+
+
+/**
+ * Make sure there are no NULL pointers in the given dispatch table.
+ * Intended for debugging purposes.
+ */
+void
+_glapi_check_table_not_null(const struct _glapi_table *table)
+{
+#ifdef EXTRA_DEBUG /* set to DEBUG for extra DEBUG */
+ const GLuint entries = _glapi_get_dispatch_table_size();
+ const void **tab = (const void **) table;
+ GLuint i;
+ for (i = 1; i < entries; i++) {
+ assert(tab[i]);
+ }
+#else
+ (void) table;
+#endif
+}
+
+
/**
* Do some spot checks to be sure that the dispatch table
* slots are assigned correctly. For debugging only.
@@ -773,7 +533,7 @@ _glapi_get_proc_name(GLuint offset)
void
_glapi_check_table(const struct _glapi_table *table)
{
-#if 0 /* enable this for extra DEBUG */
+#ifdef EXTRA_DEBUG /* set to DEBUG for extra DEBUG */
{
GLuint BeginOffset = _glapi_get_proc_offset("glBegin");
char *BeginFunc = (char*) &table->Begin;
diff --git a/src/mesa/glapi/glapi_priv.h b/src/mesa/glapi/glapi_priv.h
new file mode 100644
index 00000000000..7cd81ee8dca
--- /dev/null
+++ b/src/mesa/glapi/glapi_priv.h
@@ -0,0 +1,66 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.1
+ *
+ * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef _GLAPI_PRIV_H
+#define _GLAPI_PRIV_H
+
+#include "glthread.h"
+
+extern void
+_glapi_check_table_not_null(const struct _glapi_table *table);
+
+
+extern void
+_glapi_check_table(const struct _glapi_table *table);
+
+
+extern void
+init_glapi_relocs_once(void);
+
+
+extern _glapi_proc
+generate_entrypoint(GLuint functionOffset);
+
+
+extern void
+fill_in_entrypoint_offset(_glapi_proc entrypoint, GLuint offset);
+
+
+extern _glapi_proc
+get_entrypoint_address(GLuint functionOffset);
+
+
+#if defined(USE_X64_64_ASM) && defined(GLX_USE_TLS)
+# define DISPATCH_FUNCTION_SIZE 16
+#elif defined(USE_X86_ASM)
+# if defined(THREADS) && !defined(GLX_USE_TLS)
+# define DISPATCH_FUNCTION_SIZE 32
+# else
+# define DISPATCH_FUNCTION_SIZE 16
+# endif
+#endif
+
+
+#endif
diff --git a/src/mesa/sources.mak b/src/mesa/sources.mak
index 9f2e4e51575..74885548e5a 100644
--- a/src/mesa/sources.mak
+++ b/src/mesa/sources.mak
@@ -88,6 +88,7 @@ MAIN_SOURCES = \
GLAPI_SOURCES = \
glapi/glapi.c \
glapi/glapi_dispatch.c \
+ glapi/glapi_entrypoint.c \
glapi/glapi_getproc.c \
glapi/glapi_nop.c \
glapi/glthread.c
diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c
index 0332d4dbdfe..f326601c3be 100644
--- a/src/mesa/state_tracker/st_cb_bitmap.c
+++ b/src/mesa/state_tracker/st_cb_bitmap.c
@@ -440,6 +440,7 @@ draw_bitmap_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z,
cso_save_viewport(cso);
cso_save_fragment_shader(cso);
cso_save_vertex_shader(cso);
+ cso_save_vertex_elements(cso);
/* rasterizer state: just scissor */
st->bitmap.rasterizer.scissor = ctx->Scissor.Enabled;
@@ -490,6 +491,8 @@ draw_bitmap_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z,
cso_set_viewport(cso, &vp);
}
+ cso_set_vertex_elements(cso, 3, st->velems_util_draw);
+
/* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */
z = z * 2.0 - 1.0;
@@ -509,6 +512,7 @@ draw_bitmap_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z,
cso_restore_viewport(cso);
cso_restore_fragment_shader(cso);
cso_restore_vertex_shader(cso);
+ cso_restore_vertex_elements(cso);
}
diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c
index 9e66eed3634..de86062fc40 100644
--- a/src/mesa/state_tracker/st_cb_clear.c
+++ b/src/mesa/state_tracker/st_cb_clear.c
@@ -213,6 +213,7 @@ clear_with_quad(GLcontext *ctx,
cso_save_clip(st->cso_context);
cso_save_fragment_shader(st->cso_context);
cso_save_vertex_shader(st->cso_context);
+ cso_save_vertex_elements(st->cso_context);
/* blend state: RGBA masking */
{
@@ -264,6 +265,8 @@ clear_with_quad(GLcontext *ctx,
cso_set_depth_stencil_alpha(st->cso_context, &depth_stencil);
}
+ cso_set_vertex_elements(st->cso_context, 2, st->velems_util_draw);
+
cso_set_rasterizer(st->cso_context, &st->clear.raster);
/* viewport state: viewport matching window dims */
@@ -297,6 +300,8 @@ clear_with_quad(GLcontext *ctx,
cso_restore_clip(st->cso_context);
cso_restore_fragment_shader(st->cso_context);
cso_restore_vertex_shader(st->cso_context);
+ cso_restore_vertex_elements(st->cso_context);
+
}
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c
index 98453321f86..c609435a158 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -529,6 +529,7 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z,
cso_save_sampler_textures(cso);
cso_save_fragment_shader(cso);
cso_save_vertex_shader(cso);
+ cso_save_vertex_elements(cso);
/* rasterizer state: just scissor */
{
@@ -581,6 +582,8 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z,
cso_set_viewport(cso, &vp);
}
+ cso_set_vertex_elements(cso, 3, st->velems_util_draw);
+
/* texture state: */
if (st->pixel_xfer.pixelmap_enabled) {
struct pipe_texture *textures[2];
@@ -615,6 +618,7 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z,
cso_restore_sampler_textures(cso);
cso_restore_fragment_shader(cso);
cso_restore_vertex_shader(cso);
+ cso_restore_vertex_elements(cso);
}
diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c
index de8beaf5e25..0358a707261 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -141,6 +141,14 @@ st_create_context_priv( GLcontext *ctx, struct pipe_context *pipe )
for (i = 0; i < PIPE_MAX_SAMPLERS; i++)
st->state.sampler_list[i] = &st->state.samplers[i];
+ for (i = 0; i < 3; i++) {
+ memset(&st->velems_util_draw[i], 0, sizeof(struct pipe_vertex_element));
+ st->velems_util_draw[i].src_offset = i * 4 * sizeof(float);
+ st->velems_util_draw[i].instance_divisor = 0;
+ st->velems_util_draw[i].vertex_buffer_index = 0;
+ st->velems_util_draw[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ }
+
/* we want all vertex data to be placed in buffer objects */
vbo_use_buffer_objects(ctx);
diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h
index 045c029c305..6622361a7e3 100644
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -174,6 +174,9 @@ struct st_context
unsigned vbuf_slot;
} clear;
+ /** used for anything using util_draw_vertex_buffer */
+ struct pipe_vertex_element velems_util_draw[3];
+
void *passthrough_fs; /**< simple pass-through frag shader */
struct gen_mipmap_state *gen_mipmap;
diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c
index 32b9a473cfc..8a6e1ed4662 100644
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -57,6 +57,7 @@
#include "pipe/p_defines.h"
#include "util/u_inlines.h"
#include "util/u_format.h"
+#include "cso_cache/cso_context.h"
static GLuint double_types[4] = {
@@ -368,7 +369,6 @@ setup_interleaved_attribs(GLcontext *ctx,
(unsigned) (arrays[mesaAttr]->Ptr - offset0);
velements[attr].instance_divisor = 0;
velements[attr].vertex_buffer_index = 0;
- velements[attr].nr_components = arrays[mesaAttr]->Size;
velements[attr].src_format =
st_pipe_vertex_format(arrays[mesaAttr]->Type,
arrays[mesaAttr]->Size,
@@ -458,7 +458,6 @@ setup_non_interleaved_attribs(GLcontext *ctx,
vbuffer[attr].max_index = max_index;
velements[attr].instance_divisor = 0;
velements[attr].vertex_buffer_index = attr;
- velements[attr].nr_components = arrays[mesaAttr]->Size;
velements[attr].src_format
= st_pipe_vertex_format(arrays[mesaAttr]->Type,
arrays[mesaAttr]->Size,
@@ -564,6 +563,7 @@ st_draw_vbo(GLcontext *ctx,
(void) check_uniforms;
#endif
+ memset(velements, 0, sizeof(struct pipe_vertex_element) * vpv->num_inputs);
/*
* Setup the vbuffer[] and velements[] arrays.
*/
@@ -596,14 +596,13 @@ st_draw_vbo(GLcontext *ctx,
for (i = 0; i < num_velements; i++) {
printf("vlements[%d].vbuffer_index = %u\n", i, velements[i].vertex_buffer_index);
printf("vlements[%d].src_offset = %u\n", i, velements[i].src_offset);
- printf("vlements[%d].nr_comps = %u\n", i, velements[i].nr_components);
printf("vlements[%d].format = %s\n", i, util_format_name(velements[i].src_format));
}
}
#endif
pipe->set_vertex_buffers(pipe, num_vbuffers, vbuffer);
- pipe->set_vertex_elements(pipe, num_velements, velements);
+ cso_set_vertex_elements(ctx->st->cso_context, num_velements, velements);
if (num_vbuffers == 0 || num_velements == 0)
return;
diff --git a/src/mesa/state_tracker/st_draw_feedback.c b/src/mesa/state_tracker/st_draw_feedback.c
index 087f2f22bbf..26a5b3fcd63 100644
--- a/src/mesa/state_tracker/st_draw_feedback.c
+++ b/src/mesa/state_tracker/st_draw_feedback.c
@@ -178,7 +178,6 @@ st_feedback_draw_vbo(GLcontext *ctx,
vbuffers[attr].max_index = max_index;
velements[attr].instance_divisor = 0;
velements[attr].vertex_buffer_index = attr;
- velements[attr].nr_components = arrays[mesaAttr]->Size;
velements[attr].src_format =
st_pipe_vertex_format(arrays[mesaAttr]->Type,
arrays[mesaAttr]->Size,