summaryrefslogtreecommitdiffstats
path: root/src/gallium/auxiliary/draw
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary/draw')
-rw-r--r--src/gallium/auxiliary/draw/draw_cliptest_tmp.h114
-rw-r--r--src/gallium/auxiliary/draw/draw_context.c273
-rw-r--r--src/gallium/auxiliary/draw/draw_context.h81
-rw-r--r--src/gallium/auxiliary/draw/draw_decompose_tmp.h431
-rw-r--r--src/gallium/auxiliary/draw/draw_fs.c73
-rw-r--r--src/gallium/auxiliary/draw/draw_fs.h42
-rw-r--r--src/gallium/auxiliary/draw/draw_gs.c418
-rw-r--r--src/gallium/auxiliary/draw/draw_gs.h36
-rw-r--r--src/gallium/auxiliary/draw/draw_gs_tmp.h32
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm.c416
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm.h196
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm_sample.c223
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm_translate.c5
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe.c227
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_aaline.c30
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_aapoint.c16
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_clip.c134
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_cull.c19
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_flatshade.c6
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_offset.c13
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_pstipple.c26
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_stipple.c13
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_twoside.c8
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_unfilled.c34
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_validate.c25
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_vbuf.c24
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_wide_line.c74
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_wide_point.c125
-rw-r--r--src/gallium/auxiliary/draw/draw_private.h128
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.c198
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.h83
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_decompose.h167
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_elts.c89
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_emit.c53
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch.c45
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_emit.c49
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c52
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c428
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c419
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_post_vs.c304
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_so_emit.c293
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_util.c13
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_varray.c193
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_varray_tmp.h238
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h98
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vcache.c523
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h197
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vsplit.c208
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h309
-rw-r--r--src/gallium/auxiliary/draw/draw_so_emit_tmp.h31
-rw-r--r--src/gallium/auxiliary/draw/draw_split_tmp.h176
-rw-r--r--src/gallium/auxiliary/draw/draw_vbuf.h20
-rw-r--r--src/gallium/auxiliary/draw/draw_vertex.h2
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.c37
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.h9
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos.h2
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_exec.c8
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_llvm.c127
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_ppc.c4
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_sse.c1
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_varient.c6
61 files changed, 4593 insertions, 3031 deletions
diff --git a/src/gallium/auxiliary/draw/draw_cliptest_tmp.h b/src/gallium/auxiliary/draw/draw_cliptest_tmp.h
new file mode 100644
index 00000000000..958ed20dc84
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_cliptest_tmp.h
@@ -0,0 +1,114 @@
+/**************************************************************************
+ *
+ * Copyright 2010, VMware, inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+
+static boolean TAG(do_cliptest)( struct pt_post_vs *pvs,
+ struct draw_vertex_info *info )
+{
+ struct vertex_header *out = info->verts;
+ const float *scale = pvs->draw->viewport.scale;
+ const float *trans = pvs->draw->viewport.translate;
+ /* const */ float (*plane)[4] = pvs->draw->plane;
+ const unsigned pos = draw_current_shader_position_output(pvs->draw);
+ const unsigned ef = pvs->draw->vs.edgeflag_output;
+ const unsigned nr = pvs->draw->nr_planes;
+ const unsigned flags = (FLAGS);
+ unsigned need_pipeline = 0;
+ unsigned j;
+
+ for (j = 0; j < info->count; j++) {
+ float *position = out->data[pos];
+ unsigned mask = 0x0;
+
+ initialize_vertex_header(out);
+
+ if (flags & (DO_CLIP_XY | DO_CLIP_FULL_Z | DO_CLIP_HALF_Z | DO_CLIP_USER)) {
+ out->clip[0] = position[0];
+ out->clip[1] = position[1];
+ out->clip[2] = position[2];
+ out->clip[3] = position[3];
+
+ /* Do the hardwired planes first:
+ */
+ if (flags & DO_CLIP_XY) {
+ if (-position[0] + position[3] < 0) mask |= (1<<0);
+ if ( position[0] + position[3] < 0) mask |= (1<<1);
+ if (-position[1] + position[3] < 0) mask |= (1<<2);
+ if ( position[1] + position[3] < 0) mask |= (1<<3);
+ }
+
+ /* Clip Z planes according to full cube, half cube or none.
+ */
+ if (flags & DO_CLIP_FULL_Z) {
+ if ( position[2] + position[3] < 0) mask |= (1<<4);
+ if (-position[2] + position[3] < 0) mask |= (1<<5);
+ }
+ else if (flags & DO_CLIP_HALF_Z) {
+ if ( position[2] < 0) mask |= (1<<4);
+ if (-position[2] + position[3] < 0) mask |= (1<<5);
+ }
+
+ if (flags & DO_CLIP_USER) {
+ unsigned i;
+ for (i = 6; i < nr; i++) {
+ if (dot4(position, plane[i]) < 0)
+ mask |= (1<<i);
+ }
+ }
+
+ out->clipmask = mask;
+ need_pipeline |= out->clipmask;
+ }
+
+ if ((flags & DO_VIEWPORT) && mask == 0)
+ {
+ /* divide by w */
+ float w = 1.0f / position[3];
+
+ /* Viewport mapping */
+ position[0] = position[0] * w * scale[0] + trans[0];
+ position[1] = position[1] * w * scale[1] + trans[1];
+ position[2] = position[2] * w * scale[2] + trans[2];
+ position[3] = w;
+ }
+
+ if ((flags & DO_EDGEFLAG) && ef) {
+ const float *edgeflag = out->data[ef];
+ out->edgeflag = !(edgeflag[0] != 1.0f);
+ need_pipeline |= !out->edgeflag;
+ }
+
+ out = (struct vertex_header *)( (char *)out + info->stride );
+ }
+
+ return need_pipeline != 0;
+}
+
+
+#undef FLAGS
+#undef TAG
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index 02abddf1491..032fcbbc70a 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -34,12 +34,33 @@
#include "pipe/p_context.h"
#include "util/u_memory.h"
#include "util/u_math.h"
+#include "util/u_cpu_detect.h"
#include "draw_context.h"
#include "draw_vs.h"
#include "draw_gs.h"
#if HAVE_LLVM
#include "gallivm/lp_bld_init.h"
+#include "draw_llvm.h"
+
+static boolean
+draw_get_option_use_llvm(void)
+{
+ static boolean first = TRUE;
+ static boolean value;
+ if (first) {
+ first = FALSE;
+ value = debug_get_bool_option("DRAW_USE_LLVM", TRUE);
+
+#ifdef PIPE_ARCH_X86
+ util_cpu_detect();
+ /* require SSE2 due to LLVM PR6960. */
+ if (!util_cpu_caps.has_sse2)
+ value = FALSE;
+#endif
+ }
+ return value;
+}
#endif
struct draw_context *draw_create( struct pipe_context *pipe )
@@ -49,9 +70,13 @@ struct draw_context *draw_create( struct pipe_context *pipe )
goto fail;
#if HAVE_LLVM
- lp_build_init();
- assert(lp_build_engine);
- draw->engine = lp_build_engine;
+ if(draw_get_option_use_llvm())
+ {
+ lp_build_init();
+ assert(lp_build_engine);
+ draw->engine = lp_build_engine;
+ draw->llvm = draw_llvm_create(draw);
+ }
#endif
if (!draw_init(draw))
@@ -81,6 +106,8 @@ boolean draw_init(struct draw_context *draw)
ASSIGN_4V( draw->plane[4], 0, 0, 1, 1 ); /* yes these are correct */
ASSIGN_4V( draw->plane[5], 0, 0, -1, 1 ); /* mesa's a bit wonky */
draw->nr_planes = 6;
+ draw->clip_xy = 1;
+ draw->clip_z = 1;
draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */
@@ -132,6 +159,10 @@ void draw_destroy( struct draw_context *draw )
draw_pt_destroy( draw );
draw_vs_destroy( draw );
draw_gs_destroy( draw );
+#ifdef HAVE_LLVM
+ if(draw->llvm)
+ draw_llvm_destroy( draw->llvm );
+#endif
FREE( draw );
}
@@ -157,6 +188,14 @@ void draw_set_mrd(struct draw_context *draw, double mrd)
}
+static void update_clip_flags( struct draw_context *draw )
+{
+ draw->clip_xy = !draw->driver.bypass_clip_xy;
+ draw->clip_z = (!draw->driver.bypass_clip_z &&
+ !draw->depth_clamp);
+ draw->clip_user = (draw->nr_planes > 6);
+}
+
/**
* Register new primitive rasterization/rendering state.
* This causes the drawing pipeline to be rebuilt.
@@ -171,18 +210,25 @@ void draw_set_rasterizer_state( struct draw_context *draw,
draw->rasterizer = raster;
draw->rast_handle = rast_handle;
- draw->bypass_clipping = draw->driver.bypass_clipping;
- }
+ }
}
-
+/* With a little more work, llvmpipe will be able to turn this off and
+ * do its own x/y clipping.
+ *
+ * Some hardware can turn off clipping altogether - in particular any
+ * hardware with a TNL unit can do its own clipping, even if it is
+ * relying on the draw module for some other reason.
+ */
void draw_set_driver_clipping( struct draw_context *draw,
- boolean bypass_clipping )
+ boolean bypass_clip_xy,
+ boolean bypass_clip_z )
{
draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
- draw->driver.bypass_clipping = bypass_clipping;
- draw->bypass_clipping = draw->driver.bypass_clipping;
+ draw->driver.bypass_clip_xy = bypass_clip_xy;
+ draw->driver.bypass_clip_z = bypass_clip_z;
+ update_clip_flags(draw);
}
@@ -211,6 +257,9 @@ void draw_set_clip_state( struct draw_context *draw,
assert(clip->nr <= PIPE_MAX_CLIP_PLANES);
memcpy(&draw->plane[6], clip->ucp, clip->nr * sizeof(clip->ucp[0]));
draw->nr_planes = 6 + clip->nr;
+ draw->depth_clamp = clip->depth_clamp;
+
+ update_clip_flags(draw);
}
@@ -282,12 +331,19 @@ draw_set_mapped_constant_buffer(struct draw_context *draw,
shader_type == PIPE_SHADER_GEOMETRY);
debug_assert(slot < PIPE_MAX_CONSTANT_BUFFERS);
- if (shader_type == PIPE_SHADER_VERTEX) {
+ switch (shader_type) {
+ case PIPE_SHADER_VERTEX:
draw->pt.user.vs_constants[slot] = buffer;
+ draw->pt.user.vs_constants_size[slot] = size;
draw_vs_set_constants(draw, slot, buffer, size);
- } else if (shader_type == PIPE_SHADER_GEOMETRY) {
+ break;
+ case PIPE_SHADER_GEOMETRY:
draw->pt.user.gs_constants[slot] = buffer;
+ draw->pt.user.gs_constants_size[slot] = size;
draw_gs_set_constants(draw, slot, buffer, size);
+ break;
+ default:
+ assert(0 && "invalid shader type in draw_set_mapped_constant_buffer");
}
}
@@ -357,6 +413,42 @@ draw_set_force_passthrough( struct draw_context *draw, boolean enable )
}
+
+/**
+ * Allocate an extra vertex/geometry shader vertex attribute.
+ * This is used by some of the optional draw module stages such
+ * as wide_point which may need to allocate additional generic/texcoord
+ * attributes.
+ */
+int
+draw_alloc_extra_vertex_attrib(struct draw_context *draw,
+ uint semantic_name, uint semantic_index)
+{
+ const int num_outputs = draw_current_shader_outputs(draw);
+ const int n = draw->extra_shader_outputs.num;
+
+ assert(n < Elements(draw->extra_shader_outputs.semantic_name));
+
+ draw->extra_shader_outputs.semantic_name[n] = semantic_name;
+ draw->extra_shader_outputs.semantic_index[n] = semantic_index;
+ draw->extra_shader_outputs.slot[n] = num_outputs + n;
+ draw->extra_shader_outputs.num++;
+
+ return draw->extra_shader_outputs.slot[n];
+}
+
+
+/**
+ * Remove all extra vertex attributes that were allocated with
+ * draw_alloc_extra_vertex_attrib().
+ */
+void
+draw_remove_extra_vertex_attribs(struct draw_context *draw)
+{
+ draw->extra_shader_outputs.num = 0;
+}
+
+
/**
* Ask the draw module for the location/slot of the given vertex attribute in
* a post-transformed vertex.
@@ -390,12 +482,12 @@ draw_find_shader_output(const struct draw_context *draw,
return i;
}
- /* XXX there may be more than one extra vertex attrib.
- * For example, simulated gl_FragCoord and gl_PointCoord.
- */
- if (draw->extra_shader_outputs.semantic_name == semantic_name &&
- draw->extra_shader_outputs.semantic_index == semantic_index) {
- return draw->extra_shader_outputs.slot;
+ /* Search the extra vertex attributes */
+ for (i = 0; i < draw->extra_shader_outputs.num; i++) {
+ if (draw->extra_shader_outputs.semantic_name[i] == semantic_name &&
+ draw->extra_shader_outputs.semantic_index[i] == semantic_index) {
+ return draw->extra_shader_outputs.slot[i];
+ }
}
return 0;
@@ -414,16 +506,18 @@ draw_find_shader_output(const struct draw_context *draw,
uint
draw_num_shader_outputs(const struct draw_context *draw)
{
- uint count = draw->vs.vertex_shader->info.num_outputs;
+ uint count;
/* If a geometry shader is present, its outputs go to the
* driver, else the vertex shader's outputs.
*/
if (draw->gs.geometry_shader)
count = draw->gs.geometry_shader->info.num_outputs;
+ else
+ count = draw->vs.vertex_shader->info.num_outputs;
+
+ count += draw->extra_shader_outputs.num;
- if (draw->extra_shader_outputs.slot > 0)
- count++;
return count;
}
@@ -435,13 +529,18 @@ draw_num_shader_outputs(const struct draw_context *draw)
*/
void
draw_texture_samplers(struct draw_context *draw,
+ uint shader,
uint num_samplers,
struct tgsi_sampler **samplers)
{
- draw->vs.num_samplers = num_samplers;
- draw->vs.samplers = samplers;
- draw->gs.num_samplers = num_samplers;
- draw->gs.samplers = samplers;
+ if (shader == PIPE_SHADER_VERTEX) {
+ draw->vs.num_samplers = num_samplers;
+ draw->vs.samplers = samplers;
+ } else {
+ debug_assert(shader == PIPE_SHADER_GEOMETRY);
+ draw->gs.num_samplers = num_samplers;
+ draw->gs.samplers = samplers;
+ }
}
@@ -454,47 +553,28 @@ void draw_set_render( struct draw_context *draw,
}
-
-/**
- * Tell the drawing context about the index/element buffer to use
- * (ala glDrawElements)
- * If no element buffer is to be used (i.e. glDrawArrays) then this
- * should be called with eltSize=0 and elements=NULL.
- *
- * \param draw the drawing context
- * \param eltSize size of each element (1, 2 or 4 bytes)
- * \param elements the element buffer ptr
- */
void
-draw_set_mapped_element_buffer_range( struct draw_context *draw,
- unsigned eltSize,
- int eltBias,
- unsigned min_index,
- unsigned max_index,
- const void *elements )
+draw_set_index_buffer(struct draw_context *draw,
+ const struct pipe_index_buffer *ib)
{
- draw->pt.user.elts = elements;
- draw->pt.user.eltSize = eltSize;
- draw->pt.user.eltBias = eltBias;
- draw->pt.user.min_index = min_index;
- draw->pt.user.max_index = max_index;
+ if (ib)
+ memcpy(&draw->pt.index_buffer, ib, sizeof(draw->pt.index_buffer));
+ else
+ memset(&draw->pt.index_buffer, 0, sizeof(draw->pt.index_buffer));
}
+/**
+ * Tell drawing context where to find mapped index/element buffer.
+ */
void
-draw_set_mapped_element_buffer( struct draw_context *draw,
- unsigned eltSize,
- int eltBias,
- const void *elements )
+draw_set_mapped_index_buffer(struct draw_context *draw,
+ const void *elements)
{
- draw->pt.user.elts = elements;
- draw->pt.user.eltSize = eltSize;
- draw->pt.user.eltBias = eltBias;
- draw->pt.user.min_index = 0;
- draw->pt.user.max_index = 0xffffffff;
+ draw->pt.user.elts = elements;
}
-
+
/* Revamp me please:
*/
void draw_do_flush( struct draw_context *draw, unsigned flags )
@@ -566,7 +646,7 @@ draw_get_rasterizer_no_cull( struct draw_context *draw,
memset(&rast, 0, sizeof(rast));
rast.scissor = scissor;
rast.flatshade = flatshade;
- rast.front_winding = PIPE_WINDING_CCW;
+ rast.front_ccw = 1;
rast.gl_rasterization_rules = draw->rasterizer->gl_rasterization_rules;
draw->rasterizer_no_cull[scissor][flatshade] =
@@ -574,3 +654,82 @@ draw_get_rasterizer_no_cull( struct draw_context *draw,
}
return draw->rasterizer_no_cull[scissor][flatshade];
}
+
+void
+draw_set_mapped_so_buffers(struct draw_context *draw,
+ void *buffers[PIPE_MAX_SO_BUFFERS],
+ unsigned num_buffers)
+{
+ int i;
+
+ for (i = 0; i < num_buffers; ++i) {
+ draw->so.buffers[i] = buffers[i];
+ }
+ draw->so.num_buffers = num_buffers;
+}
+
+void
+draw_set_so_state(struct draw_context *draw,
+ struct pipe_stream_output_state *state)
+{
+ memcpy(&draw->so.state,
+ state,
+ sizeof(struct pipe_stream_output_state));
+}
+
+void
+draw_set_sampler_views(struct draw_context *draw,
+ struct pipe_sampler_view **views,
+ unsigned num)
+{
+ unsigned i;
+
+ debug_assert(num <= PIPE_MAX_VERTEX_SAMPLERS);
+
+ for (i = 0; i < num; ++i)
+ draw->sampler_views[i] = views[i];
+ for (i = num; i < PIPE_MAX_VERTEX_SAMPLERS; ++i)
+ draw->sampler_views[i] = NULL;
+
+ draw->num_sampler_views = num;
+}
+
+void
+draw_set_samplers(struct draw_context *draw,
+ struct pipe_sampler_state **samplers,
+ unsigned num)
+{
+ unsigned i;
+
+ debug_assert(num <= PIPE_MAX_VERTEX_SAMPLERS);
+
+ for (i = 0; i < num; ++i)
+ draw->samplers[i] = samplers[i];
+ for (i = num; i < PIPE_MAX_VERTEX_SAMPLERS; ++i)
+ draw->samplers[i] = NULL;
+
+ draw->num_samplers = num;
+
+#ifdef HAVE_LLVM
+ if (draw->llvm)
+ draw_llvm_set_sampler_state(draw);
+#endif
+}
+
+void
+draw_set_mapped_texture(struct draw_context *draw,
+ unsigned sampler_idx,
+ uint32_t width, uint32_t height, uint32_t depth,
+ uint32_t last_level,
+ uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS],
+ uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS],
+ const void *data[DRAW_MAX_TEXTURE_LEVELS])
+{
+#ifdef HAVE_LLVM
+ if(draw->llvm)
+ draw_llvm_set_mapped_texture(draw,
+ sampler_idx,
+ width, height, depth, last_level,
+ row_stride, img_stride, data);
+#endif
+}
diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h
index b905c2f2da6..1f27cbf488a 100644
--- a/src/gallium/auxiliary/draw/draw_context.h
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -39,19 +39,24 @@
#include "pipe/p_state.h"
+#include "tgsi/tgsi_exec.h"
struct pipe_context;
struct draw_context;
struct draw_stage;
struct draw_vertex_shader;
struct draw_geometry_shader;
+struct draw_fragment_shader;
struct tgsi_sampler;
+#define DRAW_MAX_TEXTURE_LEVELS 13 /* 4K x 4K for now */
struct draw_context *draw_create( struct pipe_context *pipe );
void draw_destroy( struct draw_context *draw );
+void draw_flush(struct draw_context *draw);
+
void draw_set_viewport_state( struct draw_context *draw,
const struct pipe_viewport_state *viewport );
@@ -97,9 +102,27 @@ draw_num_shader_outputs(const struct draw_context *draw);
void
draw_texture_samplers(struct draw_context *draw,
+ uint shader_type,
uint num_samplers,
struct tgsi_sampler **samplers);
+void
+draw_set_sampler_views(struct draw_context *draw,
+ struct pipe_sampler_view **views,
+ unsigned num);
+void
+draw_set_samplers(struct draw_context *draw,
+ struct pipe_sampler_state **samplers,
+ unsigned num);
+
+void
+draw_set_mapped_texture(struct draw_context *draw,
+ unsigned sampler_idx,
+ uint32_t width, uint32_t height, uint32_t depth,
+ uint32_t last_level,
+ uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS],
+ uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS],
+ const void *data[DRAW_MAX_TEXTURE_LEVELS]);
/*
@@ -116,6 +139,17 @@ void draw_delete_vertex_shader(struct draw_context *draw,
/*
+ * Fragment shader functions
+ */
+struct draw_fragment_shader *
+draw_create_fragment_shader(struct draw_context *draw,
+ const struct pipe_shader_state *shader);
+void draw_bind_fragment_shader(struct draw_context *draw,
+ struct draw_fragment_shader *dvs);
+void draw_delete_fragment_shader(struct draw_context *draw,
+ struct draw_fragment_shader *dvs);
+
+/*
* Geometry shader functions
*/
struct draw_geometry_shader *
@@ -139,18 +173,11 @@ void draw_set_vertex_elements(struct draw_context *draw,
unsigned count,
const struct pipe_vertex_element *elements);
-void
-draw_set_mapped_element_buffer_range( struct draw_context *draw,
- unsigned eltSize,
- int eltBias,
- unsigned min_index,
- unsigned max_index,
- const void *elements );
-
-void draw_set_mapped_element_buffer( struct draw_context *draw,
- unsigned eltSize,
- int eltBias,
- const void *elements );
+void draw_set_index_buffer(struct draw_context *draw,
+ const struct pipe_index_buffer *ib);
+
+void draw_set_mapped_index_buffer(struct draw_context *draw,
+ const void *elements);
void draw_set_mapped_vertex_buffer(struct draw_context *draw,
unsigned attr, const void *buffer);
@@ -162,11 +189,22 @@ draw_set_mapped_constant_buffer(struct draw_context *draw,
const void *buffer,
unsigned size);
+void
+draw_set_mapped_so_buffers(struct draw_context *draw,
+ void *buffers[PIPE_MAX_SO_BUFFERS],
+ unsigned num_buffers);
+void
+draw_set_so_state(struct draw_context *draw,
+ struct pipe_stream_output_state *state);
+
/***********************************************************************
- * draw_prim.c
+ * draw_pt.c
*/
+void draw_vbo(struct draw_context *draw,
+ const struct pipe_draw_info *info);
+
void draw_arrays(struct draw_context *draw, unsigned prim,
unsigned start, unsigned count);
@@ -178,8 +216,6 @@ draw_arrays_instanced(struct draw_context *draw,
unsigned startInstance,
unsigned instanceCount);
-void draw_flush(struct draw_context *draw);
-
/*******************************************************************************
* Driver backend interface
@@ -189,7 +225,8 @@ void draw_set_render( struct draw_context *draw,
struct vbuf_render *render );
void draw_set_driver_clipping( struct draw_context *draw,
- boolean bypass_clipping );
+ boolean bypass_clip_xy,
+ boolean bypass_clip_z );
void draw_set_force_passthrough( struct draw_context *draw,
boolean enable );
@@ -201,4 +238,16 @@ boolean draw_need_pipeline(const struct draw_context *draw,
const struct pipe_rasterizer_state *rasterizer,
unsigned prim );
+static INLINE int
+draw_get_shader_param(unsigned shader, enum pipe_cap param)
+{
+ switch(shader) {
+ case PIPE_SHADER_VERTEX:
+ case PIPE_SHADER_GEOMETRY:
+ return tgsi_exec_get_shader_param(param);
+ default:
+ return 0;
+ }
+}
+
#endif /* DRAW_CONTEXT_H */
diff --git a/src/gallium/auxiliary/draw/draw_decompose_tmp.h b/src/gallium/auxiliary/draw/draw_decompose_tmp.h
new file mode 100644
index 00000000000..a142563af97
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_decompose_tmp.h
@@ -0,0 +1,431 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.9
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright (C) 2010 LunarG Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ * Chia-I Wu <[email protected]>
+ */
+
+/* these macros are optional */
+#ifndef LOCAL_VARS
+#define LOCAL_VARS
+#endif
+#ifndef FUNC_ENTER
+#define FUNC_ENTER do {} while (0)
+#endif
+#ifndef FUNC_EXIT
+#define FUNC_EXIT do {} while (0)
+#endif
+#ifndef LINE_ADJ
+#define LINE_ADJ(flags, a0, i0, i1, a1) LINE(flags, i0, i1)
+#endif
+#ifndef TRIANGLE_ADJ
+#define TRIANGLE_ADJ(flags, i0, a0, i1, a1, i2, a2) TRIANGLE(flags, i0, i1, i2)
+#endif
+
+static void
+FUNC(FUNC_VARS)
+{
+ unsigned idx[6], i;
+ ushort flags;
+ LOCAL_VARS
+
+ FUNC_ENTER;
+
+ /* prim, prim_flags, count, and last_vertex_last should have been defined */
+ if (0) {
+ debug_printf("%s: prim 0x%x, prim_flags 0x%x, count %d, last_vertex_last %d\n",
+ __FUNCTION__, prim, prim_flags, count, last_vertex_last);
+ }
+
+ switch (prim) {
+ case PIPE_PRIM_POINTS:
+ for (i = 0; i < count; i++) {
+ idx[0] = GET_ELT(i);
+ POINT(idx[0]);
+ }
+ break;
+
+ case PIPE_PRIM_LINES:
+ flags = DRAW_PIPE_RESET_STIPPLE;
+ for (i = 0; i + 1 < count; i += 2) {
+ idx[0] = GET_ELT(i);
+ idx[1] = GET_ELT(i + 1);
+ LINE(flags, idx[0], idx[1]);
+ }
+ break;
+
+ case PIPE_PRIM_LINE_LOOP:
+ case PIPE_PRIM_LINE_STRIP:
+ if (count >= 2) {
+ flags = (prim_flags & DRAW_SPLIT_BEFORE) ? 0 : DRAW_PIPE_RESET_STIPPLE;
+ idx[1] = GET_ELT(0);
+ idx[2] = idx[1];
+
+ for (i = 1; i < count; i++, flags = 0) {
+ idx[0] = idx[1];
+ idx[1] = GET_ELT(i);
+ LINE(flags, idx[0], idx[1]);
+ }
+ /* close the loop */
+ if (prim == PIPE_PRIM_LINE_LOOP && !prim_flags)
+ LINE(flags, idx[1], idx[2]);
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLES:
+ flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
+ for (i = 0; i + 2 < count; i += 3) {
+ idx[0] = GET_ELT(i);
+ idx[1] = GET_ELT(i + 1);
+ idx[2] = GET_ELT(i + 2);
+ TRIANGLE(flags, idx[0], idx[1], idx[2]);
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ if (count >= 3) {
+ flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
+ idx[1] = GET_ELT(0);
+ idx[2] = GET_ELT(1);
+
+ if (last_vertex_last) {
+ for (i = 0; i + 2 < count; i++) {
+ idx[0] = idx[1];
+ idx[1] = idx[2];
+ idx[2] = GET_ELT(i + 2);
+ /* always emit idx[2] last */
+ if (i & 1)
+ TRIANGLE(flags, idx[1], idx[0], idx[2]);
+ else
+ TRIANGLE(flags, idx[0], idx[1], idx[2]);
+ }
+ }
+ else {
+ for (i = 0; i + 2 < count; i++) {
+ idx[0] = idx[1];
+ idx[1] = idx[2];
+ idx[2] = GET_ELT(i + 2);
+ /* always emit idx[0] first */
+ if (i & 1)
+ TRIANGLE(flags, idx[0], idx[2], idx[1]);
+ else
+ TRIANGLE(flags, idx[0], idx[1], idx[2]);
+ }
+ }
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_FAN:
+ if (count >= 3) {
+ flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
+ idx[0] = GET_ELT(0);
+ idx[2] = GET_ELT(1);
+
+ /* idx[0] is neither the first nor the last vertex */
+ if (last_vertex_last) {
+ for (i = 0; i + 2 < count; i++) {
+ idx[1] = idx[2];
+ idx[2] = GET_ELT(i + 2);
+ /* always emit idx[2] last */
+ TRIANGLE(flags, idx[0], idx[1], idx[2]);
+ }
+ }
+ else {
+ for (i = 0; i + 2 < count; i++) {
+ idx[1] = idx[2];
+ idx[2] = GET_ELT(i + 2);
+ /* always emit idx[1] first */
+ TRIANGLE(flags, idx[1], idx[2], idx[0]);
+ }
+ }
+ }
+ break;
+
+ case PIPE_PRIM_QUADS:
+ if (last_vertex_last) {
+ for (i = 0; i + 3 < count; i += 4) {
+ idx[0] = GET_ELT(i);
+ idx[1] = GET_ELT(i + 1);
+ idx[2] = GET_ELT(i + 2);
+ idx[3] = GET_ELT(i + 3);
+
+ flags = DRAW_PIPE_RESET_STIPPLE |
+ DRAW_PIPE_EDGE_FLAG_0 |
+ DRAW_PIPE_EDGE_FLAG_2;
+ /* always emit idx[3] last */
+ TRIANGLE(flags, idx[0], idx[1], idx[3]);
+
+ flags = DRAW_PIPE_EDGE_FLAG_0 |
+ DRAW_PIPE_EDGE_FLAG_1;
+ TRIANGLE(flags, idx[1], idx[2], idx[3]);
+ }
+ }
+ else {
+ for (i = 0; i + 3 < count; i += 4) {
+ idx[0] = GET_ELT(i);
+ idx[1] = GET_ELT(i + 1);
+ idx[2] = GET_ELT(i + 2);
+ idx[3] = GET_ELT(i + 3);
+
+ flags = DRAW_PIPE_RESET_STIPPLE |
+ DRAW_PIPE_EDGE_FLAG_0 |
+ DRAW_PIPE_EDGE_FLAG_1;
+ /* XXX should always emit idx[0] first */
+ /* always emit idx[3] first */
+ TRIANGLE(flags, idx[3], idx[0], idx[1]);
+
+ flags = DRAW_PIPE_EDGE_FLAG_1 |
+ DRAW_PIPE_EDGE_FLAG_2;
+ TRIANGLE(flags, idx[3], idx[1], idx[2]);
+ }
+ }
+ break;
+
+ case PIPE_PRIM_QUAD_STRIP:
+ if (count >= 4) {
+ idx[2] = GET_ELT(0);
+ idx[3] = GET_ELT(1);
+
+ if (last_vertex_last) {
+ for (i = 0; i + 3 < count; i += 2) {
+ idx[0] = idx[2];
+ idx[1] = idx[3];
+ idx[2] = GET_ELT(i + 2);
+ idx[3] = GET_ELT(i + 3);
+
+ /* always emit idx[3] last */
+ flags = DRAW_PIPE_RESET_STIPPLE |
+ DRAW_PIPE_EDGE_FLAG_0 |
+ DRAW_PIPE_EDGE_FLAG_2;
+ TRIANGLE(flags, idx[2], idx[0], idx[3]);
+
+ flags = DRAW_PIPE_EDGE_FLAG_0 |
+ DRAW_PIPE_EDGE_FLAG_1;
+ TRIANGLE(flags, idx[0], idx[1], idx[3]);
+ }
+ }
+ else {
+ for (i = 0; i + 3 < count; i += 2) {
+ idx[0] = idx[2];
+ idx[1] = idx[3];
+ idx[2] = GET_ELT(i + 2);
+ idx[3] = GET_ELT(i + 3);
+
+ flags = DRAW_PIPE_RESET_STIPPLE |
+ DRAW_PIPE_EDGE_FLAG_0 |
+ DRAW_PIPE_EDGE_FLAG_1;
+ /* XXX should always emit idx[0] first */
+ /* always emit idx[3] first */
+ TRIANGLE(flags, idx[3], idx[2], idx[0]);
+
+ flags = DRAW_PIPE_EDGE_FLAG_1 |
+ DRAW_PIPE_EDGE_FLAG_2;
+ TRIANGLE(flags, idx[3], idx[0], idx[1]);
+ }
+ }
+ }
+ break;
+
+ case PIPE_PRIM_POLYGON:
+ if (count >= 3) {
+ ushort edge_next, edge_finish;
+
+ if (last_vertex_last) {
+ flags = (DRAW_PIPE_RESET_STIPPLE |
+ DRAW_PIPE_EDGE_FLAG_0);
+ if (!(prim_flags & DRAW_SPLIT_BEFORE))
+ flags |= DRAW_PIPE_EDGE_FLAG_2;
+
+ edge_next = DRAW_PIPE_EDGE_FLAG_0;
+ edge_finish =
+ (prim_flags & DRAW_SPLIT_AFTER) ? 0 : DRAW_PIPE_EDGE_FLAG_1;
+ }
+ else {
+ flags = (DRAW_PIPE_RESET_STIPPLE |
+ DRAW_PIPE_EDGE_FLAG_1);
+ if (!(prim_flags & DRAW_SPLIT_BEFORE))
+ flags |= DRAW_PIPE_EDGE_FLAG_0;
+
+ edge_next = DRAW_PIPE_EDGE_FLAG_1;
+ edge_finish =
+ (prim_flags & DRAW_SPLIT_AFTER) ? 0 : DRAW_PIPE_EDGE_FLAG_2;
+ }
+
+ idx[0] = GET_ELT(0);
+ idx[2] = GET_ELT(1);
+
+ for (i = 0; i + 2 < count; i++, flags = edge_next) {
+ idx[1] = idx[2];
+ idx[2] = GET_ELT(i + 2);
+
+ if (i + 3 == count)
+ flags |= edge_finish;
+
+ /* idx[0] is both the first and the last vertex */
+ if (last_vertex_last)
+ TRIANGLE(flags, idx[1], idx[2], idx[0]);
+ else
+ TRIANGLE(flags, idx[0], idx[1], idx[2]);
+ }
+ }
+ break;
+
+ case PIPE_PRIM_LINES_ADJACENCY:
+ flags = DRAW_PIPE_RESET_STIPPLE;
+ for (i = 0; i + 3 < count; i += 4) {
+ idx[0] = GET_ELT(i);
+ idx[1] = GET_ELT(i + 1);
+ idx[2] = GET_ELT(i + 2);
+ idx[3] = GET_ELT(i + 3);
+ LINE_ADJ(flags, idx[0], idx[1], idx[2], idx[3]);
+ }
+ break;
+
+ case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+ if (count >= 4) {
+ flags = (prim_flags & DRAW_SPLIT_BEFORE) ? 0 : DRAW_PIPE_RESET_STIPPLE;
+ idx[1] = GET_ELT(0);
+ idx[2] = GET_ELT(1);
+ idx[3] = GET_ELT(2);
+
+ for (i = 1; i + 2 < count; i++, flags = 0) {
+ idx[0] = idx[1];
+ idx[1] = idx[2];
+ idx[2] = idx[3];
+ idx[3] = GET_ELT(i + 2);
+ LINE_ADJ(flags, idx[0], idx[1], idx[2], idx[3]);
+ }
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLES_ADJACENCY:
+ flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
+ for (i = 0; i + 5 < count; i += 6) {
+ idx[0] = GET_ELT(i);
+ idx[1] = GET_ELT(i + 1);
+ idx[2] = GET_ELT(i + 2);
+ idx[3] = GET_ELT(i + 3);
+ idx[4] = GET_ELT(i + 4);
+ idx[5] = GET_ELT(i + 5);
+ TRIANGLE_ADJ(flags, idx[0], idx[1], idx[2], idx[3], idx[4], idx[5]);
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+ if (count >= 6) {
+ flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
+ idx[0] = GET_ELT(1);
+ idx[2] = GET_ELT(0);
+ idx[4] = GET_ELT(2);
+ idx[3] = GET_ELT(4);
+
+ /*
+ * The vertices of the i-th triangle are stored in
+ * idx[0,2,4] = { 2*i, 2*i+2, 2*i+4 };
+ *
+ * The adjacent vertices are stored in
+ * idx[1,3,5] = { 2*i-2, 2*i+6, 2*i+3 }.
+ *
+ * However, there are two exceptions:
+ *
+ * For the first triangle, idx[1] = 1;
+ * For the last triangle, idx[3] = 2*i+5.
+ */
+ if (last_vertex_last) {
+ for (i = 0; i + 5 < count; i += 2) {
+ idx[1] = idx[0];
+
+ idx[0] = idx[2];
+ idx[2] = idx[4];
+ idx[4] = idx[3];
+
+ idx[3] = GET_ELT(i + ((i + 7 < count) ? 6 : 5));
+ idx[5] = GET_ELT(i + 3);
+
+ /*
+ * alternate the first two vertices (idx[0] and idx[2]) and the
+ * corresponding adjacent vertices (idx[3] and idx[5]) to have
+ * the correct orientation
+ */
+ if (i & 2) {
+ TRIANGLE_ADJ(flags,
+ idx[2], idx[1], idx[0], idx[5], idx[4], idx[3]);
+ }
+ else {
+ TRIANGLE_ADJ(flags,
+ idx[0], idx[1], idx[2], idx[3], idx[4], idx[5]);
+ }
+ }
+ }
+ else {
+ for (i = 0; i + 5 < count; i += 2) {
+ idx[1] = idx[0];
+
+ idx[0] = idx[2];
+ idx[2] = idx[4];
+ idx[4] = idx[3];
+
+ idx[3] = GET_ELT(i + ((i + 7 < count) ? 6 : 5));
+ idx[5] = GET_ELT(i + 3);
+
+ /*
+ * alternate the last two vertices (idx[2] and idx[4]) and the
+ * corresponding adjacent vertices (idx[1] and idx[5]) to have
+ * the correct orientation
+ */
+ if (i & 2) {
+ TRIANGLE_ADJ(flags,
+ idx[0], idx[5], idx[4], idx[3], idx[2], idx[1]);
+ }
+ else {
+ TRIANGLE_ADJ(flags,
+ idx[0], idx[1], idx[2], idx[3], idx[4], idx[5]);
+ }
+ }
+ }
+ }
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+
+ FUNC_EXIT;
+}
+
+#undef LOCAL_VARS
+#undef FUNC_ENTER
+#undef FUNC_EXIT
+#undef LINE_ADJ
+#undef TRIANGLE_ADJ
+
+#undef FUNC
+#undef FUNC_VARS
+#undef GET_ELT
+#undef POINT
+#undef LINE
+#undef TRIANGLE
diff --git a/src/gallium/auxiliary/draw/draw_fs.c b/src/gallium/auxiliary/draw/draw_fs.c
new file mode 100644
index 00000000000..1543bd86f17
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_fs.c
@@ -0,0 +1,73 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe/p_shader_tokens.h"
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_prim.h"
+
+#include "tgsi/tgsi_parse.h"
+
+#include "draw_fs.h"
+#include "draw_private.h"
+#include "draw_context.h"
+
+
+struct draw_fragment_shader *
+draw_create_fragment_shader(struct draw_context *draw,
+ const struct pipe_shader_state *shader)
+{
+ struct draw_fragment_shader *dfs;
+
+ dfs = CALLOC_STRUCT(draw_fragment_shader);
+ if (dfs) {
+ dfs->base = *shader;
+ tgsi_scan_shader(shader->tokens, &dfs->info);
+ }
+
+ return dfs;
+}
+
+
+void
+draw_bind_fragment_shader(struct draw_context *draw,
+ struct draw_fragment_shader *dfs)
+{
+ draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE);
+
+ draw->fs.fragment_shader = dfs;
+}
+
+
+void
+draw_delete_fragment_shader(struct draw_context *draw,
+ struct draw_fragment_shader *dfs)
+{
+ FREE(dfs);
+}
+
diff --git a/src/gallium/auxiliary/draw/draw_fs.h b/src/gallium/auxiliary/draw/draw_fs.h
new file mode 100644
index 00000000000..44995b8277f
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_fs.h
@@ -0,0 +1,42 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef DRAW_FS_H
+#define DRAW_FS_H
+
+
+#include "tgsi/tgsi_scan.h"
+
+
+struct draw_fragment_shader
+{
+ struct pipe_shader_state base;
+ struct tgsi_shader_info info;
+};
+
+
+#endif /* DRAW_FS_H */
diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c
index 131deed43e4..50a03ac95a5 100644
--- a/src/gallium/auxiliary/draw/draw_gs.c
+++ b/src/gallium/auxiliary/draw/draw_gs.c
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2009 VMWare Inc.
+ * Copyright 2009 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -37,8 +37,8 @@
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "util/u_prim.h"
-#define MAX_PRIM_VERTICES 6
/* fixme: move it from here */
#define MAX_PRIMITIVES 64
@@ -75,6 +75,10 @@ draw_gs_set_constants(struct draw_context *draw,
const void *constants,
unsigned size)
{
+ /* noop. added here for symmetry with the VS
+ * code and in case we'll ever want to allign
+ * the constants, e.g. when we'll change to a
+ * different interpreter */
}
@@ -90,6 +94,7 @@ draw_create_geometry_shader(struct draw_context *draw,
if (!gs)
return NULL;
+ gs->draw = draw;
gs->state = *state;
gs->state.tokens = tgsi_dup_tokens(state->tokens);
if (!gs->state.tokens) {
@@ -112,7 +117,7 @@ draw_create_geometry_shader(struct draw_context *draw,
TGSI_PROPERTY_GS_OUTPUT_PRIM)
gs->output_primitive = gs->info.properties[i].data[0];
else if (gs->info.properties[i].name ==
- TGSI_PROPERTY_GS_MAX_VERTICES)
+ TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES)
gs->max_output_vertices = gs->info.properties[i].data[0];
}
@@ -154,128 +159,37 @@ void draw_delete_geometry_shader(struct draw_context *draw,
FREE(dgs);
}
-static INLINE int num_vertices_for_prim(int prim)
-{
- switch(prim) {
- case PIPE_PRIM_POINTS:
- return 1;
- case PIPE_PRIM_LINES:
- return 2;
- case PIPE_PRIM_LINE_LOOP:
- return 2;
- case PIPE_PRIM_LINE_STRIP:
- return 2;
- case PIPE_PRIM_TRIANGLES:
- return 3;
- case PIPE_PRIM_TRIANGLE_STRIP:
- return 3;
- case PIPE_PRIM_TRIANGLE_FAN:
- return 3;
- case PIPE_PRIM_LINES_ADJACENCY:
- case PIPE_PRIM_LINE_STRIP_ADJACENCY:
- return 4;
- case PIPE_PRIM_TRIANGLES_ADJACENCY:
- case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
- return 6;
- default:
- assert(!"Bad geometry shader input");
- return 0;
- }
-}
-
-static void draw_fetch_geometry_input(struct draw_geometry_shader *shader,
- int start_primitive,
- int num_primitives,
- const float (*input_ptr)[4],
- unsigned input_vertex_stride,
- unsigned inputs_from_vs)
-{
- struct tgsi_exec_machine *machine = shader->machine;
- unsigned slot, vs_slot, k, j;
- unsigned num_vertices = num_vertices_for_prim(shader->input_primitive);
- int idx = 0;
-
- for (slot = 0, vs_slot = 0; slot < shader->info.num_inputs; slot++) {
- /*debug_printf("Slot = %d (semantic = %d)\n", slot,
- shader->info.input_semantic_name[slot]);*/
- if (shader->info.input_semantic_name[slot] ==
- TGSI_SEMANTIC_PRIMID) {
- for (j = 0; j < num_primitives; ++j) {
- machine->Inputs[idx].xyzw[0].f[j] = (float)start_primitive + j;
- machine->Inputs[idx].xyzw[1].f[j] = (float)start_primitive + j;
- machine->Inputs[idx].xyzw[2].f[j] = (float)start_primitive + j;
- machine->Inputs[idx].xyzw[3].f[j] = (float)start_primitive + j;
- }
- ++idx;
- } else {
- for (j = 0; j < num_primitives; ++j) {
- int vidx = idx;
- const float (*prim_ptr)[4];
- /*debug_printf(" %d) Prim (num_verts = %d)\n", start_primitive + j,
- num_vertices);*/
- prim_ptr = (const float (*)[4])(
- (const char *)input_ptr +
- (j * num_vertices * input_vertex_stride));
-
- for (k = 0; k < num_vertices; ++k, ++vidx) {
- const float (*input)[4];
- input = (const float (*)[4])(
- (const char *)prim_ptr + (k * input_vertex_stride));
- vidx = k * TGSI_EXEC_MAX_INPUT_ATTRIBS + slot;
- /*debug_printf("\t%d)(%d) Input vert:\n", vidx, k);*/
-#if 1
- assert(!util_is_inf_or_nan(input[vs_slot][0]));
- assert(!util_is_inf_or_nan(input[vs_slot][1]));
- assert(!util_is_inf_or_nan(input[vs_slot][2]));
- assert(!util_is_inf_or_nan(input[vs_slot][3]));
-#endif
- machine->Inputs[vidx].xyzw[0].f[j] = input[vs_slot][0];
- machine->Inputs[vidx].xyzw[1].f[j] = input[vs_slot][1];
- machine->Inputs[vidx].xyzw[2].f[j] = input[vs_slot][2];
- machine->Inputs[vidx].xyzw[3].f[j] = input[vs_slot][3];
-#if 0
- debug_printf("\t\t%d %f %f %f %f\n", slot,
- machine->Inputs[vidx].xyzw[0].f[j],
- machine->Inputs[vidx].xyzw[1].f[j],
- machine->Inputs[vidx].xyzw[2].f[j],
- machine->Inputs[vidx].xyzw[3].f[j]);
-#endif
- }
- }
- ++vs_slot;
- idx += num_vertices;
- }
- }
-}
-
+/*#define DEBUG_OUTPUTS 1*/
static INLINE void
draw_geometry_fetch_outputs(struct draw_geometry_shader *shader,
int num_primitives,
- float (*output)[4],
- unsigned vertex_size)
+ float (**p_output)[4])
{
struct tgsi_exec_machine *machine = shader->machine;
unsigned prim_idx, j, slot;
+ float (*output)[4];
+
+ output = *p_output;
/* Unswizzle all output results.
*/
- /* FIXME: handle all the primitives produced by the gs, not just
- * the first one
- unsigned prim_count =
- mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0];*/
+
for (prim_idx = 0; prim_idx < num_primitives; ++prim_idx) {
- unsigned num_verts_per_prim = machine->Primitives[0];
+ unsigned num_verts_per_prim = machine->Primitives[prim_idx];
+ shader->primitive_lengths[prim_idx + shader->emitted_primitives] =
+ machine->Primitives[prim_idx];
+ shader->emitted_vertices += num_verts_per_prim;
for (j = 0; j < num_verts_per_prim; j++) {
int idx = (prim_idx * num_verts_per_prim + j) *
shader->info.num_outputs;
#ifdef DEBUG_OUTPUTS
- debug_printf("%d) Output vert:\n", idx);
+ debug_printf("%d) Output vert:\n", idx / shader->info.num_outputs);
#endif
for (slot = 0; slot < shader->info.num_outputs; slot++) {
- output[slot][0] = machine->Outputs[idx + slot].xyzw[0].f[prim_idx];
- output[slot][1] = machine->Outputs[idx + slot].xyzw[1].f[prim_idx];
- output[slot][2] = machine->Outputs[idx + slot].xyzw[2].f[prim_idx];
- output[slot][3] = machine->Outputs[idx + slot].xyzw[3].f[prim_idx];
+ output[slot][0] = machine->Outputs[idx + slot].xyzw[0].f[0];
+ output[slot][1] = machine->Outputs[idx + slot].xyzw[1].f[0];
+ output[slot][2] = machine->Outputs[idx + slot].xyzw[2].f[0];
+ output[slot][3] = machine->Outputs[idx + slot].xyzw[3].f[0];
#ifdef DEBUG_OUTPUTS
debug_printf("\t%d: %f %f %f %f\n", slot,
output[slot][0],
@@ -285,52 +199,276 @@ draw_geometry_fetch_outputs(struct draw_geometry_shader *shader,
#endif
debug_assert(!util_is_inf_or_nan(output[slot][0]));
}
- output = (float (*)[4])((char *)output + vertex_size);
+ output = (float (*)[4])((char *)output + shader->vertex_size);
}
}
+ *p_output = output;
+ shader->emitted_primitives += num_primitives;
}
-void draw_geometry_shader_run(struct draw_geometry_shader *shader,
- const float (*input)[4],
- float (*output)[4],
- const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
- unsigned count,
- unsigned input_stride,
- unsigned vertex_size)
+/*#define DEBUG_INPUTS 1*/
+static void draw_fetch_gs_input(struct draw_geometry_shader *shader,
+ unsigned *indices,
+ unsigned num_vertices,
+ unsigned prim_idx)
{
struct tgsi_exec_machine *machine = shader->machine;
- unsigned int i;
- unsigned num_vertices = num_vertices_for_prim(shader->input_primitive);
- unsigned num_primitives = count/num_vertices;
- unsigned inputs_from_vs = 0;
+ unsigned slot, vs_slot, i;
+ unsigned input_vertex_stride = shader->input_vertex_stride;
+ const float (*input_ptr)[4];
- for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
- machine->Consts[i] = constants[i];
- }
+ input_ptr = shader->input;
- for (i = 0; i < shader->info.num_inputs; ++i) {
- if (shader->info.input_semantic_name[i] != TGSI_SEMANTIC_PRIMID)
- ++inputs_from_vs;
+ for (i = 0; i < num_vertices; ++i) {
+ const float (*input)[4];
+#if DEBUG_INPUTS
+ debug_printf("%d) vertex index = %d (prim idx = %d)\n",
+ i, indices[i], prim_idx);
+#endif
+ input = (const float (*)[4])(
+ (const char *)input_ptr + (indices[i] * input_vertex_stride));
+ for (slot = 0, vs_slot = 0; slot < shader->info.num_inputs; ++slot) {
+ unsigned idx = i * TGSI_EXEC_MAX_INPUT_ATTRIBS + slot;
+ if (shader->info.input_semantic_name[slot] == TGSI_SEMANTIC_PRIMID) {
+ machine->Inputs[idx].xyzw[0].f[prim_idx] =
+ (float)shader->in_prim_idx;
+ machine->Inputs[idx].xyzw[1].f[prim_idx] =
+ (float)shader->in_prim_idx;
+ machine->Inputs[idx].xyzw[2].f[prim_idx] =
+ (float)shader->in_prim_idx;
+ machine->Inputs[idx].xyzw[3].f[prim_idx] =
+ (float)shader->in_prim_idx;
+ } else {
+#if DEBUG_INPUTS
+ debug_printf("\tSlot = %d, vs_slot = %d, idx = %d:\n",
+ slot, vs_slot, idx);
+#endif
+#if 1
+ assert(!util_is_inf_or_nan(input[vs_slot][0]));
+ assert(!util_is_inf_or_nan(input[vs_slot][1]));
+ assert(!util_is_inf_or_nan(input[vs_slot][2]));
+ assert(!util_is_inf_or_nan(input[vs_slot][3]));
+#endif
+ machine->Inputs[idx].xyzw[0].f[prim_idx] = input[vs_slot][0];
+ machine->Inputs[idx].xyzw[1].f[prim_idx] = input[vs_slot][1];
+ machine->Inputs[idx].xyzw[2].f[prim_idx] = input[vs_slot][2];
+ machine->Inputs[idx].xyzw[3].f[prim_idx] = input[vs_slot][3];
+#if DEBUG_INPUTS
+ debug_printf("\t\t%f %f %f %f\n",
+ machine->Inputs[idx].xyzw[0].f[prim_idx],
+ machine->Inputs[idx].xyzw[1].f[prim_idx],
+ machine->Inputs[idx].xyzw[2].f[prim_idx],
+ machine->Inputs[idx].xyzw[3].f[prim_idx]);
+#endif
+ ++vs_slot;
+ }
+ }
}
+}
- for (i = 0; i < num_primitives; ++i) {
- unsigned int max_primitives = 1;
+static void gs_flush(struct draw_geometry_shader *shader,
+ unsigned input_primitives)
+{
+ unsigned out_prim_count;
+ struct tgsi_exec_machine *machine = shader->machine;
+
+ debug_assert(input_primitives > 0 &&
+ input_primitives < 4);
- draw_fetch_geometry_input(shader, i, max_primitives, input,
- input_stride, inputs_from_vs);
+ tgsi_set_exec_mask(machine,
+ 1,
+ input_primitives > 1,
+ input_primitives > 2,
+ input_primitives > 3);
- tgsi_set_exec_mask(machine,
- 1,
- max_primitives > 1,
- max_primitives > 2,
- max_primitives > 3);
+ /* run interpreter */
+ tgsi_exec_machine_run(machine);
- /* run interpreter */
- tgsi_exec_machine_run(machine);
+ out_prim_count =
+ machine->Temps[TGSI_EXEC_TEMP_PRIMITIVE_I].xyzw[TGSI_EXEC_TEMP_PRIMITIVE_C].u[0];
+
+#if 0
+ debug_printf("PRIM emitted prims = %d (verts=%d), cur prim count = %d\n",
+ shader->emitted_primitives, shader->emitted_vertices,
+ out_prim_count);
+#endif
+ draw_geometry_fetch_outputs(shader, out_prim_count,
+ &shader->tmp_output);
+}
+
+static void gs_point(struct draw_geometry_shader *shader,
+ int idx)
+{
+ unsigned indices[1];
+
+ indices[0] = idx;
+
+ draw_fetch_gs_input(shader, indices, 1, 0);
+ ++shader->in_prim_idx;
+
+ gs_flush(shader, 1);
+}
+
+static void gs_line(struct draw_geometry_shader *shader,
+ int i0, int i1)
+{
+ unsigned indices[2];
- draw_geometry_fetch_outputs(shader, max_primitives,
- output, vertex_size);
+ indices[0] = i0;
+ indices[1] = i1;
+
+ draw_fetch_gs_input(shader, indices, 2, 0);
+ ++shader->in_prim_idx;
+
+ gs_flush(shader, 1);
+}
+
+static void gs_line_adj(struct draw_geometry_shader *shader,
+ int i0, int i1, int i2, int i3)
+{
+ unsigned indices[4];
+
+ indices[0] = i0;
+ indices[1] = i1;
+ indices[2] = i2;
+ indices[3] = i3;
+
+ draw_fetch_gs_input(shader, indices, 4, 0);
+ ++shader->in_prim_idx;
+
+ gs_flush(shader, 1);
+}
+
+static void gs_tri(struct draw_geometry_shader *shader,
+ int i0, int i1, int i2)
+{
+ unsigned indices[3];
+
+ indices[0] = i0;
+ indices[1] = i1;
+ indices[2] = i2;
+
+ draw_fetch_gs_input(shader, indices, 3, 0);
+ ++shader->in_prim_idx;
+
+ gs_flush(shader, 1);
+}
+
+static void gs_tri_adj(struct draw_geometry_shader *shader,
+ int i0, int i1, int i2,
+ int i3, int i4, int i5)
+{
+ unsigned indices[6];
+
+ indices[0] = i0;
+ indices[1] = i1;
+ indices[2] = i2;
+ indices[3] = i3;
+ indices[4] = i4;
+ indices[5] = i5;
+
+ draw_fetch_gs_input(shader, indices, 6, 0);
+ ++shader->in_prim_idx;
+
+ gs_flush(shader, 1);
+}
+
+#define FUNC gs_run
+#define GET_ELT(idx) (idx)
+#include "draw_gs_tmp.h"
+
+
+#define FUNC gs_run_elts
+#define LOCAL_VARS const ushort *elts = input_prims->elts;
+#define GET_ELT(idx) (elts[idx])
+#include "draw_gs_tmp.h"
+
+
+/**
+ * Execute geometry shader using TGSI interpreter.
+ */
+int draw_geometry_shader_run(struct draw_geometry_shader *shader,
+ const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
+ const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS],
+ const struct draw_vertex_info *input_verts,
+ const struct draw_prim_info *input_prim,
+ struct draw_vertex_info *output_verts,
+ struct draw_prim_info *output_prims )
+{
+ const float (*input)[4] = (const float (*)[4])input_verts->verts->data;
+ unsigned input_stride = input_verts->vertex_size;
+ unsigned vertex_size = input_verts->vertex_size;
+ struct tgsi_exec_machine *machine = shader->machine;
+ unsigned num_input_verts = input_prim->linear ?
+ input_verts->count :
+ input_prim->count;
+ unsigned num_in_primitives =
+ MAX2(u_gs_prims_for_vertices(input_prim->prim, num_input_verts),
+ u_gs_prims_for_vertices(shader->input_primitive, num_input_verts));
+ unsigned max_out_prims = u_gs_prims_for_vertices(shader->output_primitive,
+ shader->max_output_vertices)
+ * num_in_primitives;
+
+ output_verts->vertex_size = input_verts->vertex_size;
+ output_verts->stride = input_verts->vertex_size;
+ output_verts->verts =
+ (struct vertex_header *)MALLOC(input_verts->vertex_size *
+ num_in_primitives *
+ shader->max_output_vertices);
+
+
+#if 0
+ debug_printf("%s count = %d (in prims # = %d)\n",
+ __FUNCTION__, num_input_verts, num_in_primitives);
+ debug_printf("\tlinear = %d, prim_info->count = %d\n",
+ input_prim->linear, input_prim->count);
+ debug_printf("\tprimt pipe = %d, shader in = %d, shader out = %d, max out = %d\n",
+ input_prim->prim, shader->input_primitive,
+ shader->output_primitive,
+ shader->max_output_vertices);
+#endif
+
+ shader->emitted_vertices = 0;
+ shader->emitted_primitives = 0;
+ shader->vertex_size = vertex_size;
+ shader->tmp_output = (float (*)[4])output_verts->verts->data;
+ shader->in_prim_idx = 0;
+ shader->input_vertex_stride = input_stride;
+ shader->input = input;
+ if (shader->primitive_lengths) {
+ FREE(shader->primitive_lengths);
}
+ shader->primitive_lengths = MALLOC(max_out_prims * sizeof(unsigned));
+
+ tgsi_exec_set_constant_buffers(machine, PIPE_MAX_CONSTANT_BUFFERS,
+ constants, constants_size);
+
+ if (input_prim->linear)
+ gs_run(shader, input_prim, input_verts,
+ output_prims, output_verts);
+ else
+ gs_run_elts(shader, input_prim, input_verts,
+ output_prims, output_verts);
+
+ /* Update prim_info:
+ */
+ output_prims->linear = TRUE;
+ output_prims->elts = NULL;
+ output_prims->start = 0;
+ output_prims->count = shader->emitted_vertices;
+ output_prims->prim = shader->output_primitive;
+ output_prims->flags = 0x0;
+ output_prims->primitive_lengths = shader->primitive_lengths;
+ output_prims->primitive_count = shader->emitted_primitives;
+ output_verts->count = shader->emitted_vertices;
+
+#if 0
+ debug_printf("GS finished, prims = %d, verts = %d\n",
+ output_prims->primitive_count,
+ output_verts->count);
+#endif
+
+ return shader->emitted_vertices;
}
void draw_geometry_shader_delete(struct draw_geometry_shader *shader)
diff --git a/src/gallium/auxiliary/draw/draw_gs.h b/src/gallium/auxiliary/draw/draw_gs.h
index d8eb2103433..67bc1aa73ff 100644
--- a/src/gallium/auxiliary/draw/draw_gs.h
+++ b/src/gallium/auxiliary/draw/draw_gs.h
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2009 VMWare Inc.
+ * Copyright 2009 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -54,23 +54,37 @@ struct draw_geometry_shader {
unsigned input_primitive;
unsigned output_primitive;
- /* Extracted from shader:
- */
- const float (*immediates)[4];
+ unsigned *primitive_lengths;
+ unsigned emitted_vertices;
+ unsigned emitted_primitives;
+
+ float (*tmp_output)[4];
+ unsigned vertex_size;
+
+ unsigned in_prim_idx;
+ unsigned input_vertex_stride;
+ const float (*input)[4];
};
-void draw_geometry_shader_run(struct draw_geometry_shader *shader,
- const float (*input)[4],
- float (*output)[4],
- const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
- unsigned count,
- unsigned input_stride,
- unsigned output_stride);
+/*
+ * Returns the number of vertices emitted.
+ * The vertex shader can emit any number of vertices as long as it's
+ * smaller than the GS_MAX_OUTPUT_VERTICES shader property.
+ */
+int draw_geometry_shader_run(struct draw_geometry_shader *shader,
+ const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
+ const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS],
+ const struct draw_vertex_info *input_verts,
+ const struct draw_prim_info *input_prim,
+ struct draw_vertex_info *output_verts,
+ struct draw_prim_info *output_prims );
void draw_geometry_shader_prepare(struct draw_geometry_shader *shader,
struct draw_context *draw);
void draw_geometry_shader_delete(struct draw_geometry_shader *shader);
+int draw_gs_max_output_vertices(struct draw_geometry_shader *shader,
+ unsigned pipe_prim);
#endif
diff --git a/src/gallium/auxiliary/draw/draw_gs_tmp.h b/src/gallium/auxiliary/draw/draw_gs_tmp.h
new file mode 100644
index 00000000000..de7b02655a5
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_gs_tmp.h
@@ -0,0 +1,32 @@
+#define FUNC_VARS struct draw_geometry_shader *gs, \
+ const struct draw_prim_info *input_prims, \
+ const struct draw_vertex_info *input_verts, \
+ struct draw_prim_info *output_prims, \
+ struct draw_vertex_info *output_verts
+
+#define FUNC_ENTER \
+ /* declare more local vars */ \
+ const unsigned prim = input_prims->prim; \
+ const unsigned prim_flags = input_prims->flags; \
+ const unsigned count = input_prims->count; \
+ const boolean last_vertex_last = TRUE; \
+ do { \
+ debug_assert(input_prims->primitive_count == 1); \
+ switch (prim) { \
+ case PIPE_PRIM_QUADS: \
+ case PIPE_PRIM_QUAD_STRIP: \
+ case PIPE_PRIM_POLYGON: \
+ debug_assert(!"unexpected primitive type in GS"); \
+ return; \
+ default: \
+ break; \
+ } \
+ } while (0) \
+
+#define POINT(i0) gs_point(gs,i0)
+#define LINE(flags,i0,i1) gs_line(gs,i0,i1)
+#define TRIANGLE(flags,i0,i1,i2) gs_tri(gs,i0,i1,i2)
+#define LINE_ADJ(flags,i0,i1,i2,i3) gs_line_adj(gs,i0,i1,i2,i3)
+#define TRIANGLE_ADJ(flags,i0,i1,i2,i3,i4,i5) gs_tri_adj(gs,i0,i1,i2,i3,i4,i5)
+
+#include "draw_decompose_tmp.h"
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
index 27383221b9b..7fb86d7cb27 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -1,3 +1,30 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
#include "draw_llvm.h"
#include "draw_context.h"
@@ -10,17 +37,21 @@
#include "gallivm/lp_bld_debug.h"
#include "gallivm/lp_bld_tgsi.h"
#include "gallivm/lp_bld_printf.h"
+#include "gallivm/lp_bld_intr.h"
+#include "gallivm/lp_bld_init.h"
#include "tgsi/tgsi_exec.h"
+#include "tgsi/tgsi_dump.h"
#include "util/u_cpu_detect.h"
+#include "util/u_math.h"
+#include "util/u_pointer.h"
#include "util/u_string.h"
#include <llvm-c/Transforms/Scalar.h>
#define DEBUG_STORE 0
-
/* generates the draw jit function */
static void
draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var);
@@ -34,12 +65,24 @@ init_globals(struct draw_llvm *llvm)
/* struct draw_jit_texture */
{
- LLVMTypeRef elem_types[4];
+ LLVMTypeRef elem_types[DRAW_JIT_TEXTURE_NUM_FIELDS];
elem_types[DRAW_JIT_TEXTURE_WIDTH] = LLVMInt32Type();
elem_types[DRAW_JIT_TEXTURE_HEIGHT] = LLVMInt32Type();
- elem_types[DRAW_JIT_TEXTURE_STRIDE] = LLVMInt32Type();
- elem_types[DRAW_JIT_TEXTURE_DATA] = LLVMPointerType(LLVMInt8Type(), 0);
+ elem_types[DRAW_JIT_TEXTURE_DEPTH] = LLVMInt32Type();
+ elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32Type();
+ elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] =
+ LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS);
+ elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] =
+ LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS);
+ elem_types[DRAW_JIT_TEXTURE_DATA] =
+ LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0),
+ DRAW_MAX_TEXTURE_LEVELS);
+ elem_types[DRAW_JIT_TEXTURE_MIN_LOD] = LLVMFloatType();
+ elem_types[DRAW_JIT_TEXTURE_MAX_LOD] = LLVMFloatType();
+ elem_types[DRAW_JIT_TEXTURE_LOD_BIAS] = LLVMFloatType();
+ elem_types[DRAW_JIT_TEXTURE_BORDER_COLOR] =
+ LLVMArrayType(LLVMFloatType(), 4);
texture_type = LLVMStructType(elem_types, Elements(elem_types), 0);
@@ -49,12 +92,33 @@ init_globals(struct draw_llvm *llvm)
LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height,
llvm->target, texture_type,
DRAW_JIT_TEXTURE_HEIGHT);
- LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, stride,
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, depth,
+ llvm->target, texture_type,
+ DRAW_JIT_TEXTURE_DEPTH);
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, last_level,
+ llvm->target, texture_type,
+ DRAW_JIT_TEXTURE_LAST_LEVEL);
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, row_stride,
+ llvm->target, texture_type,
+ DRAW_JIT_TEXTURE_ROW_STRIDE);
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, img_stride,
llvm->target, texture_type,
- DRAW_JIT_TEXTURE_STRIDE);
+ DRAW_JIT_TEXTURE_IMG_STRIDE);
LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data,
llvm->target, texture_type,
DRAW_JIT_TEXTURE_DATA);
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, min_lod,
+ llvm->target, texture_type,
+ DRAW_JIT_TEXTURE_MIN_LOD);
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, max_lod,
+ llvm->target, texture_type,
+ DRAW_JIT_TEXTURE_MAX_LOD);
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, lod_bias,
+ llvm->target, texture_type,
+ DRAW_JIT_TEXTURE_LOD_BIAS);
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, border_color,
+ llvm->target, texture_type,
+ DRAW_JIT_TEXTURE_BORDER_COLOR);
LP_CHECK_STRUCT_SIZE(struct draw_jit_texture,
llvm->target, texture_type);
@@ -69,7 +133,8 @@ init_globals(struct draw_llvm *llvm)
elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */
elem_types[1] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */
- elem_types[2] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */
+ elem_types[2] = LLVMArrayType(texture_type,
+ PIPE_MAX_VERTEX_SAMPLERS); /* textures */
context_type = LLVMStructType(elem_types, Elements(elem_types), 0);
@@ -79,7 +144,7 @@ init_globals(struct draw_llvm *llvm)
llvm->target, context_type, 1);
LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures,
llvm->target, context_type,
- DRAW_JIT_CONTEXT_TEXTURES_INDEX);
+ DRAW_JIT_CTX_TEXTURES);
LP_CHECK_STRUCT_SIZE(struct draw_jit_context,
llvm->target, context_type);
@@ -161,9 +226,11 @@ create_vertex_header(struct draw_llvm *llvm, int data_elems)
struct draw_llvm *
draw_llvm_create(struct draw_context *draw)
{
- struct draw_llvm *llvm = CALLOC_STRUCT( draw_llvm );
+ struct draw_llvm *llvm;
- util_cpu_detect();
+ llvm = CALLOC_STRUCT( draw_llvm );
+ if (!llvm)
+ return NULL;
llvm->draw = draw;
llvm->engine = draw->engine;
@@ -179,27 +246,50 @@ draw_llvm_create(struct draw_context *draw)
llvm->pass = LLVMCreateFunctionPassManager(llvm->provider);
LLVMAddTargetData(llvm->target, llvm->pass);
- /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
- * but there are more on SVN. */
- /* TODO: Add more passes */
- LLVMAddConstantPropagationPass(llvm->pass);
- if(util_cpu_caps.has_sse4_1) {
- /* FIXME: There is a bug in this pass, whereby the combination of fptosi
- * and sitofp (necessary for trunc/floor/ceil/round implementation)
- * somehow becomes invalid code.
+
+ if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) {
+ /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
+ * but there are more on SVN. */
+ /* TODO: Add more passes */
+
+ LLVMAddCFGSimplificationPass(llvm->pass);
+
+ if (HAVE_LLVM >= 0x207 && sizeof(void*) == 4) {
+ /* For LLVM >= 2.7 and 32-bit build, use this order of passes to
+ * avoid generating bad code.
+ * Test with piglit glsl-vs-sqrt-zero test.
+ */
+ LLVMAddConstantPropagationPass(llvm->pass);
+ LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
+ }
+ else {
+ LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
+ LLVMAddConstantPropagationPass(llvm->pass);
+ }
+
+ if(util_cpu_caps.has_sse4_1) {
+ /* FIXME: There is a bug in this pass, whereby the combination of fptosi
+ * and sitofp (necessary for trunc/floor/ceil/round implementation)
+ * somehow becomes invalid code.
+ */
+ LLVMAddInstructionCombiningPass(llvm->pass);
+ }
+ LLVMAddGVNPass(llvm->pass);
+ } else {
+ /* We need at least this pass to prevent the backends to fail in
+ * unexpected ways.
*/
- LLVMAddInstructionCombiningPass(llvm->pass);
+ LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
}
- LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
- LLVMAddGVNPass(llvm->pass);
- LLVMAddCFGSimplificationPass(llvm->pass);
init_globals(llvm);
+ if (gallivm_debug & GALLIVM_DEBUG_IR) {
+ LLVMDumpModule(llvm->module);
+ }
-#if 0
- LLVMDumpModule(llvm->module);
-#endif
+ llvm->nr_variants = 0;
+ make_empty_list(&llvm->vs_variants_list);
return llvm;
}
@@ -207,21 +297,41 @@ draw_llvm_create(struct draw_context *draw)
void
draw_llvm_destroy(struct draw_llvm *llvm)
{
+ LLVMDisposePassManager(llvm->pass);
+
FREE(llvm);
}
struct draw_llvm_variant *
-draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs)
+draw_llvm_create_variant(struct draw_llvm *llvm,
+ unsigned num_inputs,
+ const struct draw_llvm_variant_key *key)
{
- struct draw_llvm_variant *variant = MALLOC(sizeof(struct draw_llvm_variant));
+ struct draw_llvm_variant *variant;
+ struct llvm_vertex_shader *shader =
+ llvm_vertex_shader(llvm->draw->vs.vertex_shader);
- draw_llvm_make_variant_key(llvm, &variant->key);
+ variant = MALLOC(sizeof *variant +
+ shader->variant_key_size -
+ sizeof variant->key);
+ if (variant == NULL)
+ return NULL;
+
+ variant->llvm = llvm;
+
+ memcpy(&variant->key, key, shader->variant_key_size);
llvm->vertex_header_ptr_type = create_vertex_header(llvm, num_inputs);
draw_llvm_generate(llvm, variant);
draw_llvm_generate_elts(llvm, variant);
+ variant->shader = shader;
+ variant->list_item_global.base = variant;
+ variant->list_item_local.base = variant;
+ /*variant->no = */shader->variants_created++;
+ variant->list_item_global.base = variant;
+
return variant;
}
@@ -230,11 +340,13 @@ generate_vs(struct draw_llvm *llvm,
LLVMBuilderRef builder,
LLVMValueRef (*outputs)[NUM_CHANNELS],
const LLVMValueRef (*inputs)[NUM_CHANNELS],
- LLVMValueRef context_ptr)
+ LLVMValueRef context_ptr,
+ struct lp_build_sampler_soa *draw_sampler)
{
const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens;
struct lp_type vs_type;
LLVMValueRef consts_ptr = draw_jit_context_vs_constants(builder, context_ptr);
+ struct lp_build_sampler_soa *sampler = 0;
memset(&vs_type, 0, sizeof vs_type);
vs_type.floating = TRUE; /* floating point values */
@@ -246,7 +358,14 @@ generate_vs(struct draw_llvm *llvm,
num_vs = 4; /* number of vertices per block */
#endif
- /*tgsi_dump(tokens, 0);*/
+ if (gallivm_debug & GALLIVM_DEBUG_IR) {
+ tgsi_dump(tokens, 0);
+ }
+
+ if (llvm->draw->num_sampler_views &&
+ llvm->draw->num_samplers)
+ sampler = draw_sampler;
+
lp_build_tgsi_soa(builder,
tokens,
vs_type,
@@ -255,7 +374,7 @@ generate_vs(struct draw_llvm *llvm,
NULL /*pos*/,
inputs,
outputs,
- NULL/*sampler*/,
+ sampler,
&llvm->draw->vs.vertex_shader->info);
}
@@ -283,7 +402,8 @@ generate_fetch(LLVMBuilderRef builder,
LLVMValueRef *res,
struct pipe_vertex_element *velem,
LLVMValueRef vbuf,
- LLVMValueRef index)
+ LLVMValueRef index,
+ LLVMValueRef instance_id)
{
LLVMValueRef indices = LLVMConstInt(LLVMInt64Type(), velem->vertex_buffer_index, 0);
LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr,
@@ -294,8 +414,15 @@ generate_fetch(LLVMBuilderRef builder,
LLVMValueRef cond;
LLVMValueRef stride;
- cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, "");
+ if (velem->instance_divisor) {
+ /* array index = instance_id / instance_divisor */
+ index = LLVMBuildUDiv(builder, instance_id,
+ LLVMConstInt(LLVMInt32Type(), velem->instance_divisor, 0),
+ "instance_divisor");
+ }
+ /* limit index to min(inex, vb_max_index) */
+ cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, "");
index = LLVMBuildSelect(builder, cond, index, vb_max_index, "");
stride = LLVMBuildMul(builder, vb_stride, index, "");
@@ -563,20 +690,22 @@ convert_to_aos(LLVMBuilderRef builder,
static void
draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
{
- LLVMTypeRef arg_types[7];
+ LLVMTypeRef arg_types[8];
LLVMTypeRef func_type;
LLVMValueRef context_ptr;
LLVMBasicBlockRef block;
LLVMBuilderRef builder;
LLVMValueRef start, end, count, stride, step, io_itr;
LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
+ LLVMValueRef instance_id;
struct draw_context *draw = llvm->draw;
unsigned i, j;
struct lp_build_context bld;
struct lp_build_loop_state lp_loop;
- struct lp_type vs_type = lp_type_float_vec(32);
const int max_vertices = 4;
LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
+ void *code;
+ struct lp_build_sampler_soa *sampler = 0;
arg_types[0] = llvm->context_ptr_type; /* context */
arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */
@@ -585,6 +714,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
arg_types[4] = LLVMInt32Type(); /* count */
arg_types[5] = LLVMInt32Type(); /* stride */
arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */
+ arg_types[7] = LLVMInt32Type(); /* instance_id */
func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
@@ -601,6 +731,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
count = LLVMGetParam(variant->function, 4);
stride = LLVMGetParam(variant->function, 5);
vb_ptr = LLVMGetParam(variant->function, 6);
+ instance_id = LLVMGetParam(variant->function, 7);
lp_build_name(context_ptr, "context");
lp_build_name(io_ptr, "io");
@@ -609,6 +740,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
lp_build_name(count, "count");
lp_build_name(stride, "stride");
lp_build_name(vb_ptr, "vb");
+ lp_build_name(instance_id, "instance_id");
/*
* Function body
@@ -618,12 +750,17 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
builder = LLVMCreateBuilder();
LLVMPositionBuilderAtEnd(builder, block);
- lp_build_context_init(&bld, builder, vs_type);
+ lp_build_context_init(&bld, builder, lp_type_int(32));
end = lp_build_add(&bld, start, count);
step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
+ /* code generated texture sampling */
+ sampler = draw_llvm_sampler_soa_create(
+ draw_llvm_variant_key_samplers(&variant->key),
+ context_ptr);
+
#if DEBUG_STORE
lp_build_printf(builder, "start = %d, end = %d, step = %d\n",
start, end, step);
@@ -654,7 +791,8 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr,
&vb_index, 1, "");
generate_fetch(builder, vbuffers_ptr,
- &aos_attribs[j][i], velem, vb, true_index);
+ &aos_attribs[j][i], velem, vb, true_index,
+ instance_id);
}
}
convert_to_soa(builder, aos_attribs, inputs,
@@ -665,7 +803,8 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
builder,
outputs,
ptr_aos,
- context_ptr);
+ context_ptr,
+ sampler);
convert_to_aos(builder, io, outputs,
draw->vs.vertex_shader->info.num_outputs,
@@ -673,6 +812,13 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
}
lp_build_loop_end_cond(builder, end, step, LLVMIntUGE, &lp_loop);
+ sampler->destroy(sampler);
+
+#ifdef PIPE_ARCH_X86
+ /* Avoid corrupting the FPU stack on 32bit OSes. */
+ lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0);
+#endif
+
LLVMBuildRetVoid(builder);
LLVMDisposeBuilder(builder);
@@ -682,41 +828,48 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
*/
#ifdef DEBUG
if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) {
- LLVMDumpValue(variant->function);
+ lp_debug_dump_value(variant->function);
assert(0);
}
#endif
LLVMRunFunctionPassManager(llvm->pass, variant->function);
- if (0) {
- LLVMDumpValue(variant->function);
+ if (gallivm_debug & GALLIVM_DEBUG_IR) {
+ lp_debug_dump_value(variant->function);
debug_printf("\n");
}
- variant->jit_func = (draw_jit_vert_func)LLVMGetPointerToGlobal(llvm->draw->engine, variant->function);
- if (0)
- lp_disassemble(variant->jit_func);
+ code = LLVMGetPointerToGlobal(llvm->draw->engine, variant->function);
+ variant->jit_func = (draw_jit_vert_func)pointer_to_func(code);
+
+ if (gallivm_debug & GALLIVM_DEBUG_ASM) {
+ lp_disassemble(code);
+ }
+ lp_func_delete_body(variant->function);
}
static void
draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
{
- LLVMTypeRef arg_types[7];
+ LLVMTypeRef arg_types[8];
LLVMTypeRef func_type;
LLVMValueRef context_ptr;
LLVMBasicBlockRef block;
LLVMBuilderRef builder;
LLVMValueRef fetch_elts, fetch_count, stride, step, io_itr;
LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
+ LLVMValueRef instance_id;
struct draw_context *draw = llvm->draw;
unsigned i, j;
struct lp_build_context bld;
struct lp_build_loop_state lp_loop;
- struct lp_type vs_type = lp_type_float_vec(32);
const int max_vertices = 4;
LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
+ LLVMValueRef fetch_max;
+ void *code;
+ struct lp_build_sampler_soa *sampler = 0;
arg_types[0] = llvm->context_ptr_type; /* context */
arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */
@@ -725,14 +878,17 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
arg_types[4] = LLVMInt32Type(); /* fetch_count */
arg_types[5] = LLVMInt32Type(); /* stride */
arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */
+ arg_types[7] = LLVMInt32Type(); /* instance_id */
func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
- variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", func_type);
+ variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts",
+ func_type);
LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv);
for(i = 0; i < Elements(arg_types); ++i)
if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
- LLVMAddAttribute(LLVMGetParam(variant->function_elts, i), LLVMNoAliasAttribute);
+ LLVMAddAttribute(LLVMGetParam(variant->function_elts, i),
+ LLVMNoAliasAttribute);
context_ptr = LLVMGetParam(variant->function_elts, 0);
io_ptr = LLVMGetParam(variant->function_elts, 1);
@@ -741,6 +897,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
fetch_count = LLVMGetParam(variant->function_elts, 4);
stride = LLVMGetParam(variant->function_elts, 5);
vb_ptr = LLVMGetParam(variant->function_elts, 6);
+ instance_id = LLVMGetParam(variant->function_elts, 7);
lp_build_name(context_ptr, "context");
lp_build_name(io_ptr, "io");
@@ -749,6 +906,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
lp_build_name(fetch_count, "fetch_count");
lp_build_name(stride, "stride");
lp_build_name(vb_ptr, "vb");
+ lp_build_name(instance_id, "instance_id");
/*
* Function body
@@ -758,10 +916,19 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
builder = LLVMCreateBuilder();
LLVMPositionBuilderAtEnd(builder, block);
- lp_build_context_init(&bld, builder, vs_type);
+ lp_build_context_init(&bld, builder, lp_type_int(32));
step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
+ /* code generated texture sampling */
+ sampler = draw_llvm_sampler_soa_create(
+ draw_llvm_variant_key_samplers(&variant->key),
+ context_ptr);
+
+ fetch_max = LLVMBuildSub(builder, fetch_count,
+ LLVMConstInt(LLVMInt32Type(), 1, 0),
+ "fetch_max");
+
lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), &lp_loop);
{
LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
@@ -780,8 +947,15 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
builder,
lp_loop.counter,
LLVMConstInt(LLVMInt32Type(), i, 0), "");
- LLVMValueRef fetch_ptr = LLVMBuildGEP(builder, fetch_elts,
- &true_index, 1, "");
+ LLVMValueRef fetch_ptr;
+
+ /* make sure we're not out of bounds which can happen
+ * if fetch_count % 4 != 0, because on the last iteration
+ * a few of the 4 vertex fetches will be out of bounds */
+ true_index = lp_build_min(&bld, true_index, fetch_max);
+
+ fetch_ptr = LLVMBuildGEP(builder, fetch_elts,
+ &true_index, 1, "");
true_index = LLVMBuildLoad(builder, fetch_ptr, "fetch_elt");
for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
@@ -791,7 +965,8 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr,
&vb_index, 1, "");
generate_fetch(builder, vbuffers_ptr,
- &aos_attribs[j][i], velem, vb, true_index);
+ &aos_attribs[j][i], velem, vb, true_index,
+ instance_id);
}
}
convert_to_soa(builder, aos_attribs, inputs,
@@ -802,7 +977,8 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
builder,
outputs,
ptr_aos,
- context_ptr);
+ context_ptr,
+ sampler);
convert_to_aos(builder, io, outputs,
draw->vs.vertex_shader->info.num_outputs,
@@ -810,6 +986,13 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
}
lp_build_loop_end_cond(builder, fetch_count, step, LLVMIntUGE, &lp_loop);
+ sampler->destroy(sampler);
+
+#ifdef PIPE_ARCH_X86
+ /* Avoid corrupting the FPU stack on 32bit OSes. */
+ lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0);
+#endif
+
LLVMBuildRetVoid(builder);
LLVMDisposeBuilder(builder);
@@ -819,37 +1002,136 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
*/
#ifdef DEBUG
if(LLVMVerifyFunction(variant->function_elts, LLVMPrintMessageAction)) {
- LLVMDumpValue(variant->function_elts);
+ lp_debug_dump_value(variant->function_elts);
assert(0);
}
#endif
LLVMRunFunctionPassManager(llvm->pass, variant->function_elts);
- if (0) {
- LLVMDumpValue(variant->function_elts);
+ if (gallivm_debug & GALLIVM_DEBUG_IR) {
+ lp_debug_dump_value(variant->function_elts);
debug_printf("\n");
}
- variant->jit_func_elts = (draw_jit_vert_func_elts)LLVMGetPointerToGlobal(
- llvm->draw->engine, variant->function_elts);
- if (0)
- lp_disassemble(variant->jit_func_elts);
+ code = LLVMGetPointerToGlobal(llvm->draw->engine, variant->function_elts);
+ variant->jit_func_elts = (draw_jit_vert_func_elts)pointer_to_func(code);
+
+ if (gallivm_debug & GALLIVM_DEBUG_ASM) {
+ lp_disassemble(code);
+ }
+ lp_func_delete_body(variant->function_elts);
}
-void
-draw_llvm_make_variant_key(struct draw_llvm *llvm,
- struct draw_llvm_variant_key *key)
+
+struct draw_llvm_variant_key *
+draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
{
- memset(key, 0, sizeof(struct draw_llvm_variant_key));
+ unsigned i;
+ struct draw_llvm_variant_key *key;
+ struct lp_sampler_static_state *sampler;
+
+ key = (struct draw_llvm_variant_key *)store;
+ /* Presumably all variants of the shader should have the same
+ * number of vertex elements - ie the number of shader inputs.
+ */
key->nr_vertex_elements = llvm->draw->pt.nr_vertex_elements;
+ /* All variants of this shader will have the same value for
+ * nr_samplers. Not yet trying to compact away holes in the
+ * sampler array.
+ */
+ key->nr_samplers = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
+
+ sampler = draw_llvm_variant_key_samplers(key);
+
memcpy(key->vertex_element,
llvm->draw->pt.vertex_element,
sizeof(struct pipe_vertex_element) * key->nr_vertex_elements);
+
+ memset(sampler, 0, key->nr_samplers * sizeof *sampler);
+
+ for (i = 0 ; i < key->nr_samplers; i++) {
+ lp_sampler_static_state(&sampler[i],
+ llvm->draw->sampler_views[i],
+ llvm->draw->samplers[i]);
+ }
+
+ return key;
+}
+
+void
+draw_llvm_set_mapped_texture(struct draw_context *draw,
+ unsigned sampler_idx,
+ uint32_t width, uint32_t height, uint32_t depth,
+ uint32_t last_level,
+ uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS],
+ uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS],
+ const void *data[DRAW_MAX_TEXTURE_LEVELS])
+{
+ unsigned j;
+ struct draw_jit_texture *jit_tex;
+
+ assert(sampler_idx < PIPE_MAX_VERTEX_SAMPLERS);
+
+
+ jit_tex = &draw->llvm->jit_context.textures[sampler_idx];
+
+ jit_tex->width = width;
+ jit_tex->height = height;
+ jit_tex->depth = depth;
+ jit_tex->last_level = last_level;
+
+ for (j = 0; j <= last_level; j++) {
+ jit_tex->data[j] = data[j];
+ jit_tex->row_stride[j] = row_stride[j];
+ jit_tex->img_stride[j] = img_stride[j];
+ }
+}
+
+
+void
+draw_llvm_set_sampler_state(struct draw_context *draw)
+{
+ unsigned i;
+
+ for (i = 0; i < draw->num_samplers; i++) {
+ struct draw_jit_texture *jit_tex = &draw->llvm->jit_context.textures[i];
+
+ if (draw->samplers[i]) {
+ jit_tex->min_lod = draw->samplers[i]->min_lod;
+ jit_tex->max_lod = draw->samplers[i]->max_lod;
+ jit_tex->lod_bias = draw->samplers[i]->lod_bias;
+ COPY_4V(jit_tex->border_color, draw->samplers[i]->border_color);
+ }
+ }
+}
+
+
+void
+draw_llvm_destroy_variant(struct draw_llvm_variant *variant)
+{
+ struct draw_llvm *llvm = variant->llvm;
+ struct draw_context *draw = llvm->draw;
+
+ if (variant->function_elts) {
+ if (variant->function_elts)
+ LLVMFreeMachineCodeForFunction(draw->engine,
+ variant->function_elts);
+ LLVMDeleteFunction(variant->function_elts);
+ }
+
+ if (variant->function) {
+ if (variant->function)
+ LLVMFreeMachineCodeForFunction(draw->engine,
+ variant->function);
+ LLVMDeleteFunction(variant->function);
+ }
- memcpy(&key->vs,
- &llvm->draw->vs.vertex_shader->state,
- sizeof(struct pipe_shader_state));
+ remove_from_list(&variant->list_item_local);
+ variant->shader->variants_cached--;
+ remove_from_list(&variant->list_item_global);
+ llvm->nr_variants--;
+ FREE(variant);
}
diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h
index 58fee7f9d60..d0a68ae412d 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.h
+++ b/src/gallium/auxiliary/draw/draw_llvm.h
@@ -1,28 +1,79 @@
-#ifndef HAVE_LLVM_H
-#define HAVE_LLVM_H
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef DRAW_LLVM_H
+#define DRAW_LLVM_H
#include "draw/draw_private.h"
+#include "draw/draw_vs.h"
+#include "gallivm/lp_bld_sample.h"
+
#include "pipe/p_context.h"
+#include "util/u_simple_list.h"
#include <llvm-c/Core.h>
#include <llvm-c/Analysis.h>
#include <llvm-c/Target.h>
#include <llvm-c/ExecutionEngine.h>
+#define DRAW_MAX_TEXTURE_LEVELS 13 /* 4K x 4K for now */
+
+struct draw_llvm;
+struct llvm_vertex_shader;
+
struct draw_jit_texture
{
uint32_t width;
uint32_t height;
- uint32_t stride;
- const void *data;
+ uint32_t depth;
+ uint32_t last_level;
+ uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS];
+ uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS];
+ const void *data[DRAW_MAX_TEXTURE_LEVELS];
+ float min_lod;
+ float max_lod;
+ float lod_bias;
+ float border_color[4];
};
enum {
DRAW_JIT_TEXTURE_WIDTH = 0,
DRAW_JIT_TEXTURE_HEIGHT,
- DRAW_JIT_TEXTURE_STRIDE,
- DRAW_JIT_TEXTURE_DATA
+ DRAW_JIT_TEXTURE_DEPTH,
+ DRAW_JIT_TEXTURE_LAST_LEVEL,
+ DRAW_JIT_TEXTURE_ROW_STRIDE,
+ DRAW_JIT_TEXTURE_IMG_STRIDE,
+ DRAW_JIT_TEXTURE_DATA,
+ DRAW_JIT_TEXTURE_MIN_LOD,
+ DRAW_JIT_TEXTURE_MAX_LOD,
+ DRAW_JIT_TEXTURE_LOD_BIAS,
+ DRAW_JIT_TEXTURE_BORDER_COLOR,
+ DRAW_JIT_TEXTURE_NUM_FIELDS /* number of fields above */
};
enum {
@@ -48,7 +99,7 @@ struct draw_jit_context
const float *gs_constants;
- struct draw_jit_texture textures[PIPE_MAX_SAMPLERS];
+ struct draw_jit_texture textures[PIPE_MAX_VERTEX_SAMPLERS];
};
@@ -58,10 +109,10 @@ struct draw_jit_context
#define draw_jit_context_gs_constants(_builder, _ptr) \
lp_build_struct_get(_builder, _ptr, 1, "gs_constants")
-#define DRAW_JIT_CONTEXT_TEXTURES_INDEX 2
+#define DRAW_JIT_CTX_TEXTURES 2
#define draw_jit_context_textures(_builder, _ptr) \
- lp_build_struct_get_ptr(_builder, _ptr, DRAW_JIT_CONTEXT_TEXTURES_INDEX, "textures")
+ lp_build_struct_get_ptr(_builder, _ptr, DRAW_JIT_CTX_TEXTURES, "textures")
@@ -92,7 +143,8 @@ typedef void
unsigned start,
unsigned count,
unsigned stride,
- struct pipe_vertex_buffer *vertex_buffers);
+ struct pipe_vertex_buffer *vertex_buffers,
+ unsigned instance_id);
typedef void
@@ -102,13 +154,89 @@ typedef void
const unsigned *fetch_elts,
unsigned fetch_count,
unsigned stride,
- struct pipe_vertex_buffer *vertex_buffers);
+ struct pipe_vertex_buffer *vertex_buffers,
+ unsigned instance_id);
+
+struct draw_llvm_variant_key
+{
+ unsigned nr_vertex_elements:16;
+ unsigned nr_samplers:16;
+
+ /* Variable number of vertex elements:
+ */
+ struct pipe_vertex_element vertex_element[1];
+
+ /* Followed by variable number of samplers:
+ */
+/* struct lp_sampler_static_state sampler; */
+};
+
+#define DRAW_LLVM_MAX_VARIANT_KEY_SIZE \
+ (sizeof(struct draw_llvm_variant_key) + \
+ PIPE_MAX_VERTEX_SAMPLERS * sizeof(struct lp_sampler_static_state) + \
+ (PIPE_MAX_ATTRIBS-1) * sizeof(struct pipe_vertex_element))
+
+
+static INLINE size_t
+draw_llvm_variant_key_size(unsigned nr_vertex_elements,
+ unsigned nr_samplers)
+{
+ return (sizeof(struct draw_llvm_variant_key) +
+ nr_samplers * sizeof(struct lp_sampler_static_state) +
+ (nr_vertex_elements - 1) * sizeof(struct pipe_vertex_element));
+}
+
+
+static INLINE struct lp_sampler_static_state *
+draw_llvm_variant_key_samplers(struct draw_llvm_variant_key *key)
+{
+ return (struct lp_sampler_static_state *)
+ &key->vertex_element[key->nr_vertex_elements];
+}
+
+
+
+struct draw_llvm_variant_list_item
+{
+ struct draw_llvm_variant *base;
+ struct draw_llvm_variant_list_item *next, *prev;
+};
+
+struct draw_llvm_variant
+{
+ LLVMValueRef function;
+ LLVMValueRef function_elts;
+ draw_jit_vert_func jit_func;
+ draw_jit_vert_func_elts jit_func_elts;
+
+ struct llvm_vertex_shader *shader;
+
+ struct draw_llvm *llvm;
+ struct draw_llvm_variant_list_item list_item_global;
+ struct draw_llvm_variant_list_item list_item_local;
+
+ /* key is variable-sized, must be last */
+ struct draw_llvm_variant_key key;
+ /* key is variable-sized, must be last */
+};
+
+struct llvm_vertex_shader {
+ struct draw_vertex_shader base;
+
+ unsigned variant_key_size;
+ struct draw_llvm_variant_list_item variants;
+ unsigned variants_created;
+ unsigned variants_cached;
+};
struct draw_llvm {
struct draw_context *draw;
struct draw_jit_context jit_context;
+ struct draw_llvm_variant_list_item vs_variants_list;
+ int nr_variants;
+
LLVMModuleRef module;
LLVMExecutionEngineRef engine;
LLVMModuleProviderRef provider;
@@ -121,23 +249,12 @@ struct draw_llvm {
LLVMTypeRef vb_ptr_type;
};
-struct draw_llvm_variant_key
-{
- struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
- unsigned nr_vertex_elements;
- struct pipe_shader_state vs;
-};
-
-struct draw_llvm_variant
+static INLINE struct llvm_vertex_shader *
+llvm_vertex_shader(struct draw_vertex_shader *vs)
{
- struct draw_llvm_variant_key key;
- LLVMValueRef function;
- LLVMValueRef function_elts;
- draw_jit_vert_func jit_func;
- draw_jit_vert_func_elts jit_func_elts;
+ return (struct llvm_vertex_shader *)vs;
+}
- struct draw_llvm_variant *next;
-};
struct draw_llvm *
draw_llvm_create(struct draw_context *draw);
@@ -146,14 +263,35 @@ void
draw_llvm_destroy(struct draw_llvm *llvm);
struct draw_llvm_variant *
-draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs);
+draw_llvm_create_variant(struct draw_llvm *llvm,
+ unsigned num_vertex_header_attribs,
+ const struct draw_llvm_variant_key *key);
void
-draw_llvm_make_variant_key(struct draw_llvm *llvm,
- struct draw_llvm_variant_key *key);
+draw_llvm_destroy_variant(struct draw_llvm_variant *variant);
+
+struct draw_llvm_variant_key *
+draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store);
LLVMValueRef
draw_llvm_translate_from(LLVMBuilderRef builder,
LLVMValueRef vbuffer,
enum pipe_format from_format);
+
+struct lp_build_sampler_soa *
+draw_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state,
+ LLVMValueRef context_ptr);
+
+void
+draw_llvm_set_sampler_state(struct draw_context *draw);
+
+void
+draw_llvm_set_mapped_texture(struct draw_context *draw,
+ unsigned sampler_idx,
+ uint32_t width, uint32_t height, uint32_t depth,
+ uint32_t last_level,
+ uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS],
+ uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS],
+ const void *data[DRAW_MAX_TEXTURE_LEVELS]);
+
#endif
diff --git a/src/gallium/auxiliary/draw/draw_llvm_sample.c b/src/gallium/auxiliary/draw/draw_llvm_sample.c
new file mode 100644
index 00000000000..ac1fbb179c6
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_llvm_sample.c
@@ -0,0 +1,223 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Texture sampling code generation
+ * @author Jose Fonseca <[email protected]>
+ */
+
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+#include "gallivm/lp_bld_debug.h"
+#include "gallivm/lp_bld_type.h"
+#include "gallivm/lp_bld_sample.h"
+#include "gallivm/lp_bld_tgsi.h"
+
+
+#include "util/u_debug.h"
+#include "util/u_memory.h"
+#include "util/u_pointer.h"
+#include "util/u_string.h"
+
+#include "draw_llvm.h"
+
+
+/**
+ * This provides the bridge between the sampler state store in
+ * lp_jit_context and lp_jit_texture and the sampler code
+ * generator. It provides the texture layout information required by
+ * the texture sampler code generator in terms of the state stored in
+ * lp_jit_context and lp_jit_texture in runtime.
+ */
+struct draw_llvm_sampler_dynamic_state
+{
+ struct lp_sampler_dynamic_state base;
+
+ const struct lp_sampler_static_state *static_state;
+
+ LLVMValueRef context_ptr;
+};
+
+
+/**
+ * This is the bridge between our sampler and the TGSI translator.
+ */
+struct draw_llvm_sampler_soa
+{
+ struct lp_build_sampler_soa base;
+
+ struct draw_llvm_sampler_dynamic_state dynamic_state;
+};
+
+
+/**
+ * Fetch the specified member of the lp_jit_texture structure.
+ * \param emit_load if TRUE, emit the LLVM load instruction to actually
+ * fetch the field's value. Otherwise, just emit the
+ * GEP code to address the field.
+ *
+ * @sa http://llvm.org/docs/GetElementPtr.html
+ */
+static LLVMValueRef
+draw_llvm_texture_member(const struct lp_sampler_dynamic_state *base,
+ LLVMBuilderRef builder,
+ unsigned unit,
+ unsigned member_index,
+ const char *member_name,
+ boolean emit_load)
+{
+ struct draw_llvm_sampler_dynamic_state *state =
+ (struct draw_llvm_sampler_dynamic_state *)base;
+ LLVMValueRef indices[4];
+ LLVMValueRef ptr;
+ LLVMValueRef res;
+
+ debug_assert(unit < PIPE_MAX_VERTEX_SAMPLERS);
+
+ /* context[0] */
+ indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ /* context[0].textures */
+ indices[1] = LLVMConstInt(LLVMInt32Type(), DRAW_JIT_CTX_TEXTURES, 0);
+ /* context[0].textures[unit] */
+ indices[2] = LLVMConstInt(LLVMInt32Type(), unit, 0);
+ /* context[0].textures[unit].member */
+ indices[3] = LLVMConstInt(LLVMInt32Type(), member_index, 0);
+
+ ptr = LLVMBuildGEP(builder, state->context_ptr, indices, Elements(indices), "");
+
+ if (emit_load)
+ res = LLVMBuildLoad(builder, ptr, "");
+ else
+ res = ptr;
+
+ lp_build_name(res, "context.texture%u.%s", unit, member_name);
+
+ return res;
+}
+
+
+/**
+ * Helper macro to instantiate the functions that generate the code to
+ * fetch the members of lp_jit_texture to fulfill the sampler code
+ * generator requests.
+ *
+ * This complexity is the price we have to pay to keep the texture
+ * sampler code generator a reusable module without dependencies to
+ * llvmpipe internals.
+ */
+#define DRAW_LLVM_TEXTURE_MEMBER(_name, _index, _emit_load) \
+ static LLVMValueRef \
+ draw_llvm_texture_##_name( const struct lp_sampler_dynamic_state *base, \
+ LLVMBuilderRef builder, \
+ unsigned unit) \
+ { \
+ return draw_llvm_texture_member(base, builder, unit, _index, #_name, _emit_load ); \
+ }
+
+
+DRAW_LLVM_TEXTURE_MEMBER(width, DRAW_JIT_TEXTURE_WIDTH, TRUE)
+DRAW_LLVM_TEXTURE_MEMBER(height, DRAW_JIT_TEXTURE_HEIGHT, TRUE)
+DRAW_LLVM_TEXTURE_MEMBER(depth, DRAW_JIT_TEXTURE_DEPTH, TRUE)
+DRAW_LLVM_TEXTURE_MEMBER(last_level, DRAW_JIT_TEXTURE_LAST_LEVEL, TRUE)
+DRAW_LLVM_TEXTURE_MEMBER(row_stride, DRAW_JIT_TEXTURE_ROW_STRIDE, FALSE)
+DRAW_LLVM_TEXTURE_MEMBER(img_stride, DRAW_JIT_TEXTURE_IMG_STRIDE, FALSE)
+DRAW_LLVM_TEXTURE_MEMBER(data_ptr, DRAW_JIT_TEXTURE_DATA, FALSE)
+DRAW_LLVM_TEXTURE_MEMBER(min_lod, DRAW_JIT_TEXTURE_MIN_LOD, TRUE)
+DRAW_LLVM_TEXTURE_MEMBER(max_lod, DRAW_JIT_TEXTURE_MAX_LOD, TRUE)
+DRAW_LLVM_TEXTURE_MEMBER(lod_bias, DRAW_JIT_TEXTURE_LOD_BIAS, TRUE)
+DRAW_LLVM_TEXTURE_MEMBER(border_color, DRAW_JIT_TEXTURE_BORDER_COLOR, FALSE)
+
+
+static void
+draw_llvm_sampler_soa_destroy(struct lp_build_sampler_soa *sampler)
+{
+ FREE(sampler);
+}
+
+
+/**
+ * Fetch filtered values from texture.
+ * The 'texel' parameter returns four vectors corresponding to R, G, B, A.
+ */
+static void
+draw_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base,
+ LLVMBuilderRef builder,
+ struct lp_type type,
+ unsigned unit,
+ unsigned num_coords,
+ const LLVMValueRef *coords,
+ const LLVMValueRef *ddx,
+ const LLVMValueRef *ddy,
+ LLVMValueRef lod_bias, /* optional */
+ LLVMValueRef explicit_lod, /* optional */
+ LLVMValueRef *texel)
+{
+ struct draw_llvm_sampler_soa *sampler = (struct draw_llvm_sampler_soa *)base;
+
+ assert(unit < PIPE_MAX_VERTEX_SAMPLERS);
+
+ lp_build_sample_soa(builder,
+ &sampler->dynamic_state.static_state[unit],
+ &sampler->dynamic_state.base,
+ type,
+ unit,
+ num_coords, coords,
+ ddx, ddy,
+ lod_bias, explicit_lod,
+ texel);
+}
+
+
+struct lp_build_sampler_soa *
+draw_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state,
+ LLVMValueRef context_ptr)
+{
+ struct draw_llvm_sampler_soa *sampler;
+
+ sampler = CALLOC_STRUCT(draw_llvm_sampler_soa);
+ if(!sampler)
+ return NULL;
+
+ sampler->base.destroy = draw_llvm_sampler_soa_destroy;
+ sampler->base.emit_fetch_texel = draw_llvm_sampler_soa_emit_fetch_texel;
+ sampler->dynamic_state.base.width = draw_llvm_texture_width;
+ sampler->dynamic_state.base.height = draw_llvm_texture_height;
+ sampler->dynamic_state.base.depth = draw_llvm_texture_depth;
+ sampler->dynamic_state.base.last_level = draw_llvm_texture_last_level;
+ sampler->dynamic_state.base.row_stride = draw_llvm_texture_row_stride;
+ sampler->dynamic_state.base.img_stride = draw_llvm_texture_img_stride;
+ sampler->dynamic_state.base.data_ptr = draw_llvm_texture_data_ptr;
+ sampler->dynamic_state.base.min_lod = draw_llvm_texture_min_lod;
+ sampler->dynamic_state.base.max_lod = draw_llvm_texture_max_lod;
+ sampler->dynamic_state.base.lod_bias = draw_llvm_texture_lod_bias;
+ sampler->dynamic_state.base.border_color = draw_llvm_texture_border_color;
+ sampler->dynamic_state.static_state = static_state;
+ sampler->dynamic_state.context_ptr = context_ptr;
+
+ return &sampler->base;
+}
+
diff --git a/src/gallium/auxiliary/draw/draw_llvm_translate.c b/src/gallium/auxiliary/draw/draw_llvm_translate.c
index d7da7ed357d..5171327ce2d 100644
--- a/src/gallium/auxiliary/draw/draw_llvm_translate.c
+++ b/src/gallium/auxiliary/draw/draw_llvm_translate.c
@@ -3,10 +3,10 @@
#include "draw_llvm.h"
-#include "gallivm/lp_bld_arit.h"
#include "gallivm/lp_bld_struct.h"
#include "gallivm/lp_bld_format.h"
#include "gallivm/lp_bld_debug.h"
+#include "gallivm/lp_bld_type.h"
#include "util/u_memory.h"
#include "util/u_format.h"
@@ -466,6 +466,7 @@ draw_llvm_translate_from(LLVMBuilderRef builder,
const struct util_format_description *format_desc;
LLVMValueRef zero;
int i;
+ struct lp_type type = lp_float32_vec4_type();
/*
* The above can only cope with straight arrays: no bitfields,
@@ -493,5 +494,5 @@ draw_llvm_translate_from(LLVMBuilderRef builder,
format_desc = util_format_description(from_format);
zero = LLVMConstNull(LLVMInt32Type());
- return lp_build_fetch_rgba_aos(builder, format_desc, vbuffer, zero, zero);
+ return lp_build_fetch_rgba_aos(builder, format_desc, type, vbuffer, zero, zero, zero);
}
diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c
index 64c35025081..6206197dae9 100644
--- a/src/gallium/auxiliary/draw/draw_pipe.c
+++ b/src/gallium/auxiliary/draw/draw_pipe.c
@@ -170,58 +170,42 @@ static void do_triangle( struct draw_context *draw,
* Set up macros for draw_pt_decompose.h template code.
* This code uses vertex indexes / elements.
*/
-#define QUAD(i0,i1,i2,i3) \
- do_triangle( draw, \
- ( DRAW_PIPE_RESET_STIPPLE | \
- DRAW_PIPE_EDGE_FLAG_0 | \
- DRAW_PIPE_EDGE_FLAG_2 ), \
- verts + stride * elts[i0], \
- verts + stride * elts[i1], \
- verts + stride * elts[i3]); \
- do_triangle( draw, \
- ( DRAW_PIPE_EDGE_FLAG_0 | \
- DRAW_PIPE_EDGE_FLAG_1 ), \
- verts + stride * elts[i1], \
- verts + stride * elts[i2], \
- verts + stride * elts[i3])
-
-#define TRIANGLE(flags,i0,i1,i2) \
- do_triangle( draw, \
- elts[i0], /* flags */ \
- verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK) );
-
-#define LINE(flags,i0,i1) \
- do_line( draw, \
- elts[i0], \
- verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK) );
+
+#define TRIANGLE(flags,i0,i1,i2) \
+ do { \
+ do_triangle( draw, \
+ flags, \
+ verts + stride * (i0), \
+ verts + stride * (i1), \
+ verts + stride * (i2) ); \
+ } while (0)
+
+#define LINE(flags,i0,i1) \
+ do { \
+ do_line( draw, \
+ flags, \
+ verts + stride * (i0), \
+ verts + stride * (i1) ); \
+ } while (0)
#define POINT(i0) \
- do_point( draw, \
- verts + stride * elts[i0] )
+ do { \
+ do_point( draw, verts + stride * (i0) ); \
+ } while (0)
+
+#define GET_ELT(idx) (elts[idx])
-#define FUNC pipe_run
-#define ARGS \
+#define FUNC pipe_run_elts
+#define FUNC_VARS \
struct draw_context *draw, \
unsigned prim, \
+ unsigned prim_flags, \
struct vertex_header *vertices, \
unsigned stride, \
- const ushort *elts
-
-#define LOCAL_VARS \
- char *verts = (char *)vertices; \
- boolean flatfirst = (draw->rasterizer->flatshade && \
- draw->rasterizer->flatshade_first); \
- unsigned i; \
- ushort flags
-
-#define FLUSH
+ const ushort *elts, \
+ unsigned count
#include "draw_pt_decompose.h"
-#undef ARGS
-#undef LOCAL_VARS
@@ -238,78 +222,84 @@ static void do_triangle( struct draw_context *draw,
* draw_vbuf.c code uses when it has to perform a flush.
*/
void draw_pipeline_run( struct draw_context *draw,
- unsigned prim,
- struct vertex_header *vertices,
- unsigned vertex_count,
- unsigned stride,
- const ushort *elts,
- unsigned count )
+ const struct draw_vertex_info *vert_info,
+ const struct draw_prim_info *prim_info)
{
- char *verts = (char *)vertices;
+ unsigned i, start;
+
+ draw->pipeline.verts = (char *)vert_info->verts;
+ draw->pipeline.vertex_stride = vert_info->stride;
+ draw->pipeline.vertex_count = vert_info->count;
+
+ for (start = i = 0;
+ i < prim_info->primitive_count;
+ start += prim_info->primitive_lengths[i], i++)
+ {
+ const unsigned count = prim_info->primitive_lengths[i];
+
+#if DEBUG
+ /* Warn if one of the element indexes go outside the vertex buffer */
+ {
+ unsigned max_index = 0x0, i;
+ /* find the largest element index */
+ for (i = 0; i < count; i++) {
+ unsigned int index = prim_info->elts[start + i];
+ if (index > max_index)
+ max_index = index;
+ }
+ if (max_index >= vert_info->count) {
+ debug_printf("%s: max_index (%u) outside vertex buffer (%u)\n",
+ __FUNCTION__,
+ max_index,
+ vert_info->count);
+ }
+ }
+#endif
+
+ pipe_run_elts(draw,
+ prim_info->prim,
+ prim_info->flags,
+ vert_info->verts,
+ vert_info->stride,
+ prim_info->elts + start,
+ count);
+ }
- draw->pipeline.verts = verts;
- draw->pipeline.vertex_stride = stride;
- draw->pipeline.vertex_count = vertex_count;
-
- pipe_run(draw, prim, vertices, stride, elts, count);
-
draw->pipeline.verts = NULL;
draw->pipeline.vertex_count = 0;
}
-
/*
* Set up macros for draw_pt_decompose.h template code.
- * This code is for non-indexed rendering (no elts).
+ * This code is for non-indexed (aka linear) rendering (no elts).
*/
-#define QUAD(i0,i1,i2,i3) \
- do_triangle( draw, \
- ( DRAW_PIPE_RESET_STIPPLE | \
- DRAW_PIPE_EDGE_FLAG_0 | \
- DRAW_PIPE_EDGE_FLAG_2 ), \
- verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * (i1), \
- verts + stride * (i3)); \
- do_triangle( draw, \
- ( DRAW_PIPE_EDGE_FLAG_0 | \
- DRAW_PIPE_EDGE_FLAG_1 ), \
- verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * (i2), \
- verts + stride * (i3))
-
-#define TRIANGLE(flags,i0,i1,i2) \
- do_triangle( draw, \
- flags, /* flags */ \
- verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * (i1), \
- verts + stride * (i2))
-
-#define LINE(flags,i0,i1) \
- do_line( draw, \
- flags, \
- verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * (i1))
-#define POINT(i0) \
- do_point( draw, \
- verts + stride * i0 )
+#define TRIANGLE(flags,i0,i1,i2) \
+ do_triangle( draw, flags, \
+ verts + stride * (i0), \
+ verts + stride * (i1), \
+ verts + stride * (i2) )
-#define FUNC pipe_run_linear
-#define ARGS \
- struct draw_context *draw, \
- unsigned prim, \
- struct vertex_header *vertices, \
- unsigned stride
+#define LINE(flags,i0,i1) \
+ do_line( draw, flags, \
+ verts + stride * (i0), \
+ verts + stride * (i1) )
-#define LOCAL_VARS \
- char *verts = (char *)vertices; \
- boolean flatfirst = (draw->rasterizer->flatshade && \
- draw->rasterizer->flatshade_first); \
- unsigned i; \
- ushort flags
+#define POINT(i0) \
+ do_point( draw, verts + stride * (i0) )
-#define FLUSH
+
+#define GET_ELT(idx) (idx)
+
+#define FUNC pipe_run_linear
+#define FUNC_VARS \
+ struct draw_context *draw, \
+ unsigned prim, \
+ unsigned prim_flags, \
+ struct vertex_header *vertices, \
+ unsigned stride, \
+ unsigned count
#include "draw_pt_decompose.h"
@@ -318,17 +308,32 @@ void draw_pipeline_run( struct draw_context *draw,
* For drawing non-indexed primitives.
*/
void draw_pipeline_run_linear( struct draw_context *draw,
- unsigned prim,
- struct vertex_header *vertices,
- unsigned count,
- unsigned stride )
+ const struct draw_vertex_info *vert_info,
+ const struct draw_prim_info *prim_info)
{
- char *verts = (char *)vertices;
- draw->pipeline.verts = verts;
- draw->pipeline.vertex_stride = stride;
- draw->pipeline.vertex_count = count;
-
- pipe_run_linear(draw, prim, vertices, stride, count);
+ unsigned i, start;
+
+ for (start = i = 0;
+ i < prim_info->primitive_count;
+ start += prim_info->primitive_lengths[i], i++)
+ {
+ unsigned count = prim_info->primitive_lengths[i];
+ char *verts = ((char*)vert_info->verts) +
+ (start * vert_info->stride);
+
+ draw->pipeline.verts = verts;
+ draw->pipeline.vertex_stride = vert_info->stride;
+ draw->pipeline.vertex_count = count;
+
+ assert(count <= vert_info->count);
+
+ pipe_run_linear(draw,
+ prim_info->prim,
+ prim_info->flags,
+ (struct vertex_header*)verts,
+ vert_info->stride,
+ count);
+ }
draw->pipeline.verts = NULL;
draw->pipeline.vertex_count = 0;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index 4faf0a779ca..d1aba763098 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -373,8 +373,7 @@ generate_aaline_fs(struct aaline_stage *aaline)
aaline->fs->sampler_unit = transform.freeSampler;
- aaline->fs->aaline_fs
- = aaline->driver_create_fs_state(pipe, &aaline_fs);
+ aaline->fs->aaline_fs = aaline->driver_create_fs_state(pipe, &aaline_fs);
if (aaline->fs->aaline_fs == NULL)
goto fail;
@@ -425,7 +424,8 @@ aaline_create_texture(struct aaline_stage *aaline)
/* Fill in mipmap images.
* Basically each level is solid opaque, except for the outermost
- * texels which are zero. Special case the 1x1 and 2x2 levels.
+ * texels which are zero. Special case the 1x1 and 2x2 levels
+ * (though, those levels shouldn't be used - see the max_lod setting).
*/
for (level = 0; level <= MAX_TEXTURE_LEVEL; level++) {
struct pipe_transfer *transfer;
@@ -497,7 +497,8 @@ aaline_create_sampler(struct aaline_stage *aaline)
sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
sampler.normalized_coords = 1;
sampler.min_lod = 0.0f;
- sampler.max_lod = MAX_TEXTURE_LEVEL;
+ /* avoid using the 1x1 and 2x2 mipmap levels */
+ sampler.max_lod = MAX_TEXTURE_LEVEL - 2;
aaline->sampler_cso = pipe->create_sampler_state(pipe, &sampler);
if (aaline->sampler_cso == NULL)
@@ -669,8 +670,8 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header)
assert(draw->rasterizer->line_smooth);
- if (draw->rasterizer->line_width <= 3.0)
- aaline->half_line_width = 1.5f;
+ if (draw->rasterizer->line_width <= 2.2)
+ aaline->half_line_width = 1.1f;
else
aaline->half_line_width = 0.5f * draw->rasterizer->line_width;
@@ -687,10 +688,9 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header)
aaline->tex_slot = draw_current_shader_outputs(draw);
aaline->pos_slot = draw_current_shader_position_output(draw);;
- /* advertise the extra post-transformed vertex attribute */
- draw->extra_shader_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
- draw->extra_shader_outputs.semantic_index = aaline->fs->generic_attrib;
- draw->extra_shader_outputs.slot = aaline->tex_slot;
+ /* allocate the extra post-transformed vertex attribute */
+ (void) draw_alloc_extra_vertex_attrib(draw, TGSI_SEMANTIC_GENERIC,
+ aaline->fs->generic_attrib);
/* how many samplers? */
/* we'll use sampler/texture[pstip->sampler_unit] for the stipple */
@@ -743,7 +743,7 @@ aaline_flush(struct draw_stage *stage, unsigned flags)
draw->suspend_flushing = FALSE;
- draw->extra_shader_outputs.slot = 0;
+ draw_remove_extra_vertex_attribs(draw);
}
@@ -788,9 +788,6 @@ draw_aaline_stage(struct draw_context *draw)
if (aaline == NULL)
return NULL;
- if (!draw_alloc_temp_verts( &aaline->stage, 8 ))
- goto fail;
-
aaline->stage.draw = draw;
aaline->stage.name = "aaline";
aaline->stage.next = NULL;
@@ -801,11 +798,14 @@ draw_aaline_stage(struct draw_context *draw)
aaline->stage.reset_stipple_counter = aaline_reset_stipple_counter;
aaline->stage.destroy = aaline_destroy;
+ if (!draw_alloc_temp_verts( &aaline->stage, 8 ))
+ goto fail;
+
return aaline;
fail:
if (aaline)
- aaline_destroy(&aaline->stage);
+ aaline->stage.destroy(&aaline->stage);
return NULL;
}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
index bba6f50c020..5ea552f51c1 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -701,9 +701,9 @@ aapoint_first_point(struct draw_stage *stage, struct prim_header *header)
aapoint->pos_slot = draw_current_shader_position_output(draw);
- draw->extra_shader_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
- draw->extra_shader_outputs.semantic_index = aapoint->fs->generic_attrib;
- draw->extra_shader_outputs.slot = aapoint->tex_slot;
+ /* allocate the extra post-transformed vertex attribute */
+ (void) draw_alloc_extra_vertex_attrib(draw, TGSI_SEMANTIC_GENERIC,
+ aapoint->fs->generic_attrib);
/* find psize slot in post-transform vertex */
aapoint->psize_slot = -1;
@@ -754,7 +754,7 @@ aapoint_flush(struct draw_stage *stage, unsigned flags)
draw->suspend_flushing = FALSE;
- draw->extra_shader_outputs.slot = 0;
+ draw_remove_extra_vertex_attribs(draw);
}
@@ -780,9 +780,6 @@ draw_aapoint_stage(struct draw_context *draw)
if (aapoint == NULL)
goto fail;
- if (!draw_alloc_temp_verts( &aapoint->stage, 4 ))
- goto fail;
-
aapoint->stage.draw = draw;
aapoint->stage.name = "aapoint";
aapoint->stage.next = NULL;
@@ -793,11 +790,14 @@ draw_aapoint_stage(struct draw_context *draw)
aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter;
aapoint->stage.destroy = aapoint_destroy;
+ if (!draw_alloc_temp_verts( &aapoint->stage, 4 ))
+ goto fail;
+
return aapoint;
fail:
if (aapoint)
- aapoint_destroy(&aapoint->stage);
+ aapoint->stage.destroy(&aapoint->stage);
return NULL;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c
index 51a6115ebf5..a10d8e9edc0 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_clip.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c
@@ -68,8 +68,7 @@ struct clip_stage {
};
-/* This is a bit confusing:
- */
+/** Cast wrapper */
static INLINE struct clip_stage *clip_stage( struct draw_stage *stage )
{
return (struct clip_stage *)stage;
@@ -81,18 +80,22 @@ static INLINE struct clip_stage *clip_stage( struct draw_stage *stage )
/* All attributes are float[4], so this is easy:
*/
-static void interp_attr( float *fdst,
+static void interp_attr( float dst[4],
float t,
- const float *fin,
- const float *fout )
+ const float in[4],
+ const float out[4] )
{
- fdst[0] = LINTERP( t, fout[0], fin[0] );
- fdst[1] = LINTERP( t, fout[1], fin[1] );
- fdst[2] = LINTERP( t, fout[2], fin[2] );
- fdst[3] = LINTERP( t, fout[3], fin[3] );
+ dst[0] = LINTERP( t, out[0], in[0] );
+ dst[1] = LINTERP( t, out[1], in[1] );
+ dst[2] = LINTERP( t, out[2], in[2] );
+ dst[3] = LINTERP( t, out[3], in[3] );
}
+/**
+ * Copy front/back, primary/secondary colors from src vertex to dst vertex.
+ * Used when flat shading.
+ */
static void copy_colors( struct draw_stage *stage,
struct vertex_header *dst,
const struct vertex_header *src )
@@ -121,20 +124,17 @@ static void interp( const struct clip_stage *clip,
/* Vertex header.
*/
- {
- dst->clipmask = 0;
- dst->edgeflag = 0; /* will get overwritten later */
- dst->pad = 0;
- dst->vertex_id = UNDEFINED_VERTEX_ID;
- }
+ dst->clipmask = 0;
+ dst->edgeflag = 0; /* will get overwritten later */
+ dst->pad = 0;
+ dst->vertex_id = UNDEFINED_VERTEX_ID;
- /* Clip coordinates: interpolate normally
+ /* Interpolate the clip-space coords.
*/
- {
- interp_attr(dst->clip, t, in->clip, out->clip);
- }
+ interp_attr(dst->clip, t, in->clip, out->clip);
- /* Do the projective divide and insert window coordinates:
+ /* Do the projective divide and viewport transformation to get
+ * new window coordinates:
*/
{
const float *pos = dst->clip;
@@ -157,6 +157,10 @@ static void interp( const struct clip_stage *clip,
}
+/**
+ * Emit a post-clip polygon to the next pipeline stage. The polygon
+ * will be convex and the provoking vertex will always be vertex[0].
+ */
static void emit_poly( struct draw_stage *stage,
struct vertex_header **inlist,
unsigned n,
@@ -164,10 +168,18 @@ static void emit_poly( struct draw_stage *stage,
{
struct prim_header header;
unsigned i;
+ ushort edge_first, edge_middle, edge_last;
- const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2;
- const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0;
- const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1;
+ if (stage->draw->rasterizer->flatshade_first) {
+ edge_first = DRAW_PIPE_EDGE_FLAG_0;
+ edge_middle = DRAW_PIPE_EDGE_FLAG_1;
+ edge_last = DRAW_PIPE_EDGE_FLAG_2;
+ }
+ else {
+ edge_first = DRAW_PIPE_EDGE_FLAG_2;
+ edge_middle = DRAW_PIPE_EDGE_FLAG_0;
+ edge_last = DRAW_PIPE_EDGE_FLAG_1;
+ }
/* later stages may need the determinant, but only the sign matters */
header.det = origPrim->det;
@@ -175,9 +187,17 @@ static void emit_poly( struct draw_stage *stage,
header.pad = 0;
for (i = 2; i < n; i++, header.flags = edge_middle) {
- header.v[0] = inlist[i-1];
- header.v[1] = inlist[i];
- header.v[2] = inlist[0]; /* keep in v[2] for flatshading */
+ /* order the triangle verts to respect the provoking vertex mode */
+ if (stage->draw->rasterizer->flatshade_first) {
+ header.v[0] = inlist[0]; /* the provoking vertex */
+ header.v[1] = inlist[i-1];
+ header.v[2] = inlist[i];
+ }
+ else {
+ header.v[0] = inlist[i-1];
+ header.v[1] = inlist[i];
+ header.v[2] = inlist[0]; /* the provoking vertex */
+ }
if (i == n-1)
header.flags |= edge_last;
@@ -185,7 +205,8 @@ static void emit_poly( struct draw_stage *stage,
if (0) {
const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader;
uint j, k;
- debug_printf("Clipped tri:\n");
+ debug_printf("Clipped tri: (flat-shade-first = %d)\n",
+ stage->draw->rasterizer->flatshade_first);
for (j = 0; j < 3; j++) {
for (k = 0; k < vs->info.num_outputs; k++) {
debug_printf(" Vert %d: Attr %d: %f %f %f %f\n", j, k,
@@ -241,6 +262,9 @@ do_clip_tri( struct draw_stage *stage,
clipmask &= ~(1<<plane_idx);
+ assert(n < MAX_CLIPPED_VERTICES);
+ if (n >= MAX_CLIPPED_VERTICES)
+ return;
inlist[n] = inlist[0]; /* prevent rotation of vertices */
for (i = 1; i <= n; i++) {
@@ -249,11 +273,23 @@ do_clip_tri( struct draw_stage *stage,
float dp = dot4( vert->clip, plane );
if (!IS_NEGATIVE(dp_prev)) {
+ assert(outcount < MAX_CLIPPED_VERTICES);
+ if (outcount >= MAX_CLIPPED_VERTICES)
+ return;
outlist[outcount++] = vert_prev;
}
if (DIFFERENT_SIGNS(dp, dp_prev)) {
- struct vertex_header *new_vert = clipper->stage.tmp[tmpnr++];
+ struct vertex_header *new_vert;
+
+ assert(tmpnr < MAX_CLIPPED_VERTICES + 1);
+ if (tmpnr >= MAX_CLIPPED_VERTICES + 1)
+ return;
+ new_vert = clipper->stage.tmp[tmpnr++];
+
+ assert(outcount < MAX_CLIPPED_VERTICES);
+ if (outcount >= MAX_CLIPPED_VERTICES)
+ return;
outlist[outcount++] = new_vert;
if (IS_NEGATIVE(dp)) {
@@ -291,18 +327,34 @@ do_clip_tri( struct draw_stage *stage,
}
}
- /* If flat-shading, copy color to new provoking vertex.
+ /* If flat-shading, copy provoking vertex color to polygon vertex[0]
*/
- if (clipper->flat && inlist[0] != header->v[2]) {
- inlist[0] = dup_vert(stage, inlist[0], tmpnr++);
-
- copy_colors(stage, inlist[0], header->v[2]);
- }
-
- /* Emit the polygon as triangles to the setup stage:
- */
- if (n >= 3)
+ if (n >= 3) {
+ if (clipper->flat) {
+ if (stage->draw->rasterizer->flatshade_first) {
+ if (inlist[0] != header->v[0]) {
+ assert(tmpnr < MAX_CLIPPED_VERTICES + 1);
+ if (tmpnr >= MAX_CLIPPED_VERTICES + 1)
+ return;
+ inlist[0] = dup_vert(stage, inlist[0], tmpnr++);
+ copy_colors(stage, inlist[0], header->v[0]);
+ }
+ }
+ else {
+ if (inlist[0] != header->v[2]) {
+ assert(tmpnr < MAX_CLIPPED_VERTICES + 1);
+ if (tmpnr >= MAX_CLIPPED_VERTICES + 1)
+ return;
+ inlist[0] = dup_vert(stage, inlist[0], tmpnr++);
+ copy_colors(stage, inlist[0], header->v[2]);
+ }
+ }
+ }
+
+ /* Emit the polygon as triangles to the setup stage:
+ */
emit_poly( stage, inlist, n, header );
+ }
}
@@ -492,9 +544,6 @@ struct draw_stage *draw_clip_stage( struct draw_context *draw )
if (clipper == NULL)
goto fail;
- if (!draw_alloc_temp_verts( &clipper->stage, MAX_CLIPPED_VERTICES+1 ))
- goto fail;
-
clipper->stage.draw = draw;
clipper->stage.name = "clipper";
clipper->stage.point = clip_point;
@@ -506,6 +555,9 @@ struct draw_stage *draw_clip_stage( struct draw_context *draw )
clipper->plane = draw->plane;
+ if (!draw_alloc_temp_verts( &clipper->stage, MAX_CLIPPED_VERTICES+1 ))
+ goto fail;
+
return &clipper->stage;
fail:
diff --git a/src/gallium/auxiliary/draw/draw_pipe_cull.c b/src/gallium/auxiliary/draw/draw_pipe_cull.c
index dc66c65a56c..2f4d01d23ab 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_cull.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_cull.c
@@ -40,7 +40,8 @@
struct cull_stage {
struct draw_stage stage;
- unsigned winding; /**< which winding(s) to cull (one of PIPE_WINDING_x) */
+ unsigned cull_face; /**< which face(s) to cull (one of PIPE_FACE_x) */
+ unsigned front_ccw;
};
@@ -73,9 +74,12 @@ static void cull_tri( struct draw_stage *stage,
/* if det < 0 then Z points toward the camera and the triangle is
* counter-clockwise winding.
*/
- unsigned winding = (header->det < 0) ? PIPE_WINDING_CCW : PIPE_WINDING_CW;
+ unsigned ccw = (header->det < 0);
+ unsigned face = ((ccw == cull_stage(stage)->front_ccw) ?
+ PIPE_FACE_FRONT :
+ PIPE_FACE_BACK);
- if ((winding & cull_stage(stage)->winding) == 0) {
+ if ((face & cull_stage(stage)->cull_face) == 0) {
/* triangle is not culled, pass to next stage */
stage->next->tri( stage->next, header );
}
@@ -88,7 +92,8 @@ static void cull_first_tri( struct draw_stage *stage,
{
struct cull_stage *cull = cull_stage(stage);
- cull->winding = stage->draw->rasterizer->cull_mode;
+ cull->cull_face = stage->draw->rasterizer->cull_face;
+ cull->front_ccw = stage->draw->rasterizer->front_ccw;
stage->tri = cull_tri;
stage->tri( stage, header );
@@ -124,9 +129,6 @@ struct draw_stage *draw_cull_stage( struct draw_context *draw )
if (cull == NULL)
goto fail;
- if (!draw_alloc_temp_verts( &cull->stage, 0 ))
- goto fail;
-
cull->stage.draw = draw;
cull->stage.name = "cull";
cull->stage.next = NULL;
@@ -137,6 +139,9 @@ struct draw_stage *draw_cull_stage( struct draw_context *draw )
cull->stage.reset_stipple_counter = cull_reset_stipple_counter;
cull->stage.destroy = cull_destroy;
+ if (!draw_alloc_temp_verts( &cull->stage, 0 ))
+ goto fail;
+
return &cull->stage;
fail:
diff --git a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
index 34afb1a0b60..693f2895aad 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
@@ -257,9 +257,6 @@ struct draw_stage *draw_flatshade_stage( struct draw_context *draw )
if (flatshade == NULL)
goto fail;
- if (!draw_alloc_temp_verts( &flatshade->stage, 2 ))
- goto fail;
-
flatshade->stage.draw = draw;
flatshade->stage.name = "flatshade";
flatshade->stage.next = NULL;
@@ -270,6 +267,9 @@ struct draw_stage *draw_flatshade_stage( struct draw_context *draw )
flatshade->stage.reset_stipple_counter = flatshade_reset_stipple_counter;
flatshade->stage.destroy = flatshade_destroy;
+ if (!draw_alloc_temp_verts( &flatshade->stage, 2 ))
+ goto fail;
+
return &flatshade->stage;
fail:
diff --git a/src/gallium/auxiliary/draw/draw_pipe_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c
index 8e321946ced..8afbbfa1569 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_offset.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c
@@ -161,9 +161,7 @@ struct draw_stage *draw_offset_stage( struct draw_context *draw )
{
struct offset_stage *offset = CALLOC_STRUCT(offset_stage);
if (offset == NULL)
- return NULL;
-
- draw_alloc_temp_verts( &offset->stage, 3 );
+ goto fail;
offset->stage.draw = draw;
offset->stage.name = "offset";
@@ -175,5 +173,14 @@ struct draw_stage *draw_offset_stage( struct draw_context *draw )
offset->stage.reset_stipple_counter = offset_reset_stipple_counter;
offset->stage.destroy = offset_destroy;
+ if (!draw_alloc_temp_verts( &offset->stage, 3 ))
+ goto fail;
+
return &offset->stage;
+
+fail:
+ if (offset)
+ offset->stage.destroy( &offset->stage );
+
+ return NULL;
}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index ef30db094fe..ed9a53e154d 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -363,8 +363,12 @@ generate_pstip_fs(struct pstip_stage *pstip)
assert(pstip->fs->sampler_unit < PIPE_MAX_SAMPLERS);
pstip->fs->pstip_fs = pstip->driver_create_fs_state(pstip->pipe, &pstip_fs);
-
+
FREE((void *)pstip_fs.tokens);
+
+ if (!pstip->fs->pstip_fs)
+ return FALSE;
+
return TRUE;
}
@@ -603,12 +607,15 @@ pstip_destroy(struct draw_stage *stage)
}
+/** Create a new polygon stipple drawing stage object */
static struct pstip_stage *
-draw_pstip_stage(struct draw_context *draw)
+draw_pstip_stage(struct draw_context *draw, struct pipe_context *pipe)
{
struct pstip_stage *pstip = CALLOC_STRUCT(pstip_stage);
+ if (pstip == NULL)
+ goto fail;
- draw_alloc_temp_verts( &pstip->stage, 8 );
+ pstip->pipe = pipe;
pstip->stage.draw = draw;
pstip->stage.name = "pstip";
@@ -620,7 +627,16 @@ draw_pstip_stage(struct draw_context *draw)
pstip->stage.reset_stipple_counter = pstip_reset_stipple_counter;
pstip->stage.destroy = pstip_destroy;
+ if (!draw_alloc_temp_verts( &pstip->stage, 8 ))
+ goto fail;
+
return pstip;
+
+fail:
+ if (pstip)
+ pstip->stage.destroy( &pstip->stage );
+
+ return NULL;
}
@@ -756,14 +772,12 @@ draw_install_pstipple_stage(struct draw_context *draw,
/*
* Create / install pgon stipple drawing / prim stage
*/
- pstip = draw_pstip_stage( draw );
+ pstip = draw_pstip_stage( draw, pipe );
if (pstip == NULL)
goto fail;
draw->pipeline.pstipple = &pstip->stage;
- pstip->pipe = pipe;
-
/* create special texture, sampler state */
if (!pstip_create_texture(pstip))
goto fail;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_stipple.c b/src/gallium/auxiliary/draw/draw_pipe_stipple.c
index 70fbab9ea76..4b3f4e7ae11 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_stipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c
@@ -235,8 +235,8 @@ stipple_destroy( struct draw_stage *stage )
struct draw_stage *draw_stipple_stage( struct draw_context *draw )
{
struct stipple_stage *stipple = CALLOC_STRUCT(stipple_stage);
-
- draw_alloc_temp_verts( &stipple->stage, 2 );
+ if (stipple == NULL)
+ goto fail;
stipple->stage.draw = draw;
stipple->stage.name = "stipple";
@@ -248,5 +248,14 @@ struct draw_stage *draw_stipple_stage( struct draw_context *draw )
stipple->stage.flush = stipple_flush;
stipple->stage.destroy = stipple_destroy;
+ if (!draw_alloc_temp_verts( &stipple->stage, 2 ))
+ goto fail;
+
return &stipple->stage;
+
+fail:
+ if (stipple)
+ stipple->stage.destroy( &stipple->stage );
+
+ return NULL;
}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_twoside.c b/src/gallium/auxiliary/draw/draw_pipe_twoside.c
index eef0238b157..9a3f3fee625 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_twoside.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_twoside.c
@@ -141,7 +141,7 @@ static void twoside_first_tri( struct draw_stage *stage,
* if the triangle is back-facing (negative).
* sign = -1 for CCW, +1 for CW
*/
- twoside->sign = (stage->draw->rasterizer->front_winding == PIPE_WINDING_CCW) ? -1.0f : 1.0f;
+ twoside->sign = stage->draw->rasterizer->front_ccw ? -1.0f : 1.0f;
stage->tri = twoside_tri;
stage->tri( stage, header );
@@ -177,9 +177,6 @@ struct draw_stage *draw_twoside_stage( struct draw_context *draw )
if (twoside == NULL)
goto fail;
- if (!draw_alloc_temp_verts( &twoside->stage, 3 ))
- goto fail;
-
twoside->stage.draw = draw;
twoside->stage.name = "twoside";
twoside->stage.next = NULL;
@@ -190,6 +187,9 @@ struct draw_stage *draw_twoside_stage( struct draw_context *draw )
twoside->stage.reset_stipple_counter = twoside_reset_stipple_counter;
twoside->stage.destroy = twoside_destroy;
+ if (!draw_alloc_temp_verts( &twoside->stage, 3 ))
+ goto fail;
+
return &twoside->stage;
fail:
diff --git a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
index 03bb842e20a..d87741b91e7 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
@@ -105,6 +105,23 @@ static void lines( struct draw_stage *stage,
}
+/** For debugging */
+static void
+print_header_flags(unsigned flags)
+{
+ debug_printf("header->flags = ");
+ if (flags & DRAW_PIPE_RESET_STIPPLE)
+ debug_printf("RESET_STIPPLE ");
+ if (flags & DRAW_PIPE_EDGE_FLAG_0)
+ debug_printf("EDGE_FLAG_0 ");
+ if (flags & DRAW_PIPE_EDGE_FLAG_1)
+ debug_printf("EDGE_FLAG_1 ");
+ if (flags & DRAW_PIPE_EDGE_FLAG_2)
+ debug_printf("EDGE_FLAG_2 ");
+ debug_printf("\n");
+}
+
+
/* Unfilled tri:
*
* Note edgeflags in the vertex struct is not sufficient as we will
@@ -117,8 +134,12 @@ static void unfilled_tri( struct draw_stage *stage,
struct prim_header *header )
{
struct unfilled_stage *unfilled = unfilled_stage(stage);
- unsigned mode = unfilled->mode[header->det >= 0.0];
+ unsigned cw = header->det >= 0.0;
+ unsigned mode = unfilled->mode[cw];
+ if (0)
+ print_header_flags(header->flags);
+
switch (mode) {
case PIPE_POLYGON_MODE_FILL:
stage->next->tri( stage->next, header );
@@ -139,9 +160,10 @@ static void unfilled_first_tri( struct draw_stage *stage,
struct prim_header *header )
{
struct unfilled_stage *unfilled = unfilled_stage(stage);
+ const struct pipe_rasterizer_state *rast = stage->draw->rasterizer;
- unfilled->mode[0] = stage->draw->rasterizer->fill_ccw; /* front */
- unfilled->mode[1] = stage->draw->rasterizer->fill_cw; /* back */
+ unfilled->mode[0] = rast->front_ccw ? rast->fill_front : rast->fill_back;
+ unfilled->mode[1] = rast->front_ccw ? rast->fill_back : rast->fill_front;
stage->tri = unfilled_tri;
stage->tri( stage, header );
@@ -180,9 +202,6 @@ struct draw_stage *draw_unfilled_stage( struct draw_context *draw )
if (unfilled == NULL)
goto fail;
- if (!draw_alloc_temp_verts( &unfilled->stage, 0 ))
- goto fail;
-
unfilled->stage.draw = draw;
unfilled->stage.name = "unfilled";
unfilled->stage.next = NULL;
@@ -194,6 +213,9 @@ struct draw_stage *draw_unfilled_stage( struct draw_context *draw )
unfilled->stage.reset_stipple_counter = unfilled_reset_stipple_counter;
unfilled->stage.destroy = unfilled_destroy;
+ if (!draw_alloc_temp_verts( &unfilled->stage, 0 ))
+ goto fail;
+
return &unfilled->stage;
fail:
diff --git a/src/gallium/auxiliary/draw/draw_pipe_validate.c b/src/gallium/auxiliary/draw/draw_pipe_validate.c
index 2a50af7a414..c575a8ac7ca 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_validate.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_validate.c
@@ -122,12 +122,14 @@ draw_need_pipeline(const struct draw_context *draw,
return TRUE;
/* unfilled polygons */
- if (rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL ||
- rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL)
+ if (rasterizer->fill_front != PIPE_POLYGON_MODE_FILL ||
+ rasterizer->fill_back != PIPE_POLYGON_MODE_FILL)
return TRUE;
/* polygon offset */
- if (rasterizer->offset_cw || rasterizer->offset_ccw)
+ if (rasterizer->offset_point ||
+ rasterizer->offset_line ||
+ rasterizer->offset_tri)
return TRUE;
/* two-side lighting */
@@ -170,7 +172,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
wide_lines = (rast->line_width > draw->pipeline.wide_line_threshold
&& !rast->line_smooth);
- /* drawing large points? */
+ /* drawing large/sprite points (but not AA points)? */
if (rast->sprite_coord_enable && draw->pipeline.point_sprite)
wide_points = TRUE;
else if (rast->point_smooth && draw->pipeline.aapoint)
@@ -205,7 +207,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
precalc_flat = TRUE;
}
- if (wide_points || rast->sprite_coord_enable) {
+ if (wide_points) {
draw->pipeline.wide_point->next = next;
next = draw->pipeline.wide_point;
}
@@ -222,8 +224,8 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
next = draw->pipeline.pstipple;
}
- if (rast->fill_cw != PIPE_POLYGON_MODE_FILL ||
- rast->fill_ccw != PIPE_POLYGON_MODE_FILL) {
+ if (rast->fill_front != PIPE_POLYGON_MODE_FILL ||
+ rast->fill_back != PIPE_POLYGON_MODE_FILL) {
draw->pipeline.unfilled->next = next;
next = draw->pipeline.unfilled;
precalc_flat = TRUE; /* only needed for triangles really */
@@ -235,8 +237,9 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
next = draw->pipeline.flatshade;
}
- if (rast->offset_cw ||
- rast->offset_ccw) {
+ if (rast->offset_point ||
+ rast->offset_line ||
+ rast->offset_tri) {
draw->pipeline.offset->next = next;
next = draw->pipeline.offset;
need_det = TRUE;
@@ -255,14 +258,14 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
* to less work emitting vertices, smaller vertex buffers, etc.
* It's difficult to say whether this will be true in general.
*/
- if (need_det || rast->cull_mode) {
+ if (need_det || rast->cull_face != PIPE_FACE_NONE) {
draw->pipeline.cull->next = next;
next = draw->pipeline.cull;
}
/* Clip stage
*/
- if (!draw->bypass_clipping)
+ if (draw->clip_xy || draw->clip_z || draw->clip_user)
{
draw->pipeline.clip->next = next;
next = draw->pipeline.clip;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
index abbf6247ab8..58c5858734a 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
@@ -159,19 +159,8 @@ vbuf_tri( struct draw_stage *stage,
check_space( vbuf, 3 );
- if (vbuf->stage.draw->rasterizer->flatshade_first) {
- /* Put provoking vertex in position expected by the driver.
- * Emit last provoking vertex in first pos.
- * Swap verts 0 & 1 to preserve polygon winding.
- */
- vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[2] );
- vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[0] );
- vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[1] );
- }
- else {
- for (i = 0; i < 3; i++) {
- vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[i] );
- }
+ for (i = 0; i < 3; i++) {
+ vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[i] );
}
}
@@ -335,9 +324,9 @@ vbuf_flush_vertices( struct vbuf_stage *vbuf )
if (vbuf->nr_indices)
{
- vbuf->render->draw(vbuf->render,
- vbuf->indices,
- vbuf->nr_indices );
+ vbuf->render->draw_elements(vbuf->render,
+ vbuf->indices,
+ vbuf->nr_indices );
vbuf->nr_indices = 0;
}
@@ -364,9 +353,6 @@ vbuf_alloc_vertices( struct vbuf_stage *vbuf )
/* Allocate a new vertex buffer */
vbuf->max_vertices = vbuf->render->max_vertex_buffer_bytes / vbuf->vertex_size;
- /* even number */
- vbuf->max_vertices = vbuf->max_vertices & ~1;
-
if(vbuf->max_vertices >= UNDEFINED_VERTEX_ID)
vbuf->max_vertices = UNDEFINED_VERTEX_ID - 1;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
index ab167065815..98da9cfb999 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
@@ -77,8 +77,11 @@ static void wideline_line( struct draw_stage *stage,
const float dx = fabsf(pos0[0] - pos2[0]);
const float dy = fabsf(pos0[1] - pos2[1]);
+ const boolean gl_rasterization_rules =
+ stage->draw->rasterizer->gl_rasterization_rules;
+
/* small tweak to meet GL specification */
- const float bias = 0.125f;
+ const float bias = gl_rasterization_rules ? 0.125f : 0.0f;
/*
* Draw wide line as a quad (two tris) by "stretching" the line along
@@ -92,19 +95,21 @@ static void wideline_line( struct draw_stage *stage,
pos1[1] = pos1[1] + half_width - bias;
pos2[1] = pos2[1] - half_width - bias;
pos3[1] = pos3[1] + half_width - bias;
- if (pos0[0] < pos2[0]) {
- /* left to right line */
- pos0[0] -= 0.5f;
- pos1[0] -= 0.5f;
- pos2[0] -= 0.5f;
- pos3[0] -= 0.5f;
- }
- else {
- /* right to left line */
- pos0[0] += 0.5f;
- pos1[0] += 0.5f;
- pos2[0] += 0.5f;
- pos3[0] += 0.5f;
+ if (gl_rasterization_rules) {
+ if (pos0[0] < pos2[0]) {
+ /* left to right line */
+ pos0[0] -= 0.5f;
+ pos1[0] -= 0.5f;
+ pos2[0] -= 0.5f;
+ pos3[0] -= 0.5f;
+ }
+ else {
+ /* right to left line */
+ pos0[0] += 0.5f;
+ pos1[0] += 0.5f;
+ pos2[0] += 0.5f;
+ pos3[0] += 0.5f;
+ }
}
}
else {
@@ -113,19 +118,21 @@ static void wideline_line( struct draw_stage *stage,
pos1[0] = pos1[0] + half_width + bias;
pos2[0] = pos2[0] - half_width + bias;
pos3[0] = pos3[0] + half_width + bias;
- if (pos0[1] < pos2[1]) {
- /* top to bottom line */
- pos0[1] -= 0.5f;
- pos1[1] -= 0.5f;
- pos2[1] -= 0.5f;
- pos3[1] -= 0.5f;
- }
- else {
- /* bottom to top line */
- pos0[1] += 0.5f;
- pos1[1] += 0.5f;
- pos2[1] += 0.5f;
- pos3[1] += 0.5f;
+ if (gl_rasterization_rules) {
+ if (pos0[1] < pos2[1]) {
+ /* top to bottom line */
+ pos0[1] -= 0.5f;
+ pos1[1] -= 0.5f;
+ pos2[1] -= 0.5f;
+ pos3[1] -= 0.5f;
+ }
+ else {
+ /* bottom to top line */
+ pos0[1] += 0.5f;
+ pos1[1] += 0.5f;
+ pos2[1] += 0.5f;
+ pos3[1] += 0.5f;
+ }
}
}
@@ -195,8 +202,8 @@ static void wideline_destroy( struct draw_stage *stage )
struct draw_stage *draw_wide_line_stage( struct draw_context *draw )
{
struct wideline_stage *wide = CALLOC_STRUCT(wideline_stage);
-
- draw_alloc_temp_verts( &wide->stage, 4 );
+ if (wide == NULL)
+ goto fail;
wide->stage.draw = draw;
wide->stage.name = "wide-line";
@@ -208,5 +215,14 @@ struct draw_stage *draw_wide_line_stage( struct draw_context *draw )
wide->stage.reset_stipple_counter = wideline_reset_stipple_counter;
wide->stage.destroy = wideline_destroy;
+ if (!draw_alloc_temp_verts( &wide->stage, 4 ))
+ goto fail;
+
return &wide->stage;
+
+fail:
+ if (wide)
+ wide->stage.destroy( &wide->stage );
+
+ return NULL;
}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
index a86fe19586c..3646c6a7145 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
@@ -57,26 +57,24 @@
#include "util/u_memory.h"
#include "pipe/p_defines.h"
#include "pipe/p_shader_tokens.h"
+#include "draw_fs.h"
#include "draw_vs.h"
#include "draw_pipe.h"
struct widepoint_stage {
- struct draw_stage stage;
+ struct draw_stage stage; /**< base class */
float half_point_size;
float xbias;
float ybias;
- uint texcoord_slot[PIPE_MAX_SHADER_OUTPUTS];
- uint texcoord_enable[PIPE_MAX_SHADER_OUTPUTS];
- uint num_texcoords;
- uint texcoord_mode;
+ /** for automatic texcoord generation/replacement */
+ uint num_texcoord_gen;
+ uint texcoord_gen_slot[PIPE_MAX_SHADER_OUTPUTS];
int psize_slot;
-
- int point_coord_fs_input; /**< input for pointcoord */
};
@@ -96,30 +94,20 @@ widepoint_stage( struct draw_stage *stage )
static void set_texcoords(const struct widepoint_stage *wide,
struct vertex_header *v, const float tc[4])
{
+ const struct draw_context *draw = wide->stage.draw;
+ const struct pipe_rasterizer_state *rast = draw->rasterizer;
+ const uint texcoord_mode = rast->sprite_coord_mode;
uint i;
- for (i = 0; i < wide->num_texcoords; i++) {
- if (wide->texcoord_enable[i]) {
- uint j = wide->texcoord_slot[i];
- v->data[j][0] = tc[0];
- if (wide->texcoord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)
- v->data[j][1] = 1.0f - tc[1];
- else
- v->data[j][1] = tc[1];
- v->data[j][2] = tc[2];
- v->data[j][3] = tc[3];
- }
- }
- if (wide->point_coord_fs_input >= 0) {
- /* put gl_PointCoord into the extra vertex slot */
- uint slot = wide->stage.draw->extra_shader_outputs.slot;
+ for (i = 0; i < wide->num_texcoord_gen; i++) {
+ const uint slot = wide->texcoord_gen_slot[i];
v->data[slot][0] = tc[0];
- if (wide->texcoord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)
+ if (texcoord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)
v->data[slot][1] = 1.0f - tc[1];
else
v->data[slot][1] = tc[1];
- v->data[slot][2] = 0.0F;
- v->data[slot][3] = 1.0F;
+ v->data[slot][2] = tc[2];
+ v->data[slot][3] = tc[3];
}
}
@@ -201,18 +189,9 @@ static void widepoint_point( struct draw_stage *stage,
}
-static int
-find_pntc_input_attrib(struct draw_context *draw)
-{
- /* Scan the fragment program's input decls to find the pointcoord
- * attribute. The xy components will store the point coord.
- */
- return 0; /* XXX fix this */
-}
-
-
-static void widepoint_first_point( struct draw_stage *stage,
- struct prim_header *header )
+static void
+widepoint_first_point(struct draw_stage *stage,
+ struct prim_header *header)
{
struct widepoint_stage *wide = widepoint_stage(stage);
struct draw_context *draw = stage->draw;
@@ -226,6 +205,7 @@ static void widepoint_first_point( struct draw_stage *stage,
if (rast->gl_rasterization_rules) {
wide->xbias = 0.125;
+ wide->ybias = -0.125;
}
/* Disable triangle culling, stippling, unfilled mode etc. */
@@ -243,31 +223,49 @@ static void widepoint_first_point( struct draw_stage *stage,
stage->point = draw_pipe_passthrough_point;
}
+ draw_remove_extra_vertex_attribs(draw);
+
if (rast->point_quad_rasterization) {
- /* find vertex shader texcoord outputs */
- const struct draw_vertex_shader *vs = draw->vs.vertex_shader;
- uint i, j = 0;
- wide->texcoord_mode = rast->sprite_coord_mode;
- for (i = 0; i < vs->info.num_outputs; i++) {
- if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
- wide->texcoord_slot[j] = i;
- wide->texcoord_enable[j] = (rast->sprite_coord_enable >> j) & 1;
- j++;
+ const struct draw_fragment_shader *fs = draw->fs.fragment_shader;
+ uint i;
+
+ wide->num_texcoord_gen = 0;
+
+ /* Loop over fragment shader inputs looking for generic inputs
+ * for which bit 'k' in sprite_coord_enable is set.
+ */
+ for (i = 0; i < fs->info.num_inputs; i++) {
+ if (fs->info.input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
+ const int generic_index = fs->info.input_semantic_index[i];
+ /* Note that sprite_coord enable is a bitfield of
+ * PIPE_MAX_SHADER_OUTPUTS bits.
+ */
+ if (generic_index < PIPE_MAX_SHADER_OUTPUTS &&
+ (rast->sprite_coord_enable & (1 << generic_index))) {
+ /* OK, this generic attribute needs to be replaced with a
+ * texcoord (see above).
+ */
+ int slot = draw_find_shader_output(draw,
+ TGSI_SEMANTIC_GENERIC,
+ generic_index);
+
+ if (slot > 0) {
+ /* there's already a post-vertex shader attribute
+ * for this fragment shader input attribute.
+ */
+ }
+ else {
+ /* need to allocate a new post-vertex shader attribute */
+ slot = draw_alloc_extra_vertex_attrib(draw,
+ TGSI_SEMANTIC_GENERIC,
+ generic_index);
+ }
+
+ /* add this slot to the texcoord-gen list */
+ wide->texcoord_gen_slot[wide->num_texcoord_gen++] = slot;
+ }
}
}
- wide->num_texcoords = j;
-
- /* find fragment shader PointCoord input */
- wide->point_coord_fs_input = find_pntc_input_attrib(draw);
-
- /* setup extra vp output (point coord implemented as a texcoord) */
- draw->extra_shader_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
- draw->extra_shader_outputs.semantic_index = 0;
- draw->extra_shader_outputs.slot = draw_current_shader_outputs(draw);
- }
- else {
- wide->point_coord_fs_input = -1;
- draw->extra_shader_outputs.slot = 0;
}
wide->psize_slot = -1;
@@ -294,7 +292,8 @@ static void widepoint_flush( struct draw_stage *stage, unsigned flags )
stage->point = widepoint_first_point;
stage->next->flush( stage->next, flags );
- stage->draw->extra_shader_outputs.slot = 0;
+
+ draw_remove_extra_vertex_attribs(draw);
/* restore original rasterizer state */
if (draw->rast_handle) {
@@ -324,9 +323,6 @@ struct draw_stage *draw_wide_point_stage( struct draw_context *draw )
if (wide == NULL)
goto fail;
- if (!draw_alloc_temp_verts( &wide->stage, 4 ))
- goto fail;
-
wide->stage.draw = draw;
wide->stage.name = "wide-point";
wide->stage.next = NULL;
@@ -337,6 +333,9 @@ struct draw_stage *draw_wide_point_stage( struct draw_context *draw )
wide->stage.reset_stipple_counter = widepoint_reset_stipple_counter;
wide->stage.destroy = widepoint_destroy;
+ if (!draw_alloc_temp_verts( &wide->stage, 4 ))
+ goto fail;
+
return &wide->stage;
fail:
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index a2bfb693c09..d417f825a0f 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -48,6 +48,7 @@
#ifdef HAVE_LLVM
#include <llvm-c/ExecutionEngine.h>
+struct draw_llvm;
#endif
@@ -81,6 +82,9 @@ struct vertex_header {
#define UNDEFINED_VERTEX_ID 0xffff
+/* maximum number of shader variants we can cache */
+#define DRAW_MAX_SHADER_VARIANTS 1024
+
/**
* Private context for the drawing module.
*/
@@ -136,8 +140,7 @@ struct draw_context
} middle;
struct {
- struct draw_pt_front_end *vcache;
- struct draw_pt_front_end *varray;
+ struct draw_pt_front_end *vsplit;
} front;
struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
@@ -146,6 +149,8 @@ struct draw_context
struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
unsigned nr_vertex_elements;
+ struct pipe_index_buffer index_buffer;
+
/* user-space vertex data, buffers */
struct {
/** vertex element/index buffer (ex: glDrawElements) */
@@ -159,9 +164,11 @@ struct draw_context
/** vertex arrays */
const void *vbuffer[PIPE_MAX_ATTRIBS];
- /** constant buffer (for vertex/geometry shader) */
+ /** constant buffers (for vertex/geometry shader) */
const void *vs_constants[PIPE_MAX_CONSTANT_BUFFERS];
+ unsigned vs_constants_size[PIPE_MAX_CONSTANT_BUFFERS];
const void *gs_constants[PIPE_MAX_CONSTANT_BUFFERS];
+ unsigned gs_constants_size[PIPE_MAX_CONSTANT_BUFFERS];
} user;
boolean test_fse; /* enable FSE even though its not correct (eg for softpipe) */
@@ -169,13 +176,19 @@ struct draw_context
} pt;
struct {
- boolean bypass_clipping;
- boolean bypass_vs;
+ boolean bypass_clip_xy;
+ boolean bypass_clip_z;
} driver;
boolean flushing; /**< debugging/sanity */
boolean suspend_flushing; /**< internally set */
- boolean bypass_clipping; /**< set if either api or driver bypass_clipping true */
+
+ /* Flags set if API requires clipping in these planes and the
+ * driver doesn't indicate that it can do it for us.
+ */
+ boolean clip_xy;
+ boolean clip_z;
+ boolean clip_user;
boolean force_passthrough; /**< never clip or shade */
@@ -194,6 +207,7 @@ struct draw_context
struct pipe_viewport_state viewport;
boolean identity_viewport;
+ /** Vertex shader state */
struct {
struct draw_vertex_shader *vertex_shader;
uint num_vs_outputs; /**< convenience, from vertex_shader */
@@ -223,6 +237,7 @@ struct draw_context
struct translate_cache *emit_cache;
} vs;
+ /** Geometry shader state */
struct {
struct draw_geometry_shader *geometry_shader;
uint num_gs_outputs; /**< convenience, from geometry_shader */
@@ -235,17 +250,31 @@ struct draw_context
struct tgsi_sampler **samplers;
} gs;
+ /** Fragment shader state */
+ struct {
+ struct draw_fragment_shader *fragment_shader;
+ } fs;
+
+ /** Stream output (vertex feedback) state */
+ struct {
+ struct pipe_stream_output_state state;
+ void *buffers[PIPE_MAX_SO_BUFFERS];
+ uint num_buffers;
+ } so;
+
/* Clip derived state:
*/
float plane[12][4];
unsigned nr_planes;
+ boolean depth_clamp;
/* If a prim stage introduces new vertex attributes, they'll be stored here
*/
struct {
- uint semantic_name;
- uint semantic_index;
- int slot;
+ uint num;
+ uint semantic_name[10];
+ uint semantic_index[10];
+ uint slot[10];
} extra_shader_outputs;
unsigned reduced_prim;
@@ -253,12 +282,51 @@ struct draw_context
unsigned instance_id;
#ifdef HAVE_LLVM
+ struct draw_llvm *llvm;
LLVMExecutionEngineRef engine;
#endif
+ struct pipe_sampler_view *sampler_views[PIPE_MAX_VERTEX_SAMPLERS];
+ unsigned num_sampler_views;
+ const struct pipe_sampler_state *samplers[PIPE_MAX_VERTEX_SAMPLERS];
+ unsigned num_samplers;
+
void *driver_private;
};
+
+struct draw_fetch_info {
+ boolean linear;
+ unsigned start;
+ const unsigned *elts;
+ unsigned count;
+};
+
+struct draw_vertex_info {
+ struct vertex_header *verts;
+ unsigned vertex_size;
+ unsigned stride;
+ unsigned count;
+};
+
+/* these flags are set if the primitive is a segment of a larger one */
+#define DRAW_SPLIT_BEFORE 0x1
+#define DRAW_SPLIT_AFTER 0x2
+
+struct draw_prim_info {
+ boolean linear;
+ unsigned start;
+
+ const ushort *elts;
+ unsigned count;
+
+ unsigned prim;
+ unsigned flags;
+ unsigned *primitive_lengths;
+ unsigned primitive_count;
+};
+
+
/*******************************************************************************
* Draw common initialization code
*/
@@ -300,6 +368,11 @@ void draw_gs_destroy( struct draw_context *draw );
uint draw_current_shader_outputs(const struct draw_context *draw);
uint draw_current_shader_position_output(const struct draw_context *draw);
+int draw_alloc_extra_vertex_attrib(struct draw_context *draw,
+ uint semantic_name, uint semantic_index);
+void draw_remove_extra_vertex_attribs(struct draw_context *draw);
+
+
/*******************************************************************************
* Vertex processing (was passthrough) code:
*/
@@ -319,35 +392,24 @@ void draw_pipeline_destroy( struct draw_context *draw );
-/* We use the top few bits in the elts[] parameter to convey a little
- * API information. This limits the number of vertices we can address
- * to only 4096 -- if that becomes a problem, we can switch to 32-bit
- * draw indices.
- *
- * These flags expected at first vertex of lines & triangles when
- * unfilled and/or line stipple modes are operational.
+/*
+ * These flags are used by the pipeline when unfilled and/or line stipple modes
+ * are operational.
*/
-#define DRAW_PIPE_MAX_VERTICES (0x1<<12)
-#define DRAW_PIPE_EDGE_FLAG_0 (0x1<<12)
-#define DRAW_PIPE_EDGE_FLAG_1 (0x2<<12)
-#define DRAW_PIPE_EDGE_FLAG_2 (0x4<<12)
-#define DRAW_PIPE_EDGE_FLAG_ALL (0x7<<12)
-#define DRAW_PIPE_RESET_STIPPLE (0x8<<12)
-#define DRAW_PIPE_FLAG_MASK (0xf<<12)
+#define DRAW_PIPE_EDGE_FLAG_0 0x1
+#define DRAW_PIPE_EDGE_FLAG_1 0x2
+#define DRAW_PIPE_EDGE_FLAG_2 0x4
+#define DRAW_PIPE_EDGE_FLAG_ALL 0x7
+#define DRAW_PIPE_RESET_STIPPLE 0x8
void draw_pipeline_run( struct draw_context *draw,
- unsigned prim,
- struct vertex_header *vertices,
- unsigned vertex_count,
- unsigned stride,
- const ushort *elts,
- unsigned count );
+ const struct draw_vertex_info *vert,
+ const struct draw_prim_info *prim);
void draw_pipeline_run_linear( struct draw_context *draw,
- unsigned prim,
- struct vertex_header *vertices,
- unsigned count,
- unsigned stride );
+ const struct draw_vertex_info *vert,
+ const struct draw_prim_info *prim);
+
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index b853f3a89f8..f44bf2507c6 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -31,30 +31,22 @@
*/
#include "draw/draw_context.h"
+#include "draw/draw_gs.h"
#include "draw/draw_private.h"
#include "draw/draw_pt.h"
+#include "draw/draw_vs.h"
#include "tgsi/tgsi_dump.h"
#include "util/u_math.h"
#include "util/u_prim.h"
+#include "util/u_format.h"
+#include "util/u_draw.h"
DEBUG_GET_ONCE_BOOL_OPTION(draw_fse, "DRAW_FSE", FALSE)
DEBUG_GET_ONCE_BOOL_OPTION(draw_no_fse, "DRAW_NO_FSE", FALSE)
-#ifdef HAVE_LLVM
-DEBUG_GET_ONCE_BOOL_OPTION(draw_use_llvm, "DRAW_USE_LLVM", TRUE)
-#endif
-
-static unsigned trim( unsigned count, unsigned first, unsigned incr )
-{
- if (count < first)
- return 0;
- return count - (count - first) % incr;
-}
-
-
/* Overall we split things into:
- * - frontend -- prepare fetch_elts, draw_elts - eg vcache
+ * - frontend -- prepare fetch_elts, draw_elts - eg vsplit
* - middle -- fetch, shade, cliptest, viewport
* - pipeline -- the prim pipeline: clipping, wide lines, etc
* - backend -- the vbuf_render provided by the driver.
@@ -74,29 +66,35 @@ draw_pt_arrays(struct draw_context *draw,
{
unsigned first, incr;
draw_pt_split_prim(prim, &first, &incr);
- count = trim(count, first, incr);
+ count = draw_pt_trim_count(count, first, incr);
if (count < first)
return TRUE;
}
if (!draw->force_passthrough) {
+ unsigned gs_out_prim = (draw->gs.geometry_shader ?
+ draw->gs.geometry_shader->output_primitive :
+ prim);
+
if (!draw->render) {
opt |= PT_PIPELINE;
}
-
+
if (draw_need_pipeline(draw,
draw->rasterizer,
- prim)) {
+ gs_out_prim)) {
opt |= PT_PIPELINE;
}
- if (!draw->bypass_clipping && !draw->pt.test_fse) {
+ if ((draw->clip_xy ||
+ draw->clip_z ||
+ draw->clip_user) && !draw->pt.test_fse) {
opt |= PT_CLIPTEST;
}
-
+
opt |= PT_SHADE;
}
-
+
if (draw->pt.middle.llvm) {
middle = draw->pt.middle.llvm;
} else {
@@ -108,22 +106,11 @@ draw_pt_arrays(struct draw_context *draw,
middle = draw->pt.middle.general;
}
-
- /* Pick the right frontend
- */
- if (draw->pt.user.elts || (opt & PT_PIPELINE)) {
- frontend = draw->pt.front.vcache;
- } else {
- frontend = draw->pt.front.varray;
- }
+ frontend = draw->pt.front.vsplit;
frontend->prepare( frontend, prim, middle, opt );
- frontend->run(frontend,
- draw_pt_elt_func(draw),
- draw_pt_elt_ptr(draw, start),
- draw->pt.user.eltBias,
- count);
+ frontend->run(frontend, start, count);
frontend->finish( frontend );
@@ -136,12 +123,8 @@ boolean draw_pt_init( struct draw_context *draw )
draw->pt.test_fse = debug_get_option_draw_fse();
draw->pt.no_fse = debug_get_option_draw_no_fse();
- draw->pt.front.vcache = draw_pt_vcache( draw );
- if (!draw->pt.front.vcache)
- return FALSE;
-
- draw->pt.front.varray = draw_pt_varray(draw);
- if (!draw->pt.front.varray)
+ draw->pt.front.vsplit = draw_pt_vsplit(draw);
+ if (!draw->pt.front.vsplit)
return FALSE;
draw->pt.middle.fetch_emit = draw_pt_fetch_emit( draw );
@@ -157,7 +140,7 @@ boolean draw_pt_init( struct draw_context *draw )
return FALSE;
#if HAVE_LLVM
- if (debug_get_option_draw_use_llvm())
+ if (draw->llvm)
draw->pt.middle.llvm = draw_pt_fetch_pipeline_or_emit_llvm( draw );
#endif
@@ -187,14 +170,9 @@ void draw_pt_destroy( struct draw_context *draw )
draw->pt.middle.fetch_shade_emit = NULL;
}
- if (draw->pt.front.vcache) {
- draw->pt.front.vcache->destroy( draw->pt.front.vcache );
- draw->pt.front.vcache = NULL;
- }
-
- if (draw->pt.front.varray) {
- draw->pt.front.varray->destroy( draw->pt.front.varray );
- draw->pt.front.varray = NULL;
+ if (draw->pt.front.vsplit) {
+ draw->pt.front.vsplit->destroy( draw->pt.front.vsplit );
+ draw->pt.front.vsplit = NULL;
}
}
@@ -214,24 +192,29 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count)
uint ii = 0;
uint j;
- if (draw->pt.user.elts) {
+ if (draw->pt.user.eltSize) {
+ const char *elts;
+
/* indexed arrays */
+ elts = (const char *) draw->pt.user.elts;
+ elts += draw->pt.index_buffer.offset;
+
switch (draw->pt.user.eltSize) {
case 1:
{
- const ubyte *elem = (const ubyte *) draw->pt.user.elts;
+ const ubyte *elem = (const ubyte *) elts;
ii = elem[start + i];
}
break;
case 2:
{
- const ushort *elem = (const ushort *) draw->pt.user.elts;
+ const ushort *elem = (const ushort *) elts;
ii = elem[start + i];
}
break;
case 4:
{
- const uint *elem = (const uint *) draw->pt.user.elts;
+ const uint *elem = (const uint *) elts;
ii = elem[start + i];
}
break;
@@ -252,6 +235,12 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count)
for (j = 0; j < draw->pt.nr_vertex_elements; j++) {
uint buf = draw->pt.vertex_element[j].vertex_buffer_index;
ubyte *ptr = (ubyte *) draw->pt.user.vbuffer[buf];
+
+ if (draw->pt.vertex_element[j].instance_divisor) {
+ ii = draw->instance_id / draw->pt.vertex_element[j].instance_divisor;
+ }
+
+ ptr += draw->pt.vertex_buffer[buf].buffer_offset;
ptr += draw->pt.vertex_buffer[buf].stride * ii;
ptr += draw->pt.vertex_element[j].src_offset;
@@ -260,31 +249,38 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count)
case PIPE_FORMAT_R32_FLOAT:
{
float *v = (float *) ptr;
- debug_printf("%f @ %p\n", v[0], (void *) v);
+ debug_printf("R %f @ %p\n", v[0], (void *) v);
}
break;
case PIPE_FORMAT_R32G32_FLOAT:
{
float *v = (float *) ptr;
- debug_printf("%f %f @ %p\n", v[0], v[1], (void *) v);
+ debug_printf("RG %f %f @ %p\n", v[0], v[1], (void *) v);
}
break;
case PIPE_FORMAT_R32G32B32_FLOAT:
{
float *v = (float *) ptr;
- debug_printf("%f %f %f @ %p\n", v[0], v[1], v[2], (void *) v);
+ debug_printf("RGB %f %f %f @ %p\n", v[0], v[1], v[2], (void *) v);
}
break;
case PIPE_FORMAT_R32G32B32A32_FLOAT:
{
float *v = (float *) ptr;
- debug_printf("%f %f %f %f @ %p\n", v[0], v[1], v[2], v[3],
+ debug_printf("RGBA %f %f %f %f @ %p\n", v[0], v[1], v[2], v[3],
(void *) v);
}
break;
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ {
+ ubyte *u = (ubyte *) ptr;
+ debug_printf("BGRA %d %d %d %d @ %p\n", u[0], u[1], u[2], u[3],
+ (void *) u);
+ }
+ break;
default:
- debug_printf("other format (fix me)\n");
- ;
+ debug_printf("other format %s (fix me)\n",
+ util_format_name(draw->pt.vertex_element[j].src_format));
}
}
}
@@ -292,11 +288,8 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count)
/**
- * Draw vertex arrays
- * This is the main entrypoint into the drawing module.
- * \param prim one of PIPE_PRIM_x
- * \param start index of first vertex to draw
- * \param count number of vertices to draw
+ * Non-instanced drawing.
+ * \sa draw_arrays_instanced
*/
void
draw_arrays(struct draw_context *draw, unsigned prim,
@@ -305,6 +298,11 @@ draw_arrays(struct draw_context *draw, unsigned prim,
draw_arrays_instanced(draw, prim, start, count, 0, 1);
}
+
+/**
+ * Instanced drawing.
+ * \sa draw_vbo
+ */
void
draw_arrays_instanced(struct draw_context *draw,
unsigned mode,
@@ -313,40 +311,90 @@ draw_arrays_instanced(struct draw_context *draw,
unsigned startInstance,
unsigned instanceCount)
{
- unsigned reduced_prim = u_reduced_prim(mode);
+ struct pipe_draw_info info;
+
+ util_draw_init_info(&info);
+
+ info.mode = mode;
+ info.start = start;
+ info.count = count;
+ info.start_instance = startInstance;
+ info.instance_count = instanceCount;
+
+ info.indexed = (draw->pt.user.elts != NULL);
+ if (!info.indexed) {
+ info.min_index = start;
+ info.max_index = start + count - 1;
+ }
+
+ draw_vbo(draw, &info);
+}
+
+
+/**
+ * Draw vertex arrays.
+ * This is the main entrypoint into the drawing module. If drawing an indexed
+ * primitive, the draw_set_index_buffer() and draw_set_mapped_index_buffer()
+ * functions should have already been called to specify the element/index
+ * buffer information.
+ */
+void
+draw_vbo(struct draw_context *draw,
+ const struct pipe_draw_info *info)
+{
+ unsigned reduced_prim = u_reduced_prim(info->mode);
unsigned instance;
+ assert(info->instance_count > 0);
+ if (info->indexed)
+ assert(draw->pt.user.elts);
+
+ draw->pt.user.eltSize =
+ (info->indexed) ? draw->pt.index_buffer.index_size : 0;
+
+ draw->pt.user.eltBias = info->index_bias;
+ draw->pt.user.min_index = info->min_index;
+ draw->pt.user.max_index = info->max_index;
+
if (reduced_prim != draw->reduced_prim) {
draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE);
draw->reduced_prim = reduced_prim;
}
if (0)
- draw_print_arrays(draw, mode, start, MIN2(count, 20));
+ debug_printf("draw_vbo(mode=%u start=%u count=%u):\n",
+ info->mode, info->start, info->count);
-#if 0
- {
- int i;
- debug_printf("draw_arrays(mode=%u start=%u count=%u):\n",
- mode, start, count);
+ if (0)
tgsi_dump(draw->vs.vertex_shader->state.tokens, 0);
+
+ if (0) {
+ unsigned int i;
debug_printf("Elements:\n");
for (i = 0; i < draw->pt.nr_vertex_elements; i++) {
- debug_printf(" format=%s\n",
+ debug_printf(" %u: src_offset=%u inst_div=%u vbuf=%u format=%s\n",
+ i,
+ draw->pt.vertex_element[i].src_offset,
+ draw->pt.vertex_element[i].instance_divisor,
+ draw->pt.vertex_element[i].vertex_buffer_index,
util_format_name(draw->pt.vertex_element[i].src_format));
}
debug_printf("Buffers:\n");
for (i = 0; i < draw->pt.nr_vertex_buffers; i++) {
- debug_printf(" stride=%u offset=%u ptr=%p\n",
+ debug_printf(" %u: stride=%u maxindex=%u offset=%u ptr=%p\n",
+ i,
draw->pt.vertex_buffer[i].stride,
+ draw->pt.vertex_buffer[i].max_index,
draw->pt.vertex_buffer[i].buffer_offset,
draw->pt.user.vbuffer[i]);
}
}
-#endif
- for (instance = 0; instance < instanceCount; instance++) {
- draw->instance_id = instance + startInstance;
- draw_pt_arrays(draw, mode, start, count);
+ if (0)
+ draw_print_arrays(draw, info->mode, info->start, MIN2(info->count, 20));
+
+ for (instance = 0; instance < info->instance_count; instance++) {
+ draw->instance_id = instance + info->start_instance;
+ draw_pt_arrays(draw, info->mode, info->start, info->count);
}
}
diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h
index 3e3ea320cc0..5fbb4242915 100644
--- a/src/gallium/auxiliary/draw/draw_pt.h
+++ b/src/gallium/auxiliary/draw/draw_pt.h
@@ -35,10 +35,10 @@
#include "pipe/p_compiler.h"
-typedef unsigned (*pt_elt_func)( const void *elts, unsigned idx );
-
struct draw_pt_middle_end;
struct draw_context;
+struct draw_prim_info;
+struct draw_vertex_info;
#define PT_SHADE 0x1
@@ -50,13 +50,18 @@ struct draw_context;
/* The "front end" - prepare sets of fetch, draw elements for the
* middle end.
*
- * Currenly one version of this:
- * - vcache - catchall implementation, decomposes to TRI/LINE/POINT prims
- * Later:
- * - varray, varray_split
- * - velement, velement_split
+ * The fetch elements are indices to the vertices. The draw elements are
+ * indices to the fetched vertices. When both arrays of elements are both
+ * linear, middle->run_linear is called; When only the fetch elements are
+ * linear, middle->run_linear_elts is called; Otherwise, middle->run is
+ * called.
+ *
+ * When the number of the draw elements exceeds max_vertex of the middle end,
+ * the draw elements (as well as the fetch elements) are splitted and the
+ * middle end is called multiple times.
*
- * Currenly only using the vcache version.
+ * Currenly there is:
+ * - vsplit - catchall implementation, splits big prims
*/
struct draw_pt_front_end {
void (*prepare)( struct draw_pt_front_end *,
@@ -65,9 +70,7 @@ struct draw_pt_front_end {
unsigned opt );
void (*run)( struct draw_pt_front_end *,
- pt_elt_func elt_func,
- const void *elt_ptr,
- int elt_bias,
+ unsigned start,
unsigned count );
void (*finish)( struct draw_pt_front_end * );
@@ -78,6 +81,8 @@ struct draw_pt_front_end {
/* The "middle end" - prepares actual hardware vertices for the
* hardware backend.
*
+ * prim_flags is as defined by pipe_draw_info::flags.
+ *
* Currently two versions of this:
* - fetch, vertex shade, cliptest, prim-pipeline
* - fetch, emit (ie passthrough)
@@ -92,11 +97,13 @@ struct draw_pt_middle_end {
const unsigned *fetch_elts,
unsigned fetch_count,
const ushort *draw_elts,
- unsigned draw_count );
+ unsigned draw_count,
+ unsigned prim_flags );
void (*run_linear)(struct draw_pt_middle_end *,
unsigned start,
- unsigned count);
+ unsigned count,
+ unsigned prim_flags );
/* Transform all vertices in a linear range and then draw them with
* the supplied element list. May fail and return FALSE.
@@ -105,7 +112,8 @@ struct draw_pt_middle_end {
unsigned fetch_start,
unsigned fetch_count,
const ushort *draw_elts,
- unsigned draw_count );
+ unsigned draw_count,
+ unsigned prim_flags );
int (*get_max_vertex_count)( struct draw_pt_middle_end * );
@@ -120,19 +128,11 @@ struct vbuf_render;
struct vertex_header;
-/* Helper functions.
- */
-pt_elt_func draw_pt_elt_func( struct draw_context *draw );
-const void *draw_pt_elt_ptr( struct draw_context *draw,
- unsigned start );
-
/* Frontends:
*
- * Currently only the general-purpose vcache implementation, could add
- * a special case for tiny vertex buffers.
+ * Currently only the general-purpose vsplit implementation.
*/
-struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw );
-struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw);
+struct draw_pt_front_end *draw_pt_vsplit(struct draw_context *draw);
/* Middle-ends:
@@ -162,21 +162,31 @@ void draw_pt_emit_prepare( struct pt_emit *emit,
unsigned *max_vertices );
void draw_pt_emit( struct pt_emit *emit,
- const float (*vertex_data)[4],
- unsigned vertex_count,
- unsigned stride,
- const ushort *elts,
- unsigned count );
+ const struct draw_vertex_info *vert_info,
+ const struct draw_prim_info *prim_info);
void draw_pt_emit_linear( struct pt_emit *emit,
- const float (*vertex_data)[4],
- unsigned stride,
- unsigned count );
+ const struct draw_vertex_info *vert_info,
+ const struct draw_prim_info *prim_info);
void draw_pt_emit_destroy( struct pt_emit *emit );
struct pt_emit *draw_pt_emit_create( struct draw_context *draw );
+/*******************************************************************************
+ * HW stream output emit:
+ */
+struct pt_so_emit;
+
+void draw_pt_so_emit_prepare( struct pt_so_emit *emit );
+
+void draw_pt_so_emit( struct pt_so_emit *emit,
+ const struct draw_vertex_info *vert_info,
+ const struct draw_prim_info *prim_info );
+
+void draw_pt_so_emit_destroy( struct pt_so_emit *emit );
+
+struct pt_so_emit *draw_pt_so_emit_create( struct draw_context *draw );
/*******************************************************************************
* API vertex fetch:
@@ -208,12 +218,12 @@ struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw );
struct pt_post_vs;
boolean draw_pt_post_vs_run( struct pt_post_vs *pvs,
- struct vertex_header *pipeline_verts,
- unsigned stride,
- unsigned count );
+ struct draw_vertex_info *info );
void draw_pt_post_vs_prepare( struct pt_post_vs *pvs,
- boolean bypass_clipping,
+ boolean clip_xy,
+ boolean clip_z,
+ boolean clip_user,
boolean bypass_viewport,
boolean opengl,
boolean need_edgeflags );
@@ -227,6 +237,7 @@ void draw_pt_post_vs_destroy( struct pt_post_vs *pvs );
* Utils:
*/
void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr);
+unsigned draw_pt_trim_count(unsigned count, unsigned first, unsigned incr);
#endif
diff --git a/src/gallium/auxiliary/draw/draw_pt_decompose.h b/src/gallium/auxiliary/draw/draw_pt_decompose.h
index 3c44f7c11ee..3127aad7310 100644
--- a/src/gallium/auxiliary/draw/draw_pt_decompose.h
+++ b/src/gallium/auxiliary/draw/draw_pt_decompose.h
@@ -1,162 +1,7 @@
+#define LOCAL_VARS \
+ char *verts = (char *) vertices; \
+ const boolean last_vertex_last = \
+ !(draw->rasterizer->flatshade && \
+ draw->rasterizer->flatshade_first);
-
-static void FUNC( ARGS,
- unsigned count )
-{
- LOCAL_VARS;
-
- switch (prim) {
- case PIPE_PRIM_POINTS:
- for (i = 0; i < count; i ++) {
- POINT( (i + 0) );
- }
- break;
-
- case PIPE_PRIM_LINES:
- for (i = 0; i+1 < count; i += 2) {
- LINE( DRAW_PIPE_RESET_STIPPLE,
- (i + 0),
- (i + 1));
- }
- break;
-
- case PIPE_PRIM_LINE_LOOP:
- if (count >= 2) {
- flags = DRAW_PIPE_RESET_STIPPLE;
-
- for (i = 1; i < count; i++, flags = 0) {
- LINE( flags,
- (i - 1),
- (i ));
- }
-
- LINE( flags,
- (i - 1),
- (0 ));
- }
- break;
-
- case PIPE_PRIM_LINE_STRIP:
- flags = DRAW_PIPE_RESET_STIPPLE;
- for (i = 1; i < count; i++, flags = 0) {
- LINE( flags,
- (i - 1),
- (i ));
- }
- break;
-
- case PIPE_PRIM_TRIANGLES:
- for (i = 0; i+2 < count; i += 3) {
- if (flatfirst) {
- /* put provoking vertex in last pos for clipper */
- TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- (i + 1),
- (i + 2),
- (i + 0 ));
- }
- else {
- TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- (i + 0),
- (i + 1),
- (i + 2 ));
- }
- }
- break;
-
- case PIPE_PRIM_TRIANGLE_STRIP:
- if (flatfirst) {
- for (i = 0; i+2 < count; i++) {
- TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- (i + 1 + (i&1)),
- (i + 2 - (i&1)),
- (i + 0) );
- }
- }
- else {
- for (i = 0; i+2 < count; i++) {
- TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- (i + 0 + (i&1)),
- (i + 1 - (i&1)),
- (i + 2 ));
- }
- }
- break;
-
- case PIPE_PRIM_TRIANGLE_FAN:
- if (count >= 3) {
- if (flatfirst) {
- for (i = 0; i+2 < count; i++) {
- TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- (i + 2),
- 0,
- (i + 1) );
- }
- }
- else {
- for (i = 0; i+2 < count; i++) {
- TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- (0),
- (i + 1),
- (i + 2 ));
- }
- }
- }
- break;
-
-
- case PIPE_PRIM_QUADS:
- for (i = 0; i+3 < count; i += 4) {
- QUAD( (i + 0),
- (i + 1),
- (i + 2),
- (i + 3));
- }
- break;
-
- case PIPE_PRIM_QUAD_STRIP:
- for (i = 0; i+3 < count; i += 2) {
- QUAD( (i + 2),
- (i + 0),
- (i + 1),
- (i + 3));
- }
- break;
-
- case PIPE_PRIM_POLYGON:
- {
- /* These bitflags look a little odd because we submit the
- * vertices as (1,2,0) to satisfy flatshade requirements.
- */
- const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2;
- const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0;
- const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1;
-
- flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
-
- for (i = 0; i+2 < count; i++, flags = edge_middle) {
-
- if (i + 3 == count)
- flags |= edge_last;
-
- TRIANGLE( flags,
- (i + 1),
- (i + 2),
- (0));
- }
- }
- break;
-
- default:
- assert(0);
- break;
- }
-
- FLUSH;
-}
-
-
-#undef TRIANGLE
-#undef QUAD
-#undef POINT
-#undef LINE
-#undef FUNC
+#include "draw_decompose_tmp.h"
diff --git a/src/gallium/auxiliary/draw/draw_pt_elts.c b/src/gallium/auxiliary/draw/draw_pt_elts.c
deleted file mode 100644
index 88f4d9f495a..00000000000
--- a/src/gallium/auxiliary/draw/draw_pt_elts.c
+++ /dev/null
@@ -1,89 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Keith Whitwell <[email protected]>
- */
-
-#include "draw/draw_pt.h"
-#include "draw/draw_private.h"
-
-/* Neat get_elt func that also works for varrays drawing by encoding
- * the start value into a pointer.
- */
-
-static unsigned elt_uint( const void *elts, unsigned idx )
-{
- return *(((const uint *)elts) + idx);
-}
-
-static unsigned elt_ushort( const void *elts, unsigned idx )
-{
- return *(((const ushort *)elts) + idx);
-}
-
-static unsigned elt_ubyte( const void *elts, unsigned idx )
-{
- return *(((const ubyte *)elts) + idx);
-}
-
-static unsigned elt_vert( const void *elts, unsigned idx )
-{
- /* unsigned index is packed in the pointer */
- return (unsigned)(uintptr_t)elts + idx;
-}
-
-pt_elt_func draw_pt_elt_func( struct draw_context *draw )
-{
- switch (draw->pt.user.eltSize) {
- case 0: return &elt_vert;
- case 1: return &elt_ubyte;
- case 2: return &elt_ushort;
- case 4: return &elt_uint;
- default: return NULL;
- }
-}
-
-const void *draw_pt_elt_ptr( struct draw_context *draw,
- unsigned start )
-{
- const char *elts = draw->pt.user.elts;
-
- switch (draw->pt.user.eltSize) {
- case 0:
- return (const void *)(((const ubyte *)NULL) + start);
- case 1:
- return (const void *)(((const ubyte *)elts) + start);
- case 2:
- return (const void *)(((const ushort *)elts) + start);
- case 4:
- return (const void *)(((const uint *)elts) + start);
- default:
- return NULL;
- }
-}
diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c
index ad48fa39a4f..c8dfc16911e 100644
--- a/src/gallium/auxiliary/draw/draw_pt_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_emit.c
@@ -120,22 +120,21 @@ void draw_pt_emit_prepare( struct pt_emit *emit,
*max_vertices = (draw->render->max_vertex_buffer_bytes /
(vinfo->size * 4));
-
- /* even number */
- *max_vertices = *max_vertices & ~1;
}
void draw_pt_emit( struct pt_emit *emit,
- const float (*vertex_data)[4],
- unsigned vertex_count,
- unsigned stride,
- const ushort *elts,
- unsigned count )
+ const struct draw_vertex_info *vert_info,
+ const struct draw_prim_info *prim_info)
{
+ const float (*vertex_data)[4] = (const float (*)[4])vert_info->verts->data;
+ unsigned vertex_count = vert_info->count;
+ unsigned stride = vert_info->stride;
+ const ushort *elts = prim_info->elts;
struct draw_context *draw = emit->draw;
struct translate *translate = emit->translate;
struct vbuf_render *render = draw->render;
+ unsigned start, i;
void *hw_verts;
/* XXX: need to flush to get prim_vbuf.c to release its allocation??
@@ -145,11 +144,6 @@ void draw_pt_emit( struct pt_emit *emit,
if (vertex_count == 0)
return;
- if (vertex_count >= UNDEFINED_VERTEX_ID) {
- assert(0);
- return;
- }
-
/* XXX: and work out some way to coordinate the render primitive
* between vbuf.c and here...
*/
@@ -180,6 +174,7 @@ void draw_pt_emit( struct pt_emit *emit,
0,
~0);
+ /* fetch/translate vertex attribs to fill hw_verts[] */
translate->run( translate,
0,
vertex_count,
@@ -190,23 +185,31 @@ void draw_pt_emit( struct pt_emit *emit,
0,
vertex_count - 1 );
- render->draw(render,
- elts,
- count);
+ for (start = i = 0;
+ i < prim_info->primitive_count;
+ start += prim_info->primitive_lengths[i], i++)
+ {
+ render->draw_elements(render,
+ elts + start,
+ prim_info->primitive_lengths[i]);
+ }
render->release_vertices(render);
}
void draw_pt_emit_linear(struct pt_emit *emit,
- const float (*vertex_data)[4],
- unsigned stride,
- unsigned count)
+ const struct draw_vertex_info *vert_info,
+ const struct draw_prim_info *prim_info)
{
+ const float (*vertex_data)[4] = (const float (*)[4])vert_info->verts->data;
+ unsigned stride = vert_info->stride;
+ unsigned count = vert_info->count;
struct draw_context *draw = emit->draw;
struct translate *translate = emit->translate;
struct vbuf_render *render = draw->render;
void *hw_verts;
+ unsigned start, i;
#if 0
debug_printf("Linear emit\n");
@@ -215,9 +218,6 @@ void draw_pt_emit_linear(struct pt_emit *emit,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- if (count >= UNDEFINED_VERTEX_ID)
- goto fail;
-
/* XXX: and work out some way to coordinate the render primitive
* between vbuf.c and here...
*/
@@ -258,7 +258,14 @@ void draw_pt_emit_linear(struct pt_emit *emit,
render->unmap_vertices( render, 0, count - 1 );
- render->draw_arrays(render, 0, count);
+ for (start = i = 0;
+ i < prim_info->primitive_count;
+ start += prim_info->primitive_lengths[i], i++)
+ {
+ render->draw_arrays(render,
+ start,
+ prim_info->primitive_lengths[i]);
+ }
render->release_vertices(render);
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c
index a1347221b5d..ae12ee24bdc 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c
@@ -29,7 +29,6 @@
#include "util/u_math.h"
#include "draw/draw_context.h"
#include "draw/draw_private.h"
-#include "draw/draw_vbuf.h"
#include "draw/draw_pt.h"
#include "translate/translate.h"
#include "translate/translate_cache.h"
@@ -69,31 +68,12 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch,
fetch->vertex_size = vertex_size;
- /* Always emit/leave space for a vertex header.
- *
- * It's worth considering whether the vertex headers should contain
- * a pointer to the 'data', rather than having it inline.
- * Something to look at after we've fully switched over to the pt
- * paths.
+ /* Leave the clipmask/edgeflags/pad/vertex_id untouched
*/
- {
- /* Need to set header->vertex_id = 0xffff somehow.
- */
- key.element[nr].type = TRANSLATE_ELEMENT_NORMAL;
- key.element[nr].input_format = PIPE_FORMAT_R32_FLOAT;
- key.element[nr].input_buffer = draw->pt.nr_vertex_buffers;
- key.element[nr].input_offset = 0;
- key.element[nr].instance_divisor = 0;
- key.element[nr].output_format = PIPE_FORMAT_R32_FLOAT;
- key.element[nr].output_offset = dst_offset;
- dst_offset += 1 * sizeof(float);
- nr++;
-
-
- /* Just leave the clip[] array untouched.
- */
- dst_offset += 4 * sizeof(float);
- }
+ dst_offset += 1 * sizeof(float);
+ /* Just leave the clip[] array untouched.
+ */
+ dst_offset += 4 * sizeof(float);
if (instance_id_index != ~0) {
num_extra_inputs++;
@@ -132,26 +112,11 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch,
key.nr_elements = nr;
key.output_stride = vertex_size;
-
if (!fetch->translate ||
translate_key_compare(&fetch->translate->key, &key) != 0)
{
translate_key_sanitize(&key);
fetch->translate = translate_cache_find(fetch->cache, &key);
-
- {
- static struct vertex_header vh = { 0,
- 1,
- 0,
- UNDEFINED_VERTEX_ID,
- { .0f, .0f, .0f, .0f } };
-
- fetch->translate->set_buffer(fetch->translate,
- draw->pt.nr_vertex_buffers,
- &vh,
- 0,
- ~0);
- }
}
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
index d7735bf1ac9..e706b7796f8 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
@@ -36,6 +36,7 @@
#include "draw/draw_vbuf.h"
#include "draw/draw_vertex.h"
#include "draw/draw_pt.h"
+#include "draw/draw_gs.h"
#include "translate/translate.h"
#include "translate/translate_cache.h"
@@ -100,9 +101,14 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
boolean ok;
struct translate_key key;
+ unsigned gs_out_prim = (draw->gs.geometry_shader ?
+ draw->gs.geometry_shader->output_primitive :
+ prim);
+
+
ok = draw->render->set_primitive( draw->render,
- prim );
+ gs_out_prim );
if (!ok) {
assert(0);
return;
@@ -185,15 +191,6 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
*max_vertices = (draw->render->max_vertex_buffer_bytes /
(vinfo->size * 4));
-
- /* Return an even number of verts.
- * This prevents "parity" errors when splitting long triangle strips which
- * can lead to front/back culling mix-ups.
- * Every other triangle in a strip has an alternate front/back orientation
- * so splitting at an odd position can cause the orientation of subsequent
- * triangles to get reversed.
- */
- *max_vertices = *max_vertices & ~1;
}
@@ -204,7 +201,8 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
const unsigned *fetch_elts,
unsigned fetch_count,
const ushort *draw_elts,
- unsigned draw_count )
+ unsigned draw_count,
+ unsigned prim_flags )
{
struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
struct draw_context *draw = feme->draw;
@@ -214,11 +212,6 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- if (fetch_count >= UNDEFINED_VERTEX_ID) {
- assert(0);
- return;
- }
-
draw->render->allocate_vertices( draw->render,
(ushort)feme->translate->key.output_stride,
(ushort)fetch_count );
@@ -254,9 +247,9 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
/* XXX: Draw arrays path to avoid re-emitting index list again and
* again.
*/
- draw->render->draw( draw->render,
- draw_elts,
- draw_count );
+ draw->render->draw_elements( draw->render,
+ draw_elts,
+ draw_count );
/* Done -- that was easy, wasn't it:
*/
@@ -267,7 +260,8 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
static void fetch_emit_run_linear( struct draw_pt_middle_end *middle,
unsigned start,
- unsigned count )
+ unsigned count,
+ unsigned prim_flags )
{
struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
struct draw_context *draw = feme->draw;
@@ -277,9 +271,6 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- if (count >= UNDEFINED_VERTEX_ID)
- goto fail;
-
if (!draw->render->allocate_vertices( draw->render,
(ushort)feme->translate->key.output_stride,
(ushort)count ))
@@ -328,7 +319,8 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle,
unsigned start,
unsigned count,
const ushort *draw_elts,
- unsigned draw_count )
+ unsigned draw_count,
+ unsigned prim_flags )
{
struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
struct draw_context *draw = feme->draw;
@@ -338,9 +330,6 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- if (count >= UNDEFINED_VERTEX_ID)
- return FALSE;
-
if (!draw->render->allocate_vertices( draw->render,
(ushort)feme->translate->key.output_stride,
(ushort)count ))
@@ -363,9 +352,9 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle,
/* XXX: Draw arrays path to avoid re-emitting index list again and
* again.
*/
- draw->render->draw( draw->render,
- draw_elts,
- draw_count );
+ draw->render->draw_elements( draw->render,
+ draw_elts,
+ draw_count );
/* Done -- that was easy, wasn't it:
*/
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
index cbb5b6c9605..7c198c6026d 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
@@ -67,9 +67,8 @@ struct fetch_shade_emit {
-
static void fse_prepare( struct draw_pt_middle_end *middle,
- unsigned prim,
+ unsigned prim,
unsigned opt,
unsigned *max_vertices )
{
@@ -79,9 +78,12 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
const struct vertex_info *vinfo;
unsigned i;
unsigned nr_vbs = 0;
-
- if (!draw->render->set_primitive( draw->render,
+ /* Can't support geometry shader on this path.
+ */
+ assert(!draw->gs.geometry_shader);
+
+ if (!draw->render->set_primitive( draw->render,
prim )) {
assert(0);
return;
@@ -90,7 +92,6 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
/* Must do this after set_primitive() above:
*/
fse->vinfo = vinfo = draw->render->get_vertex_info(draw->render);
-
fse->key.output_stride = vinfo->size * 4;
@@ -101,7 +102,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
fse->key.nr_inputs); /* inputs - fetch from api format */
fse->key.viewport = !draw->identity_viewport;
- fse->key.clip = !draw->bypass_clipping;
+ fse->key.clip = draw->clip_xy || draw->clip_z || draw->clip_user;
fse->key.const_vbuffers = 0;
memset(fse->key.element, 0,
@@ -174,15 +175,6 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
*max_vertices = (draw->render->max_vertex_buffer_bytes /
(vinfo->size * 4));
- /* Return an even number of verts.
- * This prevents "parity" errors when splitting long triangle strips which
- * can lead to front/back culling mix-ups.
- * Every other triangle in a strip has an alternate front/back orientation
- * so splitting at an odd position can cause the orientation of subsequent
- * triangles to get reversed.
- */
- *max_vertices = *max_vertices & ~1;
-
/* Probably need to do this somewhere (or fix exec shader not to
* need it):
*/
@@ -196,7 +188,8 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
static void fse_run_linear( struct draw_pt_middle_end *middle,
unsigned start,
- unsigned count )
+ unsigned count,
+ unsigned prim_flags )
{
struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
struct draw_context *draw = fse->draw;
@@ -206,9 +199,6 @@ static void fse_run_linear( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- if (count >= UNDEFINED_VERTEX_ID)
- goto fail;
-
if (!draw->render->allocate_vertices( draw->render,
(ushort)fse->key.output_stride,
(ushort)count ))
@@ -264,7 +254,8 @@ fse_run(struct draw_pt_middle_end *middle,
const unsigned *fetch_elts,
unsigned fetch_count,
const ushort *draw_elts,
- unsigned draw_count )
+ unsigned draw_count,
+ unsigned prim_flags )
{
struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
struct draw_context *draw = fse->draw;
@@ -274,9 +265,6 @@ fse_run(struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- if (fetch_count >= UNDEFINED_VERTEX_ID)
- goto fail;
-
if (!draw->render->allocate_vertices( draw->render,
(ushort)fse->key.output_stride,
(ushort)fetch_count ))
@@ -307,9 +295,9 @@ fse_run(struct draw_pt_middle_end *middle,
draw->render->unmap_vertices( draw->render, 0, (ushort)(fetch_count - 1) );
- draw->render->draw( draw->render,
- draw_elts,
- draw_count );
+ draw->render->draw_elements( draw->render,
+ draw_elts,
+ draw_count );
draw->render->release_vertices( draw->render );
@@ -326,7 +314,8 @@ static boolean fse_run_linear_elts( struct draw_pt_middle_end *middle,
unsigned start,
unsigned count,
const ushort *draw_elts,
- unsigned draw_count )
+ unsigned draw_count,
+ unsigned prim_flags )
{
struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
struct draw_context *draw = fse->draw;
@@ -336,9 +325,6 @@ static boolean fse_run_linear_elts( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- if (count >= UNDEFINED_VERTEX_ID)
- return FALSE;
-
if (!draw->render->allocate_vertices( draw->render,
(ushort)fse->key.output_stride,
(ushort)count ))
@@ -357,9 +343,9 @@ static boolean fse_run_linear_elts( struct draw_pt_middle_end *middle,
hw_verts );
- draw->render->draw( draw->render,
- draw_elts,
- draw_count );
+ draw->render->draw_elements( draw->render,
+ draw_elts,
+ draw_count );
draw->render->unmap_vertices( draw->render, 0, (ushort)(count - 1) );
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index da5106463a7..b72fd612451 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -27,6 +27,7 @@
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "util/u_prim.h"
#include "draw/draw_context.h"
#include "draw/draw_vbuf.h"
#include "draw/draw_vertex.h"
@@ -40,16 +41,16 @@ struct fetch_pipeline_middle_end {
struct draw_context *draw;
struct pt_emit *emit;
+ struct pt_so_emit *so_emit;
struct pt_fetch *fetch;
struct pt_post_vs *post_vs;
unsigned vertex_data_offset;
unsigned vertex_size;
- unsigned prim;
+ unsigned input_prim;
unsigned opt;
};
-
static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
unsigned prim,
unsigned opt,
@@ -61,6 +62,10 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
unsigned i;
unsigned instance_id_index = ~0;
+ unsigned gs_out_prim = (draw->gs.geometry_shader ?
+ draw->gs.geometry_shader->output_primitive :
+ prim);
+
/* Add one to num_outputs because the pipeline occasionally tags on
* an additional texcoord, eg for AA lines.
*/
@@ -76,7 +81,7 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
}
}
- fpme->prim = prim;
+ fpme->input_prim = prim;
fpme->opt = opt;
/* Always leave room for the vertex header whether we need it or
@@ -95,173 +100,177 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
* but gl vs dx9 clip spaces.
*/
draw_pt_post_vs_prepare( fpme->post_vs,
- (boolean)draw->bypass_clipping,
- (boolean)draw->identity_viewport,
+ draw->clip_xy,
+ draw->clip_z,
+ draw->clip_user,
+ draw->identity_viewport,
(boolean)draw->rasterizer->gl_rasterization_rules,
- (draw->vs.edgeflag_output ? true : false) );
+ (draw->vs.edgeflag_output ? TRUE : FALSE) );
+
+ draw_pt_so_emit_prepare( fpme->so_emit );
if (!(opt & PT_PIPELINE)) {
- draw_pt_emit_prepare( fpme->emit,
- prim,
+ draw_pt_emit_prepare( fpme->emit,
+ gs_out_prim,
max_vertices );
- *max_vertices = MAX2( *max_vertices,
- DRAW_PIPE_MAX_VERTICES );
+ *max_vertices = MAX2( *max_vertices, 4096 );
}
else {
- *max_vertices = DRAW_PIPE_MAX_VERTICES;
+ /* limit max fetches by limiting max_vertices */
+ *max_vertices = 4096;
}
- /* return even number */
- *max_vertices = *max_vertices & ~1;
-
/* No need to prepare the shader.
*/
vs->prepare(vs, draw);
}
-
-static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
- const unsigned *fetch_elts,
- unsigned fetch_count,
- const ushort *draw_elts,
- unsigned draw_count )
+static void fetch( struct pt_fetch *fetch,
+ const struct draw_fetch_info *fetch_info,
+ char *output)
{
- struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
- struct draw_context *draw = fpme->draw;
- struct draw_vertex_shader *vshader = draw->vs.vertex_shader;
- struct draw_geometry_shader *gshader = draw->gs.geometry_shader;
- unsigned opt = fpme->opt;
- unsigned alloc_count = align( fetch_count, 4 );
-
- struct vertex_header *pipeline_verts =
- (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
-
- if (!pipeline_verts) {
- /* Not much we can do here - just skip the rendering.
- */
- assert(0);
- return;
+ if (fetch_info->linear) {
+ draw_pt_fetch_run_linear( fetch,
+ fetch_info->start,
+ fetch_info->count,
+ output );
}
-
- /* Fetch into our vertex buffer
- */
- draw_pt_fetch_run( fpme->fetch,
- fetch_elts,
- fetch_count,
- (char *)pipeline_verts );
-
- /* Run the shader, note that this overwrites the data[] parts of
- * the pipeline verts.
- */
- if (opt & PT_SHADE)
- {
- vshader->run_linear(vshader,
- (const float (*)[4])pipeline_verts->data,
- ( float (*)[4])pipeline_verts->data,
- draw->pt.user.vs_constants,
- fetch_count,
- fpme->vertex_size,
- fpme->vertex_size);
- if (gshader)
- draw_geometry_shader_run(gshader,
- (const float (*)[4])pipeline_verts->data,
- ( float (*)[4])pipeline_verts->data,
- draw->pt.user.gs_constants,
- fetch_count,
- fpme->vertex_size,
- fpme->vertex_size);
+ else {
+ draw_pt_fetch_run( fetch,
+ fetch_info->elts,
+ fetch_info->count,
+ output );
}
+}
- if (draw_pt_post_vs_run( fpme->post_vs,
- pipeline_verts,
- fetch_count,
- fpme->vertex_size ))
- {
- opt |= PT_PIPELINE;
- }
- /* Do we need to run the pipeline?
- */
- if (opt & PT_PIPELINE) {
+static void pipeline(struct fetch_pipeline_middle_end *fpme,
+ const struct draw_vertex_info *vert_info,
+ const struct draw_prim_info *prim_info)
+{
+ if (prim_info->linear)
+ draw_pipeline_run_linear( fpme->draw,
+ vert_info,
+ prim_info);
+ else
draw_pipeline_run( fpme->draw,
- fpme->prim,
- pipeline_verts,
- fetch_count,
- fpme->vertex_size,
- draw_elts,
- draw_count );
+ vert_info,
+ prim_info );
+}
+
+static void emit(struct pt_emit *emit,
+ const struct draw_vertex_info *vert_info,
+ const struct draw_prim_info *prim_info)
+{
+ if (prim_info->linear) {
+ draw_pt_emit_linear(emit, vert_info, prim_info);
}
else {
- draw_pt_emit( fpme->emit,
- (const float (*)[4])pipeline_verts->data,
- fetch_count,
- fpme->vertex_size,
- draw_elts,
- draw_count );
+ draw_pt_emit(emit, vert_info, prim_info);
}
+}
- FREE(pipeline_verts);
+static void draw_vertex_shader_run(struct draw_vertex_shader *vshader,
+ const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
+ unsigned const_size[PIPE_MAX_CONSTANT_BUFFERS],
+ const struct draw_vertex_info *input_verts,
+ struct draw_vertex_info *output_verts )
+{
+ output_verts->vertex_size = input_verts->vertex_size;
+ output_verts->stride = input_verts->vertex_size;
+ output_verts->count = input_verts->count;
+ output_verts->verts =
+ (struct vertex_header *)MALLOC(output_verts->vertex_size *
+ align(output_verts->count, 4));
+
+ vshader->run_linear(vshader,
+ (const float (*)[4])input_verts->verts->data,
+ ( float (*)[4])output_verts->verts->data,
+ constants,
+ const_size,
+ input_verts->count,
+ input_verts->vertex_size,
+ input_verts->vertex_size);
}
-
-static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
- unsigned start,
- unsigned count)
+static void fetch_pipeline_generic( struct draw_pt_middle_end *middle,
+ const struct draw_fetch_info *fetch_info,
+ const struct draw_prim_info *prim_info )
{
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
struct draw_context *draw = fpme->draw;
- struct draw_vertex_shader *shader = draw->vs.vertex_shader;
- struct draw_geometry_shader *geometry_shader = draw->gs.geometry_shader;
+ struct draw_vertex_shader *vshader = draw->vs.vertex_shader;
+ struct draw_geometry_shader *gshader = draw->gs.geometry_shader;
+ struct draw_prim_info gs_prim_info;
+ struct draw_vertex_info fetched_vert_info;
+ struct draw_vertex_info vs_vert_info;
+ struct draw_vertex_info gs_vert_info;
+ struct draw_vertex_info *vert_info;
unsigned opt = fpme->opt;
- unsigned alloc_count = align( count, 4 );
- struct vertex_header *pipeline_verts =
- (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
-
- if (!pipeline_verts) {
- /* Not much we can do here - just skip the rendering.
- */
+ fetched_vert_info.count = fetch_info->count;
+ fetched_vert_info.vertex_size = fpme->vertex_size;
+ fetched_vert_info.stride = fpme->vertex_size;
+ fetched_vert_info.verts =
+ (struct vertex_header *)MALLOC(fpme->vertex_size *
+ align(fetch_info->count, 4));
+ if (!fetched_vert_info.verts) {
assert(0);
return;
}
- /* Fetch into our vertex buffer
+ /* Fetch into our vertex buffer.
+ */
+ fetch( fpme->fetch, fetch_info, (char *)fetched_vert_info.verts );
+
+ /* Finished with fetch:
*/
- draw_pt_fetch_run_linear( fpme->fetch,
- start,
- count,
- (char *)pipeline_verts );
+ fetch_info = NULL;
+ vert_info = &fetched_vert_info;
/* Run the shader, note that this overwrites the data[] parts of
* the pipeline verts.
*/
- if (opt & PT_SHADE)
- {
- shader->run_linear(shader,
- (const float (*)[4])pipeline_verts->data,
- ( float (*)[4])pipeline_verts->data,
- draw->pt.user.vs_constants,
- count,
- fpme->vertex_size,
- fpme->vertex_size);
-
- if (geometry_shader)
- draw_geometry_shader_run(geometry_shader,
- (const float (*)[4])pipeline_verts->data,
- ( float (*)[4])pipeline_verts->data,
- draw->pt.user.gs_constants,
- count,
- fpme->vertex_size,
- fpme->vertex_size);
+ if (fpme->opt & PT_SHADE) {
+ draw_vertex_shader_run(vshader,
+ draw->pt.user.vs_constants,
+ draw->pt.user.vs_constants_size,
+ vert_info,
+ &vs_vert_info);
+
+ FREE(vert_info->verts);
+ vert_info = &vs_vert_info;
+ }
+
+ if ((fpme->opt & PT_SHADE) && gshader) {
+ draw_geometry_shader_run(gshader,
+ draw->pt.user.gs_constants,
+ draw->pt.user.gs_constants_size,
+ vert_info,
+ prim_info,
+ &gs_vert_info,
+ &gs_prim_info);
+
+ FREE(vert_info->verts);
+ vert_info = &gs_vert_info;
+ prim_info = &gs_prim_info;
}
+
+ /* Stream output needs to be done before clipping.
+ *
+ * XXX: Stream output surely needs to respect the prim_info->elt
+ * lists.
+ */
+ draw_pt_so_emit( fpme->so_emit,
+ vert_info,
+ prim_info );
+
if (draw_pt_post_vs_run( fpme->post_vs,
- pipeline_verts,
- count,
- fpme->vertex_size ))
+ vert_info ))
{
opt |= PT_PIPELINE;
}
@@ -269,102 +278,102 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
/* Do we need to run the pipeline?
*/
if (opt & PT_PIPELINE) {
- draw_pipeline_run_linear( fpme->draw,
- fpme->prim,
- pipeline_verts,
- count,
- fpme->vertex_size);
+ pipeline( fpme,
+ vert_info,
+ prim_info );
}
else {
- draw_pt_emit_linear( fpme->emit,
- (const float (*)[4])pipeline_verts->data,
- fpme->vertex_size,
- count );
+ emit( fpme->emit,
+ vert_info,
+ prim_info );
}
-
- FREE(pipeline_verts);
+ FREE(vert_info->verts);
}
+static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
+ const unsigned *fetch_elts,
+ unsigned fetch_count,
+ const ushort *draw_elts,
+ unsigned draw_count,
+ unsigned prim_flags )
+{
+ struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
+ struct draw_fetch_info fetch_info;
+ struct draw_prim_info prim_info;
+
+ fetch_info.linear = FALSE;
+ fetch_info.start = 0;
+ fetch_info.elts = fetch_elts;
+ fetch_info.count = fetch_count;
+
+ prim_info.linear = FALSE;
+ prim_info.start = 0;
+ prim_info.count = draw_count;
+ prim_info.elts = draw_elts;
+ prim_info.prim = fpme->input_prim;
+ prim_info.flags = prim_flags;
+ prim_info.primitive_count = 1;
+ prim_info.primitive_lengths = &draw_count;
+
+ fetch_pipeline_generic( middle, &fetch_info, &prim_info );
+}
-static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle,
- unsigned start,
- unsigned count,
- const ushort *draw_elts,
- unsigned draw_count )
+static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
+ unsigned start,
+ unsigned count,
+ unsigned prim_flags)
{
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
- struct draw_context *draw = fpme->draw;
- struct draw_vertex_shader *shader = draw->vs.vertex_shader;
- struct draw_geometry_shader *geometry_shader = draw->gs.geometry_shader;
- unsigned opt = fpme->opt;
- unsigned alloc_count = align( count, 4 );
+ struct draw_fetch_info fetch_info;
+ struct draw_prim_info prim_info;
+
+ fetch_info.linear = TRUE;
+ fetch_info.start = start;
+ fetch_info.count = count;
+ fetch_info.elts = NULL;
+
+ prim_info.linear = TRUE;
+ prim_info.start = 0;
+ prim_info.count = count;
+ prim_info.elts = NULL;
+ prim_info.prim = fpme->input_prim;
+ prim_info.flags = prim_flags;
+ prim_info.primitive_count = 1;
+ prim_info.primitive_lengths = &count;
+
+ fetch_pipeline_generic( middle, &fetch_info, &prim_info );
+}
- struct vertex_header *pipeline_verts =
- (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
- if (!pipeline_verts)
- return FALSE;
- /* Fetch into our vertex buffer
- */
- draw_pt_fetch_run_linear( fpme->fetch,
- start,
- count,
- (char *)pipeline_verts );
+static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle,
+ unsigned start,
+ unsigned count,
+ const ushort *draw_elts,
+ unsigned draw_count,
+ unsigned prim_flags )
+{
+ struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
+ struct draw_fetch_info fetch_info;
+ struct draw_prim_info prim_info;
- /* Run the shader, note that this overwrites the data[] parts of
- * the pipeline verts.
- */
- if (opt & PT_SHADE)
- {
- shader->run_linear(shader,
- (const float (*)[4])pipeline_verts->data,
- ( float (*)[4])pipeline_verts->data,
- draw->pt.user.vs_constants,
- count,
- fpme->vertex_size,
- fpme->vertex_size);
-
- if (geometry_shader)
- draw_geometry_shader_run(geometry_shader,
- (const float (*)[4])pipeline_verts->data,
- ( float (*)[4])pipeline_verts->data,
- draw->pt.user.gs_constants,
- count,
- fpme->vertex_size,
- fpme->vertex_size);
- }
+ fetch_info.linear = TRUE;
+ fetch_info.start = start;
+ fetch_info.count = count;
+ fetch_info.elts = NULL;
- if (draw_pt_post_vs_run( fpme->post_vs,
- pipeline_verts,
- count,
- fpme->vertex_size ))
- {
- opt |= PT_PIPELINE;
- }
+ prim_info.linear = FALSE;
+ prim_info.start = 0;
+ prim_info.count = draw_count;
+ prim_info.elts = draw_elts;
+ prim_info.prim = fpme->input_prim;
+ prim_info.flags = prim_flags;
+ prim_info.primitive_count = 1;
+ prim_info.primitive_lengths = &draw_count;
- /* Do we need to run the pipeline?
- */
- if (opt & PT_PIPELINE) {
- draw_pipeline_run( fpme->draw,
- fpme->prim,
- pipeline_verts,
- count,
- fpme->vertex_size,
- draw_elts,
- draw_count );
- }
- else {
- draw_pt_emit( fpme->emit,
- (const float (*)[4])pipeline_verts->data,
- count,
- fpme->vertex_size,
- draw_elts,
- draw_count );
- }
+ fetch_pipeline_generic( middle, &fetch_info, &prim_info );
- FREE(pipeline_verts);
return TRUE;
}
@@ -385,6 +394,9 @@ static void fetch_pipeline_destroy( struct draw_pt_middle_end *middle )
if (fpme->emit)
draw_pt_emit_destroy( fpme->emit );
+ if (fpme->so_emit)
+ draw_pt_so_emit_destroy( fpme->so_emit );
+
if (fpme->post_vs)
draw_pt_post_vs_destroy( fpme->post_vs );
@@ -416,7 +428,11 @@ struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit( struct draw_context *
goto fail;
fpme->emit = draw_pt_emit_create( draw );
- if (!fpme->emit)
+ if (!fpme->emit)
+ goto fail;
+
+ fpme->so_emit = draw_pt_so_emit_create( draw );
+ if (!fpme->so_emit)
goto fail;
return &fpme->base;
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
index d2a492f2b4c..77291e304e1 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2010 VMWare, Inc.
+ * Copyright 2010 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -28,11 +28,11 @@
#include "util/u_math.h"
#include "util/u_memory.h"
#include "draw/draw_context.h"
+#include "draw/draw_gs.h"
#include "draw/draw_vbuf.h"
#include "draw/draw_vertex.h"
#include "draw/draw_pt.h"
#include "draw/draw_vs.h"
-#include "draw/draw_gs.h"
#include "draw/draw_llvm.h"
@@ -41,53 +41,59 @@ struct llvm_middle_end {
struct draw_context *draw;
struct pt_emit *emit;
+ struct pt_so_emit *so_emit;
struct pt_fetch *fetch;
struct pt_post_vs *post_vs;
unsigned vertex_data_offset;
unsigned vertex_size;
- unsigned prim;
+ unsigned input_prim;
unsigned opt;
struct draw_llvm *llvm;
- struct draw_llvm_variant *variants;
struct draw_llvm_variant *current_variant;
- int nr_variants;
};
static void
llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
- unsigned prim,
+ unsigned in_prim,
unsigned opt,
unsigned *max_vertices )
{
struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
struct draw_context *draw = fpme->draw;
- struct draw_vertex_shader *vs = draw->vs.vertex_shader;
- struct draw_geometry_shader *gs = draw->gs.geometry_shader;
- struct draw_llvm_variant_key key;
+ struct llvm_vertex_shader *shader =
+ llvm_vertex_shader(draw->vs.vertex_shader);
+ char store[DRAW_LLVM_MAX_VARIANT_KEY_SIZE];
+ struct draw_llvm_variant_key *key;
struct draw_llvm_variant *variant = NULL;
+ struct draw_llvm_variant_list_item *li;
unsigned i;
unsigned instance_id_index = ~0;
+
+ unsigned out_prim = (draw->gs.geometry_shader ?
+ draw->gs.geometry_shader->output_primitive :
+ in_prim);
+
/* Add one to num_outputs because the pipeline occasionally tags on
* an additional texcoord, eg for AA lines.
*/
- unsigned nr = MAX2( vs->info.num_inputs,
- vs->info.num_outputs + 1 );
+ unsigned nr = MAX2( shader->base.info.num_inputs,
+ shader->base.info.num_outputs + 1 );
/* Scan for instanceID system value.
*/
- for (i = 0; i < vs->info.num_inputs; i++) {
- if (vs->info.input_semantic_name[i] == TGSI_SEMANTIC_INSTANCEID) {
+ for (i = 0; i < shader->base.info.num_inputs; i++) {
+ if (shader->base.info.input_semantic_name[i] == TGSI_SEMANTIC_INSTANCEID) {
instance_id_index = i;
break;
}
}
- fpme->prim = prim;
+ fpme->input_prim = in_prim;
fpme->opt = opt;
/* Always leave room for the vertex header whether we need it or
@@ -97,57 +103,71 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
fpme->vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float);
-
- draw_pt_fetch_prepare( fpme->fetch,
- vs->info.num_inputs,
- fpme->vertex_size,
- instance_id_index );
- if (opt & PT_SHADE) {
- vs->prepare(vs, draw);
- draw_geometry_shader_prepare(gs, draw);
- }
-
-
/* XXX: it's not really gl rasterization rules we care about here,
* but gl vs dx9 clip spaces.
*/
draw_pt_post_vs_prepare( fpme->post_vs,
- (boolean)draw->bypass_clipping,
- (boolean)(draw->identity_viewport),
+ draw->clip_xy,
+ draw->clip_z,
+ draw->clip_user,
+ draw->identity_viewport,
(boolean)draw->rasterizer->gl_rasterization_rules,
- (draw->vs.edgeflag_output ? true : false) );
+ (draw->vs.edgeflag_output ? TRUE : FALSE) );
+
+ draw_pt_so_emit_prepare( fpme->so_emit );
if (!(opt & PT_PIPELINE)) {
draw_pt_emit_prepare( fpme->emit,
- prim,
+ out_prim,
max_vertices );
- *max_vertices = MAX2( *max_vertices,
- DRAW_PIPE_MAX_VERTICES );
+ *max_vertices = MAX2( *max_vertices, 4096 );
}
else {
- *max_vertices = DRAW_PIPE_MAX_VERTICES;
+ /* limit max fetches by limiting max_vertices */
+ *max_vertices = 4096;
}
/* return even number */
*max_vertices = *max_vertices & ~1;
+
+ key = draw_llvm_make_variant_key(fpme->llvm, store);
- draw_llvm_make_variant_key(fpme->llvm, &key);
-
- variant = fpme->variants;
- while(variant) {
- if(memcmp(&variant->key, &key, sizeof key) == 0)
+ li = first_elem(&shader->variants);
+ while(!at_end(&shader->variants, li)) {
+ if(memcmp(&li->base->key, key, shader->variant_key_size) == 0) {
+ variant = li->base;
break;
+ }
+ li = next_elem(li);
+ }
- variant = variant->next;
+ if (variant) {
+ move_to_head(&fpme->llvm->vs_variants_list, &variant->list_item_global);
}
+ else {
+ unsigned i;
+ if (fpme->llvm->nr_variants >= DRAW_MAX_SHADER_VARIANTS) {
+ /*
+ * XXX: should we flush here ?
+ */
+ for (i = 0; i < DRAW_MAX_SHADER_VARIANTS / 4; i++) {
+ struct draw_llvm_variant_list_item *item =
+ last_elem(&fpme->llvm->vs_variants_list);
+ draw_llvm_destroy_variant(item->base);
+ }
+ }
+
+ variant = draw_llvm_create_variant(fpme->llvm, nr, key);
- if (!variant) {
- variant = draw_llvm_prepare(fpme->llvm, nr);
- variant->next = fpme->variants;
- fpme->variants = variant;
- ++fpme->nr_variants;
+ if (variant) {
+ insert_at_head(&shader->variants, &variant->list_item_local);
+ insert_at_head(&fpme->llvm->vs_variants_list, &variant->list_item_global);
+ fpme->llvm->nr_variants++;
+ shader->variants_cached++;
+ }
}
+
fpme->current_variant = variant;
/*XXX we only support one constant buffer */
@@ -158,125 +178,174 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
}
+static void pipeline(struct llvm_middle_end *llvm,
+ const struct draw_vertex_info *vert_info,
+ const struct draw_prim_info *prim_info)
+{
+ if (prim_info->linear)
+ draw_pipeline_run_linear( llvm->draw,
+ vert_info,
+ prim_info);
+ else
+ draw_pipeline_run( llvm->draw,
+ vert_info,
+ prim_info );
+}
+
+static void emit(struct pt_emit *emit,
+ const struct draw_vertex_info *vert_info,
+ const struct draw_prim_info *prim_info)
+{
+ if (prim_info->linear) {
+ draw_pt_emit_linear(emit, vert_info, prim_info);
+ }
+ else {
+ draw_pt_emit(emit, vert_info, prim_info);
+ }
+}
-static void llvm_middle_end_run( struct draw_pt_middle_end *middle,
- const unsigned *fetch_elts,
- unsigned fetch_count,
- const ushort *draw_elts,
- unsigned draw_count )
+static void
+llvm_pipeline_generic( struct draw_pt_middle_end *middle,
+ const struct draw_fetch_info *fetch_info,
+ const struct draw_prim_info *prim_info )
{
struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
struct draw_context *draw = fpme->draw;
+ struct draw_geometry_shader *gshader = draw->gs.geometry_shader;
+ struct draw_prim_info gs_prim_info;
+ struct draw_vertex_info llvm_vert_info;
+ struct draw_vertex_info gs_vert_info;
+ struct draw_vertex_info *vert_info;
unsigned opt = fpme->opt;
- unsigned alloc_count = align( fetch_count, 4 );
-
- struct vertex_header *pipeline_verts =
- (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
- if (!pipeline_verts) {
- /* Not much we can do here - just skip the rendering.
- */
+ llvm_vert_info.count = fetch_info->count;
+ llvm_vert_info.vertex_size = fpme->vertex_size;
+ llvm_vert_info.stride = fpme->vertex_size;
+ llvm_vert_info.verts =
+ (struct vertex_header *)MALLOC(fpme->vertex_size *
+ align(fetch_info->count, 4));
+ if (!llvm_vert_info.verts) {
assert(0);
return;
}
- fpme->current_variant->jit_func_elts( &fpme->llvm->jit_context,
- pipeline_verts,
- (const char **)draw->pt.user.vbuffer,
- fetch_elts,
- fetch_count,
- fpme->vertex_size,
- draw->pt.vertex_buffer );
-
- if (draw_pt_post_vs_run( fpme->post_vs,
- pipeline_verts,
- fetch_count,
- fpme->vertex_size ))
- {
+ if (fetch_info->linear)
+ fpme->current_variant->jit_func( &fpme->llvm->jit_context,
+ llvm_vert_info.verts,
+ (const char **)draw->pt.user.vbuffer,
+ fetch_info->start,
+ fetch_info->count,
+ fpme->vertex_size,
+ draw->pt.vertex_buffer,
+ draw->instance_id);
+ else
+ fpme->current_variant->jit_func_elts( &fpme->llvm->jit_context,
+ llvm_vert_info.verts,
+ (const char **)draw->pt.user.vbuffer,
+ fetch_info->elts,
+ fetch_info->count,
+ fpme->vertex_size,
+ draw->pt.vertex_buffer,
+ draw->instance_id);
+
+ /* Finished with fetch and vs:
+ */
+ fetch_info = NULL;
+ vert_info = &llvm_vert_info;
+
+
+ if ((opt & PT_SHADE) && gshader) {
+ draw_geometry_shader_run(gshader,
+ draw->pt.user.gs_constants,
+ draw->pt.user.gs_constants_size,
+ vert_info,
+ prim_info,
+ &gs_vert_info,
+ &gs_prim_info);
+
+ FREE(vert_info->verts);
+ vert_info = &gs_vert_info;
+ prim_info = &gs_prim_info;
+ }
+
+ /* stream output needs to be done before clipping */
+ draw_pt_so_emit( fpme->so_emit,
+ vert_info,
+ prim_info );
+
+ if (draw_pt_post_vs_run( fpme->post_vs, vert_info )) {
opt |= PT_PIPELINE;
}
/* Do we need to run the pipeline?
*/
if (opt & PT_PIPELINE) {
- draw_pipeline_run( fpme->draw,
- fpme->prim,
- pipeline_verts,
- fetch_count,
- fpme->vertex_size,
- draw_elts,
- draw_count );
+ pipeline( fpme,
+ vert_info,
+ prim_info );
}
else {
- draw_pt_emit( fpme->emit,
- (const float (*)[4])pipeline_verts->data,
- fetch_count,
- fpme->vertex_size,
- draw_elts,
- draw_count );
+ emit( fpme->emit,
+ vert_info,
+ prim_info );
}
+ FREE(vert_info->verts);
+}
- FREE(pipeline_verts);
+static void llvm_middle_end_run( struct draw_pt_middle_end *middle,
+ const unsigned *fetch_elts,
+ unsigned fetch_count,
+ const ushort *draw_elts,
+ unsigned draw_count,
+ unsigned prim_flags )
+{
+ struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
+ struct draw_fetch_info fetch_info;
+ struct draw_prim_info prim_info;
+
+ fetch_info.linear = FALSE;
+ fetch_info.start = 0;
+ fetch_info.elts = fetch_elts;
+ fetch_info.count = fetch_count;
+
+ prim_info.linear = FALSE;
+ prim_info.start = 0;
+ prim_info.count = draw_count;
+ prim_info.elts = draw_elts;
+ prim_info.prim = fpme->input_prim;
+ prim_info.flags = prim_flags;
+ prim_info.primitive_count = 1;
+ prim_info.primitive_lengths = &draw_count;
+
+ llvm_pipeline_generic( middle, &fetch_info, &prim_info );
}
static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle,
unsigned start,
- unsigned count)
+ unsigned count,
+ unsigned prim_flags)
{
struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
- struct draw_context *draw = fpme->draw;
- unsigned opt = fpme->opt;
- unsigned alloc_count = align( count, 4 );
-
- struct vertex_header *pipeline_verts =
- (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
-
- if (!pipeline_verts) {
- /* Not much we can do here - just skip the rendering.
- */
- assert(0);
- return;
- }
-
-#if 0
- debug_printf("#### Pipeline = %p (data = %p)\n",
- pipeline_verts, pipeline_verts->data);
-#endif
- fpme->current_variant->jit_func( &fpme->llvm->jit_context,
- pipeline_verts,
- (const char **)draw->pt.user.vbuffer,
- start,
- count,
- fpme->vertex_size,
- draw->pt.vertex_buffer );
-
- if (draw_pt_post_vs_run( fpme->post_vs,
- pipeline_verts,
- count,
- fpme->vertex_size ))
- {
- opt |= PT_PIPELINE;
- }
-
- /* Do we need to run the pipeline?
- */
- if (opt & PT_PIPELINE) {
- draw_pipeline_run_linear( fpme->draw,
- fpme->prim,
- pipeline_verts,
- count,
- fpme->vertex_size);
- }
- else {
- draw_pt_emit_linear( fpme->emit,
- (const float (*)[4])pipeline_verts->data,
- fpme->vertex_size,
- count );
- }
-
- FREE(pipeline_verts);
+ struct draw_fetch_info fetch_info;
+ struct draw_prim_info prim_info;
+
+ fetch_info.linear = TRUE;
+ fetch_info.start = start;
+ fetch_info.count = count;
+ fetch_info.elts = NULL;
+
+ prim_info.linear = TRUE;
+ prim_info.start = 0;
+ prim_info.count = count;
+ prim_info.elts = NULL;
+ prim_info.prim = fpme->input_prim;
+ prim_info.flags = prim_flags;
+ prim_info.primitive_count = 1;
+ prim_info.primitive_lengths = &count;
+
+ llvm_pipeline_generic( middle, &fetch_info, &prim_info );
}
@@ -286,56 +355,29 @@ llvm_middle_end_linear_run_elts( struct draw_pt_middle_end *middle,
unsigned start,
unsigned count,
const ushort *draw_elts,
- unsigned draw_count )
+ unsigned draw_count,
+ unsigned prim_flags )
{
struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
- struct draw_context *draw = fpme->draw;
- unsigned opt = fpme->opt;
- unsigned alloc_count = align( count, 4 );
-
- struct vertex_header *pipeline_verts =
- (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
-
- if (!pipeline_verts)
- return FALSE;
-
- fpme->current_variant->jit_func( &fpme->llvm->jit_context,
- pipeline_verts,
- (const char **)draw->pt.user.vbuffer,
- start,
- count,
- fpme->vertex_size,
- draw->pt.vertex_buffer );
-
- if (draw_pt_post_vs_run( fpme->post_vs,
- pipeline_verts,
- count,
- fpme->vertex_size ))
- {
- opt |= PT_PIPELINE;
- }
+ struct draw_fetch_info fetch_info;
+ struct draw_prim_info prim_info;
- /* Do we need to run the pipeline?
- */
- if (opt & PT_PIPELINE) {
- draw_pipeline_run( fpme->draw,
- fpme->prim,
- pipeline_verts,
- count,
- fpme->vertex_size,
- draw_elts,
- draw_count );
- }
- else {
- draw_pt_emit( fpme->emit,
- (const float (*)[4])pipeline_verts->data,
- count,
- fpme->vertex_size,
- draw_elts,
- draw_count );
- }
+ fetch_info.linear = TRUE;
+ fetch_info.start = start;
+ fetch_info.count = count;
+ fetch_info.elts = NULL;
+
+ prim_info.linear = FALSE;
+ prim_info.start = 0;
+ prim_info.count = draw_count;
+ prim_info.elts = draw_elts;
+ prim_info.prim = fpme->input_prim;
+ prim_info.flags = prim_flags;
+ prim_info.primitive_count = 1;
+ prim_info.primitive_lengths = &draw_count;
+
+ llvm_pipeline_generic( middle, &fetch_info, &prim_info );
- FREE(pipeline_verts);
return TRUE;
}
@@ -356,17 +398,18 @@ static void llvm_middle_end_destroy( struct draw_pt_middle_end *middle )
if (fpme->emit)
draw_pt_emit_destroy( fpme->emit );
+ if (fpme->so_emit)
+ draw_pt_so_emit_destroy( fpme->so_emit );
+
if (fpme->post_vs)
draw_pt_post_vs_destroy( fpme->post_vs );
- if (fpme->llvm)
- draw_llvm_destroy( fpme->llvm );
-
FREE(middle);
}
-struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit_llvm( struct draw_context *draw )
+struct draw_pt_middle_end *
+draw_pt_fetch_pipeline_or_emit_llvm(struct draw_context *draw)
{
struct llvm_middle_end *fpme = 0;
@@ -398,13 +441,15 @@ struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit_llvm( struct draw_cont
if (!fpme->emit)
goto fail;
- fpme->llvm = draw_llvm_create(draw);
+ fpme->so_emit = draw_pt_so_emit_create( draw );
+ if (!fpme->so_emit)
+ goto fail;
+
+ fpme->llvm = draw->llvm;
if (!fpme->llvm)
goto fail;
- fpme->variants = NULL;
fpme->current_variant = NULL;
- fpme->nr_variants = 0;
return &fpme->base;
diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
index 5525dfc748d..769409cfd67 100644
--- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c
+++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
@@ -26,22 +26,38 @@
**************************************************************************/
#include "util/u_memory.h"
+#include "util/u_math.h"
#include "pipe/p_context.h"
#include "draw/draw_context.h"
#include "draw/draw_private.h"
-#include "draw/draw_vbuf.h"
#include "draw/draw_pt.h"
+
+#define DO_CLIP_XY 0x1
+#define DO_CLIP_FULL_Z 0x2
+#define DO_CLIP_HALF_Z 0x4
+#define DO_CLIP_USER 0x8
+#define DO_VIEWPORT 0x10
+#define DO_EDGEFLAG 0x20
+
+
struct pt_post_vs {
struct draw_context *draw;
+ unsigned flags;
+
boolean (*run)( struct pt_post_vs *pvs,
- struct vertex_header *vertices,
- unsigned count,
- unsigned stride );
+ struct draw_vertex_info *info );
};
-
+static INLINE void
+initialize_vertex_header(struct vertex_header *header)
+{
+ header->clipmask = 0;
+ header->edgeflag = 1;
+ header->pad = 0;
+ header->vertex_id = UNDEFINED_VERTEX_ID;
+}
static INLINE float
dot4(const float *a, const float *b)
@@ -52,215 +68,121 @@ dot4(const float *a, const float *b)
a[3]*b[3]);
}
+#define FLAGS (0)
+#define TAG(x) x##_none
+#include "draw_cliptest_tmp.h"
+#define FLAGS (DO_CLIP_XY | DO_CLIP_FULL_Z | DO_VIEWPORT)
+#define TAG(x) x##_xy_fullz_viewport
+#include "draw_cliptest_tmp.h"
-static INLINE unsigned
-compute_clipmask_gl(const float *clip, /*const*/ float plane[][4], unsigned nr)
-{
- unsigned mask = 0x0;
- unsigned i;
-
-#if 0
- debug_printf("compute clipmask %f %f %f %f\n",
- clip[0], clip[1], clip[2], clip[3]);
- assert(clip[3] != 0.0);
-#endif
-
- /* Do the hardwired planes first:
- */
- if (-clip[0] + clip[3] < 0) mask |= (1<<0);
- if ( clip[0] + clip[3] < 0) mask |= (1<<1);
- if (-clip[1] + clip[3] < 0) mask |= (1<<2);
- if ( clip[1] + clip[3] < 0) mask |= (1<<3);
- if ( clip[2] + clip[3] < 0) mask |= (1<<4); /* match mesa clipplane numbering - for now */
- if (-clip[2] + clip[3] < 0) mask |= (1<<5); /* match mesa clipplane numbering - for now */
-
- /* Followed by any remaining ones:
- */
- for (i = 6; i < nr; i++) {
- if (dot4(clip, plane[i]) < 0)
- mask |= (1<<i);
- }
-
- return mask;
-}
-
-
-/* The normal case - cliptest, rhw divide, viewport transform.
- *
- * Also handle identity viewport here at the expense of a few wasted
- * instructions
- */
-static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs,
- struct vertex_header *vertices,
- unsigned count,
- unsigned stride )
-{
- struct vertex_header *out = vertices;
- const float *scale = pvs->draw->viewport.scale;
- const float *trans = pvs->draw->viewport.translate;
- const unsigned pos = draw_current_shader_position_output(pvs->draw);
- unsigned clipped = 0;
- unsigned j;
-
- if (0) debug_printf("%s\n", __FUNCTION__);
-
- for (j = 0; j < count; j++) {
- float *position = out->data[pos];
-
-#if 0
- debug_printf("%d) io = %p, data = %p = [%f, %f, %f, %f]\n",
- j, out, position, position[0], position[1], position[2], position[3]);
-#endif
-
- out->clip[0] = position[0];
- out->clip[1] = position[1];
- out->clip[2] = position[2];
- out->clip[3] = position[3];
-
- out->vertex_id = 0xffff;
- out->clipmask = compute_clipmask_gl(out->clip,
- pvs->draw->plane,
- pvs->draw->nr_planes);
- clipped += out->clipmask;
-
- if (out->clipmask == 0)
- {
- /* divide by w */
- float w = 1.0f / position[3];
-
- /* Viewport mapping */
- position[0] = position[0] * w * scale[0] + trans[0];
- position[1] = position[1] * w * scale[1] + trans[1];
- position[2] = position[2] * w * scale[2] + trans[2];
- position[3] = w;
-#if 0
- debug_printf("post viewport: %f %f %f %f\n",
- position[0],
- position[1],
- position[2],
- position[3]);
-#endif
- }
-
- out = (struct vertex_header *)( (char *)out + stride );
- }
-
- return clipped != 0;
-}
-
-
+#define FLAGS (DO_CLIP_XY | DO_CLIP_HALF_Z | DO_VIEWPORT)
+#define TAG(x) x##_xy_halfz_viewport
+#include "draw_cliptest_tmp.h"
-/* As above plus edgeflags
- */
-static boolean
-post_vs_cliptest_viewport_gl_edgeflag(struct pt_post_vs *pvs,
- struct vertex_header *vertices,
- unsigned count,
- unsigned stride )
-{
- unsigned j;
- boolean needpipe;
+#define FLAGS (DO_CLIP_FULL_Z | DO_VIEWPORT)
+#define TAG(x) x##_fullz_viewport
+#include "draw_cliptest_tmp.h"
- needpipe = post_vs_cliptest_viewport_gl( pvs, vertices, count, stride);
+#define FLAGS (DO_CLIP_HALF_Z | DO_VIEWPORT)
+#define TAG(x) x##_halfz_viewport
+#include "draw_cliptest_tmp.h"
- /* If present, copy edgeflag VS output into vertex header.
- * Otherwise, leave header as is.
- */
- if (pvs->draw->vs.edgeflag_output) {
- struct vertex_header *out = vertices;
- int ef = pvs->draw->vs.edgeflag_output;
-
- for (j = 0; j < count; j++) {
- const float *edgeflag = out->data[ef];
- out->edgeflag = !(edgeflag[0] != 1.0f);
- needpipe |= !out->edgeflag;
- out = (struct vertex_header *)( (char *)out + stride );
- }
- }
- return needpipe;
-}
+#define FLAGS (DO_CLIP_XY | DO_CLIP_FULL_Z | DO_CLIP_USER | DO_VIEWPORT)
+#define TAG(x) x##_xy_fullz_user_viewport
+#include "draw_cliptest_tmp.h"
+#define FLAGS (DO_CLIP_XY | DO_CLIP_FULL_Z | DO_CLIP_USER | DO_VIEWPORT | DO_EDGEFLAG)
+#define TAG(x) x##_xy_fullz_user_viewport_edgeflag
+#include "draw_cliptest_tmp.h"
-/* If bypass_clipping is set, skip cliptest and rhw divide.
+/* Don't want to create 64 versions of this function, so catch the
+ * less common ones here. This is looking like something which should
+ * be code-generated, perhaps appended to the end of the vertex
+ * shader.
*/
-static boolean post_vs_viewport( struct pt_post_vs *pvs,
- struct vertex_header *vertices,
- unsigned count,
- unsigned stride )
-{
- struct vertex_header *out = vertices;
- const float *scale = pvs->draw->viewport.scale;
- const float *trans = pvs->draw->viewport.translate;
- const unsigned pos = draw_current_shader_position_output(pvs->draw);
- unsigned j;
-
- if (0) debug_printf("%s\n", __FUNCTION__);
- for (j = 0; j < count; j++) {
- float *position = out->data[pos];
-
- /* Viewport mapping only, no cliptest/rhw divide
- */
- position[0] = position[0] * scale[0] + trans[0];
- position[1] = position[1] * scale[1] + trans[1];
- position[2] = position[2] * scale[2] + trans[2];
-
- out = (struct vertex_header *)((char *)out + stride);
- }
-
- return FALSE;
-}
+#define FLAGS (pvs->flags)
+#define TAG(x) x##_generic
+#include "draw_cliptest_tmp.h"
-/* If bypass_clipping is set and we have an identity viewport, nothing
- * to do.
- */
-static boolean post_vs_none( struct pt_post_vs *pvs,
- struct vertex_header *vertices,
- unsigned count,
- unsigned stride )
-{
- if (0) debug_printf("%s\n", __FUNCTION__);
- return FALSE;
-}
boolean draw_pt_post_vs_run( struct pt_post_vs *pvs,
- struct vertex_header *pipeline_verts,
- unsigned count,
- unsigned stride )
+ struct draw_vertex_info *info )
{
- return pvs->run( pvs, pipeline_verts, count, stride );
+ return pvs->run( pvs, info );
}
void draw_pt_post_vs_prepare( struct pt_post_vs *pvs,
- boolean bypass_clipping,
+ boolean clip_xy,
+ boolean clip_z,
+ boolean clip_user,
boolean bypass_viewport,
boolean opengl,
boolean need_edgeflags )
{
- if (!need_edgeflags) {
- if (bypass_clipping) {
- if (bypass_viewport)
- pvs->run = post_vs_none;
- else
- pvs->run = post_vs_viewport;
- }
- else {
- /* if (opengl) */
- pvs->run = post_vs_cliptest_viewport_gl;
- }
+ pvs->flags = 0;
+
+ if (clip_xy)
+ pvs->flags |= DO_CLIP_XY;
+
+ if (clip_z && opengl) {
+ pvs->flags |= DO_CLIP_FULL_Z;
+ ASSIGN_4V( pvs->draw->plane[4], 0, 0, 1, 1 );
}
- else {
- /* If we need to copy edgeflags to the vertex header, it should
- * mean we're running the primitive pipeline. Hence the bypass
- * flags should be false.
- */
- assert(!bypass_clipping);
- assert(!bypass_viewport);
- pvs->run = post_vs_cliptest_viewport_gl_edgeflag;
+
+ if (clip_z && !opengl) {
+ pvs->flags |= DO_CLIP_HALF_Z;
+ ASSIGN_4V( pvs->draw->plane[4], 0, 0, 1, 0 );
+ }
+
+ if (clip_user)
+ pvs->flags |= DO_CLIP_USER;
+
+ if (!bypass_viewport)
+ pvs->flags |= DO_VIEWPORT;
+
+ if (need_edgeflags)
+ pvs->flags |= DO_EDGEFLAG;
+
+ /* Now select the relevant function:
+ */
+ switch (pvs->flags) {
+ case 0:
+ pvs->run = do_cliptest_none;
+ break;
+
+ case DO_CLIP_XY | DO_CLIP_FULL_Z | DO_VIEWPORT:
+ pvs->run = do_cliptest_xy_fullz_viewport;
+ break;
+
+ case DO_CLIP_XY | DO_CLIP_HALF_Z | DO_VIEWPORT:
+ pvs->run = do_cliptest_xy_halfz_viewport;
+ break;
+
+ case DO_CLIP_FULL_Z | DO_VIEWPORT:
+ pvs->run = do_cliptest_fullz_viewport;
+ break;
+
+ case DO_CLIP_HALF_Z | DO_VIEWPORT:
+ pvs->run = do_cliptest_halfz_viewport;
+ break;
+
+ case DO_CLIP_XY | DO_CLIP_FULL_Z | DO_CLIP_USER | DO_VIEWPORT:
+ pvs->run = do_cliptest_xy_fullz_user_viewport;
+ break;
+
+ case (DO_CLIP_XY | DO_CLIP_FULL_Z | DO_CLIP_USER |
+ DO_VIEWPORT | DO_EDGEFLAG):
+ pvs->run = do_cliptest_xy_fullz_user_viewport_edgeflag;
+ break;
+
+ default:
+ pvs->run = do_cliptest_generic;
+ break;
}
}
@@ -272,7 +194,7 @@ struct pt_post_vs *draw_pt_post_vs_create( struct draw_context *draw )
return NULL;
pvs->draw = draw;
-
+
return pvs;
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_so_emit.c b/src/gallium/auxiliary/draw/draw_pt_so_emit.c
new file mode 100644
index 00000000000..c86bdd99a33
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_so_emit.c
@@ -0,0 +1,293 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_vbuf.h"
+#include "draw/draw_vertex.h"
+#include "draw/draw_pt.h"
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+struct pt_so_emit {
+ struct draw_context *draw;
+
+ void *buffers[PIPE_MAX_SO_BUFFERS];
+
+ unsigned input_vertex_stride;
+ const float (*inputs)[4];
+
+ boolean has_so;
+
+ boolean single_buffer;
+
+ unsigned emitted_primitives;
+ unsigned emitted_vertices;
+};
+
+
+void draw_pt_so_emit_prepare(struct pt_so_emit *emit)
+{
+ struct draw_context *draw = emit->draw;
+
+ emit->has_so = (draw->so.state.num_outputs > 0);
+
+ /* if we have a state with outputs make sure we have
+ * buffers to output to */
+ if (emit->has_so) {
+ boolean has_valid_buffer = FALSE;
+ unsigned i;
+ for (i = 0; i < draw->so.num_buffers; ++i) {
+ if (draw->so.buffers[i]) {
+ has_valid_buffer = TRUE;
+ break;
+ }
+ }
+ emit->has_so = has_valid_buffer;
+ }
+
+ if (!emit->has_so)
+ return;
+
+ /* XXX: need to flush to get prim_vbuf.c to release its allocation??
+ */
+ draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+}
+
+static boolean
+is_component_writable(unsigned mask,
+ unsigned compo)
+{
+ switch (mask) {
+ case TGSI_WRITEMASK_NONE:
+ return FALSE;
+ case TGSI_WRITEMASK_X:
+ return compo == 0;
+ case TGSI_WRITEMASK_Y:
+ return compo == 1;
+ case TGSI_WRITEMASK_XY:
+ return compo == 0 || compo == 1;
+ case TGSI_WRITEMASK_Z:
+ return compo == 2;
+ case TGSI_WRITEMASK_XZ:
+ return compo == 0 || compo == 2;
+ case TGSI_WRITEMASK_YZ:
+ return compo == 1 || compo == 2;
+ case TGSI_WRITEMASK_XYZ:
+ return compo == 0 || compo == 1 || compo == 2;
+ case TGSI_WRITEMASK_W:
+ return compo == 3;
+ case TGSI_WRITEMASK_XW:
+ return compo == 0 || compo == 3;
+ case TGSI_WRITEMASK_YW:
+ return compo == 1 || compo == 3;
+ case TGSI_WRITEMASK_XYW:
+ return compo == 0 || compo == 1 || compo == 3;
+ case TGSI_WRITEMASK_ZW:
+ return compo == 2 || compo == 3;
+ case TGSI_WRITEMASK_XZW:
+ return compo == 0 || compo == 1 || compo == 3;
+ case TGSI_WRITEMASK_YZW:
+ return compo == 1 || compo == 2 || compo == 4;
+ case TGSI_WRITEMASK_XYZW:
+ return compo < 4;
+ default:
+ debug_assert(!"Unknown writemask in stream out");
+ return compo < 4;
+ }
+}
+
+static void so_emit_prim(struct pt_so_emit *so,
+ unsigned *indices,
+ unsigned num_vertices)
+{
+ unsigned slot, i;
+ unsigned input_vertex_stride = so->input_vertex_stride;
+ struct draw_context *draw = so->draw;
+ const float (*input_ptr)[4];
+ const struct pipe_stream_output_state *state =
+ &draw->so.state;
+ float **buffer = 0;
+
+ input_ptr = so->inputs;
+
+ for (i = 0; i < num_vertices; ++i) {
+ const float (*input)[4];
+ unsigned total_written_compos = 0;
+ /*debug_printf("%d) vertex index = %d (prim idx = %d)\n", i, indices[i], prim_idx);*/
+ input = (const float (*)[4])(
+ (const char *)input_ptr + (indices[i] * input_vertex_stride));
+ for (slot = 0; slot < state->num_outputs; ++slot) {
+ unsigned idx = state->register_index[slot];
+ unsigned writemask = state->register_mask[slot];
+ unsigned written_compos = 0;
+ unsigned compo;
+
+ buffer = (float**)&so->buffers[state->output_buffer[slot]];
+
+ /*debug_printf("\tSlot = %d, vs_slot = %d, idx = %d:\n",
+ slot, vs_slot, idx);*/
+#if 1
+ assert(!util_is_inf_or_nan(input[idx][0]));
+ assert(!util_is_inf_or_nan(input[idx][1]));
+ assert(!util_is_inf_or_nan(input[idx][2]));
+ assert(!util_is_inf_or_nan(input[idx][3]));
+#endif
+ for (compo = 0; compo < 4; ++compo) {
+ if (is_component_writable(writemask, compo)) {
+ float *buf = *buffer;
+ buf[written_compos++] = input[idx][compo];
+ }
+ }
+#if 0
+ debug_printf("\t\t(writemask = %d)%f %f %f %f\n",
+ writemask,
+ input[idx][0],
+ input[idx][1],
+ input[idx][2],
+ input[idx][3]);
+#endif
+ *buffer += written_compos;
+ total_written_compos += written_compos;
+ }
+ if (so->single_buffer) {
+ int stride = (int)state->stride -
+ sizeof(float) * total_written_compos;
+
+ debug_assert(stride >= 0);
+ *buffer = (float*) (((char*)*buffer) + stride);
+ }
+ }
+ so->emitted_vertices += num_vertices;
+ ++so->emitted_primitives;
+}
+
+static void so_point(struct pt_so_emit *so, int idx)
+{
+ unsigned indices[1];
+
+ indices[0] = idx;
+
+ so_emit_prim(so, indices, 1);
+}
+
+static void so_line(struct pt_so_emit *so, int i0, int i1)
+{
+ unsigned indices[2];
+
+ indices[0] = i0;
+ indices[1] = i1;
+
+ so_emit_prim(so, indices, 2);
+}
+
+static void so_tri(struct pt_so_emit *so, int i0, int i1, int i2)
+{
+ unsigned indices[3];
+
+ indices[0] = i0;
+ indices[1] = i1;
+ indices[2] = i2;
+
+ so_emit_prim(so, indices, 3);
+}
+
+
+#define FUNC so_run_linear
+#define GET_ELT(idx) (start + (idx))
+#include "draw_so_emit_tmp.h"
+
+
+#define FUNC so_run_elts
+#define LOCAL_VARS const ushort *elts = input_prims->elts;
+#define GET_ELT(idx) (elts[start + (idx)])
+#include "draw_so_emit_tmp.h"
+
+
+void draw_pt_so_emit( struct pt_so_emit *emit,
+ const struct draw_vertex_info *input_verts,
+ const struct draw_prim_info *input_prims )
+{
+ struct draw_context *draw = emit->draw;
+ struct vbuf_render *render = draw->render;
+ unsigned start, i;
+
+ if (!emit->has_so)
+ return;
+
+ emit->emitted_vertices = 0;
+ emit->emitted_primitives = 0;
+ emit->input_vertex_stride = input_verts->stride;
+ emit->inputs = (const float (*)[4])input_verts->verts->data;
+ for (i = 0; i < draw->so.num_buffers; ++i) {
+ emit->buffers[i] = draw->so.buffers[i];
+ }
+ emit->single_buffer = TRUE;
+ for (i = 0; i < draw->so.state.num_outputs; ++i) {
+ if (draw->so.state.output_buffer[i] != 0)
+ emit->single_buffer = FALSE;
+ }
+
+ /* XXX: need to flush to get prim_vbuf.c to release its allocation??*/
+ draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+
+ for (start = i = 0; i < input_prims->primitive_count;
+ start += input_prims->primitive_lengths[i], i++)
+ {
+ unsigned count = input_prims->primitive_lengths[i];
+
+ if (input_prims->linear) {
+ so_run_linear(emit, input_prims, input_verts,
+ start, count);
+ } else {
+ so_run_elts(emit, input_prims, input_verts,
+ start, count);
+ }
+ }
+
+ render->set_stream_output_info(render,
+ emit->emitted_primitives,
+ emit->emitted_vertices);
+}
+
+
+struct pt_so_emit *draw_pt_so_emit_create( struct draw_context *draw )
+{
+ struct pt_so_emit *emit = CALLOC_STRUCT(pt_so_emit);
+ if (!emit)
+ return NULL;
+
+ emit->draw = draw;
+
+ return emit;
+}
+
+void draw_pt_so_emit_destroy( struct pt_so_emit *emit )
+{
+ FREE(emit);
+}
diff --git a/src/gallium/auxiliary/draw/draw_pt_util.c b/src/gallium/auxiliary/draw/draw_pt_util.c
index 3236d38e6ab..513bbbed216 100644
--- a/src/gallium/auxiliary/draw/draw_pt_util.c
+++ b/src/gallium/auxiliary/draw/draw_pt_util.c
@@ -53,7 +53,7 @@ void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr)
break;
case PIPE_PRIM_LINES_ADJACENCY:
*first = 4;
- *incr = 2;
+ *incr = 4;
break;
case PIPE_PRIM_LINE_STRIP_ADJACENCY:
*first = 4;
@@ -65,7 +65,7 @@ void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr)
break;
case PIPE_PRIM_TRIANGLES_ADJACENCY:
*first = 6;
- *incr = 3;
+ *incr = 6;
break;
case PIPE_PRIM_TRIANGLE_STRIP:
case PIPE_PRIM_TRIANGLE_FAN:
@@ -75,7 +75,7 @@ void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr)
break;
case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
*first = 6;
- *incr = 1;
+ *incr = 2;
break;
case PIPE_PRIM_QUADS:
*first = 4;
@@ -92,3 +92,10 @@ void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr)
break;
}
}
+
+unsigned draw_pt_trim_count(unsigned count, unsigned first, unsigned incr)
+{
+ if (count < first)
+ return 0;
+ return count - (count - first) % incr;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c
deleted file mode 100644
index d0e16c9bc3c..00000000000
--- a/src/gallium/auxiliary/draw/draw_pt_varray.c
+++ /dev/null
@@ -1,193 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include "util/u_math.h"
-#include "util/u_memory.h"
-
-#include "draw/draw_context.h"
-#include "draw/draw_private.h"
-#include "draw/draw_pt.h"
-
-#define FETCH_MAX 256
-#define DRAW_MAX (FETCH_MAX+8)
-
-struct varray_frontend {
- struct draw_pt_front_end base;
- struct draw_context *draw;
-
- ushort draw_elts[DRAW_MAX];
- unsigned fetch_elts[FETCH_MAX];
-
- unsigned driver_fetch_max;
- unsigned fetch_max;
-
- struct draw_pt_middle_end *middle;
-
- unsigned input_prim;
- unsigned output_prim;
-};
-
-
-static void varray_flush_linear(struct varray_frontend *varray,
- unsigned start, unsigned count)
-{
- if (count) {
- assert(varray->middle->run_linear);
- varray->middle->run_linear(varray->middle, start, count);
- }
-}
-
-static void varray_line_loop_segment(struct varray_frontend *varray,
- unsigned start,
- unsigned segment_start,
- unsigned segment_count,
- boolean end )
-{
- assert(segment_count < varray->fetch_max);
- if (segment_count >= 1) {
- unsigned nr = 0, i;
-
- for (i = 0; i < segment_count; i++)
- varray->fetch_elts[nr++] = start + segment_start + i;
-
- if (end)
- varray->fetch_elts[nr++] = start;
-
- assert(nr <= FETCH_MAX);
-
- varray->middle->run(varray->middle,
- varray->fetch_elts,
- nr,
- varray->draw_elts, /* ie. linear */
- nr);
- }
-}
-
-
-
-static void varray_fan_segment(struct varray_frontend *varray,
- unsigned start,
- unsigned segment_start,
- unsigned segment_count )
-{
- assert(segment_count < varray->fetch_max);
- if (segment_count >= 2) {
- unsigned nr = 0, i;
-
- if (segment_start != 0)
- varray->fetch_elts[nr++] = start;
-
- for (i = 0 ; i < segment_count; i++)
- varray->fetch_elts[nr++] = start + segment_start + i;
-
- assert(nr <= FETCH_MAX);
-
- varray->middle->run(varray->middle,
- varray->fetch_elts,
- nr,
- varray->draw_elts, /* ie. linear */
- nr);
- }
-}
-
-
-
-
-#define FUNC varray_run
-#include "draw_pt_varray_tmp_linear.h"
-
-static unsigned decompose_prim[PIPE_PRIM_POLYGON + 1] = {
- PIPE_PRIM_POINTS,
- PIPE_PRIM_LINES,
- PIPE_PRIM_LINE_STRIP, /* decomposed LINELOOP */
- PIPE_PRIM_LINE_STRIP,
- PIPE_PRIM_TRIANGLES,
- PIPE_PRIM_TRIANGLE_STRIP,
- PIPE_PRIM_TRIANGLE_FAN,
- PIPE_PRIM_QUADS,
- PIPE_PRIM_QUAD_STRIP,
- PIPE_PRIM_POLYGON
-};
-
-
-
-static void varray_prepare(struct draw_pt_front_end *frontend,
- unsigned prim,
- struct draw_pt_middle_end *middle,
- unsigned opt)
-{
- struct varray_frontend *varray = (struct varray_frontend *)frontend;
-
- varray->base.run = varray_run;
-
- varray->input_prim = prim;
- varray->output_prim = decompose_prim[prim];
-
- varray->middle = middle;
- middle->prepare(middle, varray->output_prim, opt, &varray->driver_fetch_max );
-
- /* check that the max is even */
- assert((varray->driver_fetch_max & 1) == 0);
-
- varray->fetch_max = MIN2(FETCH_MAX, varray->driver_fetch_max);
-}
-
-
-
-
-static void varray_finish(struct draw_pt_front_end *frontend)
-{
- struct varray_frontend *varray = (struct varray_frontend *)frontend;
- varray->middle->finish(varray->middle);
- varray->middle = NULL;
-}
-
-static void varray_destroy(struct draw_pt_front_end *frontend)
-{
- FREE(frontend);
-}
-
-
-struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw)
-{
- ushort i;
- struct varray_frontend *varray = CALLOC_STRUCT(varray_frontend);
- if (varray == NULL)
- return NULL;
-
- varray->base.prepare = varray_prepare;
- varray->base.run = NULL;
- varray->base.finish = varray_finish;
- varray->base.destroy = varray_destroy;
- varray->draw = draw;
-
- for (i = 0; i < DRAW_MAX; i++) {
- varray->draw_elts[i] = i;
- }
-
- return &varray->base;
-}
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
deleted file mode 100644
index 7c722457c3c..00000000000
--- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
+++ /dev/null
@@ -1,238 +0,0 @@
-
-static void FUNC(struct draw_pt_front_end *frontend,
- pt_elt_func get_elt,
- const void *elts,
- unsigned count)
-{
- struct varray_frontend *varray = (struct varray_frontend *)frontend;
- struct draw_context *draw = varray->draw;
- unsigned start = (unsigned)elts;
-
- boolean flatfirst = (draw->rasterizer->flatshade &&
- draw->rasterizer->flatshade_first);
- unsigned i, j;
- ushort flags;
- unsigned first, incr;
-
- varray->fetch_start = start;
-
- draw_pt_split_prim(varray->input_prim, &first, &incr);
-
-#if 0
- debug_printf("%s (%d) %d/%d\n", __FUNCTION__,
- varray->input_prim,
- start, count);
-#endif
-
- switch (varray->input_prim) {
- case PIPE_PRIM_POINTS:
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i < end; i++) {
- POINT(varray, i + 0);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- break;
-
- case PIPE_PRIM_LINES:
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+1 < end; i += 2) {
- LINE(varray, DRAW_PIPE_RESET_STIPPLE,
- i + 0, i + 1);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- break;
-
- case PIPE_PRIM_LINE_LOOP:
- if (count >= 2) {
- flags = DRAW_PIPE_RESET_STIPPLE;
-
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 1; i < end; i++, flags = 0) {
- LINE(varray, flags, i - 1, i);
- }
- LINE(varray, flags, i - 1, 0);
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- }
- break;
-
- case PIPE_PRIM_LINE_STRIP:
- flags = DRAW_PIPE_RESET_STIPPLE;
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 1; i < end; i++, flags = 0) {
- LINE(varray, flags, i - 1, i);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- break;
-
- case PIPE_PRIM_TRIANGLES:
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+2 < end; i += 3) {
- TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- i + 0, i + 1, i + 2);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- break;
-
- case PIPE_PRIM_TRIANGLE_STRIP:
- if (flatfirst) {
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+2 < end; i++) {
- TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- i + 0, i + 1 + (i&1), i + 2 - (i&1));
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- if (j + first + i <= count) {
- varray->fetch_start -= 2;
- i -= 2;
- }
- }
- }
- else {
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i + 2 < end; i++) {
- TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- i + 0 + (i&1), i + 1 - (i&1), i + 2);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- if (j + first + i <= count) {
- varray->fetch_start -= 2;
- i -= 2;
- }
- }
- }
- break;
-
- case PIPE_PRIM_TRIANGLE_FAN:
- if (count >= 3) {
- if (flatfirst) {
- flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+2 < end; i++) {
- TRIANGLE(varray, flags, i + 1, i + 2, 0);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- }
- else {
- flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+2 < end; i++) {
- TRIANGLE(varray, flags, 0, i + 1, i + 2);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- }
- }
- break;
-
- case PIPE_PRIM_QUADS:
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+3 < end; i += 4) {
- QUAD(varray, i + 0, i + 1, i + 2, i + 3);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- break;
-
- case PIPE_PRIM_QUAD_STRIP:
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+3 < end; i += 2) {
- QUAD(varray, i + 2, i + 0, i + 1, i + 3);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- if (j + first + i <= count) {
- varray->fetch_start -= 2;
- i -= 2;
- }
- }
- break;
-
- case PIPE_PRIM_POLYGON:
- {
- /* These bitflags look a little odd because we submit the
- * vertices as (1,2,0) to satisfy flatshade requirements.
- */
- const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2;
- const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0;
- const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1;
-
- flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+2 < end; i++, flags = edge_middle) {
-
- if (i + 3 == count)
- flags |= edge_last;
-
- TRIANGLE(varray, flags, i + 1, i + 2, 0);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- }
- break;
-
- default:
- assert(0);
- break;
- }
-
- varray_flush(varray);
-}
-
-#undef TRIANGLE
-#undef QUAD
-#undef POINT
-#undef LINE
-#undef FUNC
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
deleted file mode 100644
index a292346be95..00000000000
--- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
+++ /dev/null
@@ -1,98 +0,0 @@
-static unsigned trim( unsigned count, unsigned first, unsigned incr )
-{
- return count - (count - first) % incr;
-}
-
-static void FUNC(struct draw_pt_front_end *frontend,
- pt_elt_func get_elt,
- const void *elts,
- int elt_bias,
- unsigned count)
-{
- struct varray_frontend *varray = (struct varray_frontend *)frontend;
- unsigned start = (unsigned) ((char *) elts - (char *) NULL);
-
- unsigned j;
- unsigned first, incr;
-
- assert(elt_bias == 0);
-
- draw_pt_split_prim(varray->input_prim, &first, &incr);
-
- /* Sanitize primitive length:
- */
- count = trim(count, first, incr);
- if (count < first)
- return;
-
-#if 0
- debug_printf("%s (%d) %d/%d\n", __FUNCTION__,
- varray->input_prim,
- start, count);
-#endif
-
- switch (varray->input_prim) {
- case PIPE_PRIM_POINTS:
- case PIPE_PRIM_LINES:
- case PIPE_PRIM_TRIANGLES:
- case PIPE_PRIM_LINE_STRIP:
- case PIPE_PRIM_TRIANGLE_STRIP:
- case PIPE_PRIM_QUADS:
- case PIPE_PRIM_QUAD_STRIP:
- case PIPE_PRIM_LINES_ADJACENCY:
- case PIPE_PRIM_LINE_STRIP_ADJACENCY:
- case PIPE_PRIM_TRIANGLES_ADJACENCY:
- case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
- for (j = 0; j < count;) {
- unsigned remaining = count - j;
- unsigned nr = trim( MIN2(varray->driver_fetch_max, remaining), first, incr );
- varray_flush_linear(varray, start + j, nr);
- j += nr;
- if (nr != remaining)
- j -= (first - incr);
- }
- break;
-
- case PIPE_PRIM_LINE_LOOP:
- /* Always have to decompose as we've stated that this will be
- * emitted as a line-strip.
- */
- for (j = 0; j < count;) {
- unsigned remaining = count - j;
- unsigned nr = trim( MIN2(varray->fetch_max-1, remaining), first, incr );
- varray_line_loop_segment(varray, start, j, nr, nr == remaining);
- j += nr;
- if (nr != remaining)
- j -= (first - incr);
- }
- break;
-
-
- case PIPE_PRIM_POLYGON:
- case PIPE_PRIM_TRIANGLE_FAN:
- if (count < varray->driver_fetch_max) {
- varray_flush_linear(varray, start, count);
- }
- else {
- for ( j = 0; j < count;) {
- unsigned remaining = count - j;
- unsigned nr = trim( MIN2(varray->fetch_max-1, remaining), first, incr );
- varray_fan_segment(varray, start, j, nr);
- j += nr;
- if (nr != remaining)
- j -= (first - incr);
- }
- }
- break;
-
- default:
- assert(0);
- break;
- }
-}
-
-#undef TRIANGLE
-#undef QUAD
-#undef POINT
-#undef LINE
-#undef FUNC
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c
deleted file mode 100644
index 37ffbac4f92..00000000000
--- a/src/gallium/auxiliary/draw/draw_pt_vcache.c
+++ /dev/null
@@ -1,523 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Keith Whitwell <[email protected]>
- */
-
-#include "util/u_memory.h"
-#include "util/u_prim.h"
-#include "draw/draw_context.h"
-#include "draw/draw_private.h"
-#include "draw/draw_pt.h"
-
-
-#define CACHE_MAX 256
-#define FETCH_MAX 256
-#define DRAW_MAX (16*1024)
-
-struct vcache_frontend {
- struct draw_pt_front_end base;
- struct draw_context *draw;
-
- unsigned in[CACHE_MAX];
- ushort out[CACHE_MAX];
-
- ushort draw_elts[DRAW_MAX];
- unsigned fetch_elts[FETCH_MAX];
-
- unsigned draw_count;
- unsigned fetch_count;
- unsigned fetch_max;
-
- struct draw_pt_middle_end *middle;
-
- unsigned input_prim;
- unsigned output_prim;
-
- unsigned middle_prim;
- unsigned opt;
-};
-
-static INLINE void
-vcache_flush( struct vcache_frontend *vcache )
-{
- if (vcache->middle_prim != vcache->output_prim) {
- vcache->middle_prim = vcache->output_prim;
- vcache->middle->prepare( vcache->middle,
- vcache->middle_prim,
- vcache->opt,
- &vcache->fetch_max );
- }
-
- if (vcache->draw_count) {
- vcache->middle->run( vcache->middle,
- vcache->fetch_elts,
- vcache->fetch_count,
- vcache->draw_elts,
- vcache->draw_count );
- }
-
- memset(vcache->in, ~0, sizeof(vcache->in));
- vcache->fetch_count = 0;
- vcache->draw_count = 0;
-}
-
-static INLINE void
-vcache_check_flush( struct vcache_frontend *vcache )
-{
- if ( vcache->draw_count + 6 >= DRAW_MAX ||
- vcache->fetch_count + 4 >= FETCH_MAX )
- {
- vcache_flush( vcache );
- }
-}
-
-
-static INLINE void
-vcache_elt( struct vcache_frontend *vcache,
- unsigned felt,
- ushort flags )
-{
- unsigned idx = felt % CACHE_MAX;
-
- if (vcache->in[idx] != felt) {
- assert(vcache->fetch_count < FETCH_MAX);
-
- vcache->in[idx] = felt;
- vcache->out[idx] = (ushort)vcache->fetch_count;
- vcache->fetch_elts[vcache->fetch_count++] = felt;
- }
-
- vcache->draw_elts[vcache->draw_count++] = vcache->out[idx] | flags;
-}
-
-
-
-static INLINE void
-vcache_triangle( struct vcache_frontend *vcache,
- unsigned i0,
- unsigned i1,
- unsigned i2 )
-{
- vcache_elt(vcache, i0, 0);
- vcache_elt(vcache, i1, 0);
- vcache_elt(vcache, i2, 0);
- vcache_check_flush(vcache);
-}
-
-
-static INLINE void
-vcache_triangle_flags( struct vcache_frontend *vcache,
- ushort flags,
- unsigned i0,
- unsigned i1,
- unsigned i2 )
-{
- vcache_elt(vcache, i0, flags);
- vcache_elt(vcache, i1, 0);
- vcache_elt(vcache, i2, 0);
- vcache_check_flush(vcache);
-}
-
-static INLINE void
-vcache_line( struct vcache_frontend *vcache,
- unsigned i0,
- unsigned i1 )
-{
- vcache_elt(vcache, i0, 0);
- vcache_elt(vcache, i1, 0);
- vcache_check_flush(vcache);
-}
-
-
-static INLINE void
-vcache_line_flags( struct vcache_frontend *vcache,
- ushort flags,
- unsigned i0,
- unsigned i1 )
-{
- vcache_elt(vcache, i0, flags);
- vcache_elt(vcache, i1, 0);
- vcache_check_flush(vcache);
-}
-
-
-static INLINE void
-vcache_point( struct vcache_frontend *vcache,
- unsigned i0 )
-{
- vcache_elt(vcache, i0, 0);
- vcache_check_flush(vcache);
-}
-
-static INLINE void
-vcache_quad( struct vcache_frontend *vcache,
- unsigned i0,
- unsigned i1,
- unsigned i2,
- unsigned i3 )
-{
- vcache_triangle( vcache, i0, i1, i3 );
- vcache_triangle( vcache, i1, i2, i3 );
-}
-
-static INLINE void
-vcache_ef_quad( struct vcache_frontend *vcache,
- unsigned i0,
- unsigned i1,
- unsigned i2,
- unsigned i3 )
-{
- if (vcache->draw->rasterizer->flatshade_first) {
- vcache_triangle_flags( vcache,
- ( DRAW_PIPE_RESET_STIPPLE |
- DRAW_PIPE_EDGE_FLAG_0 |
- DRAW_PIPE_EDGE_FLAG_1 ),
- i0, i1, i2 );
-
- vcache_triangle_flags( vcache,
- ( DRAW_PIPE_EDGE_FLAG_2 |
- DRAW_PIPE_EDGE_FLAG_1 ),
- i0, i2, i3 );
- }
- else {
- vcache_triangle_flags( vcache,
- ( DRAW_PIPE_RESET_STIPPLE |
- DRAW_PIPE_EDGE_FLAG_0 |
- DRAW_PIPE_EDGE_FLAG_2 ),
- i0, i1, i3 );
-
- vcache_triangle_flags( vcache,
- ( DRAW_PIPE_EDGE_FLAG_0 |
- DRAW_PIPE_EDGE_FLAG_1 ),
- i1, i2, i3 );
- }
-}
-
-/* At least for now, we're back to using a template include file for
- * this. The two paths aren't too different though - it may be
- * possible to reunify them.
- */
-#define TRIANGLE(vc,flags,i0,i1,i2) vcache_triangle_flags(vc,flags,i0,i1,i2)
-#define QUAD(vc,i0,i1,i2,i3) vcache_ef_quad(vc,i0,i1,i2,i3)
-#define LINE(vc,flags,i0,i1) vcache_line_flags(vc,flags,i0,i1)
-#define POINT(vc,i0) vcache_point(vc,i0)
-#define FUNC vcache_run_extras
-#include "draw_pt_vcache_tmp.h"
-
-#define TRIANGLE(vc,flags,i0,i1,i2) vcache_triangle(vc,i0,i1,i2)
-#define QUAD(vc,i0,i1,i2,i3) vcache_quad(vc,i0,i1,i2,i3)
-#define LINE(vc,flags,i0,i1) vcache_line(vc,i0,i1)
-#define POINT(vc,i0) vcache_point(vc,i0)
-#define FUNC vcache_run
-#include "draw_pt_vcache_tmp.h"
-
-static INLINE void
-rebase_uint_elts( const unsigned *src,
- unsigned count,
- int delta,
- ushort *dest )
-{
- unsigned i;
-
- for (i = 0; i < count; i++)
- dest[i] = (ushort)(src[i] + delta);
-}
-
-static INLINE void
-rebase_ushort_elts( const ushort *src,
- unsigned count,
- int delta,
- ushort *dest )
-{
- unsigned i;
-
- for (i = 0; i < count; i++)
- dest[i] = (ushort)(src[i] + delta);
-}
-
-static INLINE void
-rebase_ubyte_elts( const ubyte *src,
- unsigned count,
- int delta,
- ushort *dest )
-{
- unsigned i;
-
- for (i = 0; i < count; i++)
- dest[i] = (ushort)(src[i] + delta);
-}
-
-
-
-static INLINE void
-translate_uint_elts( const unsigned *src,
- unsigned count,
- ushort *dest )
-{
- unsigned i;
-
- for (i = 0; i < count; i++)
- dest[i] = (ushort)(src[i]);
-}
-
-static INLINE void
-translate_ushort_elts( const ushort *src,
- unsigned count,
- ushort *dest )
-{
- unsigned i;
-
- for (i = 0; i < count; i++)
- dest[i] = (ushort)(src[i]);
-}
-
-static INLINE void
-translate_ubyte_elts( const ubyte *src,
- unsigned count,
- ushort *dest )
-{
- unsigned i;
-
- for (i = 0; i < count; i++)
- dest[i] = (ushort)(src[i]);
-}
-
-
-
-
-#if 0
-static INLINE enum pipe_format
-format_from_get_elt( pt_elt_func get_elt )
-{
- switch (draw->pt.user.eltSize) {
- case 1: return PIPE_FORMAT_R8_UNORM;
- case 2: return PIPE_FORMAT_R16_UNORM;
- case 4: return PIPE_FORMAT_R32_UNORM;
- default: return PIPE_FORMAT_NONE;
- }
-}
-#endif
-
-static INLINE void
-vcache_check_run( struct draw_pt_front_end *frontend,
- pt_elt_func get_elt,
- const void *elts,
- int elt_bias,
- unsigned draw_count )
-{
- struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
- struct draw_context *draw = vcache->draw;
- unsigned min_index = draw->pt.user.min_index;
- unsigned max_index = draw->pt.user.max_index;
- unsigned index_size = draw->pt.user.eltSize;
- unsigned fetch_count = max_index + 1 - min_index;
- const ushort *transformed_elts;
- ushort *storage = NULL;
- boolean ok = FALSE;
-
-
- if (0) debug_printf("fetch_count %d fetch_max %d draw_count %d\n", fetch_count,
- vcache->fetch_max,
- draw_count);
-
- if (elt_bias + max_index >= DRAW_PIPE_MAX_VERTICES ||
- fetch_count >= UNDEFINED_VERTEX_ID ||
- fetch_count > draw_count) {
- if (0) debug_printf("fail\n");
- goto fail;
- }
-
- if (vcache->middle_prim != vcache->input_prim) {
- vcache->middle_prim = vcache->input_prim;
- vcache->middle->prepare( vcache->middle,
- vcache->middle_prim,
- vcache->opt,
- &vcache->fetch_max );
- }
-
-
- assert((elt_bias >= 0 && min_index + elt_bias >= min_index) ||
- (elt_bias < 0 && min_index + elt_bias < min_index));
-
- if (min_index == 0 &&
- index_size == 2)
- {
- transformed_elts = (const ushort *)elts;
- }
- else
- {
- storage = MALLOC( draw_count * sizeof(ushort) );
- if (!storage)
- goto fail;
-
- if (min_index == 0) {
- switch(index_size) {
- case 1:
- translate_ubyte_elts( (const ubyte *)elts,
- draw_count,
- storage );
- break;
-
- case 2:
- translate_ushort_elts( (const ushort *)elts,
- draw_count,
- storage );
- break;
-
- case 4:
- translate_uint_elts( (const uint *)elts,
- draw_count,
- storage );
- break;
-
- default:
- assert(0);
- FREE(storage);
- return;
- }
- }
- else {
- switch(index_size) {
- case 1:
- rebase_ubyte_elts( (const ubyte *)elts,
- draw_count,
- 0 - (int)min_index,
- storage );
- break;
-
- case 2:
- rebase_ushort_elts( (const ushort *)elts,
- draw_count,
- 0 - (int)min_index,
- storage );
- break;
-
- case 4:
- rebase_uint_elts( (const uint *)elts,
- draw_count,
- 0 - (int)min_index,
- storage );
- break;
-
- default:
- assert(0);
- FREE(storage);
- return;
- }
- }
- transformed_elts = storage;
- }
-
- if (fetch_count < UNDEFINED_VERTEX_ID)
- ok = vcache->middle->run_linear_elts( vcache->middle,
- min_index + elt_bias, /* start */
- fetch_count,
- transformed_elts,
- draw_count );
-
- FREE(storage);
-
- if (ok)
- return;
-
- debug_printf("failed to execute atomic draw elts for %d/%d, splitting up\n",
- fetch_count, draw_count);
-
- fail:
- vcache_run( frontend, get_elt, elts, elt_bias, draw_count );
-}
-
-
-
-
-static void
-vcache_prepare( struct draw_pt_front_end *frontend,
- unsigned prim,
- struct draw_pt_middle_end *middle,
- unsigned opt )
-{
- struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
-
- if (opt & PT_PIPELINE)
- {
- vcache->base.run = vcache_run_extras;
- }
- else
- {
- vcache->base.run = vcache_check_run;
- }
-
- vcache->input_prim = prim;
- vcache->output_prim = u_reduced_prim(prim);
-
- vcache->middle = middle;
- vcache->opt = opt;
-
- /* Have to run prepare here, but try and guess a good prim for
- * doing so:
- */
- vcache->middle_prim = (opt & PT_PIPELINE) ? vcache->output_prim : vcache->input_prim;
- middle->prepare( middle, vcache->middle_prim, opt, &vcache->fetch_max );
-}
-
-
-
-
-static void
-vcache_finish( struct draw_pt_front_end *frontend )
-{
- struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
- vcache->middle->finish( vcache->middle );
- vcache->middle = NULL;
-}
-
-static void
-vcache_destroy( struct draw_pt_front_end *frontend )
-{
- FREE(frontend);
-}
-
-
-struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw )
-{
- struct vcache_frontend *vcache = CALLOC_STRUCT( vcache_frontend );
- if (vcache == NULL)
- return NULL;
-
- vcache->base.prepare = vcache_prepare;
- vcache->base.run = NULL;
- vcache->base.finish = vcache_finish;
- vcache->base.destroy = vcache_destroy;
- vcache->draw = draw;
-
- memset(vcache->in, ~0, sizeof(vcache->in));
-
- return &vcache->base;
-}
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
deleted file mode 100644
index f7a63de3ba9..00000000000
--- a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
+++ /dev/null
@@ -1,197 +0,0 @@
-
-
-static void FUNC( struct draw_pt_front_end *frontend,
- pt_elt_func get_elt,
- const void *elts,
- int elt_bias,
- unsigned count )
-{
- struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
- struct draw_context *draw = vcache->draw;
-
- boolean flatfirst = (draw->rasterizer->flatshade &&
- draw->rasterizer->flatshade_first);
- unsigned i;
- ushort flags;
-
- if (0) debug_printf("%s %d\n", __FUNCTION__, count);
-
-
- switch (vcache->input_prim) {
- case PIPE_PRIM_POINTS:
- for (i = 0; i < count; i ++) {
- POINT( vcache,
- get_elt(elts, i + 0) + elt_bias );
- }
- break;
-
- case PIPE_PRIM_LINES:
- for (i = 0; i+1 < count; i += 2) {
- LINE( vcache,
- DRAW_PIPE_RESET_STIPPLE,
- get_elt(elts, i + 0) + elt_bias,
- get_elt(elts, i + 1) + elt_bias);
- }
- break;
-
- case PIPE_PRIM_LINE_LOOP:
- if (count >= 2) {
- flags = DRAW_PIPE_RESET_STIPPLE;
-
- for (i = 1; i < count; i++, flags = 0) {
- LINE( vcache,
- flags,
- get_elt(elts, i - 1) + elt_bias,
- get_elt(elts, i ) + elt_bias);
- }
-
- LINE( vcache,
- flags,
- get_elt(elts, i - 1) + elt_bias,
- get_elt(elts, 0 ) + elt_bias);
- }
- break;
-
- case PIPE_PRIM_LINE_STRIP:
- flags = DRAW_PIPE_RESET_STIPPLE;
- for (i = 1; i < count; i++, flags = 0) {
- LINE( vcache,
- flags,
- get_elt(elts, i - 1) + elt_bias,
- get_elt(elts, i ) + elt_bias);
- }
- break;
-
- case PIPE_PRIM_TRIANGLES:
- for (i = 0; i+2 < count; i += 3) {
- TRIANGLE( vcache,
- DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- get_elt(elts, i + 0) + elt_bias,
- get_elt(elts, i + 1) + elt_bias,
- get_elt(elts, i + 2 ) + elt_bias);
- }
- break;
-
- case PIPE_PRIM_TRIANGLE_STRIP:
- if (flatfirst) {
- for (i = 0; i+2 < count; i++) {
- TRIANGLE( vcache,
- DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- get_elt(elts, i + 0) + elt_bias,
- get_elt(elts, i + 1 + (i&1)) + elt_bias,
- get_elt(elts, i + 2 - (i&1)) + elt_bias);
- }
- }
- else {
- for (i = 0; i+2 < count; i++) {
- TRIANGLE( vcache,
- DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- get_elt(elts, i + 0 + (i&1)) + elt_bias,
- get_elt(elts, i + 1 - (i&1)) + elt_bias,
- get_elt(elts, i + 2 ) + elt_bias);
- }
- }
- break;
-
- case PIPE_PRIM_TRIANGLE_FAN:
- if (count >= 3) {
- if (flatfirst) {
- for (i = 0; i+2 < count; i++) {
- TRIANGLE( vcache,
- DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- get_elt(elts, i + 1) + elt_bias,
- get_elt(elts, i + 2) + elt_bias,
- get_elt(elts, 0 ) + elt_bias);
- }
- }
- else {
- for (i = 0; i+2 < count; i++) {
- TRIANGLE( vcache,
- DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- get_elt(elts, 0) + elt_bias,
- get_elt(elts, i + 1) + elt_bias,
- get_elt(elts, i + 2 ) + elt_bias);
- }
- }
- }
- break;
-
-
- case PIPE_PRIM_QUADS:
- for (i = 0; i+3 < count; i += 4) {
- QUAD( vcache,
- get_elt(elts, i + 0) + elt_bias,
- get_elt(elts, i + 1) + elt_bias,
- get_elt(elts, i + 2) + elt_bias,
- get_elt(elts, i + 3) + elt_bias );
- }
- break;
-
- case PIPE_PRIM_QUAD_STRIP:
- for (i = 0; i+3 < count; i += 2) {
- QUAD( vcache,
- get_elt(elts, i + 2) + elt_bias,
- get_elt(elts, i + 0) + elt_bias,
- get_elt(elts, i + 1) + elt_bias,
- get_elt(elts, i + 3) + elt_bias );
- }
- break;
-
- case PIPE_PRIM_POLYGON:
- {
- /* These bitflags look a little odd because we submit the
- * vertices as (1,2,0) to satisfy flatshade requirements.
- */
- const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2;
- const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0;
- const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1;
- ushort edge_next, edge_finish;
-
- if (flatfirst) {
- flags = DRAW_PIPE_RESET_STIPPLE | edge_middle | edge_last;
- edge_next = edge_last;
- edge_finish = edge_first;
- }
- else {
- flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
- edge_next = edge_middle;
- edge_finish = edge_last;
- }
-
- for (i = 0; i+2 < count; i++, flags = edge_next) {
-
- if (i + 3 == count)
- flags |= edge_finish;
-
- if (flatfirst) {
- TRIANGLE( vcache,
- flags,
- get_elt(elts, 0) + elt_bias,
- get_elt(elts, i + 1) + elt_bias,
- get_elt(elts, i + 2) + elt_bias );
- }
- else {
- TRIANGLE( vcache,
- flags,
- get_elt(elts, i + 1) + elt_bias,
- get_elt(elts, i + 2) + elt_bias,
- get_elt(elts, 0) + elt_bias);
- }
- }
- }
- break;
-
- default:
- assert(0);
- break;
- }
-
- vcache_flush( vcache );
-}
-
-
-#undef TRIANGLE
-#undef QUAD
-#undef POINT
-#undef LINE
-#undef FUNC
diff --git a/src/gallium/auxiliary/draw/draw_pt_vsplit.c b/src/gallium/auxiliary/draw/draw_pt_vsplit.c
new file mode 100644
index 00000000000..a6875253094
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_vsplit.c
@@ -0,0 +1,208 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.9
+ *
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright (C) 2010 LunarG Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_pt.h"
+
+#define SEGMENT_SIZE 1024
+#define MAP_SIZE 256
+
+struct vsplit_frontend {
+ struct draw_pt_front_end base;
+ struct draw_context *draw;
+
+ unsigned prim;
+
+ struct draw_pt_middle_end *middle;
+
+ unsigned max_vertices;
+ ushort segment_size;
+
+ /* buffers for splitting */
+ unsigned fetch_elts[SEGMENT_SIZE];
+ ushort draw_elts[SEGMENT_SIZE];
+ ushort identity_draw_elts[SEGMENT_SIZE];
+
+ struct {
+ /* map a fetch element to a draw element */
+ unsigned fetches[MAP_SIZE];
+ ushort draws[MAP_SIZE];
+ boolean has_max_fetch;
+
+ ushort num_fetch_elts;
+ ushort num_draw_elts;
+ } cache;
+};
+
+
+static void
+vsplit_clear_cache(struct vsplit_frontend *vsplit)
+{
+ memset(vsplit->cache.fetches, 0xff, sizeof(vsplit->cache.fetches));
+ vsplit->cache.has_max_fetch = FALSE;
+ vsplit->cache.num_fetch_elts = 0;
+ vsplit->cache.num_draw_elts = 0;
+}
+
+static void
+vsplit_flush_cache(struct vsplit_frontend *vsplit, unsigned flags)
+{
+ vsplit->middle->run(vsplit->middle,
+ vsplit->fetch_elts, vsplit->cache.num_fetch_elts,
+ vsplit->draw_elts, vsplit->cache.num_draw_elts, flags);
+}
+
+/**
+ * Add a fetch element and add it to the draw elements.
+ */
+static INLINE void
+vsplit_add_cache(struct vsplit_frontend *vsplit, unsigned fetch)
+{
+ unsigned hash = fetch % MAP_SIZE;
+
+ if (vsplit->cache.fetches[hash] != fetch) {
+ /* update cache */
+ vsplit->cache.fetches[hash] = fetch;
+ vsplit->cache.draws[hash] = vsplit->cache.num_fetch_elts;
+
+ /* add fetch */
+ assert(vsplit->cache.num_fetch_elts < vsplit->segment_size);
+ vsplit->fetch_elts[vsplit->cache.num_fetch_elts++] = fetch;
+ }
+
+ vsplit->draw_elts[vsplit->cache.num_draw_elts++] = vsplit->cache.draws[hash];
+}
+
+
+/**
+ * Add a fetch element and add it to the draw elements. The fetch element is
+ * in full range (uint).
+ */
+static INLINE void
+vsplit_add_cache_uint(struct vsplit_frontend *vsplit, unsigned fetch)
+{
+ /* special care for 0xffffffff */
+ if (fetch == 0xffffffff && !vsplit->cache.has_max_fetch) {
+ unsigned hash = fetch % MAP_SIZE;
+ vsplit->cache.fetches[hash] = fetch - 1; /* force update */
+ vsplit->cache.has_max_fetch = TRUE;
+ }
+
+ vsplit_add_cache(vsplit, fetch);
+}
+
+
+#define FUNC vsplit_run_linear
+#include "draw_pt_vsplit_tmp.h"
+
+#define FUNC vsplit_run_ubyte
+#define ELT_TYPE ubyte
+#define ADD_CACHE(vsplit, fetch) vsplit_add_cache(vsplit, fetch)
+#include "draw_pt_vsplit_tmp.h"
+
+#define FUNC vsplit_run_ushort
+#define ELT_TYPE ushort
+#define ADD_CACHE(vsplit, fetch) vsplit_add_cache(vsplit, fetch)
+#include "draw_pt_vsplit_tmp.h"
+
+#define FUNC vsplit_run_uint
+#define ELT_TYPE uint
+#define ADD_CACHE(vsplit, fetch) vsplit_add_cache_uint(vsplit, fetch)
+#include "draw_pt_vsplit_tmp.h"
+
+
+static void vsplit_prepare(struct draw_pt_front_end *frontend,
+ unsigned in_prim,
+ struct draw_pt_middle_end *middle,
+ unsigned opt)
+{
+ struct vsplit_frontend *vsplit = (struct vsplit_frontend *) frontend;
+
+ switch (vsplit->draw->pt.user.eltSize) {
+ case 0:
+ vsplit->base.run = vsplit_run_linear;
+ break;
+ case 1:
+ vsplit->base.run = vsplit_run_ubyte;
+ break;
+ case 2:
+ vsplit->base.run = vsplit_run_ushort;
+ break;
+ case 4:
+ vsplit->base.run = vsplit_run_uint;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ /* split only */
+ vsplit->prim = in_prim;
+
+ vsplit->middle = middle;
+ middle->prepare(middle, vsplit->prim, opt, &vsplit->max_vertices);
+
+ vsplit->segment_size = MIN2(SEGMENT_SIZE, vsplit->max_vertices);
+}
+
+
+static void vsplit_finish(struct draw_pt_front_end *frontend)
+{
+ struct vsplit_frontend *vsplit = (struct vsplit_frontend *) frontend;
+ vsplit->middle->finish(vsplit->middle);
+ vsplit->middle = NULL;
+}
+
+
+static void vsplit_destroy(struct draw_pt_front_end *frontend)
+{
+ FREE(frontend);
+}
+
+
+struct draw_pt_front_end *draw_pt_vsplit(struct draw_context *draw)
+{
+ struct vsplit_frontend *vsplit = CALLOC_STRUCT(vsplit_frontend);
+ ushort i;
+
+ if (!vsplit)
+ return NULL;
+
+ vsplit->base.prepare = vsplit_prepare;
+ vsplit->base.run = NULL;
+ vsplit->base.finish = vsplit_finish;
+ vsplit->base.destroy = vsplit_destroy;
+ vsplit->draw = draw;
+
+ for (i = 0; i < SEGMENT_SIZE; i++)
+ vsplit->identity_draw_elts[i] = i;
+
+ return &vsplit->base;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
new file mode 100644
index 00000000000..3f66f962e11
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
@@ -0,0 +1,309 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.9
+ *
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright (C) 2010 LunarG Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#define CONCAT2(name, elt_type) name ## elt_type
+#define CONCAT(name, elt_type) CONCAT2(name, elt_type)
+
+#ifdef ELT_TYPE
+
+/**
+ * Fetch all elements in [min_index, max_index] with bias, and use the
+ * (rebased) index buffer as the draw elements.
+ */
+static boolean
+CONCAT(vsplit_primitive_, ELT_TYPE)(struct vsplit_frontend *vsplit,
+ unsigned istart, unsigned icount)
+{
+ struct draw_context *draw = vsplit->draw;
+ const ELT_TYPE *ib = (const ELT_TYPE *)
+ ((const char *) draw->pt.user.elts + draw->pt.index_buffer.offset);
+ const unsigned min_index = draw->pt.user.min_index;
+ const unsigned max_index = draw->pt.user.max_index;
+ const int elt_bias = draw->pt.user.eltBias;
+ unsigned fetch_start, fetch_count;
+ const ushort *draw_elts = NULL;
+ unsigned i;
+
+ /* use the ib directly */
+ if (min_index == 0 && sizeof(ib[0]) == sizeof(draw_elts[0])) {
+ if (icount > vsplit->max_vertices)
+ return FALSE;
+
+ for (i = 0; i < icount; i++) {
+ ELT_TYPE idx = ib[istart + i];
+ assert(idx >= min_index && idx <= max_index);
+ }
+ draw_elts = (const ushort *) ib;
+ }
+ else {
+ /* have to go through vsplit->draw_elts */
+ if (icount > vsplit->segment_size)
+ return FALSE;
+ }
+
+ /* this is faster only when we fetch less elements than the normal path */
+ if (max_index - min_index > icount - 1)
+ return FALSE;
+
+ if (elt_bias < 0 && min_index < -elt_bias)
+ return FALSE;
+
+ /* why this check? */
+ for (i = 0; i < draw->pt.nr_vertex_elements; i++) {
+ if (draw->pt.vertex_element[i].instance_divisor)
+ return FALSE;
+ }
+
+ fetch_start = min_index + elt_bias;
+ fetch_count = max_index - min_index + 1;
+
+ if (!draw_elts) {
+ if (min_index == 0) {
+ for (i = 0; i < icount; i++) {
+ ELT_TYPE idx = ib[istart + i];
+
+ assert(idx >= min_index && idx <= max_index);
+ vsplit->draw_elts[i] = (ushort) idx;
+ }
+ }
+ else {
+ for (i = 0; i < icount; i++) {
+ ELT_TYPE idx = ib[istart + i];
+
+ assert(idx >= min_index && idx <= max_index);
+ vsplit->draw_elts[i] = (ushort) (idx - min_index);
+ }
+ }
+
+ draw_elts = vsplit->draw_elts;
+ }
+
+ return vsplit->middle->run_linear_elts(vsplit->middle,
+ fetch_start, fetch_count,
+ draw_elts, icount, 0x0);
+}
+
+/**
+ * Use the cache to prepare the fetch and draw elements, and flush.
+ *
+ * When spoken is TRUE, ispoken replaces istart; When close is TRUE, iclose is
+ * appended.
+ */
+static INLINE void
+CONCAT(vsplit_segment_cache_, ELT_TYPE)(struct vsplit_frontend *vsplit,
+ unsigned flags,
+ unsigned istart, unsigned icount,
+ boolean spoken, unsigned ispoken,
+ boolean close, unsigned iclose)
+{
+ struct draw_context *draw = vsplit->draw;
+ const ELT_TYPE *ib = (const ELT_TYPE *)
+ ((const char *) draw->pt.user.elts + draw->pt.index_buffer.offset);
+ const int ibias = draw->pt.user.eltBias;
+ unsigned i;
+
+ assert(icount + !!close <= vsplit->segment_size);
+
+ vsplit_clear_cache(vsplit);
+
+ spoken = !!spoken;
+ if (ibias == 0) {
+ if (spoken)
+ ADD_CACHE(vsplit, ib[ispoken]);
+
+ for (i = spoken; i < icount; i++)
+ ADD_CACHE(vsplit, ib[istart + i]);
+
+ if (close)
+ ADD_CACHE(vsplit, ib[iclose]);
+ }
+ else if (ibias > 0) {
+ if (spoken)
+ ADD_CACHE(vsplit, (uint) ib[ispoken] + ibias);
+
+ for (i = spoken; i < icount; i++)
+ ADD_CACHE(vsplit, (uint) ib[istart + i] + ibias);
+
+ if (close)
+ ADD_CACHE(vsplit, (uint) ib[iclose] + ibias);
+ }
+ else {
+ if (spoken) {
+ if (ib[ispoken] < -ibias)
+ return;
+ ADD_CACHE(vsplit, ib[ispoken] + ibias);
+ }
+
+ for (i = spoken; i < icount; i++) {
+ if (ib[istart + i] < -ibias)
+ return;
+ ADD_CACHE(vsplit, ib[istart + i] + ibias);
+ }
+
+ if (close) {
+ if (ib[iclose] < -ibias)
+ return;
+ ADD_CACHE(vsplit, ib[iclose] + ibias);
+ }
+ }
+
+ vsplit_flush_cache(vsplit, flags);
+}
+
+static void
+CONCAT(vsplit_segment_simple_, ELT_TYPE)(struct vsplit_frontend *vsplit,
+ unsigned flags,
+ unsigned istart,
+ unsigned icount)
+{
+ CONCAT(vsplit_segment_cache_, ELT_TYPE)(vsplit,
+ flags, istart, icount, FALSE, 0, FALSE, 0);
+}
+
+static void
+CONCAT(vsplit_segment_loop_, ELT_TYPE)(struct vsplit_frontend *vsplit,
+ unsigned flags,
+ unsigned istart,
+ unsigned icount,
+ unsigned i0)
+{
+ const boolean close_loop = ((flags) == DRAW_SPLIT_BEFORE);
+
+ CONCAT(vsplit_segment_cache_, ELT_TYPE)(vsplit,
+ flags, istart, icount, FALSE, 0, close_loop, i0);
+}
+
+static void
+CONCAT(vsplit_segment_fan_, ELT_TYPE)(struct vsplit_frontend *vsplit,
+ unsigned flags,
+ unsigned istart,
+ unsigned icount,
+ unsigned i0)
+{
+ const boolean use_spoken = (((flags) & DRAW_SPLIT_BEFORE) != 0);
+
+ CONCAT(vsplit_segment_cache_, ELT_TYPE)(vsplit,
+ flags, istart, icount, use_spoken, i0, FALSE, 0);
+}
+
+#define LOCAL_VARS \
+ struct vsplit_frontend *vsplit = (struct vsplit_frontend *) frontend; \
+ const unsigned prim = vsplit->prim; \
+ const unsigned max_count_simple = vsplit->segment_size; \
+ const unsigned max_count_loop = vsplit->segment_size - 1; \
+ const unsigned max_count_fan = vsplit->segment_size;
+
+#define PRIMITIVE(istart, icount) \
+ CONCAT(vsplit_primitive_, ELT_TYPE)(vsplit, istart, icount)
+
+#else /* ELT_TYPE */
+
+static void
+vsplit_segment_simple_linear(struct vsplit_frontend *vsplit, unsigned flags,
+ unsigned istart, unsigned icount)
+{
+ assert(icount <= vsplit->max_vertices);
+ vsplit->middle->run_linear(vsplit->middle, istart, icount, flags);
+}
+
+static void
+vsplit_segment_loop_linear(struct vsplit_frontend *vsplit, unsigned flags,
+ unsigned istart, unsigned icount, unsigned i0)
+{
+ boolean close_loop = (flags == DRAW_SPLIT_BEFORE);
+ unsigned nr;
+
+ assert(icount + !!close_loop <= vsplit->segment_size);
+
+ if (close_loop) {
+ for (nr = 0; nr < icount; nr++)
+ vsplit->fetch_elts[nr] = istart + nr;
+ vsplit->fetch_elts[nr++] = i0;
+
+ vsplit->middle->run(vsplit->middle, vsplit->fetch_elts, nr,
+ vsplit->identity_draw_elts, nr, flags);
+ }
+ else {
+ vsplit->middle->run_linear(vsplit->middle, istart, icount, flags);
+ }
+}
+
+static void
+vsplit_segment_fan_linear(struct vsplit_frontend *vsplit, unsigned flags,
+ unsigned istart, unsigned icount, unsigned i0)
+{
+ boolean use_spoken = ((flags & DRAW_SPLIT_BEFORE) != 0);
+ unsigned nr = 0, i;
+
+ assert(icount + !!use_spoken <= vsplit->segment_size);
+
+ if (use_spoken) {
+ vsplit->fetch_elts[nr++] = i0;
+ for (i = 1 ; i < icount; i++)
+ vsplit->fetch_elts[nr++] = istart + i;
+
+ vsplit->middle->run(vsplit->middle, vsplit->fetch_elts, nr,
+ vsplit->identity_draw_elts, nr, flags);
+ }
+ else {
+ vsplit->middle->run_linear(vsplit->middle, istart, icount, flags);
+ }
+}
+
+#define LOCAL_VARS \
+ struct vsplit_frontend *vsplit = (struct vsplit_frontend *) frontend; \
+ const unsigned prim = vsplit->prim; \
+ const unsigned max_count_simple = vsplit->max_vertices; \
+ const unsigned max_count_loop = vsplit->segment_size - 1; \
+ const unsigned max_count_fan = vsplit->segment_size;
+
+#define PRIMITIVE(istart, icount) FALSE
+
+#define ELT_TYPE linear
+
+#endif /* ELT_TYPE */
+
+#define FUNC_VARS \
+ struct draw_pt_front_end *frontend, \
+ unsigned start, \
+ unsigned count
+
+#define SEGMENT_SIMPLE(flags, istart, icount) \
+ CONCAT(vsplit_segment_simple_, ELT_TYPE)(vsplit, flags, istart, icount)
+
+#define SEGMENT_LOOP(flags, istart, icount, i0) \
+ CONCAT(vsplit_segment_loop_, ELT_TYPE)(vsplit, flags, istart, icount, i0)
+
+#define SEGMENT_FAN(flags, istart, icount, i0) \
+ CONCAT(vsplit_segment_fan_, ELT_TYPE)(vsplit, flags, istart, icount, i0)
+
+#include "draw_split_tmp.h"
+
+#undef CONCAT2
+#undef CONCAT
+
+#undef ELT_TYPE
+#undef ADD_CACHE
diff --git a/src/gallium/auxiliary/draw/draw_so_emit_tmp.h b/src/gallium/auxiliary/draw/draw_so_emit_tmp.h
new file mode 100644
index 00000000000..7fafde9d5e6
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_so_emit_tmp.h
@@ -0,0 +1,31 @@
+#define FUNC_VARS \
+ struct pt_so_emit *so, \
+ const struct draw_prim_info *input_prims, \
+ const struct draw_vertex_info *input_verts, \
+ unsigned start, \
+ unsigned count
+
+#define FUNC_ENTER \
+ /* declare more local vars */ \
+ const unsigned prim = input_prims->prim; \
+ const unsigned prim_flags = input_prims->flags; \
+ const boolean last_vertex_last = TRUE; \
+ do { \
+ debug_assert(input_prims->primitive_count == 1); \
+ switch (prim) { \
+ case PIPE_PRIM_LINES_ADJACENCY: \
+ case PIPE_PRIM_LINE_STRIP_ADJACENCY: \
+ case PIPE_PRIM_TRIANGLES_ADJACENCY: \
+ case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: \
+ debug_assert(!"unexpected primitive type in stream output"); \
+ return; \
+ default: \
+ break; \
+ } \
+ } while (0) \
+
+#define POINT(i0) so_point(so,i0)
+#define LINE(flags,i0,i1) so_line(so,i0,i1)
+#define TRIANGLE(flags,i0,i1,i2) so_tri(so,i0,i1,i2)
+
+#include "draw_decompose_tmp.h"
diff --git a/src/gallium/auxiliary/draw/draw_split_tmp.h b/src/gallium/auxiliary/draw/draw_split_tmp.h
new file mode 100644
index 00000000000..47defc62b96
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_split_tmp.h
@@ -0,0 +1,176 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.9
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright (C) 2010 LunarG Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+static void
+FUNC(FUNC_VARS)
+{
+ unsigned first, incr;
+ LOCAL_VARS
+
+ /*
+ * prim, start, count, and max_count_{simple,loop,fan} should have been
+ * defined
+ */
+ if (0) {
+ debug_printf("%s: prim 0x%x, start %d, count %d, max_count_simple %d, "
+ "max_count_loop %d, max_count_fan %d\n",
+ __FUNCTION__, prim, start, count, max_count_simple,
+ max_count_loop, max_count_fan);
+ }
+
+ draw_pt_split_prim(prim, &first, &incr);
+ /* sanitize primitive length */
+ count = draw_pt_trim_count(count, first, incr);
+ if (count < first)
+ return;
+
+ /* try flushing the entire primitive */
+ if (PRIMITIVE(start, count))
+ return;
+
+ /* must be able to at least flush two complete primitives */
+ assert(max_count_simple >= first + incr &&
+ max_count_loop >= first + incr &&
+ max_count_fan >= first + incr);
+
+ /* no splitting required */
+ if (count <= max_count_simple) {
+ SEGMENT_SIMPLE(0x0, start, count);
+ }
+ else {
+ const unsigned rollback = first - incr;
+ unsigned flags = DRAW_SPLIT_AFTER, seg_start = 0, seg_max;
+
+ /*
+ * Both count and seg_max below are explicitly trimmed. Because
+ *
+ * seg_start = N * (seg_max - rollback) = N' * incr,
+ *
+ * we have
+ *
+ * remaining = count - seg_start = first + N'' * incr.
+ *
+ * That is, remaining is implicitly trimmed.
+ */
+ switch (prim) {
+ case PIPE_PRIM_POINTS:
+ case PIPE_PRIM_LINES:
+ case PIPE_PRIM_LINE_STRIP:
+ case PIPE_PRIM_TRIANGLES:
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ case PIPE_PRIM_QUADS:
+ case PIPE_PRIM_QUAD_STRIP:
+ case PIPE_PRIM_LINES_ADJACENCY:
+ case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+ case PIPE_PRIM_TRIANGLES_ADJACENCY:
+ case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+ seg_max =
+ draw_pt_trim_count(MIN2(max_count_simple, count), first, incr);
+ if (prim == PIPE_PRIM_TRIANGLE_STRIP ||
+ prim == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY) {
+ /* make sure we flush even number of triangles at a time */
+ if (seg_max < count && !(((seg_max - first) / incr) & 1))
+ seg_max -= incr;
+ }
+
+ do {
+ const unsigned remaining = count - seg_start;
+
+ if (remaining > seg_max) {
+ SEGMENT_SIMPLE(flags, start + seg_start, seg_max);
+ seg_start += seg_max - rollback;
+
+ flags |= DRAW_SPLIT_BEFORE;
+ }
+ else {
+ flags &= ~DRAW_SPLIT_AFTER;
+
+ SEGMENT_SIMPLE(flags, start + seg_start, remaining);
+ seg_start += remaining;
+ }
+ } while (seg_start < count);
+ break;
+
+ case PIPE_PRIM_LINE_LOOP:
+ seg_max =
+ draw_pt_trim_count(MIN2(max_count_loop, count), first, incr);
+
+ do {
+ const unsigned remaining = count - seg_start;
+
+ if (remaining > seg_max) {
+ SEGMENT_LOOP(flags, start + seg_start, seg_max, start);
+ seg_start += seg_max - rollback;
+
+ flags |= DRAW_SPLIT_BEFORE;
+ }
+ else {
+ flags &= ~DRAW_SPLIT_AFTER;
+
+ SEGMENT_LOOP(flags, start + seg_start, remaining, start);
+ seg_start += remaining;
+ }
+ } while (seg_start < count);
+ break;
+
+ case PIPE_PRIM_TRIANGLE_FAN:
+ case PIPE_PRIM_POLYGON:
+ seg_max =
+ draw_pt_trim_count(MIN2(max_count_fan, count), first, incr);
+
+ do {
+ const unsigned remaining = count - seg_start;
+
+ if (remaining > seg_max) {
+ SEGMENT_FAN(flags, start + seg_start, seg_max, start);
+ seg_start += seg_max - rollback;
+
+ flags |= DRAW_SPLIT_BEFORE;
+ }
+ else {
+ flags &= ~DRAW_SPLIT_AFTER;
+
+ SEGMENT_FAN(flags, start + seg_start, remaining, start);
+ seg_start += remaining;
+ }
+ } while (seg_start < count);
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+ }
+}
+
+#undef FUNC
+#undef FUNC_VARS
+#undef LOCAL_VARS
+
+#undef PRIMITIVE
+#undef SEGMENT_SIMPLE
+#undef SEGMENT_LOOP
+#undef SEGMENT_FAN
diff --git a/src/gallium/auxiliary/draw/draw_vbuf.h b/src/gallium/auxiliary/draw/draw_vbuf.h
index cccd3bf4358..e32803c0720 100644
--- a/src/gallium/auxiliary/draw/draw_vbuf.h
+++ b/src/gallium/auxiliary/draw/draw_vbuf.h
@@ -98,14 +98,14 @@ struct vbuf_render {
boolean (*set_primitive)( struct vbuf_render *, unsigned prim );
/**
- * DrawElements, note indices are ushort. The driver must complete
- * this call, if necessary splitting the index list itself.
+ * Draw indexed primitives. Note that indices are ushort. The driver
+ * must complete this call, if necessary splitting the index list itself.
*/
- void (*draw)( struct vbuf_render *,
- const ushort *indices,
- uint nr_indices );
+ void (*draw_elements)( struct vbuf_render *,
+ const ushort *indices,
+ uint nr_indices );
- /* Draw Arrays path too.
+ /* Draw non-indexed primitives.
*/
void (*draw_arrays)( struct vbuf_render *,
unsigned start,
@@ -117,6 +117,14 @@ struct vbuf_render {
void (*release_vertices)( struct vbuf_render * );
void (*destroy)( struct vbuf_render * );
+
+
+ /**
+ * Called after writing data to the stream out buffers
+ */
+ void (*set_stream_output_info)( struct vbuf_render *vbufr,
+ unsigned primitive_count,
+ unsigned vertices_count );
};
diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h
index 3af31ffe126..e63cf5f4f98 100644
--- a/src/gallium/auxiliary/draw/draw_vertex.h
+++ b/src/gallium/auxiliary/draw/draw_vertex.h
@@ -166,7 +166,7 @@ static INLINE enum pipe_format draw_translate_vinfo_format(enum attrib_emit emit
}
}
-static INLINE enum attrib_emit draw_translate_vinfo_size(enum attrib_emit emit)
+static INLINE unsigned draw_translate_vinfo_size(enum attrib_emit emit)
{
switch (emit) {
case EMIT_OMIT:
diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c
index c2832eefa2a..fb665b08fff 100644
--- a/src/gallium/auxiliary/draw/draw_vs.c
+++ b/src/gallium/auxiliary/draw/draw_vs.c
@@ -48,18 +48,30 @@
DEBUG_GET_ONCE_BOOL_OPTION(gallium_dump_vs, "GALLIUM_DUMP_VS", FALSE)
+
+/**
+ * Set a vertex shader constant buffer.
+ * \param slot which constant buffer in [0, PIPE_MAX_CONSTANT_BUFFERS-1]
+ * \param constants the mapped buffer
+ * \param size size of buffer in bytes
+ */
void
draw_vs_set_constants(struct draw_context *draw,
unsigned slot,
const void *constants,
unsigned size)
{
- if (((uintptr_t)constants) & 0xf) {
+ const int alignment = 16;
+
+ /* check if buffer is 16-byte aligned */
+ if (((uintptr_t)constants) & (alignment - 1)) {
+ /* if not, copy the constants into a new, 16-byte aligned buffer */
if (size > draw->vs.const_storage_size[slot]) {
if (draw->vs.aligned_constant_storage[slot]) {
align_free((void *)draw->vs.aligned_constant_storage[slot]);
}
- draw->vs.aligned_constant_storage[slot] = align_malloc(size, 16);
+ draw->vs.aligned_constant_storage[slot] =
+ align_malloc(size, alignment);
}
assert(constants);
memcpy((void *)draw->vs.aligned_constant_storage[slot],
@@ -85,18 +97,27 @@ struct draw_vertex_shader *
draw_create_vertex_shader(struct draw_context *draw,
const struct pipe_shader_state *shader)
{
- struct draw_vertex_shader *vs;
+ struct draw_vertex_shader *vs = NULL;
if (draw->dump_vs) {
tgsi_dump(shader->tokens, 0);
}
- vs = draw_create_vs_sse( draw, shader );
- if (!vs) {
+ if (!draw->pt.middle.llvm) {
+#if defined(PIPE_ARCH_X86)
+ vs = draw_create_vs_sse( draw, shader );
+#elif defined(PIPE_ARCH_PPC)
vs = draw_create_vs_ppc( draw, shader );
- if (!vs) {
- vs = draw_create_vs_exec( draw, shader );
- }
+#endif
+ }
+#if HAVE_LLVM
+ else {
+ vs = draw_create_vs_llvm(draw, shader);
+ }
+#endif
+
+ if (!vs) {
+ vs = draw_create_vs_exec( draw, shader );
}
if (vs)
diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h
index 6c7e94db433..f9a038788fb 100644
--- a/src/gallium/auxiliary/draw/draw_vs.h
+++ b/src/gallium/auxiliary/draw/draw_vs.h
@@ -133,7 +133,8 @@ struct draw_vertex_shader {
void (*run_linear)( struct draw_vertex_shader *shader,
const float (*input)[4],
float (*output)[4],
- const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
+ const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
+ const unsigned const_size[PIPE_MAX_CONSTANT_BUFFERS],
unsigned count,
unsigned input_stride,
unsigned output_stride );
@@ -165,7 +166,6 @@ draw_create_vs_ppc(struct draw_context *draw,
const struct pipe_shader_state *templ);
-
struct draw_vs_varient_key;
struct draw_vertex_shader;
@@ -173,6 +173,11 @@ struct draw_vs_varient *
draw_vs_create_varient_aos_sse( struct draw_vertex_shader *vs,
const struct draw_vs_varient_key *key );
+#if HAVE_LLVM
+struct draw_vertex_shader *
+draw_create_vs_llvm(struct draw_context *draw,
+ const struct pipe_shader_state *state);
+#endif
/********************************************************************************
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h
index 1911242f825..68e8295b5e1 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.h
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.h
@@ -32,6 +32,8 @@
#define DRAW_VS_AOS_H
#include "pipe/p_config.h"
+#include "tgsi/tgsi_exec.h"
+#include "draw_vs.h"
#ifdef PIPE_ARCH_X86
diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c
index bc34d390dae..dab3eb1ca8e 100644
--- a/src/gallium/auxiliary/draw/draw_vs_exec.c
+++ b/src/gallium/auxiliary/draw/draw_vs_exec.c
@@ -85,7 +85,8 @@ static void
vs_exec_run_linear( struct draw_vertex_shader *shader,
const float (*input)[4],
float (*output)[4],
- const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
+ const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
+ const unsigned const_size[PIPE_MAX_CONSTANT_BUFFERS],
unsigned count,
unsigned input_stride,
unsigned output_stride )
@@ -95,9 +96,8 @@ vs_exec_run_linear( struct draw_vertex_shader *shader,
unsigned int i, j;
unsigned slot;
- for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
- machine->Consts[i] = constants[i];
- }
+ tgsi_exec_set_constant_buffers(machine, PIPE_MAX_CONSTANT_BUFFERS,
+ constants, const_size);
for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);
diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c
new file mode 100644
index 00000000000..fa9992db783
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c
@@ -0,0 +1,127 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "pipe/p_shader_tokens.h"
+#include "pipe/p_screen.h"
+
+#include "draw_private.h"
+#include "draw_context.h"
+#include "draw_vs.h"
+#include "draw_llvm.h"
+
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_scan.h"
+
+static void
+vs_llvm_prepare(struct draw_vertex_shader *shader,
+ struct draw_context *draw)
+{
+ /*struct llvm_vertex_shader *evs = llvm_vertex_shader(shader);*/
+}
+
+static void
+vs_llvm_run_linear( struct draw_vertex_shader *shader,
+ const float (*input)[4],
+ float (*output)[4],
+ const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
+ const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS],
+ unsigned count,
+ unsigned input_stride,
+ unsigned output_stride )
+{
+ /* we should never get here since the entire pipeline is
+ * generated in draw_pt_fetch_shade_pipeline_llvm.c */
+ debug_assert(0);
+}
+
+
+static void
+vs_llvm_delete( struct draw_vertex_shader *dvs )
+{
+ struct llvm_vertex_shader *shader = llvm_vertex_shader(dvs);
+ struct pipe_fence_handle *fence = NULL;
+ struct draw_llvm_variant_list_item *li;
+ struct pipe_context *pipe = dvs->draw->pipe;
+
+ /*
+ * XXX: This might be not neccessary at all.
+ */
+ pipe->flush(pipe, 0, &fence);
+ if (fence) {
+ pipe->screen->fence_finish(pipe->screen, fence, 0);
+ pipe->screen->fence_reference(pipe->screen, &fence, NULL);
+ }
+
+
+ li = first_elem(&shader->variants);
+ while(!at_end(&shader->variants, li)) {
+ struct draw_llvm_variant_list_item *next = next_elem(li);
+ draw_llvm_destroy_variant(li->base);
+ li = next;
+ }
+
+ assert(shader->variants_cached == 0);
+ FREE((void*) dvs->state.tokens);
+ FREE( dvs );
+}
+
+
+struct draw_vertex_shader *
+draw_create_vs_llvm(struct draw_context *draw,
+ const struct pipe_shader_state *state)
+{
+ struct llvm_vertex_shader *vs = CALLOC_STRUCT( llvm_vertex_shader );
+
+ if (vs == NULL)
+ return NULL;
+
+ /* we make a private copy of the tokens */
+ vs->base.state.tokens = tgsi_dup_tokens(state->tokens);
+ if (!vs->base.state.tokens) {
+ FREE(vs);
+ return NULL;
+ }
+
+ tgsi_scan_shader(state->tokens, &vs->base.info);
+
+ vs->variant_key_size =
+ draw_llvm_variant_key_size(
+ vs->base.info.file_max[TGSI_FILE_INPUT]+1,
+ vs->base.info.file_max[TGSI_FILE_SAMPLER]+1);
+
+ vs->base.draw = draw;
+ vs->base.prepare = vs_llvm_prepare;
+ vs->base.run_linear = vs_llvm_run_linear;
+ vs->base.delete = vs_llvm_delete;
+ vs->base.create_varient = draw_vs_create_varient_generic;
+
+ make_empty_list(&vs->variants);
+
+ return &vs->base;
+}
diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c
index d869eecec5e..5df84916c51 100644
--- a/src/gallium/auxiliary/draw/draw_vs_ppc.c
+++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c
@@ -125,7 +125,7 @@ vs_ppc_run_linear( struct draw_vertex_shader *base,
*/
shader->func(inputs_soa, outputs_soa, temps_soa,
(float (*)[4]) shader->base.immediates,
- (const float (*)[4])constants[0],
+ (float (*)[4])constants[0],
ppc_builtin_constants);
/* convert (up to) four output verts from SoA back to AoS format */
@@ -190,7 +190,7 @@ draw_create_vs_ppc(struct draw_context *draw,
vs->base.create_varient = draw_vs_varient_aos_ppc;
else
#endif
- vs->base.create_varient = draw_vs_varient_generic;
+ vs->base.create_varient = draw_vs_create_varient_generic;
vs->base.prepare = vs_ppc_prepare;
vs->base.run_linear = vs_ppc_run_linear;
vs->base.delete = vs_ppc_delete;
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index 14c95082a9d..0b0c6077c6f 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -84,6 +84,7 @@ vs_sse_run_linear( struct draw_vertex_shader *base,
const float (*input)[4],
float (*output)[4],
const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
+ const unsigned const_size[PIPE_MAX_CONSTANT_BUFFERS],
unsigned count,
unsigned input_stride,
unsigned output_stride )
diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c
index 6eb26927f27..eacd1601877 100644
--- a/src/gallium/auxiliary/draw/draw_vs_varient.c
+++ b/src/gallium/auxiliary/draw/draw_vs_varient.c
@@ -149,7 +149,8 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient,
vsvg->base.vs->run_linear( vsvg->base.vs,
temp_buffer,
temp_buffer,
- vsvg->base.vs->draw->pt.user.vs_constants,
+ vsvg->base.vs->draw->pt.user.vs_constants,
+ vsvg->base.vs->draw->pt.user.vs_constants_size,
count,
temp_vertex_stride,
temp_vertex_stride);
@@ -214,7 +215,8 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient,
vsvg->base.vs->run_linear( vsvg->base.vs,
temp_buffer,
temp_buffer,
- vsvg->base.vs->draw->pt.user.vs_constants,
+ vsvg->base.vs->draw->pt.user.vs_constants,
+ vsvg->base.vs->draw->pt.user.vs_constants_size,
count,
temp_vertex_stride,
temp_vertex_stride);