summaryrefslogtreecommitdiffstats
path: root/src/mesa
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa')
-rw-r--r--src/mesa/drivers/dri/i965/Makefile.sources1
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c20
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h47
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h6
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h7
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c39
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs.c26
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs.h20
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs_emit.c93
-rw-r--r--src/mesa/drivers/dri/i965/brw_misc_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c1
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c85
-rw-r--r--src/mesa/drivers/dri/i965/gen6_gs_state.c8
-rw-r--r--src/mesa/drivers/dri/i965/gen6_sol.c71
15 files changed, 417 insertions, 10 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index cd6a8f48b5a..e50f9c3f95f 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -93,6 +93,7 @@ i965_C_SOURCES := \
gen6_sampler_state.c \
gen6_scissor_state.c \
gen6_sf_state.c \
+ gen6_sol.c \
gen6_urb.c \
gen6_viewport_state.c \
gen6_vs_state.c \
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 5e9cb1f8b2f..d8cad54667a 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -178,6 +178,26 @@ brwCreateContext(int api,
ctx->Const.MaxTextureMaxAnisotropy = 16.0;
+ /* Hardware only supports a limited number of transform feedback buffers.
+ * So we need to override the Mesa default (which is based only on software
+ * limits).
+ */
+ ctx->Const.MaxTransformFeedbackSeparateAttribs = BRW_MAX_SOL_BUFFERS;
+
+ /* On Gen6, in the worst case, we use up one binding table entry per
+ * transform feedback component (see comments above the definition of
+ * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
+ * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
+ * BRW_MAX_SOL_BINDINGS.
+ *
+ * In "separate components" mode, we need to divide this value by
+ * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
+ * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
+ */
+ ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
+ ctx->Const.MaxTransformFeedbackSeparateComponents =
+ BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
+
/* if conformance mode is set, swrast can handle any size AA point */
ctx->Const.MaxPointSizeAA = 255.0;
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 70a45c77260..febd4fe4365 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -368,6 +368,12 @@ struct brw_clip_prog_data {
struct brw_gs_prog_data {
GLuint urb_read_length;
GLuint total_grf;
+
+ /**
+ * Gen6 transform feedback: Amount by which the streaming vertex buffer
+ * indices should be incremented each time the GS is invoked.
+ */
+ unsigned svbi_postincrement_value;
};
struct brw_vs_prog_data {
@@ -407,6 +413,34 @@ struct brw_vs_ouput_sizes {
#define BRW_MAX_DRAW_BUFFERS 8
/**
+ * Max number of binding table entries used for stream output.
+ *
+ * From the OpenGL 3.0 spec, table 6.44 (Transform Feedback State), the
+ * minimum value of MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS is 64.
+ *
+ * On Gen6, the size of transform feedback data is limited not by the number
+ * of components but by the number of binding table entries we set aside. We
+ * use one binding table entry for a float, one entry for a vector, and one
+ * entry per matrix column. Since the only way we can communicate our
+ * transform feedback capabilities to the client is via
+ * MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS, we need to plan for the
+ * worst case, in which all the varyings are floats, so we use up one binding
+ * table entry per component. Therefore we need to set aside at least 64
+ * binding table entries for use by transform feedback.
+ *
+ * Note: since we don't currently pack varyings, it is currently impossible
+ * for the client to actually use up all of these binding table entries--if
+ * all of their varyings were floats, they would run out of varying slots and
+ * fail to link. But that's a bug, so it seems prudent to go ahead and
+ * allocate the number of binding table entries we will need once the bug is
+ * fixed.
+ */
+#define BRW_MAX_SOL_BINDINGS 64
+
+/** Maximum number of actual buffers used for stream output */
+#define BRW_MAX_SOL_BUFFERS 4
+
+/**
* Helpers to create Surface Binding Table indexes for draw buffers,
* textures, and constant buffers.
*
@@ -436,6 +470,11 @@ struct brw_vs_ouput_sizes {
* | . | . |
* | : | : |
* | 25 | Texture 15 |
+ * +-----|-------------------------+
+ * | 26 | SOL Binding 0 |
+ * | . | . |
+ * | : | : |
+ * | 89 | SOL Binding 63 |
* +-------------------------------+
*
* Note that nothing actually uses the SURF_INDEX_DRAW macro, so it has to be
@@ -446,9 +485,10 @@ struct brw_vs_ouput_sizes {
#define SURF_INDEX_VERT_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 0)
#define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 1)
#define SURF_INDEX_TEXTURE(t) (BRW_MAX_DRAW_BUFFERS + 2 + (t))
+#define SURF_INDEX_SOL_BINDING(t) (SURF_INDEX_TEXTURE(BRW_MAX_TEX_UNIT) + (t))
/** Maximum size of the binding table. */
-#define BRW_MAX_SURFACES (BRW_MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 2)
+#define BRW_MAX_SURFACES SURF_INDEX_SOL_BINDING(BRW_MAX_SOL_BINDINGS)
enum brw_cache_id {
BRW_BLEND_STATE,
@@ -1026,6 +1066,11 @@ brw_compute_barycentric_interp_modes(bool shade_model_flat,
/* brw_wm_surface_state.c */
void brw_init_surface_formats(struct brw_context *brw);
+void
+brw_update_sol_surface(struct brw_context *brw,
+ struct gl_buffer_object *buffer_obj,
+ uint32_t *out_offset, unsigned num_vector_components,
+ unsigned stride_dwords, unsigned offset_dwords);
/* gen6_clip_state.c */
bool
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 18546023531..4edfaf7d5e4 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1424,6 +1424,12 @@ enum brw_wm_barycentric_interp_mode {
#define URB_WRITE_PRIM_START 0x2
#define URB_WRITE_PRIM_TYPE_SHIFT 2
+
+/* Maximum number of entries that can be addressed using a binding table
+ * pointer of type SURFTYPE_BUFFER
+ */
+#define BRW_MAX_NUM_BUFFER_ENTRIES (1 << 27)
+
#include "intel_chipset.h"
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 596be02158c..1529ec622a7 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -912,6 +912,13 @@ void brw_ff_sync(struct brw_compile *p,
GLuint response_length,
bool eot);
+void brw_svb_write(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLuint binding_table_index,
+ bool send_commit_msg);
+
void brw_fb_WRITE(struct brw_compile *p,
int dispatch_width,
GLuint msg_reg_nr,
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index d48753c546f..f6726fcfca5 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -2390,3 +2390,42 @@ void brw_ff_sync(struct brw_compile *p,
response_length,
eot);
}
+
+/**
+ * Emit the SEND instruction necessary to generate stream output data on Gen6
+ * (for transform feedback).
+ *
+ * If send_commit_msg is true, this is the last piece of stream output data
+ * from this thread, so send the data as a committed write. According to the
+ * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
+ *
+ * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
+ * writes are complete by sending the final write as a committed write."
+ */
+void
+brw_svb_write(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLuint binding_table_index,
+ bool send_commit_msg)
+{
+ struct brw_instruction *insn;
+
+ gen6_resolve_implied_move(p, &src0, msg_reg_nr);
+
+ insn = next_insn(p, BRW_OPCODE_SEND);
+ brw_set_dest(p, insn, dest);
+ brw_set_src0(p, insn, src0);
+ brw_set_src1(p, insn, brw_imm_d(0));
+ brw_set_dp_write_message(p, insn,
+ binding_table_index,
+ 0, /* msg_control: ignored */
+ GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE,
+ 1, /* msg_length */
+ true, /* header_present */
+ 0, /* last_render_target: ignored */
+ send_commit_msg, /* response_length */
+ 0, /* end_of_thread */
+ send_commit_msg); /* send_commit_msg */
+}
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index f5d5898e04b..1e605efd6e4 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -183,7 +183,31 @@ static void populate_key( struct brw_context *brw,
} else if (intel->gen == 6) {
/* On Gen6, GS is used for transform feedback. */
/* _NEW_TRANSFORM_FEEDBACK */
- key->need_gs_prog = ctx->TransformFeedback.CurrentObject->Active;
+ if (ctx->TransformFeedback.CurrentObject->Active) {
+ const struct gl_shader_program *shaderprog =
+ ctx->Shader.CurrentVertexProgram;
+ const struct gl_transform_feedback_info *linked_xfb_info =
+ &shaderprog->LinkedTransformFeedback;
+ int i;
+
+ /* Make sure that the VUE slots won't overflow the unsigned chars in
+ * key->transform_feedback_bindings[].
+ */
+ STATIC_ASSERT(BRW_VERT_RESULT_MAX <= 256);
+
+ /* Make sure that we don't need more binding table entries than we've
+ * set aside for use in transform feedback. (We shouldn't, since we
+ * set aside enough binding table entries to have one per component).
+ */
+ assert(linked_xfb_info->NumOutputs <= BRW_MAX_SOL_BINDINGS);
+
+ key->need_gs_prog = true;
+ key->num_transform_feedback_bindings = linked_xfb_info->NumOutputs;
+ for (i = 0; i < key->num_transform_feedback_bindings; ++i) {
+ key->transform_feedback_bindings[i] =
+ linked_xfb_info->Outputs[i].OutputRegister;
+ }
+ }
} else {
/* Pre-gen6, GS is used to transform QUADLIST, QUADSTRIP, and LINELOOP
* into simpler primitives.
diff --git a/src/mesa/drivers/dri/i965/brw_gs.h b/src/mesa/drivers/dri/i965/brw_gs.h
index ecab3ef37fa..33d8d7ab5a7 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.h
+++ b/src/mesa/drivers/dri/i965/brw_gs.h
@@ -50,6 +50,18 @@ struct brw_gs_prog_key {
GLuint pv_first:1;
GLuint need_gs_prog:1;
GLuint userclip_active:1;
+
+ /**
+ * Number of varyings that are output to transform feedback.
+ */
+ GLuint num_transform_feedback_bindings:7; /* 0-BRW_MAX_SOL_BINDINGS */
+
+ /**
+ * Map from the index of a transform feedback binding table entry to the
+ * gl_vert_result that should be streamed out through that binding table
+ * entry.
+ */
+ unsigned char transform_feedback_bindings[BRW_MAX_SOL_BINDINGS];
};
struct brw_gs_compile {
@@ -59,6 +71,14 @@ struct brw_gs_compile {
struct {
struct brw_reg R0;
+
+ /**
+ * Register holding streamed vertex buffer pointers -- see the Sandy
+ * Bridge PRM, volume 2 part 1, section 4.4.2 (GS Thread Payload
+ * [DevSNB]). These pointers are delivered in GRF 1.
+ */
+ struct brw_reg SVBI;
+
struct brw_reg vertex[MAX_GS_VERTS];
struct brw_reg header;
struct brw_reg temp;
diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c b/src/mesa/drivers/dri/i965/brw_gs_emit.c
index 322f9bd81c1..3062c3312b2 100644
--- a/src/mesa/drivers/dri/i965/brw_gs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c
@@ -42,8 +42,16 @@
#include "brw_eu.h"
#include "brw_gs.h"
+/**
+ * Allocate registers for GS.
+ *
+ * If svbi_payload_enable is true, then the thread will be spawned with the
+ * "SVBI Payload Enable" bit set, so GRF 1 needs to be set aside to hold the
+ * streamed vertex buffer indices.
+ */
static void brw_gs_alloc_regs( struct brw_gs_compile *c,
- GLuint nr_verts )
+ GLuint nr_verts,
+ bool svbi_payload_enable )
{
GLuint i = 0,j;
@@ -51,6 +59,10 @@ static void brw_gs_alloc_regs( struct brw_gs_compile *c,
*/
c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
+ /* Streamed vertex buffer indices */
+ if (svbi_payload_enable)
+ c->reg.SVBI = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD);
+
/* Payload vertices plus space for more generated vertices:
*/
for (j = 0; j < nr_verts; j++) {
@@ -212,7 +224,7 @@ void brw_gs_quads( struct brw_gs_compile *c, struct brw_gs_prog_key *key )
{
struct intel_context *intel = &c->func.brw->intel;
- brw_gs_alloc_regs(c, 4);
+ brw_gs_alloc_regs(c, 4, false);
brw_gs_initialize_header(c);
/* Use polygons for correct edgeflag behaviour. Note that vertex 3
* is the PV for quads, but vertex 0 for polygons:
@@ -250,7 +262,7 @@ void brw_gs_quad_strip( struct brw_gs_compile *c, struct brw_gs_prog_key *key )
{
struct intel_context *intel = &c->func.brw->intel;
- brw_gs_alloc_regs(c, 4);
+ brw_gs_alloc_regs(c, 4, false);
brw_gs_initialize_header(c);
if (intel->needs_ff_sync)
@@ -286,7 +298,7 @@ void brw_gs_lines( struct brw_gs_compile *c )
{
struct intel_context *intel = &c->func.brw->intel;
- brw_gs_alloc_regs(c, 2);
+ brw_gs_alloc_regs(c, 2, false);
brw_gs_initialize_header(c);
if (intel->needs_ff_sync)
@@ -310,10 +322,81 @@ gen6_sol_program(struct brw_gs_compile *c, struct brw_gs_prog_key *key,
unsigned num_verts, bool check_edge_flags)
{
struct brw_compile *p = &c->func;
+ c->prog_data.svbi_postincrement_value = num_verts;
- brw_gs_alloc_regs(c, num_verts);
+ brw_gs_alloc_regs(c, num_verts, true);
brw_gs_initialize_header(c);
+ if (key->num_transform_feedback_bindings > 0) {
+ unsigned vertex, binding;
+ /* Note: since we use the binding table to keep track of buffer offsets
+ * and stride, the GS doesn't need to keep track of a separate pointer
+ * into each buffer; it uses a single pointer which increments by 1 for
+ * each vertex. So we use SVBI0 for this pointer, regardless of whether
+ * transform feedback is in interleaved or separate attribs mode.
+ */
+ brw_MOV(p, get_element_ud(c->reg.header, 5),
+ get_element_ud(c->reg.SVBI, 0));
+ /* For each vertex, generate code to output each varying using the
+ * appropriate binding table entry.
+ */
+ for (vertex = 0; vertex < num_verts; ++vertex) {
+ for (binding = 0; binding < key->num_transform_feedback_bindings;
+ ++binding) {
+ unsigned char vert_result =
+ key->transform_feedback_bindings[binding];
+ unsigned char slot = c->vue_map.vert_result_to_slot[vert_result];
+ /* From the Sandybridge PRM, Volume 2, Part 1, Section 4.5.1:
+ *
+ * "Prior to End of Thread with a URB_WRITE, the kernel must
+ * ensure that all writes are complete by sending the final
+ * write as a committed write."
+ */
+ bool final_write =
+ binding == key->num_transform_feedback_bindings - 1 &&
+ vertex == num_verts - 1;
+ struct brw_reg vertex_slot = c->reg.vertex[vertex];
+ vertex_slot.nr += slot / 2;
+ vertex_slot.subnr = (slot % 2) * 16;
+ brw_MOV(p, stride(c->reg.header, 4, 4, 1),
+ retype(vertex_slot, BRW_REGISTER_TYPE_UD));
+ brw_svb_write(p,
+ final_write ? c->reg.temp : brw_null_reg(), /* dest */
+ 1, /* msg_reg_nr */
+ c->reg.header, /* src0 */
+ SURF_INDEX_SOL_BINDING(binding), /* binding_table_index */
+ final_write); /* send_commit_msg */
+ }
+
+ /* If there are more vertices to output, increment the pointer so
+ * that we will start outputting to the next location in the
+ * transform feedback buffers.
+ */
+ if (vertex != num_verts - 1) {
+ brw_ADD(p, get_element_ud(c->reg.header, 5),
+ get_element_ud(c->reg.header, 5), brw_imm_ud(1));
+ }
+ }
+
+ /* Now, reinitialize the header register from R0 to restore the parts of
+ * the register that we overwrote while streaming out transform feedback
+ * data.
+ */
+ brw_gs_initialize_header(c);
+
+ /* Finally, wait for the write commit to occur so that we can proceed to
+ * other things safely.
+ *
+ * From the Sandybridge PRM, Volume 4, Part 1, Section 3.3:
+ *
+ * The write commit does not modify the destination register, but
+ * merely clears the dependency associated with the destination
+ * register. Thus, a simple “mov” instruction using the register as a
+ * source is sufficient to wait for the write commit to occur.
+ */
+ brw_MOV(p, c->reg.temp, c->reg.temp);
+ }
+
brw_gs_ff_sync(c, 1);
brw_gs_overwrite_header_dw2_from_r0(c);
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index e76901a3136..7b1398134bf 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -116,7 +116,7 @@ static void upload_gen6_binding_table_pointers(struct brw_context *brw)
GEN6_BINDING_TABLE_MODIFY_PS |
(4 - 2));
OUT_BATCH(brw->bind.bo_offset); /* vs */
- OUT_BATCH(0); /* gs */
+ OUT_BATCH(brw->bind.bo_offset); /* gs */
OUT_BATCH(brw->bind.bo_offset); /* wm/ps */
ADVANCE_BATCH();
}
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 59fe81aec26..a3a470fee6b 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -92,6 +92,7 @@ extern const struct brw_tracked_state gen6_gs_state;
extern const struct brw_tracked_state gen6_renderbuffer_surfaces;
extern const struct brw_tracked_state gen6_sampler_state;
extern const struct brw_tracked_state gen6_scissor_state;
+extern const struct brw_tracked_state gen6_sol_surface;
extern const struct brw_tracked_state gen6_sf_state;
extern const struct brw_tracked_state gen6_sf_vp;
extern const struct brw_tracked_state gen6_urb;
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index bd32815d08c..463689224df 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -145,6 +145,7 @@ static const struct brw_tracked_state *gen6_atoms[] =
&brw_wm_pull_constants,
&gen6_renderbuffer_surfaces,
&brw_texture_surfaces,
+ &gen6_sol_surface,
&brw_binding_table,
&brw_samplers,
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 7a959522a09..3801c096dda 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -38,6 +38,7 @@
#include "intel_batchbuffer.h"
#include "intel_tex.h"
#include "intel_fbo.h"
+#include "intel_buffer_objects.h"
#include "brw_context.h"
#include "brw_state.h"
@@ -715,6 +716,90 @@ brw_create_constant_surface(struct brw_context *brw,
I915_GEM_DOMAIN_SAMPLER, 0);
}
+/**
+ * Set up a binding table entry for use by stream output logic (transform
+ * feedback).
+ *
+ * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
+ */
+void
+brw_update_sol_surface(struct brw_context *brw,
+ struct gl_buffer_object *buffer_obj,
+ uint32_t *out_offset, unsigned num_vector_components,
+ unsigned stride_dwords, unsigned offset_dwords)
+{
+ drm_intel_bo *bo = intel_buffer_object(buffer_obj)->buffer;
+ uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
+ out_offset);
+ uint32_t pitch_minus_1 = 4*stride_dwords - 1;
+ uint32_t offset_bytes = 4 * offset_dwords;
+ size_t size_dwords = buffer_obj->Size / 4;
+ uint32_t buffer_size_minus_1, width, height, depth, surface_format;
+
+ /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
+ * too big to map using a single binding table entry?
+ */
+ assert((size_dwords - offset_dwords) / stride_dwords
+ <= BRW_MAX_NUM_BUFFER_ENTRIES);
+
+ if (size_dwords > offset_dwords + num_vector_components) {
+ /* There is room for at least 1 transform feedback output in the buffer.
+ * Compute the number of additional transform feedback outputs the
+ * buffer has room for.
+ */
+ buffer_size_minus_1 =
+ (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
+ } else {
+ /* There isn't even room for a single transform feedback output in the
+ * buffer. We can't configure the binding table entry to prevent output
+ * entirely; we'll have to rely on the geometry shader to detect
+ * overflow. But to minimize the damage in case of a bug, set up the
+ * binding table entry to just allow a single output.
+ */
+ buffer_size_minus_1 = 0;
+ }
+ width = buffer_size_minus_1 & 0x7f;
+ height = (buffer_size_minus_1 & 0xfff80) >> 7;
+ depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
+
+ switch (num_vector_components) {
+ case 1:
+ surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
+ break;
+ case 2:
+ surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
+ break;
+ case 3:
+ surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
+ break;
+ case 4:
+ surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+ break;
+ default:
+ assert(!"Invalid vector size for transform feedback output");
+ surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
+ break;
+ }
+
+ surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
+ BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
+ surface_format << BRW_SURFACE_FORMAT_SHIFT |
+ BRW_SURFACE_RC_READ_WRITE;
+ surf[1] = bo->offset + offset_bytes; /* reloc */
+ surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
+ height << BRW_SURFACE_HEIGHT_SHIFT);
+ surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
+ pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
+ surf[4] = 0;
+ surf[5] = 0;
+
+ /* Emit relocation to surface contents. */
+ drm_intel_bo_emit_reloc(brw->intel.batch.bo,
+ *out_offset + 4,
+ bo, offset_bytes,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
+}
+
/* Creates a new WM constant buffer reflecting the current fragment program's
* constants, if needed by the fragment program.
*
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_state.c b/src/mesa/drivers/dri/i965/gen6_gs_state.c
index 42962a64d36..fdad5d42dcd 100644
--- a/src/mesa/drivers/dri/i965/gen6_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_gs_state.c
@@ -50,13 +50,17 @@ upload_gs_state(struct brw_context *brw)
OUT_BATCH(brw->gs.prog_offset);
OUT_BATCH(GEN6_GS_SPF_MODE | GEN6_GS_VECTOR_MASK_ENABLE);
OUT_BATCH(0); /* no scratch space */
- OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
+ OUT_BATCH((2 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
(brw->gs.prog_data->urb_read_length << GEN6_GS_URB_READ_LENGTH_SHIFT));
OUT_BATCH(((brw->max_gs_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT) |
GEN6_GS_STATISTICS_ENABLE |
GEN6_GS_SO_STATISTICS_ENABLE |
GEN6_GS_RENDERING_ENABLE);
- OUT_BATCH(GEN6_GS_ENABLE);
+ OUT_BATCH(GEN6_GS_SVBI_PAYLOAD_ENABLE |
+ GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
+ (brw->gs.prog_data->svbi_postincrement_value <<
+ GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT) |
+ GEN6_GS_ENABLE);
ADVANCE_BATCH();
} else {
BEGIN_BATCH(7);
diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c b/src/mesa/drivers/dri/i965/gen6_sol.c
new file mode 100644
index 00000000000..491b39cce12
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_sol.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** \file gen6_sol.c
+ *
+ * Code to initialize the binding table entries used by transform feedback.
+ */
+
+#include "brw_context.h"
+#include "brw_defines.h"
+
+static void
+gen6_update_sol_surfaces(struct brw_context *brw)
+{
+ struct gl_context *ctx = &brw->intel.ctx;
+ /* _NEW_TRANSFORM_FEEDBACK */
+ struct gl_transform_feedback_object *xfb_obj =
+ ctx->TransformFeedback.CurrentObject;
+ /* BRW_NEW_VERTEX_PROGRAM */
+ const struct gl_shader_program *shaderprog =
+ ctx->Shader.CurrentVertexProgram;
+ const struct gl_transform_feedback_info *linked_xfb_info =
+ &shaderprog->LinkedTransformFeedback;
+ int i;
+
+ for (i = 0; i < BRW_MAX_SOL_BINDINGS; ++i) {
+ const int surf_index = SURF_INDEX_SOL_BINDING(i);
+ if (xfb_obj->Active && i < linked_xfb_info->NumOutputs) {
+ unsigned buffer = linked_xfb_info->Outputs[i].OutputBuffer;
+ unsigned buffer_offset =
+ xfb_obj->Offset[buffer] / 4 +
+ linked_xfb_info->Outputs[i].DstOffset;
+ brw_update_sol_surface(
+ brw, xfb_obj->Buffers[buffer], &brw->bind.surf_offset[surf_index],
+ linked_xfb_info->Outputs[i].NumComponents,
+ linked_xfb_info->BufferStride[buffer], buffer_offset);
+ } else {
+ brw->bind.surf_offset[surf_index] = 0;
+ }
+ }
+}
+
+const struct brw_tracked_state gen6_sol_surface = {
+ .dirty = {
+ .mesa = _NEW_TRANSFORM_FEEDBACK,
+ .brw = (BRW_NEW_BATCH |
+ BRW_NEW_VERTEX_PROGRAM),
+ .cache = 0
+ },
+ .emit = gen6_update_sol_surfaces,
+};