diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/Makefile.sources | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.c | 20 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 47 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_defines.h | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_eu.h | 7 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_eu_emit.c | 39 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_gs.c | 26 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_gs.h | 20 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_gs_emit.c | 93 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_misc_state.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_state.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_state_upload.c | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 85 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen6_gs_state.c | 8 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen6_sol.c | 71 |
15 files changed, 417 insertions, 10 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index cd6a8f48b5a..e50f9c3f95f 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -93,6 +93,7 @@ i965_C_SOURCES := \ gen6_sampler_state.c \ gen6_scissor_state.c \ gen6_sf_state.c \ + gen6_sol.c \ gen6_urb.c \ gen6_viewport_state.c \ gen6_vs_state.c \ diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 5e9cb1f8b2f..d8cad54667a 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -178,6 +178,26 @@ brwCreateContext(int api, ctx->Const.MaxTextureMaxAnisotropy = 16.0; + /* Hardware only supports a limited number of transform feedback buffers. + * So we need to override the Mesa default (which is based only on software + * limits). + */ + ctx->Const.MaxTransformFeedbackSeparateAttribs = BRW_MAX_SOL_BUFFERS; + + /* On Gen6, in the worst case, we use up one binding table entry per + * transform feedback component (see comments above the definition of + * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value + * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to + * BRW_MAX_SOL_BINDINGS. + * + * In "separate components" mode, we need to divide this value by + * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries + * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS. + */ + ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS; + ctx->Const.MaxTransformFeedbackSeparateComponents = + BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS; + /* if conformance mode is set, swrast can handle any size AA point */ ctx->Const.MaxPointSizeAA = 255.0; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 70a45c77260..febd4fe4365 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -368,6 +368,12 @@ struct brw_clip_prog_data { struct brw_gs_prog_data { GLuint urb_read_length; GLuint total_grf; + + /** + * Gen6 transform feedback: Amount by which the streaming vertex buffer + * indices should be incremented each time the GS is invoked. + */ + unsigned svbi_postincrement_value; }; struct brw_vs_prog_data { @@ -407,6 +413,34 @@ struct brw_vs_ouput_sizes { #define BRW_MAX_DRAW_BUFFERS 8 /** + * Max number of binding table entries used for stream output. + * + * From the OpenGL 3.0 spec, table 6.44 (Transform Feedback State), the + * minimum value of MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS is 64. + * + * On Gen6, the size of transform feedback data is limited not by the number + * of components but by the number of binding table entries we set aside. We + * use one binding table entry for a float, one entry for a vector, and one + * entry per matrix column. Since the only way we can communicate our + * transform feedback capabilities to the client is via + * MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS, we need to plan for the + * worst case, in which all the varyings are floats, so we use up one binding + * table entry per component. Therefore we need to set aside at least 64 + * binding table entries for use by transform feedback. + * + * Note: since we don't currently pack varyings, it is currently impossible + * for the client to actually use up all of these binding table entries--if + * all of their varyings were floats, they would run out of varying slots and + * fail to link. But that's a bug, so it seems prudent to go ahead and + * allocate the number of binding table entries we will need once the bug is + * fixed. + */ +#define BRW_MAX_SOL_BINDINGS 64 + +/** Maximum number of actual buffers used for stream output */ +#define BRW_MAX_SOL_BUFFERS 4 + +/** * Helpers to create Surface Binding Table indexes for draw buffers, * textures, and constant buffers. * @@ -436,6 +470,11 @@ struct brw_vs_ouput_sizes { * | . | . | * | : | : | * | 25 | Texture 15 | + * +-----|-------------------------+ + * | 26 | SOL Binding 0 | + * | . | . | + * | : | : | + * | 89 | SOL Binding 63 | * +-------------------------------+ * * Note that nothing actually uses the SURF_INDEX_DRAW macro, so it has to be @@ -446,9 +485,10 @@ struct brw_vs_ouput_sizes { #define SURF_INDEX_VERT_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 0) #define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 1) #define SURF_INDEX_TEXTURE(t) (BRW_MAX_DRAW_BUFFERS + 2 + (t)) +#define SURF_INDEX_SOL_BINDING(t) (SURF_INDEX_TEXTURE(BRW_MAX_TEX_UNIT) + (t)) /** Maximum size of the binding table. */ -#define BRW_MAX_SURFACES (BRW_MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 2) +#define BRW_MAX_SURFACES SURF_INDEX_SOL_BINDING(BRW_MAX_SOL_BINDINGS) enum brw_cache_id { BRW_BLEND_STATE, @@ -1026,6 +1066,11 @@ brw_compute_barycentric_interp_modes(bool shade_model_flat, /* brw_wm_surface_state.c */ void brw_init_surface_formats(struct brw_context *brw); +void +brw_update_sol_surface(struct brw_context *brw, + struct gl_buffer_object *buffer_obj, + uint32_t *out_offset, unsigned num_vector_components, + unsigned stride_dwords, unsigned offset_dwords); /* gen6_clip_state.c */ bool diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 18546023531..4edfaf7d5e4 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1424,6 +1424,12 @@ enum brw_wm_barycentric_interp_mode { #define URB_WRITE_PRIM_START 0x2 #define URB_WRITE_PRIM_TYPE_SHIFT 2 + +/* Maximum number of entries that can be addressed using a binding table + * pointer of type SURFTYPE_BUFFER + */ +#define BRW_MAX_NUM_BUFFER_ENTRIES (1 << 27) + #include "intel_chipset.h" #endif diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 596be02158c..1529ec622a7 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -912,6 +912,13 @@ void brw_ff_sync(struct brw_compile *p, GLuint response_length, bool eot); +void brw_svb_write(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLuint binding_table_index, + bool send_commit_msg); + void brw_fb_WRITE(struct brw_compile *p, int dispatch_width, GLuint msg_reg_nr, diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index d48753c546f..f6726fcfca5 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -2390,3 +2390,42 @@ void brw_ff_sync(struct brw_compile *p, response_length, eot); } + +/** + * Emit the SEND instruction necessary to generate stream output data on Gen6 + * (for transform feedback). + * + * If send_commit_msg is true, this is the last piece of stream output data + * from this thread, so send the data as a committed write. According to the + * Sandy Bridge PRM (volume 2 part 1, section 4.5.1): + * + * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all + * writes are complete by sending the final write as a committed write." + */ +void +brw_svb_write(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLuint binding_table_index, + bool send_commit_msg) +{ + struct brw_instruction *insn; + + gen6_resolve_implied_move(p, &src0, msg_reg_nr); + + insn = next_insn(p, BRW_OPCODE_SEND); + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_src1(p, insn, brw_imm_d(0)); + brw_set_dp_write_message(p, insn, + binding_table_index, + 0, /* msg_control: ignored */ + GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE, + 1, /* msg_length */ + true, /* header_present */ + 0, /* last_render_target: ignored */ + send_commit_msg, /* response_length */ + 0, /* end_of_thread */ + send_commit_msg); /* send_commit_msg */ +} diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index f5d5898e04b..1e605efd6e4 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -183,7 +183,31 @@ static void populate_key( struct brw_context *brw, } else if (intel->gen == 6) { /* On Gen6, GS is used for transform feedback. */ /* _NEW_TRANSFORM_FEEDBACK */ - key->need_gs_prog = ctx->TransformFeedback.CurrentObject->Active; + if (ctx->TransformFeedback.CurrentObject->Active) { + const struct gl_shader_program *shaderprog = + ctx->Shader.CurrentVertexProgram; + const struct gl_transform_feedback_info *linked_xfb_info = + &shaderprog->LinkedTransformFeedback; + int i; + + /* Make sure that the VUE slots won't overflow the unsigned chars in + * key->transform_feedback_bindings[]. + */ + STATIC_ASSERT(BRW_VERT_RESULT_MAX <= 256); + + /* Make sure that we don't need more binding table entries than we've + * set aside for use in transform feedback. (We shouldn't, since we + * set aside enough binding table entries to have one per component). + */ + assert(linked_xfb_info->NumOutputs <= BRW_MAX_SOL_BINDINGS); + + key->need_gs_prog = true; + key->num_transform_feedback_bindings = linked_xfb_info->NumOutputs; + for (i = 0; i < key->num_transform_feedback_bindings; ++i) { + key->transform_feedback_bindings[i] = + linked_xfb_info->Outputs[i].OutputRegister; + } + } } else { /* Pre-gen6, GS is used to transform QUADLIST, QUADSTRIP, and LINELOOP * into simpler primitives. diff --git a/src/mesa/drivers/dri/i965/brw_gs.h b/src/mesa/drivers/dri/i965/brw_gs.h index ecab3ef37fa..33d8d7ab5a7 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.h +++ b/src/mesa/drivers/dri/i965/brw_gs.h @@ -50,6 +50,18 @@ struct brw_gs_prog_key { GLuint pv_first:1; GLuint need_gs_prog:1; GLuint userclip_active:1; + + /** + * Number of varyings that are output to transform feedback. + */ + GLuint num_transform_feedback_bindings:7; /* 0-BRW_MAX_SOL_BINDINGS */ + + /** + * Map from the index of a transform feedback binding table entry to the + * gl_vert_result that should be streamed out through that binding table + * entry. + */ + unsigned char transform_feedback_bindings[BRW_MAX_SOL_BINDINGS]; }; struct brw_gs_compile { @@ -59,6 +71,14 @@ struct brw_gs_compile { struct { struct brw_reg R0; + + /** + * Register holding streamed vertex buffer pointers -- see the Sandy + * Bridge PRM, volume 2 part 1, section 4.4.2 (GS Thread Payload + * [DevSNB]). These pointers are delivered in GRF 1. + */ + struct brw_reg SVBI; + struct brw_reg vertex[MAX_GS_VERTS]; struct brw_reg header; struct brw_reg temp; diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c b/src/mesa/drivers/dri/i965/brw_gs_emit.c index 322f9bd81c1..3062c3312b2 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c @@ -42,8 +42,16 @@ #include "brw_eu.h" #include "brw_gs.h" +/** + * Allocate registers for GS. + * + * If svbi_payload_enable is true, then the thread will be spawned with the + * "SVBI Payload Enable" bit set, so GRF 1 needs to be set aside to hold the + * streamed vertex buffer indices. + */ static void brw_gs_alloc_regs( struct brw_gs_compile *c, - GLuint nr_verts ) + GLuint nr_verts, + bool svbi_payload_enable ) { GLuint i = 0,j; @@ -51,6 +59,10 @@ static void brw_gs_alloc_regs( struct brw_gs_compile *c, */ c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; + /* Streamed vertex buffer indices */ + if (svbi_payload_enable) + c->reg.SVBI = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD); + /* Payload vertices plus space for more generated vertices: */ for (j = 0; j < nr_verts; j++) { @@ -212,7 +224,7 @@ void brw_gs_quads( struct brw_gs_compile *c, struct brw_gs_prog_key *key ) { struct intel_context *intel = &c->func.brw->intel; - brw_gs_alloc_regs(c, 4); + brw_gs_alloc_regs(c, 4, false); brw_gs_initialize_header(c); /* Use polygons for correct edgeflag behaviour. Note that vertex 3 * is the PV for quads, but vertex 0 for polygons: @@ -250,7 +262,7 @@ void brw_gs_quad_strip( struct brw_gs_compile *c, struct brw_gs_prog_key *key ) { struct intel_context *intel = &c->func.brw->intel; - brw_gs_alloc_regs(c, 4); + brw_gs_alloc_regs(c, 4, false); brw_gs_initialize_header(c); if (intel->needs_ff_sync) @@ -286,7 +298,7 @@ void brw_gs_lines( struct brw_gs_compile *c ) { struct intel_context *intel = &c->func.brw->intel; - brw_gs_alloc_regs(c, 2); + brw_gs_alloc_regs(c, 2, false); brw_gs_initialize_header(c); if (intel->needs_ff_sync) @@ -310,10 +322,81 @@ gen6_sol_program(struct brw_gs_compile *c, struct brw_gs_prog_key *key, unsigned num_verts, bool check_edge_flags) { struct brw_compile *p = &c->func; + c->prog_data.svbi_postincrement_value = num_verts; - brw_gs_alloc_regs(c, num_verts); + brw_gs_alloc_regs(c, num_verts, true); brw_gs_initialize_header(c); + if (key->num_transform_feedback_bindings > 0) { + unsigned vertex, binding; + /* Note: since we use the binding table to keep track of buffer offsets + * and stride, the GS doesn't need to keep track of a separate pointer + * into each buffer; it uses a single pointer which increments by 1 for + * each vertex. So we use SVBI0 for this pointer, regardless of whether + * transform feedback is in interleaved or separate attribs mode. + */ + brw_MOV(p, get_element_ud(c->reg.header, 5), + get_element_ud(c->reg.SVBI, 0)); + /* For each vertex, generate code to output each varying using the + * appropriate binding table entry. + */ + for (vertex = 0; vertex < num_verts; ++vertex) { + for (binding = 0; binding < key->num_transform_feedback_bindings; + ++binding) { + unsigned char vert_result = + key->transform_feedback_bindings[binding]; + unsigned char slot = c->vue_map.vert_result_to_slot[vert_result]; + /* From the Sandybridge PRM, Volume 2, Part 1, Section 4.5.1: + * + * "Prior to End of Thread with a URB_WRITE, the kernel must + * ensure that all writes are complete by sending the final + * write as a committed write." + */ + bool final_write = + binding == key->num_transform_feedback_bindings - 1 && + vertex == num_verts - 1; + struct brw_reg vertex_slot = c->reg.vertex[vertex]; + vertex_slot.nr += slot / 2; + vertex_slot.subnr = (slot % 2) * 16; + brw_MOV(p, stride(c->reg.header, 4, 4, 1), + retype(vertex_slot, BRW_REGISTER_TYPE_UD)); + brw_svb_write(p, + final_write ? c->reg.temp : brw_null_reg(), /* dest */ + 1, /* msg_reg_nr */ + c->reg.header, /* src0 */ + SURF_INDEX_SOL_BINDING(binding), /* binding_table_index */ + final_write); /* send_commit_msg */ + } + + /* If there are more vertices to output, increment the pointer so + * that we will start outputting to the next location in the + * transform feedback buffers. + */ + if (vertex != num_verts - 1) { + brw_ADD(p, get_element_ud(c->reg.header, 5), + get_element_ud(c->reg.header, 5), brw_imm_ud(1)); + } + } + + /* Now, reinitialize the header register from R0 to restore the parts of + * the register that we overwrote while streaming out transform feedback + * data. + */ + brw_gs_initialize_header(c); + + /* Finally, wait for the write commit to occur so that we can proceed to + * other things safely. + * + * From the Sandybridge PRM, Volume 4, Part 1, Section 3.3: + * + * The write commit does not modify the destination register, but + * merely clears the dependency associated with the destination + * register. Thus, a simple “mov” instruction using the register as a + * source is sufficient to wait for the write commit to occur. + */ + brw_MOV(p, c->reg.temp, c->reg.temp); + } + brw_gs_ff_sync(c, 1); brw_gs_overwrite_header_dw2_from_r0(c); diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index e76901a3136..7b1398134bf 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -116,7 +116,7 @@ static void upload_gen6_binding_table_pointers(struct brw_context *brw) GEN6_BINDING_TABLE_MODIFY_PS | (4 - 2)); OUT_BATCH(brw->bind.bo_offset); /* vs */ - OUT_BATCH(0); /* gs */ + OUT_BATCH(brw->bind.bo_offset); /* gs */ OUT_BATCH(brw->bind.bo_offset); /* wm/ps */ ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 59fe81aec26..a3a470fee6b 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -92,6 +92,7 @@ extern const struct brw_tracked_state gen6_gs_state; extern const struct brw_tracked_state gen6_renderbuffer_surfaces; extern const struct brw_tracked_state gen6_sampler_state; extern const struct brw_tracked_state gen6_scissor_state; +extern const struct brw_tracked_state gen6_sol_surface; extern const struct brw_tracked_state gen6_sf_state; extern const struct brw_tracked_state gen6_sf_vp; extern const struct brw_tracked_state gen6_urb; diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index bd32815d08c..463689224df 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -145,6 +145,7 @@ static const struct brw_tracked_state *gen6_atoms[] = &brw_wm_pull_constants, &gen6_renderbuffer_surfaces, &brw_texture_surfaces, + &gen6_sol_surface, &brw_binding_table, &brw_samplers, diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 7a959522a09..3801c096dda 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -38,6 +38,7 @@ #include "intel_batchbuffer.h" #include "intel_tex.h" #include "intel_fbo.h" +#include "intel_buffer_objects.h" #include "brw_context.h" #include "brw_state.h" @@ -715,6 +716,90 @@ brw_create_constant_surface(struct brw_context *brw, I915_GEM_DOMAIN_SAMPLER, 0); } +/** + * Set up a binding table entry for use by stream output logic (transform + * feedback). + * + * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES. + */ +void +brw_update_sol_surface(struct brw_context *brw, + struct gl_buffer_object *buffer_obj, + uint32_t *out_offset, unsigned num_vector_components, + unsigned stride_dwords, unsigned offset_dwords) +{ + drm_intel_bo *bo = intel_buffer_object(buffer_obj)->buffer; + uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, + out_offset); + uint32_t pitch_minus_1 = 4*stride_dwords - 1; + uint32_t offset_bytes = 4 * offset_dwords; + size_t size_dwords = buffer_obj->Size / 4; + uint32_t buffer_size_minus_1, width, height, depth, surface_format; + + /* FIXME: can we rely on core Mesa to ensure that the buffer isn't + * too big to map using a single binding table entry? + */ + assert((size_dwords - offset_dwords) / stride_dwords + <= BRW_MAX_NUM_BUFFER_ENTRIES); + + if (size_dwords > offset_dwords + num_vector_components) { + /* There is room for at least 1 transform feedback output in the buffer. + * Compute the number of additional transform feedback outputs the + * buffer has room for. + */ + buffer_size_minus_1 = + (size_dwords - offset_dwords - num_vector_components) / stride_dwords; + } else { + /* There isn't even room for a single transform feedback output in the + * buffer. We can't configure the binding table entry to prevent output + * entirely; we'll have to rely on the geometry shader to detect + * overflow. But to minimize the damage in case of a bug, set up the + * binding table entry to just allow a single output. + */ + buffer_size_minus_1 = 0; + } + width = buffer_size_minus_1 & 0x7f; + height = (buffer_size_minus_1 & 0xfff80) >> 7; + depth = (buffer_size_minus_1 & 0x7f00000) >> 20; + + switch (num_vector_components) { + case 1: + surface_format = BRW_SURFACEFORMAT_R32_FLOAT; + break; + case 2: + surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT; + break; + case 3: + surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT; + break; + case 4: + surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; + break; + default: + assert(!"Invalid vector size for transform feedback output"); + surface_format = BRW_SURFACEFORMAT_R32_FLOAT; + break; + } + + surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT | + BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT | + surface_format << BRW_SURFACE_FORMAT_SHIFT | + BRW_SURFACE_RC_READ_WRITE; + surf[1] = bo->offset + offset_bytes; /* reloc */ + surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT | + height << BRW_SURFACE_HEIGHT_SHIFT); + surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT | + pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT); + surf[4] = 0; + surf[5] = 0; + + /* Emit relocation to surface contents. */ + drm_intel_bo_emit_reloc(brw->intel.batch.bo, + *out_offset + 4, + bo, offset_bytes, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); +} + /* Creates a new WM constant buffer reflecting the current fragment program's * constants, if needed by the fragment program. * diff --git a/src/mesa/drivers/dri/i965/gen6_gs_state.c b/src/mesa/drivers/dri/i965/gen6_gs_state.c index 42962a64d36..fdad5d42dcd 100644 --- a/src/mesa/drivers/dri/i965/gen6_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_gs_state.c @@ -50,13 +50,17 @@ upload_gs_state(struct brw_context *brw) OUT_BATCH(brw->gs.prog_offset); OUT_BATCH(GEN6_GS_SPF_MODE | GEN6_GS_VECTOR_MASK_ENABLE); OUT_BATCH(0); /* no scratch space */ - OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) | + OUT_BATCH((2 << GEN6_GS_DISPATCH_START_GRF_SHIFT) | (brw->gs.prog_data->urb_read_length << GEN6_GS_URB_READ_LENGTH_SHIFT)); OUT_BATCH(((brw->max_gs_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT) | GEN6_GS_STATISTICS_ENABLE | GEN6_GS_SO_STATISTICS_ENABLE | GEN6_GS_RENDERING_ENABLE); - OUT_BATCH(GEN6_GS_ENABLE); + OUT_BATCH(GEN6_GS_SVBI_PAYLOAD_ENABLE | + GEN6_GS_SVBI_POSTINCREMENT_ENABLE | + (brw->gs.prog_data->svbi_postincrement_value << + GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT) | + GEN6_GS_ENABLE); ADVANCE_BATCH(); } else { BEGIN_BATCH(7); diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c b/src/mesa/drivers/dri/i965/gen6_sol.c new file mode 100644 index 00000000000..491b39cce12 --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen6_sol.c @@ -0,0 +1,71 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** \file gen6_sol.c + * + * Code to initialize the binding table entries used by transform feedback. + */ + +#include "brw_context.h" +#include "brw_defines.h" + +static void +gen6_update_sol_surfaces(struct brw_context *brw) +{ + struct gl_context *ctx = &brw->intel.ctx; + /* _NEW_TRANSFORM_FEEDBACK */ + struct gl_transform_feedback_object *xfb_obj = + ctx->TransformFeedback.CurrentObject; + /* BRW_NEW_VERTEX_PROGRAM */ + const struct gl_shader_program *shaderprog = + ctx->Shader.CurrentVertexProgram; + const struct gl_transform_feedback_info *linked_xfb_info = + &shaderprog->LinkedTransformFeedback; + int i; + + for (i = 0; i < BRW_MAX_SOL_BINDINGS; ++i) { + const int surf_index = SURF_INDEX_SOL_BINDING(i); + if (xfb_obj->Active && i < linked_xfb_info->NumOutputs) { + unsigned buffer = linked_xfb_info->Outputs[i].OutputBuffer; + unsigned buffer_offset = + xfb_obj->Offset[buffer] / 4 + + linked_xfb_info->Outputs[i].DstOffset; + brw_update_sol_surface( + brw, xfb_obj->Buffers[buffer], &brw->bind.surf_offset[surf_index], + linked_xfb_info->Outputs[i].NumComponents, + linked_xfb_info->BufferStride[buffer], buffer_offset); + } else { + brw->bind.surf_offset[surf_index] = 0; + } + } +} + +const struct brw_tracked_state gen6_sol_surface = { + .dirty = { + .mesa = _NEW_TRANSFORM_FEEDBACK, + .brw = (BRW_NEW_BATCH | + BRW_NEW_VERTEX_PROGRAM), + .cache = 0 + }, + .emit = gen6_update_sol_surfaces, +}; |