diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 14 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_defines.h | 5 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_gs.c | 33 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen7_gs_state.c | 41 |
7 files changed, 84 insertions, 15 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 57f086bdb5c..c566bba1ac3 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -548,6 +548,20 @@ struct brw_gs_prog_data unsigned output_vertex_size_hwords; unsigned output_topology; + + /** + * Size of the control data (cut bits or StreamID bits), in hwords (32 + * bytes). 0 if there is no control data. + */ + unsigned control_data_header_size_hwords; + + /** + * Format of the control data (either GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID + * if the control data is StreamID bits, or + * GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT if the control data is cut bits). + * Ignored if control_data_header_size is 0. + */ + unsigned control_data_format; }; /** Number of texture sampler units */ diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 0406c4d75e8..85e414d4f3a 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1337,6 +1337,10 @@ enum brw_message_target { /* DW5 */ # define GEN6_GS_MAX_THREADS_SHIFT 25 # define HSW_GS_MAX_THREADS_SHIFT 24 +# define IVB_GS_CONTROL_DATA_FORMAT_SHIFT 24 +# define GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT 0 +# define GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID 1 +# define GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT 20 # define GEN7_GS_DISPATCH_MODE_SINGLE (0 << 11) # define GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE (1 << 11) # define GEN7_GS_DISPATCH_MODE_DUAL_OBJECT (2 << 11) @@ -1346,6 +1350,7 @@ enum brw_message_target { # define GEN7_GS_INCLUDE_PRIMITIVE_ID (1 << 4) # define GEN7_GS_ENABLE (1 << 0) /* DW6 */ +# define HSW_GS_CONTROL_DATA_FORMAT_SHIFT 31 # define GEN6_GS_REORDER (1 << 30) # define GEN6_GS_DISCARD_ADJACENCY (1 << 29) # define GEN6_GS_SVBI_PAYLOAD_ENABLE (1 << 28) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs.c b/src/mesa/drivers/dri/i965/brw_vec4_gs.c index 7ab03acfaa9..f67ae2baada 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs.c +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs.c @@ -62,6 +62,38 @@ do_gs_prog(struct brw_context *brw, c.prog_data.base.param = rzalloc_array(NULL, const float *, param_count); c.prog_data.base.pull_param = rzalloc_array(NULL, const float *, param_count); + if (gp->program.OutputType == GL_POINTS) { + /* When the output type is points, the geometry shader may output data + * to multiple streams, and EndPrimitive() has no effect. So we + * configure the hardware to interpret the control data as stream ID. + */ + c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID; + + /* However, StreamID is not yet supported, so we output zero bits of + * control data per vertex. + */ + c.control_data_bits_per_vertex = 0; + } else { + /* When the output type is triangle_strip or line_strip, EndPrimitive() + * may be used to terminate the current strip and start a new one + * (similar to primitive restart), and outputting data to multiple + * streams is not supported. So we configure the hardware to interpret + * the control data as EndPrimitive information (a.k.a. "cut bits"). + */ + c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT; + + /* We only need to output control data if the shader actually calls + * EndPrimitive(). + */ + c.control_data_bits_per_vertex = gp->program.UsesEndPrimitive ? 1 : 0; + } + c.control_data_header_size_bits = + gp->program.VerticesOut * c.control_data_bits_per_vertex; + + /* 1 HWORD = 32 bytes = 256 bits */ + c.prog_data.control_data_header_size_hwords = + ALIGN(c.control_data_header_size_bits, 256) / 256; + brw_compute_vue_map(brw, &c.prog_data.base.vue_map, gp->program.Base.OutputsWritten, c.key.base.userclip_active); @@ -148,6 +180,7 @@ do_gs_prog(struct brw_context *brw, */ unsigned output_size_bytes = c.prog_data.output_vertex_size_hwords * 32 * gp->program.VerticesOut; + output_size_bytes += 32 * c.prog_data.control_data_header_size_hwords; assert(output_size_bytes >= 1); if (output_size_bytes > GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index d82a26ea07c..37cde6437cd 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -200,6 +200,7 @@ vec4_gs_visitor::emit_urb_write_opcode(bool complete) (void) complete; vec4_instruction *inst = emit(GS_OPCODE_URB_WRITE); + inst->offset = c->prog_data.control_data_header_size_hwords; inst->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; return inst; } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h index fba0ac61205..1193e28715c 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h @@ -51,6 +51,9 @@ struct brw_gs_compile struct brw_gs_prog_data prog_data; struct brw_geometry_program *gp; + + unsigned control_data_bits_per_vertex; + unsigned control_data_header_size_bits; }; #ifdef __cplusplus diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 86ecd214496..4760a5399af 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -2727,7 +2727,7 @@ vec4_visitor::emit_vertex() * URB row increments, and each of our MRFs is half of one of * those, since we're doing interleaved writes. */ - inst->offset = (max_usable_mrf - base_mrf) / 2; + inst->offset += (max_usable_mrf - base_mrf) / 2; } } diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c b/src/mesa/drivers/dri/i965/gen7_gs_state.c index 3e3c33123a4..231e3c98910 100644 --- a/src/mesa/drivers/dri/i965/gen7_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_gs_state.c @@ -95,21 +95,34 @@ upload_gs_state(struct brw_context *brw) OUT_BATCH(0); } - OUT_BATCH(((brw->gs.prog_data->output_vertex_size_hwords * 2 - 1) << - GEN7_GS_OUTPUT_VERTEX_SIZE_SHIFT) | - (brw->gs.prog_data->output_topology << - GEN7_GS_OUTPUT_TOPOLOGY_SHIFT) | - (prog_data->urb_read_length << - GEN6_GS_URB_READ_LENGTH_SHIFT) | - (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT) | - (prog_data->dispatch_grf_start_reg << - GEN6_GS_DISPATCH_START_GRF_SHIFT)); - - OUT_BATCH(((brw->max_gs_threads - 1) << max_threads_shift) | - GEN7_GS_DISPATCH_MODE_DUAL_OBJECT | - GEN6_GS_STATISTICS_ENABLE | - GEN7_GS_ENABLE); + uint32_t dw5 = + ((brw->gs.prog_data->output_vertex_size_hwords * 2 - 1) << + GEN7_GS_OUTPUT_VERTEX_SIZE_SHIFT) | + (brw->gs.prog_data->output_topology << + GEN7_GS_OUTPUT_TOPOLOGY_SHIFT) | + (prog_data->urb_read_length << + GEN6_GS_URB_READ_LENGTH_SHIFT) | + (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT) | + (prog_data->dispatch_grf_start_reg << + GEN6_GS_DISPATCH_START_GRF_SHIFT); + uint32_t dw6 = + ((brw->max_gs_threads - 1) << max_threads_shift) | + (brw->gs.prog_data->control_data_header_size_hwords << + GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT) | + GEN7_GS_DISPATCH_MODE_DUAL_OBJECT | + GEN6_GS_STATISTICS_ENABLE | + GEN7_GS_ENABLE; + + if (brw->is_haswell) { + dw6 |= brw->gs.prog_data->control_data_format << + HSW_GS_CONTROL_DATA_FORMAT_SHIFT; + } else { + dw5 |= brw->gs.prog_data->control_data_format << + IVB_GS_CONTROL_DATA_FORMAT_SHIFT; + } + OUT_BATCH(dw5); + OUT_BATCH(dw6); OUT_BATCH(0); ADVANCE_BATCH(); } else { |