summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIago Toral Quiroga <[email protected]>2014-07-01 13:08:25 +0200
committerIago Toral Quiroga <[email protected]>2014-09-19 15:01:15 +0200
commitc09ddf82ffa13173b55a1b51075be2671378c4ea (patch)
treeebaf69626cc5aa55e6e449cbd3bce2691379365a
parent621685ad4c747cc67e1b6c7ba95fa59774196a54 (diff)
i965/gen6/gs: Compute URB entry size for user-provided geometry shaders.
Acked-by: Kenneth Graunke <[email protected]> Reviewed-by: Jordan Justen <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h8
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_gs.c87
2 files changed, 62 insertions, 33 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 0b5735cc02e..72a21e85d16 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1595,10 +1595,14 @@ enum brw_message_target {
# define GEN7_URB_ENTRY_SIZE_SHIFT 16
# define GEN7_URB_STARTING_ADDRESS_SHIFT 25
-/* "GS URB Entry Allocation Size" is a U9-1 field, so the maximum gs_size
+/* Gen7 "GS URB Entry Allocation Size" is a U9-1 field, so the maximum gs_size
* is 2^9, or 512. It's counted in multiples of 64 bytes.
*/
-#define GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES (512*64)
+#define GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES (512*64)
+/* Gen6 "GS URB Entry Allocation Size" is defined as a number of 1024-bit
+ * (128 bytes) URB rows and the maximum allowed value is 5 rows.
+ */
+#define GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES (5*128)
#define _3DSTATE_PUSH_CONSTANT_ALLOC_VS 0x7912 /* GEN7+ */
#define _3DSTATE_PUSH_CONSTANT_ALLOC_GS 0x7915 /* GEN7+ */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs.c b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
index b00e584f7a2..41c6d8b6593 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
@@ -75,31 +75,36 @@ do_gs_prog(struct brw_context *brw,
*/
c.prog_data.base.base.nr_params = ALIGN(param_count, 4) / 4 + gs->num_samplers;
- if (gp->program.OutputType == GL_POINTS) {
- /* When the output type is points, the geometry shader may output data
- * to multiple streams, and EndPrimitive() has no effect. So we
- * configure the hardware to interpret the control data as stream ID.
- */
- c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID;
-
- /* We only have to emit control bits if we are using streams */
- if (prog->Geom.UsesStreams)
- c.control_data_bits_per_vertex = 2;
- else
- c.control_data_bits_per_vertex = 0;
+ if (brw->gen >= 7) {
+ if (gp->program.OutputType == GL_POINTS) {
+ /* When the output type is points, the geometry shader may output data
+ * to multiple streams, and EndPrimitive() has no effect. So we
+ * configure the hardware to interpret the control data as stream ID.
+ */
+ c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID;
+
+ /* We only have to emit control bits if we are using streams */
+ if (prog->Geom.UsesStreams)
+ c.control_data_bits_per_vertex = 2;
+ else
+ c.control_data_bits_per_vertex = 0;
+ } else {
+ /* When the output type is triangle_strip or line_strip, EndPrimitive()
+ * may be used to terminate the current strip and start a new one
+ * (similar to primitive restart), and outputting data to multiple
+ * streams is not supported. So we configure the hardware to interpret
+ * the control data as EndPrimitive information (a.k.a. "cut bits").
+ */
+ c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT;
+
+ /* We only need to output control data if the shader actually calls
+ * EndPrimitive().
+ */
+ c.control_data_bits_per_vertex = gp->program.UsesEndPrimitive ? 1 : 0;
+ }
} else {
- /* When the output type is triangle_strip or line_strip, EndPrimitive()
- * may be used to terminate the current strip and start a new one
- * (similar to primitive restart), and outputting data to multiple
- * streams is not supported. So we configure the hardware to interpret
- * the control data as EndPrimitive information (a.k.a. "cut bits").
- */
- c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT;
-
- /* We only need to output control data if the shader actually calls
- * EndPrimitive().
- */
- c.control_data_bits_per_vertex = gp->program.UsesEndPrimitive ? 1 : 0;
+ /* There are no control data bits in gen6. */
+ c.control_data_bits_per_vertex = 0;
}
c.control_data_header_size_bits =
gp->program.VerticesOut * c.control_data_bits_per_vertex;
@@ -170,7 +175,8 @@ do_gs_prog(struct brw_context *brw,
*
*/
unsigned output_vertex_size_bytes = c.prog_data.base.vue_map.num_slots * 16;
- assert(output_vertex_size_bytes <= GEN7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES);
+ assert(brw->gen == 6 ||
+ output_vertex_size_bytes <= GEN7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES);
c.prog_data.output_vertex_size_hwords =
ALIGN(output_vertex_size_bytes, 32) / 32;
@@ -200,10 +206,20 @@ do_gs_prog(struct brw_context *brw,
* the above figures are all worst-case, and most of them scale with the
* number of output vertices. So we'll just calculate the amount of space
* we need, and if it's too large, fail to compile.
+ *
+ * The above is for gen7+ where we have a single URB entry that will hold
+ * all the output. In gen6, we will have to allocate URB entries for every
+ * vertex we emit, so our URB entries only need to be large enough to hold
+ * a single vertex. Also, gen6 does not have a control data header.
*/
- unsigned output_size_bytes =
- c.prog_data.output_vertex_size_hwords * 32 * gp->program.VerticesOut;
- output_size_bytes += 32 * c.prog_data.control_data_header_size_hwords;
+ unsigned output_size_bytes;
+ if (brw->gen >= 7) {
+ output_size_bytes =
+ c.prog_data.output_vertex_size_hwords * 32 * gp->program.VerticesOut;
+ output_size_bytes += 32 * c.prog_data.control_data_header_size_hwords;
+ } else {
+ output_size_bytes = c.prog_data.output_vertex_size_hwords * 32;
+ }
/* Broadwell stores "Vertex Count" as a full 8 DWord (32 byte) URB output,
* which comes before the control header.
@@ -212,11 +228,20 @@ do_gs_prog(struct brw_context *brw,
output_size_bytes += 32;
assert(output_size_bytes >= 1);
- if (output_size_bytes > GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES)
+ int max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES;
+ if (brw->gen == 6)
+ max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES;
+ if (output_size_bytes > max_output_size_bytes)
return false;
- /* URB entry sizes are stored as a multiple of 64 bytes. */
- c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
+
+ /* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and
+ * a multiple of 128 bytes in gen6.
+ */
+ if (brw->gen >= 7)
+ c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
+ else
+ c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128;
c.prog_data.output_topology =
get_hw_prim_for_gl_prim(gp->program.OutputType);