summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKenneth Graunke <[email protected]>2015-08-05 09:16:59 -0700
committerKenneth Graunke <[email protected]>2015-09-23 11:00:00 -0700
commitdf31c1850d14729e27513ae733110a668f6b6e95 (patch)
tree06d123e528dae4a2cb26e8e31dc4603658f3cf18
parent542d40d698a698dc656c7a64ddcea07060707555 (diff)
i965/gs: Use new NIR intrinsics.
By performing the vertex counting in NIR, we're able to elide a ton of useless safety checks around every EmitVertex() call: total instructions in shared programs: 3952 -> 3720 (-5.87%) instructions in affected programs: 3491 -> 3259 (-6.65%) helped: 11 HURT: 0 Improves performance in Gl32GSCloth by 0.671742% +/- 0.142202% (n=621) on Haswell GT3e at 1024x768. This should also make it easier to implement Broadwell's "Static Vertex Count" feature someday. Signed-off-by: Kenneth Graunke <[email protected]> Reviewed-by: Jason Ekstrand <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir.c5
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp13
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp28
-rw-r--r--src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp28
4 files changed, 48 insertions, 26 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index b47b87e07dd..1d4f6ab2ccd 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -96,6 +96,11 @@ brw_create_nir(struct brw_context *brw,
}
nir_validate_shader(nir);
+ if (stage == MESA_SHADER_GEOMETRY) {
+ nir_lower_gs_intrinsics(nir);
+ nir_validate_shader(nir);
+ }
+
nir_lower_global_vars_to_local(nir);
nir_validate_shader(nir);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
index 8a8dd571e74..4f4e1e12fab 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
@@ -92,16 +92,25 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
src_reg src;
switch (instr->intrinsic) {
- case nir_intrinsic_emit_vertex: {
+ case nir_intrinsic_emit_vertex_with_counter: {
+ this->vertex_count =
+ retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
int stream_id = instr->const_index[0];
gs_emit_vertex(stream_id);
break;
}
- case nir_intrinsic_end_primitive:
+ case nir_intrinsic_end_primitive_with_counter:
+ this->vertex_count =
+ retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
gs_end_primitive();
break;
+ case nir_intrinsic_set_vertex_count:
+ this->vertex_count =
+ retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
+ break;
+
case nir_intrinsic_load_invocation_id: {
src_reg invocation_id =
src_reg(nir_system_values[SYSTEM_VALUE_INVOCATION_ID]);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index b9694f67787..7a5b945650c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -484,14 +484,6 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id)
if (stream_id > 0 && shader_prog->TransformFeedback.NumVarying == 0)
return;
- /* To ensure that we don't output more vertices than the shader specified
- * using max_vertices, do the logic inside a conditional of the form "if
- * (vertex_count < MAX)"
- */
- unsigned num_output_vertices = c->gp->program.VerticesOut;
- emit(CMP(dst_null_d(), this->vertex_count,
- src_reg(num_output_vertices), BRW_CONDITIONAL_L));
- emit(IF(BRW_PREDICATE_NORMAL));
{
/* If we're outputting 32 control data bits or less, then we can wait
* until the shader is over to output them all. Otherwise we need to
@@ -562,12 +554,7 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id)
this->current_annotation = "emit vertex: Stream control data bits";
set_stream_control_data_bits(stream_id);
}
-
- this->current_annotation = "emit vertex: increment vertex count";
- emit(ADD(dst_reg(this->vertex_count), this->vertex_count,
- src_reg(1u)));
}
- emit(BRW_OPCODE_ENDIF);
this->current_annotation = NULL;
}
@@ -575,7 +562,22 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id)
void
vec4_gs_visitor::visit(ir_emit_vertex *ir)
{
+ /* To ensure that we don't output more vertices than the shader specified
+ * using max_vertices, do the logic inside a conditional of the form "if
+ * (vertex_count < MAX)"
+ */
+ unsigned num_output_vertices = c->gp->program.VerticesOut;
+ emit(CMP(dst_null_d(), this->vertex_count,
+ src_reg(num_output_vertices), BRW_CONDITIONAL_L));
+ emit(IF(BRW_PREDICATE_NORMAL));
+
gs_emit_vertex(ir->stream_id());
+
+ this->current_annotation = "emit vertex: increment vertex count";
+ emit(ADD(dst_reg(this->vertex_count), this->vertex_count,
+ src_reg(1u)));
+
+ emit(BRW_OPCODE_ENDIF);
}
void
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
index 68e443d38a5..5cfff7b62ba 100644
--- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
@@ -149,19 +149,29 @@ gen6_gs_visitor::emit_prolog()
void
gen6_gs_visitor::visit(ir_emit_vertex *ir)
{
+ /* To ensure that we don't output more vertices than the shader specified
+ * using max_vertices, do the logic inside a conditional of the form "if
+ * (vertex_count < MAX)"
+ */
+ unsigned num_output_vertices = c->gp->program.VerticesOut;
+ emit(CMP(dst_null_d(), this->vertex_count,
+ src_reg(num_output_vertices), BRW_CONDITIONAL_L));
+ emit(IF(BRW_PREDICATE_NORMAL));
+
gs_emit_vertex(ir->stream_id());
+
+ this->current_annotation = "emit vertex: increment vertex count";
+ emit(ADD(dst_reg(this->vertex_count), this->vertex_count,
+ src_reg(1u)));
+
+ emit(BRW_OPCODE_ENDIF);
}
+
void
gen6_gs_visitor::gs_emit_vertex(int stream_id)
{
this->current_annotation = "gen6 emit vertex";
- /* Honor max_vertex layout indication in geometry shader by ignoring any
- * vertices coming after c->gp->program.VerticesOut.
- */
- unsigned num_output_vertices = c->gp->program.VerticesOut;
- emit(CMP(dst_null_d(), this->vertex_count, src_reg(num_output_vertices),
- BRW_CONDITIONAL_L));
- emit(IF(BRW_PREDICATE_NORMAL));
+
{
/* Buffer all output slots for this vertex in vertex_output */
for (int slot = 0; slot < prog_data->vue_map.num_slots; ++slot) {
@@ -219,11 +229,7 @@ gen6_gs_visitor::gs_emit_vertex(int stream_id)
}
emit(ADD(dst_reg(this->vertex_output_offset),
this->vertex_output_offset, 1u));
-
- /* Update vertex count */
- emit(ADD(dst_reg(this->vertex_count), this->vertex_count, 1u));
}
- emit(BRW_OPCODE_ENDIF);
}
void