From df31c1850d14729e27513ae733110a668f6b6e95 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 5 Aug 2015 09:16:59 -0700 Subject: i965/gs: Use new NIR intrinsics. By performing the vertex counting in NIR, we're able to elide a ton of useless safety checks around every EmitVertex() call: total instructions in shared programs: 3952 -> 3720 (-5.87%) instructions in affected programs: 3491 -> 3259 (-6.65%) helped: 11 HURT: 0 Improves performance in Gl32GSCloth by 0.671742% +/- 0.142202% (n=621) on Haswell GT3e at 1024x768. This should also make it easier to implement Broadwell's "Static Vertex Count" feature someday. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_nir.c | 5 ++++ src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp | 13 +++++++++-- src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 28 ++++++++++++----------- src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp | 28 ++++++++++++++--------- 4 files changed, 48 insertions(+), 26 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index b47b87e07dd..1d4f6ab2ccd 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -96,6 +96,11 @@ brw_create_nir(struct brw_context *brw, } nir_validate_shader(nir); + if (stage == MESA_SHADER_GEOMETRY) { + nir_lower_gs_intrinsics(nir); + nir_validate_shader(nir); + } + nir_lower_global_vars_to_local(nir); nir_validate_shader(nir); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp index 8a8dd571e74..4f4e1e12fab 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp @@ -92,16 +92,25 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) src_reg src; switch (instr->intrinsic) { - case nir_intrinsic_emit_vertex: { + case nir_intrinsic_emit_vertex_with_counter: { + this->vertex_count = + retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD); int stream_id = instr->const_index[0]; gs_emit_vertex(stream_id); break; } - case nir_intrinsic_end_primitive: + case nir_intrinsic_end_primitive_with_counter: + this->vertex_count = + retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD); gs_end_primitive(); break; + case nir_intrinsic_set_vertex_count: + this->vertex_count = + retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD); + break; + case nir_intrinsic_load_invocation_id: { src_reg invocation_id = src_reg(nir_system_values[SYSTEM_VALUE_INVOCATION_ID]); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index b9694f67787..7a5b945650c 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -484,14 +484,6 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id) if (stream_id > 0 && shader_prog->TransformFeedback.NumVarying == 0) return; - /* To ensure that we don't output more vertices than the shader specified - * using max_vertices, do the logic inside a conditional of the form "if - * (vertex_count < MAX)" - */ - unsigned num_output_vertices = c->gp->program.VerticesOut; - emit(CMP(dst_null_d(), this->vertex_count, - src_reg(num_output_vertices), BRW_CONDITIONAL_L)); - emit(IF(BRW_PREDICATE_NORMAL)); { /* If we're outputting 32 control data bits or less, then we can wait * until the shader is over to output them all. Otherwise we need to @@ -562,12 +554,7 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id) this->current_annotation = "emit vertex: Stream control data bits"; set_stream_control_data_bits(stream_id); } - - this->current_annotation = "emit vertex: increment vertex count"; - emit(ADD(dst_reg(this->vertex_count), this->vertex_count, - src_reg(1u))); } - emit(BRW_OPCODE_ENDIF); this->current_annotation = NULL; } @@ -575,7 +562,22 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id) void vec4_gs_visitor::visit(ir_emit_vertex *ir) { + /* To ensure that we don't output more vertices than the shader specified + * using max_vertices, do the logic inside a conditional of the form "if + * (vertex_count < MAX)" + */ + unsigned num_output_vertices = c->gp->program.VerticesOut; + emit(CMP(dst_null_d(), this->vertex_count, + src_reg(num_output_vertices), BRW_CONDITIONAL_L)); + emit(IF(BRW_PREDICATE_NORMAL)); + gs_emit_vertex(ir->stream_id()); + + this->current_annotation = "emit vertex: increment vertex count"; + emit(ADD(dst_reg(this->vertex_count), this->vertex_count, + src_reg(1u))); + + emit(BRW_OPCODE_ENDIF); } void diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp index 68e443d38a5..5cfff7b62ba 100644 --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp @@ -149,19 +149,29 @@ gen6_gs_visitor::emit_prolog() void gen6_gs_visitor::visit(ir_emit_vertex *ir) { + /* To ensure that we don't output more vertices than the shader specified + * using max_vertices, do the logic inside a conditional of the form "if + * (vertex_count < MAX)" + */ + unsigned num_output_vertices = c->gp->program.VerticesOut; + emit(CMP(dst_null_d(), this->vertex_count, + src_reg(num_output_vertices), BRW_CONDITIONAL_L)); + emit(IF(BRW_PREDICATE_NORMAL)); + gs_emit_vertex(ir->stream_id()); + + this->current_annotation = "emit vertex: increment vertex count"; + emit(ADD(dst_reg(this->vertex_count), this->vertex_count, + src_reg(1u))); + + emit(BRW_OPCODE_ENDIF); } + void gen6_gs_visitor::gs_emit_vertex(int stream_id) { this->current_annotation = "gen6 emit vertex"; - /* Honor max_vertex layout indication in geometry shader by ignoring any - * vertices coming after c->gp->program.VerticesOut. - */ - unsigned num_output_vertices = c->gp->program.VerticesOut; - emit(CMP(dst_null_d(), this->vertex_count, src_reg(num_output_vertices), - BRW_CONDITIONAL_L)); - emit(IF(BRW_PREDICATE_NORMAL)); + { /* Buffer all output slots for this vertex in vertex_output */ for (int slot = 0; slot < prog_data->vue_map.num_slots; ++slot) { @@ -219,11 +229,7 @@ gen6_gs_visitor::gs_emit_vertex(int stream_id) } emit(ADD(dst_reg(this->vertex_output_offset), this->vertex_output_offset, 1u)); - - /* Update vertex count */ - emit(ADD(dst_reg(this->vertex_count), this->vertex_count, 1u)); } - emit(BRW_OPCODE_ENDIF); } void -- cgit v1.2.3