summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKenneth Graunke <[email protected]>2015-07-01 17:01:24 -0700
committerKenneth Graunke <[email protected]>2015-07-10 18:21:15 -0700
commit0edb084f9d0444c451a08fd2ed7daee2eb8a6f4a (patch)
tree793d2323e6812f17686e346b40c58bbe08180993
parent0fae4e451bc60de1138729d20e03100e93cc6f38 (diff)
i965/gs: Move vertex_count != 0 check up a level; skip one caller.
Paul's original code had emit_control_data_bits() skip the URB write if vertex_count was 0. This meant wrapping every control data write in a conditional write. We accumulate control data bits in a single UD (32-bit) register. For simple shaders that don't emit many vertices, the control data header will be <= 32-bits long, so we only need to write it once at the end of the shader. For shaders with larger headers, we write out batches of control data bits at EmitVertex(), when (vertex_count * bits_per_vertex) % 32 == 0. On the first EmitVertex() call, the above expression will evaluate to true simply because vertex_count == 0. But we want to avoid emitting the control data bits, because we haven't accumulated 32-bits worth yet. In other words, the vertex_count != 0 check is really only necessary in the EmitVertex() batching case, not the end-of-thread case. This saves a CMP/IF/ENDIF in every shader that uses EndPrimitive() or multiple streams. The only downside is that a shader which emits no vertices at all will execute an additional URB write---but such shaders are pointless and not worth optimizing. Signed-off-by: Kenneth Graunke <[email protected]> Reviewed-by: Matt Turner <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp14
1 files changed, 8 insertions, 6 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index 2f948ee73c0..55408eb0b0c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -348,11 +348,6 @@ vec4_gs_visitor::emit_control_data_bits()
if (c->control_data_header_size_bits > 128)
urb_write_flags = urb_write_flags | BRW_URB_WRITE_PER_SLOT_OFFSET;
- /* If vertex_count is 0, then no control data bits have been accumulated
- * yet, so we should do nothing.
- */
- emit(CMP(dst_null_d(), this->vertex_count, 0u, BRW_CONDITIONAL_NEQ));
- emit(IF(BRW_PREDICATE_NORMAL));
{
/* If we are using either channel masks or a per-slot offset, then we
* need to figure out which DWORD we are trying to write to, using the
@@ -431,7 +426,6 @@ vec4_gs_visitor::emit_control_data_bits()
inst->base_mrf = base_mrf;
inst->mlen = 2;
}
- emit(BRW_OPCODE_ENDIF);
}
void
@@ -531,9 +525,17 @@ vec4_gs_visitor::visit(ir_emit_vertex *ir)
emit(AND(dst_null_d(), this->vertex_count,
(uint32_t) (32 / c->control_data_bits_per_vertex - 1)));
inst->conditional_mod = BRW_CONDITIONAL_Z;
+
emit(IF(BRW_PREDICATE_NORMAL));
{
+ /* If vertex_count is 0, then no control data bits have been
+ * accumulated yet, so we skip emitting them.
+ */
+ emit(CMP(dst_null_d(), this->vertex_count, 0u,
+ BRW_CONDITIONAL_NEQ));
+ emit(IF(BRW_PREDICATE_NORMAL));
emit_control_data_bits();
+ emit(BRW_OPCODE_ENDIF);
/* Reset control_data_bits to 0 so we can start accumulating a new
* batch.