diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 8 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 42 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen7_gs_state.c | 4 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen8_gs_state.c | 4 |
4 files changed, 36 insertions, 22 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 5830aa993d5..9e04d813ed4 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -590,10 +590,12 @@ struct brw_gs_prog_data int invocations; /** - * True if the thread should be dispatched in DUAL_INSTANCE mode, false if - * it should be dispatched in DUAL_OBJECT mode. + * Dispatch mode, can be any of: + * GEN7_GS_DISPATCH_MODE_DUAL_OBJECT + * GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE + * GEN7_GS_DISPATCH_MODE_SINGLE */ - bool dual_instanced_dispatch; + int dispatch_mode; }; /** Number of texture sampler units */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index 0b95002ca9f..ad3204fcfe2 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -101,10 +101,11 @@ vec4_gs_visitor::setup_payload() { int attribute_map[BRW_VARYING_SLOT_COUNT * MAX_GS_INPUT_VERTICES]; - /* If we are in dual instanced mode, then attributes are going to be - * interleaved, so one register contains two attribute slots. + /* If we are in dual instanced or single mode, then attributes are going + * to be interleaved, so one register contains two attribute slots. */ - int attributes_per_reg = c->prog_data.dual_instanced_dispatch ? 2 : 1; + int attributes_per_reg = + c->prog_data.dispatch_mode == GEN7_GS_DISPATCH_MODE_DUAL_OBJECT ? 1 : 2; /* If a geometry shader tries to read from an input that wasn't written by * the vertex shader, that produces undefined results, but it shouldn't @@ -129,8 +130,7 @@ vec4_gs_visitor::setup_payload() reg = setup_varying_inputs(reg, attribute_map, attributes_per_reg); - lower_attributes_to_hw_regs(attribute_map, - c->prog_data.dual_instanced_dispatch); + lower_attributes_to_hw_regs(attribute_map, attributes_per_reg > 1); this->first_non_payload_grf = reg; } @@ -640,7 +640,7 @@ brw_gs_emit(struct brw_context *brw, */ if (c->prog_data.invocations <= 1 && likely(!(INTEL_DEBUG & DEBUG_NO_DUAL_OBJECT_GS))) { - c->prog_data.dual_instanced_dispatch = false; + c->prog_data.dispatch_mode = GEN7_GS_DISPATCH_MODE_DUAL_OBJECT; vec4_gs_visitor v(brw, c, prog, mem_ctx, true /* no_spills */); if (v.run()) { @@ -652,15 +652,31 @@ brw_gs_emit(struct brw_context *brw, /* Either we failed to compile in DUAL_OBJECT mode (probably because it * would have required spilling) or DUAL_OBJECT mode is disabled. So fall - * back to DUAL_INSTANCED mode, which consumes fewer registers. + * back to DUAL_INSTANCED or SINGLE mode, which consumes fewer registers. * - * FIXME: In an ideal world we'd fall back to SINGLE mode, which would - * allow us to interleave general purpose registers (resulting in even less - * likelihood of spilling). But at the moment, the vec4 generator and - * visitor classes don't have the infrastructure to interleave general - * purpose registers, so DUAL_INSTANCED is the best we can do. + * FIXME: Single dispatch mode requires that the driver can handle + * interleaving of input registers, but this is already supported (dual + * instance mode has the same requirement). However, to take full advantage + * of single dispatch mode to reduce register pressure we would also need to + * do interleaved outputs, but currently, the vec4 visitor and generator + * classes do not support this, so at the moment register pressure in + * single and dual instance modes is the same. + * + * From the Ivy Bridge PRM, Vol2 Part1 7.2.1.1 "3DSTATE_GS" + * "If InstanceCount>1, DUAL_OBJECT mode is invalid. Software will likely + * want to use DUAL_INSTANCE mode for higher performance, but SINGLE mode + * is also supported. When InstanceCount=1 (one instance per object) software + * can decide which dispatch mode to use. DUAL_OBJECT mode would likely be + * the best choice for performance, followed by SINGLE mode." + * + * So SINGLE mode is more performant when invocations == 1 and DUAL_INSTANCE + * mode is more performant when invocations > 1. Gen6 only supports + * SINGLE mode. */ - c->prog_data.dual_instanced_dispatch = true; + if (c->prog_data.invocations <= 1) + c->prog_data.dispatch_mode = GEN7_GS_DISPATCH_MODE_SINGLE; + else + c->prog_data.dispatch_mode = GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE; vec4_gs_visitor v(brw, c, prog, mem_ctx, false /* no_spills */); if (!v.run()) { diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c b/src/mesa/drivers/dri/i965/gen7_gs_state.c index 6b0fb97b64c..e3e175eb31f 100644 --- a/src/mesa/drivers/dri/i965/gen7_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_gs_state.c @@ -145,9 +145,7 @@ upload_gs_state(struct brw_context *brw) GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT) | ((brw->gs.prog_data->invocations - 1) << GEN7_GS_INSTANCE_CONTROL_SHIFT) | - (brw->gs.prog_data->dual_instanced_dispatch ? - GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE : - GEN7_GS_DISPATCH_MODE_DUAL_OBJECT) | + brw->gs.prog_data->dispatch_mode | GEN6_GS_STATISTICS_ENABLE | (brw->gs.prog_data->include_primitive_id ? GEN7_GS_INCLUDE_PRIMITIVE_ID : 0) | diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c b/src/mesa/drivers/dri/i965/gen8_gs_state.c index 5cb5be970bd..927be429747 100644 --- a/src/mesa/drivers/dri/i965/gen8_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c @@ -83,9 +83,7 @@ gen8_upload_gs_state(struct brw_context *brw) OUT_BATCH(((brw->max_gs_threads / 2 - 1) << HSW_GS_MAX_THREADS_SHIFT) | (brw->gs.prog_data->control_data_header_size_hwords << GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT) | - (brw->gs.prog_data->dual_instanced_dispatch ? - GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE : - GEN7_GS_DISPATCH_MODE_DUAL_OBJECT) | + brw->gs.prog_data->dispatch_mode | GEN6_GS_STATISTICS_ENABLE | (brw->gs.prog_data->include_primitive_id ? GEN7_GS_INCLUDE_PRIMITIVE_ID : 0) | |