From ee5fb8d1ba7f50ed94e1a34fa0f6e15a0588145e Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Mon, 20 Oct 2014 23:29:41 -0700 Subject: i965: Generate vs code using scalar backend for BDW+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With everything in place, we can now use the scalar backend compiler for vertex shaders on BDW+. We make scalar vertex shaders the default on BDW+ but add a new vec4vs debug option to force the vec4 backend. No piglit regressions. Performance impact is minimal, I see a ~1.5 improvement on the T-Rex GLBenchmark case, but in general it's in the noise. Some of our internal synthetic, vs bounded benchmarks show great improvement, 20%-40% in some cases, but real-world cases are mostly unaffected. Signed-off-by: Kristian Høgsberg Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_context.c | 13 ++++++++ src/mesa/drivers/dri/i965/brw_context.h | 1 + src/mesa/drivers/dri/i965/brw_shader.cpp | 19 +++++++++-- src/mesa/drivers/dri/i965/brw_vec4.cpp | 57 +++++++++++++++++++++++++------- src/mesa/drivers/dri/i965/intel_debug.c | 1 + src/mesa/drivers/dri/i965/intel_debug.h | 1 + 6 files changed, 77 insertions(+), 15 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index ee9684bf945..860ee22d674 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -559,6 +559,15 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = true; ctx->Const.ShaderCompilerOptions[MESA_SHADER_GEOMETRY].OptimizeForAOS = true; + if (brw->scalar_vs) { + /* If we're using the scalar backend for vertex shaders, we need to + * configure these accordingly. + */ + ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectOutput = true; + ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectTemp = true; + ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = false; + } + /* ARB_viewport_array */ if (brw->gen >= 7 && ctx->API == API_OPENGL_CORE) { ctx->Const.MaxViewports = GEN7_NUM_VIEWPORTS; @@ -754,6 +763,10 @@ brwCreateContext(gl_api api, brw_process_driconf_options(brw); brw_process_intel_debug_variable(brw); + + if (brw->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS)) + brw->scalar_vs = true; + brw_initialize_context_constants(brw); ctx->Const.ResetStrategy = notify_reset diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index c3b90133849..1b8f0bbfcac 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1076,6 +1076,7 @@ struct brw_context bool has_pln; bool no_simd8; bool use_rep_send; + bool scalar_vs; /** * Some versions of Gen hardware don't do centroid interpolation correctly diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index b3b1ad79d62..1e5227c6c88 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -74,6 +74,19 @@ brw_shader_precompile(struct gl_context *ctx, return true; } +static inline bool +is_scalar_shader_stage(struct brw_context *brw, int stage) +{ + switch (stage) { + case MESA_SHADER_FRAGMENT: + return true; + case MESA_SHADER_VERTEX: + return brw->scalar_vs; + default: + return false; + } +} + static void brw_lower_packing_builtins(struct brw_context *brw, gl_shader_stage shader_type, @@ -84,7 +97,7 @@ brw_lower_packing_builtins(struct brw_context *brw, | LOWER_PACK_UNORM_2x16 | LOWER_UNPACK_UNORM_2x16; - if (shader_type == MESA_SHADER_FRAGMENT) { + if (is_scalar_shader_stage(brw, shader_type)) { ops |= LOWER_UNPACK_UNORM_4x8 | LOWER_UNPACK_SNORM_4x8 | LOWER_PACK_UNORM_4x8 @@ -97,7 +110,7 @@ brw_lower_packing_builtins(struct brw_context *brw, * lowering is needed. For SOA code, the Half2x16 ops must be * scalarized. */ - if (shader_type == MESA_SHADER_FRAGMENT) { + if (is_scalar_shader_stage(brw, shader_type)) { ops |= LOWER_PACK_HALF_2x16_TO_SPLIT | LOWER_UNPACK_HALF_2x16_TO_SPLIT; } @@ -185,7 +198,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) do { progress = false; - if (stage == MESA_SHADER_FRAGMENT) { + if (is_scalar_shader_stage(brw, stage)) { brw_do_channel_expressions(shader->base.ir); brw_do_vector_splitting(shader->base.ir); } diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 6d5443b58c1..9d080d63fa3 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -22,6 +22,7 @@ */ #include "brw_vec4.h" +#include "brw_fs.h" #include "brw_cfg.h" #include "brw_vs.h" #include "brw_dead_control_flow.h" @@ -1767,6 +1768,7 @@ brw_vs_emit(struct brw_context *brw, { bool start_busy = false; double start_time = 0; + const unsigned *assembly = NULL; if (unlikely(brw->perf_debug)) { start_busy = (brw->batch.last_bo && @@ -1781,23 +1783,54 @@ brw_vs_emit(struct brw_context *brw, if (unlikely(INTEL_DEBUG & DEBUG_VS)) brw_dump_ir("vertex", prog, &shader->base, &c->vp->program.Base); - vec4_vs_visitor v(brw, c, prog_data, prog, mem_ctx); - if (!v.run()) { - if (prog) { - prog->LinkStatus = false; - ralloc_strcat(&prog->InfoLog, v.fail_msg); + if (prog && brw->gen >= 8 && brw->scalar_vs) { + fs_visitor v(brw, mem_ctx, &c->key, prog_data, prog, &c->vp->program, 8); + if (!v.run_vs()) { + if (prog) { + prog->LinkStatus = false; + ralloc_strcat(&prog->InfoLog, v.fail_msg); + } + + _mesa_problem(NULL, "Failed to compile vertex shader: %s\n", + v.fail_msg); + + return NULL; } - _mesa_problem(NULL, "Failed to compile vertex shader: %s\n", - v.fail_msg); + fs_generator g(brw, mem_ctx, (void *) &c->key, &prog_data->base.base, + &c->vp->program.Base, v.runtime_check_aads_emit); + if (INTEL_DEBUG & DEBUG_VS) { + char *name = ralloc_asprintf(mem_ctx, "%s vertex shader %d", + prog->Label ? prog->Label : "unnamed", + prog->Name); + g.enable_debug(name); + } + g.generate_code(v.cfg, 8); + assembly = g.get_assembly(final_assembly_size); - return NULL; + if (assembly) + prog_data->base.simd8 = true; + c->base.last_scratch = v.last_scratch; } - const unsigned *assembly = NULL; - vec4_generator g(brw, prog, &c->vp->program.Base, &prog_data->base, - mem_ctx, INTEL_DEBUG & DEBUG_VS); - assembly = g.generate_assembly(v.cfg, final_assembly_size); + if (!assembly) { + vec4_vs_visitor v(brw, c, prog_data, prog, mem_ctx); + if (!v.run()) { + if (prog) { + prog->LinkStatus = false; + ralloc_strcat(&prog->InfoLog, v.fail_msg); + } + + _mesa_problem(NULL, "Failed to compile vertex shader: %s\n", + v.fail_msg); + + return NULL; + } + + vec4_generator g(brw, prog, &c->vp->program.Base, &prog_data->base, + mem_ctx, INTEL_DEBUG & DEBUG_VS); + assembly = g.generate_assembly(v.cfg, final_assembly_size); + } if (unlikely(brw->perf_debug) && shader) { if (shader->compiled_once) { diff --git a/src/mesa/drivers/dri/i965/intel_debug.c b/src/mesa/drivers/dri/i965/intel_debug.c index 1dd2b1db4b8..013602c4abe 100644 --- a/src/mesa/drivers/dri/i965/intel_debug.c +++ b/src/mesa/drivers/dri/i965/intel_debug.c @@ -68,6 +68,7 @@ static const struct dri_debug_control debug_control[] = { { "optimizer", DEBUG_OPTIMIZER }, { "ann", DEBUG_ANNOTATION }, { "no8", DEBUG_NO8 }, + { "vec4vs", DEBUG_VEC4VS }, { NULL, 0 } }; diff --git a/src/mesa/drivers/dri/i965/intel_debug.h b/src/mesa/drivers/dri/i965/intel_debug.h index df2c86345ef..01b4dcfd3fe 100644 --- a/src/mesa/drivers/dri/i965/intel_debug.h +++ b/src/mesa/drivers/dri/i965/intel_debug.h @@ -63,6 +63,7 @@ extern uint64_t INTEL_DEBUG; #define DEBUG_OPTIMIZER (1 << 27) #define DEBUG_ANNOTATION (1 << 28) #define DEBUG_NO8 (1 << 29) +#define DEBUG_VEC4VS (1 << 30) #ifdef HAVE_ANDROID_PLATFORM #define LOG_TAG "INTEL-MESA" -- cgit v1.2.3