summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKenneth Graunke <[email protected]>2015-04-07 15:15:09 -0700
committerKenneth Graunke <[email protected]>2015-04-11 12:35:33 -0700
commit89c1feb78d010bc457f5d02be84c955eebf3549f (patch)
tree558425e1ff2c21b99d66ba218f143b6c2759540b
parentb3e286c4575bf6af343c1a03471fd876cdfb5c43 (diff)
i965: Create NIR during LinkShader() and ProgramStringNotify().
Previously, we translated into NIR and did all the optimizations and lowering as part of running fs_visitor. This meant that we did all of that work twice for fragment shaders - once for SIMD8, and again for SIMD16. We also had to redo it every time we hit a state based recompile. We now generate NIR once at link time. ARB programs don't have linking, so we instead generate it at ProgramStringNotify time. Mesa's fixed function vertex program handling doesn't bother to inform the driver about new programs at all (which is rather mean), so we generate NIR at the last minute, if it hasn't happened already. shader-db runs ~9.4% faster on my i7-5600U, with a release build. v2: Check NirOptions != NULL in ProgramStringNotify(). Don't bother using _mesa_program_enum_to_shader_stage as we already know it. Signed-off-by: Kenneth Graunke <[email protected]> Reviewed-by: Jason Ekstrand <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/Makefile.sources1
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_nir.cpp174
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir.c213
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir.h6
-rw-r--r--src/mesa/drivers/dri/i965/brw_program.c9
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.cpp6
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.cpp17
-rw-r--r--src/mesa/main/mtypes.h2
-rw-r--r--src/mesa/program/program.c5
9 files changed, 257 insertions, 176 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index 498d5a7c013..6d4659f0920 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -77,6 +77,7 @@ i965_FILES = \
brw_misc_state.c \
brw_multisample_state.h \
brw_nir.h \
+ brw_nir.c \
brw_nir_analyze_boolean_resolves.c \
brw_object_purgeable.c \
brw_packed_float.c \
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 4ee92a8208c..d6508fc707a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -28,175 +28,10 @@
#include "brw_fs.h"
#include "brw_nir.h"
-static void
-nir_optimize(nir_shader *nir)
-{
- bool progress;
- do {
- progress = false;
- nir_lower_vars_to_ssa(nir);
- nir_validate_shader(nir);
- nir_lower_alu_to_scalar(nir);
- nir_validate_shader(nir);
- progress |= nir_copy_prop(nir);
- nir_validate_shader(nir);
- nir_lower_phis_to_scalar(nir);
- nir_validate_shader(nir);
- progress |= nir_copy_prop(nir);
- nir_validate_shader(nir);
- progress |= nir_opt_dce(nir);
- nir_validate_shader(nir);
- progress |= nir_opt_cse(nir);
- nir_validate_shader(nir);
- progress |= nir_opt_peephole_select(nir);
- nir_validate_shader(nir);
- progress |= nir_opt_algebraic(nir);
- nir_validate_shader(nir);
- progress |= nir_opt_constant_folding(nir);
- nir_validate_shader(nir);
- progress |= nir_opt_remove_phis(nir);
- nir_validate_shader(nir);
- } while (progress);
-}
-
-static bool
-count_nir_instrs_in_block(nir_block *block, void *state)
-{
- int *count = (int *) state;
- nir_foreach_instr(block, instr) {
- *count = *count + 1;
- }
- return true;
-}
-
-static int
-count_nir_instrs(nir_shader *nir)
-{
- int count = 0;
- nir_foreach_overload(nir, overload) {
- if (!overload->impl)
- continue;
- nir_foreach_block(overload->impl, count_nir_instrs_in_block, &count);
- }
- return count;
-}
-
void
fs_visitor::emit_nir_code()
{
- const nir_shader_compiler_options *options =
- ctx->Const.ShaderCompilerOptions[stage].NirOptions;
-
- nir_shader *nir;
- /* First, lower the GLSL IR or Mesa IR to NIR */
- if (shader_prog) {
- nir = glsl_to_nir(&shader->base, options);
- } else {
- nir = prog_to_nir(prog, options);
- nir_convert_to_ssa(nir); /* turn registers into SSA */
- }
- nir_validate_shader(nir);
-
- nir_lower_global_vars_to_local(nir);
- nir_validate_shader(nir);
-
- nir_lower_tex_projector(nir);
- nir_validate_shader(nir);
-
- nir_normalize_cubemap_coords(nir);
- nir_validate_shader(nir);
-
- nir_split_var_copies(nir);
- nir_validate_shader(nir);
-
- nir_optimize(nir);
-
- /* Lower a bunch of stuff */
- nir_lower_var_copies(nir);
- nir_validate_shader(nir);
-
- /* Get rid of split copies */
- nir_optimize(nir);
-
- if (shader_prog) {
- nir_assign_var_locations_scalar_direct_first(nir, &nir->uniforms,
- &nir->num_direct_uniforms,
- &nir->num_uniforms);
- } else {
- /* ARB programs generally create a giant array of "uniform" data, and allow
- * indirect addressing without any boundaries. In the absence of bounds
- * analysis, it's all or nothing. num_direct_uniforms is only useful when
- * we have some direct and some indirect access; it doesn't matter here.
- */
- nir->num_direct_uniforms = 0;
- }
- nir_assign_var_locations_scalar(&nir->inputs, &nir->num_inputs);
- nir_assign_var_locations_scalar(&nir->outputs, &nir->num_outputs);
-
- nir_lower_io(nir);
- nir_validate_shader(nir);
-
- nir_remove_dead_variables(nir);
- nir_validate_shader(nir);
-
- if (shader_prog) {
- nir_lower_samplers(nir, shader_prog, stage);
- nir_validate_shader(nir);
- }
-
- nir_lower_system_values(nir);
- nir_validate_shader(nir);
-
- nir_lower_atomics(nir);
- nir_validate_shader(nir);
-
- nir_optimize(nir);
-
- if (brw->gen >= 6) {
- /* Try and fuse multiply-adds */
- nir_opt_peephole_ffma(nir);
- nir_validate_shader(nir);
- }
-
- nir_opt_algebraic_late(nir);
- nir_validate_shader(nir);
-
- nir_lower_locals_to_regs(nir);
- nir_validate_shader(nir);
-
- nir_lower_to_source_mods(nir);
- nir_validate_shader(nir);
- nir_copy_prop(nir);
- nir_validate_shader(nir);
- nir_opt_dce(nir);
- nir_validate_shader(nir);
-
- if (unlikely(debug_enabled)) {
- fprintf(stderr, "NIR (SSA form) for %s shader:\n", stage_name);
- nir_print_shader(nir, stderr);
- }
-
- if (dispatch_width == 8) {
- static GLuint msg_id = 0;
- _mesa_gl_debug(&brw->ctx, &msg_id,
- MESA_DEBUG_SOURCE_SHADER_COMPILER,
- MESA_DEBUG_TYPE_OTHER,
- MESA_DEBUG_SEVERITY_NOTIFICATION,
- "%s NIR shader: %d inst\n",
- stage_abbrev,
- count_nir_instrs(nir));
- }
-
- nir_convert_from_ssa(nir);
- nir_validate_shader(nir);
-
- /* This is the last pass we run before we start emitting stuff. It
- * determines when we need to insert boolean resolves on Gen <= 5. We
- * run it last because it stashes data in instr->pass_flags and we don't
- * want that to be squashed by other NIR passes.
- */
- if (brw->gen <= 5)
- brw_nir_analyze_boolean_resolves(nir);
+ nir_shader *nir = prog->nir;
/* emit the arrays used for inputs and outputs - load/store intrinsics will
* be converted to reads/writes of these arrays
@@ -232,13 +67,6 @@ fs_visitor::emit_nir_code()
assert(overload->impl);
nir_emit_impl(overload->impl);
}
-
- if (unlikely(debug_enabled)) {
- fprintf(stderr, "NIR (final form) for %s shader:\n", stage_name);
- nir_print_shader(nir, stderr);
- }
-
- ralloc_free(nir);
}
void
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
new file mode 100644
index 00000000000..de4d7aafd44
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -0,0 +1,213 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_nir.h"
+#include "glsl/glsl_parser_extras.h"
+#include "glsl/nir/glsl_to_nir.h"
+#include "program/prog_to_nir.h"
+
+static void
+nir_optimize(nir_shader *nir)
+{
+ bool progress;
+ do {
+ progress = false;
+ nir_lower_vars_to_ssa(nir);
+ nir_validate_shader(nir);
+ nir_lower_alu_to_scalar(nir);
+ nir_validate_shader(nir);
+ progress |= nir_copy_prop(nir);
+ nir_validate_shader(nir);
+ nir_lower_phis_to_scalar(nir);
+ nir_validate_shader(nir);
+ progress |= nir_copy_prop(nir);
+ nir_validate_shader(nir);
+ progress |= nir_opt_dce(nir);
+ nir_validate_shader(nir);
+ progress |= nir_opt_cse(nir);
+ nir_validate_shader(nir);
+ progress |= nir_opt_peephole_select(nir);
+ nir_validate_shader(nir);
+ progress |= nir_opt_algebraic(nir);
+ nir_validate_shader(nir);
+ progress |= nir_opt_constant_folding(nir);
+ nir_validate_shader(nir);
+ progress |= nir_opt_remove_phis(nir);
+ nir_validate_shader(nir);
+ } while (progress);
+}
+
+static bool
+count_nir_instrs_in_block(nir_block *block, void *state)
+{
+ int *count = (int *) state;
+ nir_foreach_instr(block, instr) {
+ *count = *count + 1;
+ }
+ return true;
+}
+
+static int
+count_nir_instrs(nir_shader *nir)
+{
+ int count = 0;
+ nir_foreach_overload(nir, overload) {
+ if (!overload->impl)
+ continue;
+ nir_foreach_block(overload->impl, count_nir_instrs_in_block, &count);
+ }
+ return count;
+}
+
+nir_shader *
+brw_create_nir(struct brw_context *brw,
+ const struct gl_shader_program *shader_prog,
+ const struct gl_program *prog,
+ gl_shader_stage stage)
+{
+ struct gl_context *ctx = &brw->ctx;
+ const nir_shader_compiler_options *options =
+ ctx->Const.ShaderCompilerOptions[stage].NirOptions;
+ struct gl_shader *shader = shader_prog ? shader_prog->_LinkedShaders[stage] : NULL;
+ bool debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage);
+ nir_shader *nir;
+
+ /* First, lower the GLSL IR or Mesa IR to NIR */
+ if (shader_prog) {
+ nir = glsl_to_nir(shader, options);
+ } else {
+ nir = prog_to_nir(prog, options);
+ nir_convert_to_ssa(nir); /* turn registers into SSA */
+ }
+ nir_validate_shader(nir);
+
+ nir_lower_global_vars_to_local(nir);
+ nir_validate_shader(nir);
+
+ nir_lower_tex_projector(nir);
+ nir_validate_shader(nir);
+
+ nir_normalize_cubemap_coords(nir);
+ nir_validate_shader(nir);
+
+ nir_split_var_copies(nir);
+ nir_validate_shader(nir);
+
+ nir_optimize(nir);
+
+ /* Lower a bunch of stuff */
+ nir_lower_var_copies(nir);
+ nir_validate_shader(nir);
+
+ /* Get rid of split copies */
+ nir_optimize(nir);
+
+ if (shader_prog) {
+ nir_assign_var_locations_scalar_direct_first(nir, &nir->uniforms,
+ &nir->num_direct_uniforms,
+ &nir->num_uniforms);
+ } else {
+ /* ARB programs generally create a giant array of "uniform" data, and allow
+ * indirect addressing without any boundaries. In the absence of bounds
+ * analysis, it's all or nothing. num_direct_uniforms is only useful when
+ * we have some direct and some indirect access; it doesn't matter here.
+ */
+ nir->num_direct_uniforms = 0;
+ }
+ nir_assign_var_locations_scalar(&nir->inputs, &nir->num_inputs);
+ nir_assign_var_locations_scalar(&nir->outputs, &nir->num_outputs);
+
+ nir_lower_io(nir);
+ nir_validate_shader(nir);
+
+ nir_remove_dead_variables(nir);
+ nir_validate_shader(nir);
+
+ if (shader_prog) {
+ nir_lower_samplers(nir, shader_prog, stage);
+ nir_validate_shader(nir);
+ }
+
+ nir_lower_system_values(nir);
+ nir_validate_shader(nir);
+
+ nir_lower_atomics(nir);
+ nir_validate_shader(nir);
+
+ nir_optimize(nir);
+
+ if (brw->gen >= 6) {
+ /* Try and fuse multiply-adds */
+ nir_opt_peephole_ffma(nir);
+ nir_validate_shader(nir);
+ }
+
+ nir_opt_algebraic_late(nir);
+ nir_validate_shader(nir);
+
+ nir_lower_locals_to_regs(nir);
+ nir_validate_shader(nir);
+
+ nir_lower_to_source_mods(nir);
+ nir_validate_shader(nir);
+ nir_copy_prop(nir);
+ nir_validate_shader(nir);
+ nir_opt_dce(nir);
+ nir_validate_shader(nir);
+
+ if (unlikely(debug_enabled)) {
+ fprintf(stderr, "NIR (SSA form) for %s shader:\n",
+ _mesa_shader_stage_to_string(stage));
+ nir_print_shader(nir, stderr);
+ }
+
+ static GLuint msg_id = 0;
+ _mesa_gl_debug(&brw->ctx, &msg_id,
+ MESA_DEBUG_SOURCE_SHADER_COMPILER,
+ MESA_DEBUG_TYPE_OTHER,
+ MESA_DEBUG_SEVERITY_NOTIFICATION,
+ "%s NIR shader: %d inst\n",
+ _mesa_shader_stage_to_abbrev(stage),
+ count_nir_instrs(nir));
+
+ nir_convert_from_ssa(nir);
+ nir_validate_shader(nir);
+
+ /* This is the last pass we run before we start emitting stuff. It
+ * determines when we need to insert boolean resolves on Gen <= 5. We
+ * run it last because it stashes data in instr->pass_flags and we don't
+ * want that to be squashed by other NIR passes.
+ */
+ if (brw->gen <= 5)
+ brw_nir_analyze_boolean_resolves(nir);
+
+ nir_sweep(nir);
+
+ if (unlikely(debug_enabled)) {
+ fprintf(stderr, "NIR (final form) for %s shader:\n",
+ _mesa_shader_stage_to_string(stage));
+ nir_print_shader(nir, stderr);
+ }
+
+ return nir;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h
index 27782a3c807..313110997bf 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.h
+++ b/src/mesa/drivers/dri/i965/brw_nir.h
@@ -23,6 +23,7 @@
#pragma once
+#include "brw_context.h"
#include "glsl/nir/nir.h"
#ifdef __cplusplus
@@ -73,6 +74,11 @@ enum {
void brw_nir_analyze_boolean_resolves(nir_shader *nir);
+nir_shader *brw_create_nir(struct brw_context *brw,
+ const struct gl_shader_program *shader_prog,
+ const struct gl_program *prog,
+ gl_shader_stage stage);
+
#ifdef __cplusplus
}
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index 8920c34b029..9e27c2aa974 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -43,6 +43,7 @@
#include "brw_context.h"
#include "brw_shader.h"
+#include "brw_nir.h"
#include "brw_wm.h"
#include "intel_batchbuffer.h"
@@ -141,6 +142,10 @@ brwProgramStringNotify(struct gl_context *ctx,
brw_add_texrect_params(prog);
+ if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_FRAGMENT].NirOptions) {
+ prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT);
+ }
+
brw_fs_precompile(ctx, NULL, prog);
break;
}
@@ -163,6 +168,10 @@ brwProgramStringNotify(struct gl_context *ctx,
brw_add_texrect_params(prog);
+ if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions) {
+ prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX);
+ }
+
brw_vs_precompile(ctx, NULL, prog);
break;
}
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 87000776f1d..335a8007e12 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -27,6 +27,7 @@
#include "brw_gs.h"
#include "brw_fs.h"
#include "brw_cfg.h"
+#include "brw_nir.h"
#include "glsl/ir_optimization.h"
#include "glsl/glsl_parser_extras.h"
#include "main/shaderapi.h"
@@ -229,6 +230,8 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
struct gl_shader *shader = shProg->_LinkedShaders[stage];
+ const struct gl_shader_compiler_options *options =
+ &ctx->Const.ShaderCompilerOptions[stage];
if (!shader)
continue;
@@ -277,6 +280,9 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
brw_add_texrect_params(prog);
+ if (options->NirOptions)
+ prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage);
+
_mesa_reference_program(ctx, &prog, NULL);
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index ef2fd40b503..c4c77b28760 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -25,6 +25,7 @@
#include "brw_fs.h"
#include "brw_cfg.h"
#include "brw_vs.h"
+#include "brw_nir.h"
#include "brw_vec4_live_variables.h"
#include "brw_dead_control_flow.h"
@@ -1809,6 +1810,8 @@ brw_vs_emit(struct brw_context *brw,
bool start_busy = false;
double start_time = 0;
const unsigned *assembly = NULL;
+ bool use_nir =
+ brw->ctx.Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions != NULL;
if (unlikely(brw->perf_debug)) {
start_busy = (brw->batch.last_bo &&
@@ -1823,9 +1826,17 @@ brw_vs_emit(struct brw_context *brw,
if (unlikely(INTEL_DEBUG & DEBUG_VS))
brw_dump_ir("vertex", prog, &shader->base, &c->vp->program.Base);
- if (brw->scalar_vs &&
- (prog ||
- brw->ctx.Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions)) {
+ if (use_nir && !c->vp->program.Base.nir) {
+ /* Normally we generate NIR in LinkShader() or ProgramStringNotify(), but
+ * Mesa's fixed-function vertex program handling doesn't notify the driver
+ * at all. Just do it here, at the last minute, even though it's lame.
+ */
+ assert(c->vp->program.Base.Id == 0 && prog == NULL);
+ c->vp->program.Base.nir =
+ brw_create_nir(brw, NULL, &c->vp->program.Base, MESA_SHADER_VERTEX);
+ }
+
+ if (brw->scalar_vs && (prog || use_nir)) {
fs_visitor v(brw, mem_ctx, &c->key, prog_data, prog, &c->vp->program, 8);
if (!v.run_vs()) {
if (prog) {
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 61840282cdb..5d726b422f5 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2087,6 +2087,8 @@ struct gl_program
struct prog_instruction *Instructions;
+ struct nir_shader *nir;
+
GLbitfield64 InputsRead; /**< Bitmask of which input regs are read */
GLbitfield64 OutputsWritten; /**< Bitmask of which output regs are written */
GLbitfield SystemValuesRead; /**< Bitmask of SYSTEM_VALUE_x inputs used */
diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c
index 3c214d5e361..4f28e2a3b54 100644
--- a/src/mesa/program/program.c
+++ b/src/mesa/program/program.c
@@ -37,6 +37,7 @@
#include "prog_cache.h"
#include "prog_parameter.h"
#include "prog_instruction.h"
+#include "util/ralloc.h"
/**
@@ -380,6 +381,10 @@ _mesa_delete_program(struct gl_context *ctx, struct gl_program *prog)
_mesa_free_parameter_list(prog->Parameters);
}
+ if (prog->nir) {
+ ralloc_free(prog->nir);
+ }
+
free(prog);
}