diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/Makefile.am | 5 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/Makefile.sources | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 16 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_nir.c | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_nir.h | 2 | ||||
-rwxr-xr-x | src/mesa/drivers/dri/i965/brw_nir_trig_workarounds.py | 43 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 16 |
7 files changed, 58 insertions, 28 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am index 0db5a51e725..a41c8305a80 100644 --- a/src/mesa/drivers/dri/i965/Makefile.am +++ b/src/mesa/drivers/dri/i965/Makefile.am @@ -33,6 +33,7 @@ AM_CFLAGS = \ -I$(top_srcdir)/src/mesa/drivers/dri/common \ -I$(top_srcdir)/src/mesa/drivers/dri/intel/server \ -I$(top_srcdir)/src/gtest/include \ + -I$(top_srcdir)/src/compiler/nir \ -I$(top_builddir)/src/compiler/nir \ -I$(top_builddir)/src/mesa/drivers/dri/common \ $(DEFINES) \ @@ -41,6 +42,10 @@ AM_CFLAGS = \ AM_CXXFLAGS = $(AM_CFLAGS) +brw_nir_trig_workarounds.c: brw_nir_trig_workarounds.py $(top_srcdir)/src/compiler/nir/nir_algebraic.py + $(MKDIR_GEN) + $(AM_V_GEN) PYTHONPATH=$(top_srcdir)/src/compiler/nir $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/brw_nir_trig_workarounds.py > $@ || ($(RM) $@; false) + noinst_LTLIBRARIES = libi965_dri.la libi965_compiler.la libi965_dri_la_SOURCES = $(i965_FILES) libi965_dri_la_LIBADD = libi965_compiler.la $(INTEL_LIBS) diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 46895882414..2619e4360bc 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -44,6 +44,7 @@ i965_compiler_FILES = \ brw_nir.c \ brw_nir_analyze_boolean_resolves.c \ brw_nir_attribute_workarounds.c \ + brw_nir_trig_workarounds.c \ brw_nir_opt_peephole_ffma.c \ brw_nir_uniforms.cpp \ brw_packed_float.c \ diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 90b878913b3..bd6314a3e37 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -775,24 +775,12 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) break; case nir_op_fsin: - if (!compiler->precise_trig) { - inst = bld.emit(SHADER_OPCODE_SIN, result, op[0]); - } else { - fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F); - inst = bld.emit(SHADER_OPCODE_SIN, tmp, op[0]); - inst = bld.MUL(result, tmp, brw_imm_f(0.99997)); - } + inst = bld.emit(SHADER_OPCODE_SIN, result, op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_fcos: - if (!compiler->precise_trig) { - inst = bld.emit(SHADER_OPCODE_COS, result, op[0]); - } else { - fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F); - inst = bld.emit(SHADER_OPCODE_COS, tmp, op[0]); - inst = bld.MUL(result, tmp, brw_imm_f(0.99997)); - } + inst = bld.emit(SHADER_OPCODE_COS, result, op[0]); inst->saturate = instr->dest.saturate; break; diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index 1821c0d7d23..932979a7719 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -447,6 +447,9 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir) if (nir->stage == MESA_SHADER_GEOMETRY) OPT(nir_lower_gs_intrinsics); + if (compiler->precise_trig) + OPT(brw_nir_apply_trig_workarounds); + static const nir_lower_tex_options tex_options = { .lower_txp = ~0, }; diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h index b10c0838fe0..2711606511d 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.h +++ b/src/mesa/drivers/dri/i965/brw_nir.h @@ -106,6 +106,8 @@ bool brw_nir_apply_attribute_workarounds(nir_shader *nir, bool use_legacy_snorm_formula, const uint8_t *attrib_wa_flags); +bool brw_nir_apply_trig_workarounds(nir_shader *nir); + nir_shader *brw_nir_apply_sampler_key(nir_shader *nir, const struct brw_device_info *devinfo, const struct brw_sampler_prog_key_data *key, diff --git a/src/mesa/drivers/dri/i965/brw_nir_trig_workarounds.py b/src/mesa/drivers/dri/i965/brw_nir_trig_workarounds.py new file mode 100755 index 00000000000..67dab9ab326 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_nir_trig_workarounds.py @@ -0,0 +1,43 @@ +#! /usr/bin/env python +# +# Copyright (C) 2016 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +import nir_algebraic + +# The SIN and COS instructions on Intel hardware can produce values +# slightly outside of the [-1.0, 1.0] range for a small set of values. +# Obviously, this can break everyone's expectations about trig functions. +# +# According to an internal presentation, the COS instruction can produce +# a value up to 1.000027 for inputs in the range (0.08296, 0.09888). One +# suggested workaround is to multiply by 0.99997, scaling down the +# amplitude slightly. Apparently this also minimizes the error function, +# reducing the maximum error from 0.00006 to about 0.00003. + +trig_workarounds = [ + (('fsin', 'x'), ('fmul', ('fsin', 'x'), 0.99997)), + (('fcos', 'x'), ('fmul', ('fcos', 'x'), 0.99997)), +] + +print '#include "brw_nir.h"' +print nir_algebraic.AlgebraicPass("brw_nir_apply_trig_workarounds", + trig_workarounds).render() diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index d9f96c58379..e4e8c38e703 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -1101,24 +1101,12 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) break; case nir_op_fsin: - if (!compiler->precise_trig) { - inst = emit_math(SHADER_OPCODE_SIN, dst, op[0]); - } else { - src_reg tmp = src_reg(this, glsl_type::vec4_type); - inst = emit_math(SHADER_OPCODE_SIN, dst_reg(tmp), op[0]); - inst = emit(MUL(dst, tmp, brw_imm_f(0.99997))); - } + inst = emit_math(SHADER_OPCODE_SIN, dst, op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_fcos: - if (!compiler->precise_trig) { - inst = emit_math(SHADER_OPCODE_COS, dst, op[0]); - } else { - src_reg tmp = src_reg(this, glsl_type::vec4_type); - inst = emit_math(SHADER_OPCODE_COS, dst_reg(tmp), op[0]); - inst = emit(MUL(dst, tmp, brw_imm_f(0.99997))); - } + inst = emit_math(SHADER_OPCODE_COS, dst, op[0]); inst->saturate = instr->dest.saturate; break; |