diff options
author | Samuel Iglesias Gonsálvez <[email protected]> | 2019-02-12 15:43:10 +0100 |
---|---|---|
committer | Andres Gomez <[email protected]> | 2019-09-17 23:39:18 +0300 |
commit | 7580707345b7df0f262935c30b43bde16d297e39 (patch) | |
tree | 9652120d350202eebe9253f0bc9c829d0b057da6 | |
parent | 0ac07c7ca7207f3f1388c0450b456ecc578d9c5b (diff) |
nir: mind rounding mode on fadd, fsub, fmul and fma opcodes
According to Vulkan spec, the new execution modes affect only
correctly rounded SPIR-V instructions, which includes fadd, fsub and
fmul.
v2:
- Fix fmul, fsub and fadd round-to-zero definitions, they should use
auxiliary functions to calculate the proper value because Mesa uses
round-to-nearest-even rounding mode by default (Connor).
v3:
- Do an actual fused multiply-add at ffma (Connor).
v4:
- Simplify fadd and fmul for bit sizes < 64 (Connor).
- Do not use double ffma for 32 bits float (Connor).
Signed-off-by: Samuel Iglesias Gonsálvez <[email protected]>
Signed-off-by: Andres Gomez <[email protected]>
Reviewed-by: Connor Abbott <[email protected]> [v3]
-rw-r--r-- | src/compiler/nir/nir_constant_expressions.py | 1 | ||||
-rw-r--r-- | src/compiler/nir/nir_opcodes.py | 49 |
2 files changed, 46 insertions, 4 deletions
diff --git a/src/compiler/nir/nir_constant_expressions.py b/src/compiler/nir/nir_constant_expressions.py index 219d91c1cbd..baba66e5d02 100644 --- a/src/compiler/nir/nir_constant_expressions.py +++ b/src/compiler/nir/nir_constant_expressions.py @@ -64,6 +64,7 @@ template = """\ #include "util/rounding.h" /* for _mesa_roundeven */ #include "util/half_float.h" #include "util/double.h" +#include "util/softfloat.h" #include "util/bigmath.h" #include "nir_constant_expressions.h" diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py index 13f64d78c4f..5a1c8fd2514 100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py @@ -492,7 +492,16 @@ def binop_reduce(name, output_size, output_type, src_type, prereduce_expr, [4, 4], [src_type, src_type], False, _2src_commutative, final(reduce_(reduce_(src0, src1), reduce_(src2, src3)))) -binop("fadd", tfloat, _2src_commutative + associative, "src0 + src1") +binop("fadd", tfloat, _2src_commutative + associative,""" +if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) { + if (bit_size == 64) + dst = _mesa_double_add_rtz(src0, src1); + else + dst = _mesa_double_to_float_rtz((double)src0 + (double)src1); +} else { + dst = src0 + src1; +} +""") binop("iadd", tint, _2src_commutative + associative, "src0 + src1") binop("iadd_sat", tint, _2src_commutative, """ src1 > 0 ? @@ -508,10 +517,28 @@ binop("isub_sat", tint, "", """ """) binop("usub_sat", tuint, "", "src0 < src1 ? 0 : src0 - src1") -binop("fsub", tfloat, "", "src0 - src1") +binop("fsub", tfloat, "", """ +if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) { + if (bit_size == 64) + dst = _mesa_double_sub_rtz(src0, src1); + else + dst = _mesa_double_to_float_rtz((double)src0 - (double)src1); +} else { + dst = src0 - src1; +} +""") binop("isub", tint, "", "src0 - src1") -binop("fmul", tfloat, _2src_commutative + associative, "src0 * src1") +binop("fmul", tfloat, _2src_commutative + associative, """ +if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) { + if (bit_size == 64) + dst = _mesa_double_mul_rtz(src0, src1); + else + dst = _mesa_double_to_float_rtz((double)src0 * (double)src1); +} else { + dst = src0 * src1; +} +""") # low 32-bits of signed/unsigned integer multiply binop("imul", tint, _2src_commutative + associative, "src0 * src1") @@ -834,7 +861,21 @@ def triop_horiz(name, output_size, src1_size, src2_size, src3_size, const_expr): [src1_size, src2_size, src3_size], [tuint, tuint, tuint], False, "", const_expr) -triop("ffma", tfloat, _2src_commutative, "src0 * src1 + src2") +triop("ffma", tfloat, _2src_commutative, """ +if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) { + if (bit_size == 64) + dst = _mesa_double_fma_rtz(src0, src1, src2); + else if (bit_size == 32) + dst = _mesa_float_fma_rtz(src0, src1, src2); + else + dst = _mesa_double_to_float_rtz(_mesa_double_fma_rtz(src0, src1, src2)); +} else { + if (bit_size == 32) + dst = fmaf(src0, src1, src2); + else + dst = fma(src0, src1, src2); +} +""") triop("flrp", tfloat, "", "src0 * (1 - src2) + src1 * src2") |