summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIan Romanick <[email protected]>2018-06-25 19:55:31 -0700
committerIan Romanick <[email protected]>2019-04-18 12:38:05 -0700
commit1711bf6cf2de45e182427130fd0e587c7ce16a70 (patch)
treea32d60943d45deec1afd547f998b0416da028796
parent06d2c116415c0ab163a57ed7f2522342ed43e4d4 (diff)
intel/fs: Generate better code for fsign multiplied by a value
v2: Rebase on v2 changes in previous two commits. v3: Rebase on 85c35885b38 ("nir: Rework nir_src_as_alu_instr to not take a pointer"). shader-db results: Skylake and Broadwell had similar results. (Skylake shown) total instructions in shared programs: 15297100 -> 15282141 (-0.10%) instructions in affected programs: 956685 -> 941726 (-1.56%) helped: 4527 HURT: 0 helped stats (abs) min: 1 max: 221 x̄: 3.30 x̃: 2 helped stats (rel) min: 0.07% max: 10.53% x̄: 1.85% x̃: 1.37% 95% mean confidence interval for instructions value: -3.48 -3.12 95% mean confidence interval for instructions %-change: -1.88% -1.81% Instructions are helped. total cycles in shared programs: 372809551 -> 372597886 (-0.06%) cycles in affected programs: 13645512 -> 13433847 (-1.55%) helped: 4362 HURT: 125 helped stats (abs) min: 1 max: 2088 x̄: 50.73 x̃: 28 helped stats (rel) min: 0.01% max: 28.20% x̄: 2.77% x̃: 2.39% HURT stats (abs) min: 1 max: 1836 x̄: 76.90 x̃: 28 HURT stats (rel) min: <.01% max: 34.36% x̄: 3.03% x̃: 1.42% 95% mean confidence interval for cycles value: -50.98 -43.37 95% mean confidence interval for cycles %-change: -2.67% -2.55% Cycles are helped. total spills in shared programs: 23465 -> 23463 (<.01%) spills in affected programs: 42 -> 40 (-4.76%) helped: 1 HURT: 0 total fills in shared programs: 31766 -> 31763 (<.01%) fills in affected programs: 69 -> 66 (-4.35%) helped: 1 HURT: 0 Haswell total instructions in shared programs: 13839992 -> 13828311 (-0.08%) instructions in affected programs: 712503 -> 700822 (-1.64%) helped: 3477 HURT: 0 helped stats (abs) min: 1 max: 221 x̄: 3.36 x̃: 2 helped stats (rel) min: 0.07% max: 10.64% x̄: 1.96% x̃: 1.52% 95% mean confidence interval for instructions value: -3.58 -3.14 95% mean confidence interval for instructions %-change: -2.01% -1.92% Instructions are helped. total cycles in shared programs: 387026330 -> 386872483 (-0.04%) cycles in affected programs: 11329966 -> 11176119 (-1.36%) helped: 3307 HURT: 139 helped stats (abs) min: 2 max: 1776 x̄: 49.58 x̃: 18 helped stats (rel) min: 0.01% max: 20.38% x̄: 2.27% x̃: 1.79% HURT stats (abs) min: 1 max: 2314 x̄: 72.68 x̃: 20 HURT stats (rel) min: <.01% max: 33.99% x̄: 2.28% x̃: 0.96% 95% mean confidence interval for cycles value: -49.31 -39.98 95% mean confidence interval for cycles %-change: -2.15% -2.01% Cycles are helped. LOST: 1 GAINED: 0 Ivy Bridge total instructions in shared programs: 12045602 -> 12038463 (-0.06%) instructions in affected programs: 623837 -> 616698 (-1.14%) helped: 2498 HURT: 0 helped stats (abs) min: 1 max: 39 x̄: 2.86 x̃: 2 helped stats (rel) min: 0.05% max: 10.00% x̄: 1.30% x̃: 1.05% 95% mean confidence interval for instructions value: -2.96 -2.75 95% mean confidence interval for instructions %-change: -1.34% -1.26% Instructions are helped. total cycles in shared programs: 181025675 -> 180891323 (-0.07%) cycles in affected programs: 11329329 -> 11194977 (-1.19%) helped: 2439 HURT: 47 helped stats (abs) min: 1 max: 1565 x̄: 57.06 x̃: 26 helped stats (rel) min: 0.02% max: 24.56% x̄: 2.02% x̃: 1.64% HURT stats (abs) min: 1 max: 1269 x̄: 102.51 x̃: 43 HURT stats (rel) min: 0.11% max: 52.94% x̄: 4.15% x̃: 1.34% 95% mean confidence interval for cycles value: -59.91 -48.17 95% mean confidence interval for cycles %-change: -1.99% -1.82% Cycles are helped. Sandy Bridge, Iron Lake, and GM45 had similar results. (Sandy Bridge shown) total instructions in shared programs: 10896368 -> 10896339 (<.01%) instructions in affected programs: 3767 -> 3738 (-0.77%) helped: 17 HURT: 0 helped stats (abs) min: 1 max: 4 x̄: 1.71 x̃: 1 helped stats (rel) min: 0.13% max: 9.52% x̄: 3.58% x̃: 2.73% 95% mean confidence interval for instructions value: -2.27 -1.14 95% mean confidence interval for instructions %-change: -5.14% -2.03% Instructions are helped. total cycles in shared programs: 155091109 -> 155091021 (<.01%) cycles in affected programs: 47241 -> 47153 (-0.19%) helped: 15 HURT: 8 helped stats (abs) min: 2 max: 81 x̄: 15.73 x̃: 4 helped stats (rel) min: 0.03% max: 10.59% x̄: 1.55% x̃: 0.71% HURT stats (abs) min: 14 max: 32 x̄: 18.50 x̃: 17 HURT stats (rel) min: 0.32% max: 2.79% x̄: 2.43% x̃: 2.71% 95% mean confidence interval for cycles value: -14.59 6.93 95% mean confidence interval for cycles %-change: -1.41% 1.08% Inconclusive result (value mean confidence interval includes 0). Reviewed-by: Matt Turner <[email protected]> [v2]
-rw-r--r--src/intel/compiler/brw_fs_nir.cpp43
1 files changed, 43 insertions, 0 deletions
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index 930b3844a03..cf044a3f613 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -24,6 +24,7 @@
#include "compiler/glsl/ir.h"
#include "brw_fs.h"
#include "brw_nir.h"
+#include "nir_search_helpers.h"
#include "util/u_math.h"
#include "util/bitscan.h"
@@ -927,6 +928,41 @@ fs_visitor::emit_fsign(const fs_builder &bld, const nir_alu_instr *instr,
}
}
+/**
+ * Deteremine whether sources of a nir_op_fmul can be fused with a nir_op_fsign
+ *
+ * Checks the operands of a \c nir_op_fmul to determine whether or not
+ * \c emit_fsign could fuse the multiplication with the \c sign() calculation.
+ *
+ * \param instr The multiplication instruction
+ *
+ * \param fsign_src The source of \c instr that may or may not be a
+ * \c nir_op_fsign
+ */
+static bool
+can_fuse_fmul_fsign(nir_alu_instr *instr, unsigned fsign_src)
+{
+ assert(instr->op == nir_op_fmul);
+
+ nir_alu_instr *const fsign_instr =
+ nir_src_as_alu_instr(instr->src[fsign_src].src);
+
+ /* Rules:
+ *
+ * 1. instr->src[fsign_src] must be a nir_op_fsign.
+ * 2. The nir_op_fsign can only be used by this multiplication.
+ * 3. The source that is the nir_op_fsign does not have source modifiers.
+ * \c emit_fsign only examines the source modifiers of the source of the
+ * \c nir_op_fsign.
+ *
+ * The nir_op_fsign must also not have the saturate modifier, but steps
+ * have already been taken (in nir_opt_algebraic) to ensure that.
+ */
+ return fsign_instr != NULL && fsign_instr->op == nir_op_fsign &&
+ is_used_once(fsign_instr) &&
+ !instr->src[fsign_src].abs && !instr->src[fsign_src].negate;
+}
+
void
fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
{
@@ -1132,6 +1168,13 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
break;
case nir_op_fmul:
+ for (unsigned i = 0; i < 2; i++) {
+ if (can_fuse_fmul_fsign(instr, i)) {
+ emit_fsign(bld, instr, result, op, i);
+ return;
+ }
+ }
+
inst = bld.MUL(result, op[0], op[1]);
inst->saturate = instr->dest.saturate;
break;