summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIan Romanick <[email protected]>2017-02-09 15:21:47 +0000
committerIan Romanick <[email protected]>2019-03-01 12:42:14 -0800
commitcb3e21cd1925c9378b4acb869601bbb011d0de97 (patch)
tree877b1c9dfdec0e962bcd52b12d83c910cf6a7810
parent8eb36c912955407f9f1d7aba968fe9513cc8e325 (diff)
intel/fs: Use De Morgan's laws to avoid logical-not of a logic result on Gen8+
Instead of emitting ~(a & b), emit (~a | ~b) since logical-not of operands is free on Gen8+. v2: Fix swizzles. Fix types for cmod propagation. v3: Simplify logic for inverting source of inot(ixor(a, b)). Suggested by Ken. Skylake and Broadwell had similar results. (Skylake shown) Skylake total instructions in shared programs: 15185593 -> 15185583 (<.01%) instructions in affected programs: 5673 -> 5663 (-0.18%) helped: 12 HURT: 1 helped stats (abs) min: 1 max: 2 x̄: 1.17 x̃: 1 helped stats (rel) min: 0.30% max: 5.88% x̄: 1.50% x̃: 0.70% HURT stats (abs) min: 4 max: 4 x̄: 4.00 x̃: 4 HURT stats (rel) min: 0.12% max: 0.12% x̄: 0.12% x̃: 0.12% 95% mean confidence interval for instructions value: -1.66 0.13 95% mean confidence interval for instructions %-change: -2.60% -0.15% Inconclusive result (value mean confidence interval includes 0). total cycles in shared programs: 370977726 -> 370964249 (<.01%) cycles in affected programs: 869987 -> 856510 (-1.55%) helped: 15 HURT: 2 helped stats (abs) min: 2 max: 6640 x̄: 902.20 x̃: 16 helped stats (rel) min: <.01% max: 4.92% x̄: 1.71% x̃: 1.53% HURT stats (abs) min: 14 max: 42 x̄: 28.00 x̃: 28 HURT stats (rel) min: 1.08% max: 3.18% x̄: 2.13% x̃: 2.13% 95% mean confidence interval for cycles value: -1654.87 69.34 95% mean confidence interval for cycles %-change: -2.29% -0.23% Inconclusive result (value mean confidence interval includes 0). Reviewed-by: Kenneth Graunke <[email protected]>
-rw-r--r--src/intel/compiler/brw_fs_nir.cpp59
1 files changed, 59 insertions, 0 deletions
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index 23b21f1d680..110473bfa85 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -1155,6 +1155,65 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
case nir_op_inot:
if (devinfo->gen >= 8) {
+ nir_alu_instr *const inot_src_instr =
+ nir_src_as_alu_instr(&instr->src[0].src);
+
+ if (inot_src_instr != NULL &&
+ (inot_src_instr->op == nir_op_ior ||
+ inot_src_instr->op == nir_op_ixor ||
+ inot_src_instr->op == nir_op_iand) &&
+ !inot_src_instr->src[0].abs &&
+ !inot_src_instr->src[0].negate &&
+ !inot_src_instr->src[1].abs &&
+ !inot_src_instr->src[1].negate) {
+ /* The sources of the source logical instruction are now the
+ * sources of the instruction that will be generated.
+ */
+ prepare_alu_destination_and_sources(bld, inot_src_instr, op, false);
+ resolve_inot_sources(bld, inot_src_instr, op);
+
+ /* Smash all of the sources and destination to be signed. This
+ * doesn't matter for the operation of the instruction, but cmod
+ * propagation fails on unsigned sources with negation (due to
+ * fs_inst::can_do_cmod returning false).
+ */
+ result.type =
+ brw_type_for_nir_type(devinfo,
+ (nir_alu_type)(nir_type_int |
+ nir_dest_bit_size(instr->dest.dest)));
+ op[0].type =
+ brw_type_for_nir_type(devinfo,
+ (nir_alu_type)(nir_type_int |
+ nir_src_bit_size(inot_src_instr->src[0].src)));
+ op[1].type =
+ brw_type_for_nir_type(devinfo,
+ (nir_alu_type)(nir_type_int |
+ nir_src_bit_size(inot_src_instr->src[1].src)));
+
+ /* For XOR, only invert one of the sources. Arbitrarily choose
+ * the first source.
+ */
+ op[0].negate = !op[0].negate;
+ if (inot_src_instr->op != nir_op_ixor)
+ op[1].negate = !op[1].negate;
+
+ switch (inot_src_instr->op) {
+ case nir_op_ior:
+ bld.AND(result, op[0], op[1]);
+ return;
+
+ case nir_op_iand:
+ bld.OR(result, op[0], op[1]);
+ return;
+
+ case nir_op_ixor:
+ bld.XOR(result, op[0], op[1]);
+ return;
+
+ default:
+ unreachable("impossible opcode");
+ }
+ }
op[0] = resolve_source_modifiers(op[0]);
}
bld.NOT(result, op[0]);