diff options
author | Ian Romanick <[email protected]> | 2017-02-09 15:21:47 +0000 |
---|---|---|
committer | Ian Romanick <[email protected]> | 2019-03-01 12:42:14 -0800 |
commit | cb3e21cd1925c9378b4acb869601bbb011d0de97 (patch) | |
tree | 877b1c9dfdec0e962bcd52b12d83c910cf6a7810 /src/intel/compiler/brw_fs_nir.cpp | |
parent | 8eb36c912955407f9f1d7aba968fe9513cc8e325 (diff) |
intel/fs: Use De Morgan's laws to avoid logical-not of a logic result on Gen8+
Instead of emitting ~(a & b), emit (~a | ~b) since logical-not of
operands is free on Gen8+.
v2: Fix swizzles. Fix types for cmod propagation.
v3: Simplify logic for inverting source of inot(ixor(a, b)). Suggested
by Ken.
Skylake and Broadwell had similar results. (Skylake shown)
Skylake
total instructions in shared programs: 15185593 -> 15185583 (<.01%)
instructions in affected programs: 5673 -> 5663 (-0.18%)
helped: 12
HURT: 1
helped stats (abs) min: 1 max: 2 x̄: 1.17 x̃: 1
helped stats (rel) min: 0.30% max: 5.88% x̄: 1.50% x̃: 0.70%
HURT stats (abs) min: 4 max: 4 x̄: 4.00 x̃: 4
HURT stats (rel) min: 0.12% max: 0.12% x̄: 0.12% x̃: 0.12%
95% mean confidence interval for instructions value: -1.66 0.13
95% mean confidence interval for instructions %-change: -2.60% -0.15%
Inconclusive result (value mean confidence interval includes 0).
total cycles in shared programs: 370977726 -> 370964249 (<.01%)
cycles in affected programs: 869987 -> 856510 (-1.55%)
helped: 15
HURT: 2
helped stats (abs) min: 2 max: 6640 x̄: 902.20 x̃: 16
helped stats (rel) min: <.01% max: 4.92% x̄: 1.71% x̃: 1.53%
HURT stats (abs) min: 14 max: 42 x̄: 28.00 x̃: 28
HURT stats (rel) min: 1.08% max: 3.18% x̄: 2.13% x̃: 2.13%
95% mean confidence interval for cycles value: -1654.87 69.34
95% mean confidence interval for cycles %-change: -2.29% -0.23%
Inconclusive result (value mean confidence interval includes 0).
Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src/intel/compiler/brw_fs_nir.cpp')
-rw-r--r-- | src/intel/compiler/brw_fs_nir.cpp | 59 |
1 files changed, 59 insertions, 0 deletions
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 23b21f1d680..110473bfa85 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -1155,6 +1155,65 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) case nir_op_inot: if (devinfo->gen >= 8) { + nir_alu_instr *const inot_src_instr = + nir_src_as_alu_instr(&instr->src[0].src); + + if (inot_src_instr != NULL && + (inot_src_instr->op == nir_op_ior || + inot_src_instr->op == nir_op_ixor || + inot_src_instr->op == nir_op_iand) && + !inot_src_instr->src[0].abs && + !inot_src_instr->src[0].negate && + !inot_src_instr->src[1].abs && + !inot_src_instr->src[1].negate) { + /* The sources of the source logical instruction are now the + * sources of the instruction that will be generated. + */ + prepare_alu_destination_and_sources(bld, inot_src_instr, op, false); + resolve_inot_sources(bld, inot_src_instr, op); + + /* Smash all of the sources and destination to be signed. This + * doesn't matter for the operation of the instruction, but cmod + * propagation fails on unsigned sources with negation (due to + * fs_inst::can_do_cmod returning false). + */ + result.type = + brw_type_for_nir_type(devinfo, + (nir_alu_type)(nir_type_int | + nir_dest_bit_size(instr->dest.dest))); + op[0].type = + brw_type_for_nir_type(devinfo, + (nir_alu_type)(nir_type_int | + nir_src_bit_size(inot_src_instr->src[0].src))); + op[1].type = + brw_type_for_nir_type(devinfo, + (nir_alu_type)(nir_type_int | + nir_src_bit_size(inot_src_instr->src[1].src))); + + /* For XOR, only invert one of the sources. Arbitrarily choose + * the first source. + */ + op[0].negate = !op[0].negate; + if (inot_src_instr->op != nir_op_ixor) + op[1].negate = !op[1].negate; + + switch (inot_src_instr->op) { + case nir_op_ior: + bld.AND(result, op[0], op[1]); + return; + + case nir_op_iand: + bld.OR(result, op[0], op[1]); + return; + + case nir_op_ixor: + bld.XOR(result, op[0], op[1]); + return; + + default: + unreachable("impossible opcode"); + } + } op[0] = resolve_source_modifiers(op[0]); } bld.NOT(result, op[0]); |