diff options
author | Ian Romanick <[email protected]> | 2019-06-06 11:21:15 -0700 |
---|---|---|
committer | Ian Romanick <[email protected]> | 2019-07-11 10:20:03 -0700 |
commit | 365b45d571121e5c30cbed1b8580b8fc06aec564 (patch) | |
tree | 21cfb2da33ccf92069217ee366cba0b80b6c6897 /src/intel | |
parent | 6f6bc842f6672c596edd9c446ff9a1bd0668de0b (diff) |
intel/vec4: Try to emit a single load for multiple 3-src instruction operands
If a 3-source instruction uses immediate values 1.0 and -1.0, just load
1.0 into a register. Use the negation source modifier to get -1.0.
This has trivial impact now, but it prevents a few thousand regressions
on vec4 platforms with "nir/algebraic: Recognize open-coded flrp(-1, 1,
a) and flrp(1, -1, a)"
All Gen6 and Gen7 platforms had similar results. (Haswell shown)
total instructions in shared programs: 13487412 -> 13487406 (<.01%)
instructions in affected programs: 541 -> 535 (-1.11%)
helped: 6
HURT: 0
helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
helped stats (rel) min: 0.36% max: 2.08% x̄: 1.65% x̃: 1.80%
95% mean confidence interval for instructions value: -1.00 -1.00
95% mean confidence interval for instructions %-change: -2.33% -0.97%
Instructions are helped.
total cycles in shared programs: 376402564 -> 376402500 (<.01%)
cycles in affected programs: 10348 -> 10284 (-0.62%)
helped: 10
HURT: 1
helped stats (abs) min: 2 max: 26 x̄: 7.00 x̃: 2
helped stats (rel) min: 0.13% max: 2.05% x̄: 0.89% x̃: 0.79%
HURT stats (abs) min: 6 max: 6 x̄: 6.00 x̃: 6
HURT stats (rel) min: 0.29% max: 0.29% x̄: 0.29% x̃: 0.29%
95% mean confidence interval for cycles value: -11.72 0.08
95% mean confidence interval for cycles %-change: -1.20% -0.36%
Inconclusive result (value mean confidence interval includes 0).
No shader-db changes on any other Intel platform.
Reviewed-by: Matt Turner <[email protected]>
Diffstat (limited to 'src/intel')
-rw-r--r-- | src/intel/compiler/brw_vec4.h | 2 | ||||
-rw-r--r-- | src/intel/compiler/brw_vec4_nir.cpp | 38 |
2 files changed, 36 insertions, 4 deletions
diff --git a/src/intel/compiler/brw_vec4.h b/src/intel/compiler/brw_vec4.h index c8804b90f47..a8e0049bf9c 100644 --- a/src/intel/compiler/brw_vec4.h +++ b/src/intel/compiler/brw_vec4.h @@ -243,7 +243,7 @@ public: src_reg emit_uniformize(const src_reg &src); /** Fix all float operands of a 3-source instruction. */ - void fix_float_operands(src_reg op[3]); + void fix_float_operands(src_reg op[3], nir_alu_instr *instr); src_reg fix_3src_operand(const src_reg &src); src_reg resolve_source_modifiers(const src_reg &src); diff --git a/src/intel/compiler/brw_vec4_nir.cpp b/src/intel/compiler/brw_vec4_nir.cpp index be7e7d79979..7d0394bdc67 100644 --- a/src/intel/compiler/brw_vec4_nir.cpp +++ b/src/intel/compiler/brw_vec4_nir.cpp @@ -1131,10 +1131,42 @@ try_immediate_source(const nir_alu_instr *instr, src_reg *op, } void -vec4_visitor::fix_float_operands(src_reg op[3]) +vec4_visitor::fix_float_operands(src_reg op[3], nir_alu_instr *instr) { bool fixed[3] = { false, false, false }; + for (unsigned i = 0; i < 2; i++) { + if (!nir_src_is_const(instr->src[i].src)) + continue; + + for (unsigned j = i + 1; j < 3; j++) { + if (fixed[j]) + continue; + + if (!nir_src_is_const(instr->src[j].src)) + continue; + + if (nir_alu_srcs_equal(instr, instr, i, j)) { + if (!fixed[i]) + op[i] = fix_3src_operand(op[i]); + + op[j] = op[i]; + + fixed[i] = true; + fixed[j] = true; + } else if (nir_alu_srcs_negative_equal(instr, instr, i, j)) { + if (!fixed[i]) + op[i] = fix_3src_operand(op[i]); + + op[j] = op[i]; + op[j].negate = !op[j].negate; + + fixed[i] = true; + fixed[j] = true; + } + } + } + for (unsigned i = 0; i < 3; i++) { if (!fixed[i]) op[i] = fix_3src_operand(op[i]); @@ -1927,14 +1959,14 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) inst = emit(ADD(dst, src_reg(mul_dst), op[2])); inst->saturate = instr->dest.saturate; } else { - fix_float_operands(op); + fix_float_operands(op, instr); inst = emit(MAD(dst, op[2], op[1], op[0])); inst->saturate = instr->dest.saturate; } break; case nir_op_flrp: - fix_float_operands(op); + fix_float_operands(op, instr); inst = emit(LRP(dst, op[2], op[1], op[0])); inst->saturate = instr->dest.saturate; break; |