aboutsummaryrefslogtreecommitdiffstats
path: root/src/intel/compiler/brw_vec4_nir.cpp
diff options
context:
space:
mode:
authorIan Romanick <[email protected]>2019-06-11 12:06:52 -0700
committerIan Romanick <[email protected]>2019-07-11 10:20:03 -0700
commit3a1fdca5ad206f578b0d54a490a8bf6f199c8851 (patch)
tree939530be6e2b4345b4ab6a9a4079303db7903d1d /src/intel/compiler/brw_vec4_nir.cpp
parentacd7796a079bd4ac0cadb2e6bb73033e79310aaf (diff)
intel/vec4: Try to emit immediate sources for MOV
Per the comment in vec4_visitor::nir_emit_load_const, further improvement is possible in this area. That case would be more complicated as I think we'd want to check that all users of the nir_load_const_instr result intended to use the value as float. No shader-db changes on any Gen8+ platform as these platforms do not use the vec4 backend. v2: Massive rebase on eeebeb211f1 ("intel/vec4: Try emitting non-scalar immediates"). This commit is about twice as helpful since b04beaf41d2 ("intel/vec4: Try both sources as candidates for being immediates"). Haswell and Ivy Bridge had similar results. (Haswell shown) total instructions in shared programs: 13478598 -> 13474068 (-0.03%) instructions in affected programs: 589452 -> 584922 (-0.77%) helped: 2773 HURT: 0 helped stats (abs) min: 1 max: 7 x̄: 1.63 x̃: 1 helped stats (rel) min: 0.16% max: 5.66% x̄: 0.96% x̃: 0.83% 95% mean confidence interval for instructions value: -1.67 -1.60 95% mean confidence interval for instructions %-change: -0.98% -0.94% Instructions are helped. total cycles in shared programs: 376386916 -> 376369392 (<.01%) cycles in affected programs: 16871628 -> 16854104 (-0.10%) helped: 2293 HURT: 523 helped stats (abs) min: 2 max: 812 x̄: 13.80 x̃: 2 helped stats (rel) min: <.01% max: 10.18% x̄: 1.02% x̃: 0.36% HURT stats (abs) min: 2 max: 316 x̄: 26.99 x̃: 14 HURT stats (rel) min: <.01% max: 19.34% x̄: 2.15% x̃: 1.43% 95% mean confidence interval for cycles value: -7.87 -4.58 95% mean confidence interval for cycles %-change: -0.52% -0.34% Cycles are helped. Sandy Bridge total instructions in shared programs: 10860328 -> 10857675 (-0.02%) instructions in affected programs: 335907 -> 333254 (-0.79%) helped: 1639 HURT: 0 helped stats (abs) min: 1 max: 5 x̄: 1.62 x̃: 1 helped stats (rel) min: 0.10% max: 5.26% x̄: 0.86% x̃: 0.70% 95% mean confidence interval for instructions value: -1.67 -1.57 95% mean confidence interval for instructions %-change: -0.89% -0.84% Instructions are helped. total cycles in shared programs: 153942720 -> 153934120 (<.01%) cycles in affected programs: 5604818 -> 5596218 (-0.15%) helped: 1494 HURT: 97 helped stats (abs) min: 2 max: 256 x̄: 7.84 x̃: 2 helped stats (rel) min: 0.01% max: 6.62% x̄: 0.35% x̃: 0.18% HURT stats (abs) min: 2 max: 160 x̄: 32.02 x̃: 20 HURT stats (rel) min: 0.02% max: 3.37% x̄: 0.88% x̃: 0.56% 95% mean confidence interval for cycles value: -6.45 -4.36 95% mean confidence interval for cycles %-change: -0.32% -0.23% Cycles are helped. Iron Lake and GM45 had similar results. (Iron Lake shown) total instructions in shared programs: 8139378 -> 8137267 (-0.03%) instructions in affected programs: 265616 -> 263505 (-0.79%) helped: 1148 HURT: 0 helped stats (abs) min: 1 max: 5 x̄: 1.84 x̃: 1 helped stats (rel) min: 0.22% max: 4.76% x̄: 0.87% x̃: 0.62% 95% mean confidence interval for instructions value: -1.90 -1.78 95% mean confidence interval for instructions %-change: -0.90% -0.83% Instructions are helped. total cycles in shared programs: 188541756 -> 188537540 (<.01%) cycles in affected programs: 9807004 -> 9802788 (-0.04%) helped: 1143 HURT: 4 helped stats (abs) min: 2 max: 10 x̄: 3.70 x̃: 2 helped stats (rel) min: <.01% max: 3.01% x̄: 0.13% x̃: 0.06% HURT stats (abs) min: 2 max: 2 x̄: 2.00 x̃: 2 HURT stats (rel) min: 0.18% max: 0.18% x̄: 0.18% x̃: 0.18% 95% mean confidence interval for cycles value: -3.80 -3.55 95% mean confidence interval for cycles %-change: -0.14% -0.12% Cycles are helped. Reviewed-by: Matt Turner <[email protected]>
Diffstat (limited to 'src/intel/compiler/brw_vec4_nir.cpp')
-rw-r--r--src/intel/compiler/brw_vec4_nir.cpp18
1 files changed, 14 insertions, 4 deletions
diff --git a/src/intel/compiler/brw_vec4_nir.cpp b/src/intel/compiler/brw_vec4_nir.cpp
index 157a97bfc03..1155dec946a 100644
--- a/src/intel/compiler/brw_vec4_nir.cpp
+++ b/src/intel/compiler/brw_vec4_nir.cpp
@@ -1036,7 +1036,15 @@ try_immediate_source(const nir_alu_instr *instr, src_reg *op,
{
unsigned idx;
- if (nir_src_bit_size(instr->src[1].src) == 32 &&
+ /* MOV should be the only single-source instruction passed to this
+ * function. Any other unary instruction with a constant source should
+ * have been constant-folded away!
+ */
+ assert(nir_op_infos[instr->op].num_inputs > 1 ||
+ instr->op == nir_op_mov);
+
+ if (instr->op != nir_op_mov &&
+ nir_src_bit_size(instr->src[1].src) == 32 &&
nir_src_is_const(instr->src[1].src)) {
idx = 1;
} else if (try_src0_also &&
@@ -1139,10 +1147,11 @@ try_immediate_source(const nir_alu_instr *instr, src_reg *op,
unreachable("Non-32bit type.");
}
- /* The instruction format only allows source 1 to be an immediate value.
- * If the immediate value was source 0, then the sources must be exchanged.
+ /* If the instruction has more than one source, the instruction format only
+ * allows source 1 to be an immediate value. If the immediate value was
+ * source 0, then the sources must be exchanged.
*/
- if (idx == 0) {
+ if (idx == 0 && instr->op != nir_op_mov) {
src_reg tmp = op[0];
op[0] = op[1];
op[1] = tmp;
@@ -1217,6 +1226,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
switch (instr->op) {
case nir_op_mov:
+ try_immediate_source(instr, &op[0], true, devinfo);
inst = emit(MOV(dst, op[0]));
inst->saturate = instr->dest.saturate;
break;