diff options
author | Jason Ekstrand <[email protected]> | 2019-04-11 14:57:12 -0500 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2019-07-02 16:15:25 +0000 |
commit | fa869f45c8d758f9a7d2cdf1fc5c55f093be4c02 (patch) | |
tree | e53507af9ef9e73dff21d90d09f25e7edae668dc /src | |
parent | 2b79a9e5a5235bab8f9a64293f3d20193b430ff1 (diff) |
intel/fs: Use nir_lower_interpolation on gen11+
On gen11, the removed the PLN instruction so we have to emit a pile of
MAD to emulate it. We may as well do that in NIR so we can optimize and
later schedule it.
Shader-db results on Ice Lake:
total instructions in shared programs: 17145644 -> 16556440 (-3.44%)
instructions in affected programs: 11507454 -> 10918250 (-5.12%)
helped: 35763
HURT: 42085
helped stats (abs) min: 1 max: 140 x̄: 19.09 x̃: 18
helped stats (rel) min: 0.04% max: 37.93% x̄: 15.40% x̃: 14.49%
HURT stats (abs) min: 1 max: 248 x̄: 2.22 x̃: 2
HURT stats (rel) min: 0.05% max: 50.00% x̄: 5.00% x̃: 2.47%
95% mean confidence interval for instructions value: -7.67 -7.47
95% mean confidence interval for instructions %-change: -4.46% -4.29%
Instructions are helped.
total loops in shared programs: 4370 -> 4370 (0.00%)
loops in affected programs: 0 -> 0
helped: 0
HURT: 0
total cycles in shared programs: 360624645 -> 368220857 (2.11%)
cycles in affected programs: 269631244 -> 277227456 (2.82%)
helped: 15583
HURT: 65874
helped stats (abs) min: 1 max: 28561 x̄: 78.45 x̃: 32
helped stats (rel) min: <.01% max: 67.81% x̄: 5.38% x̃: 2.44%
HURT stats (abs) min: 1 max: 238638 x̄: 133.87 x̃: 20
HURT stats (rel) min: <.01% max: 306.25% x̄: 5.81% x̃: 3.97%
95% mean confidence interval for cycles value: 67.42 119.09
95% mean confidence interval for cycles %-change: 3.61% 3.73%
Cycles are HURT.
total spills in shared programs: 8943 -> 8981 (0.42%)
spills in affected programs: 1925 -> 1963 (1.97%)
helped: 44
HURT: 14
total fills in shared programs: 21815 -> 21925 (0.50%)
fills in affected programs: 3511 -> 3621 (3.13%)
helped: 41
HURT: 18
LOST: 70
GAINED: 14
Reviewed-by: Matt Turner <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/intel/compiler/brw_fs.cpp | 46 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs.h | 1 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_generator.cpp | 2 | ||||
-rw-r--r-- | src/intel/compiler/brw_nir.c | 2 |
4 files changed, 3 insertions, 48 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 36d8191ee46..bd0ac1a0718 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -3858,47 +3858,6 @@ fs_visitor::lower_load_payload() } bool -fs_visitor::lower_linterp() -{ - bool progress = false; - - if (devinfo->gen < 11) - return false; - - foreach_block_and_inst_safe(block, fs_inst, inst, cfg) { - const fs_builder ibld(this, block, inst); - - if (inst->opcode != FS_OPCODE_LINTERP) - continue; - - fs_reg dwP = component(inst->src[1], 0); - fs_reg dwQ = component(inst->src[1], 1); - fs_reg dwR = component(inst->src[1], 3); - for (unsigned i = 0; i < DIV_ROUND_UP(dispatch_width, 8); i++) { - const fs_builder hbld(ibld.half(i)); - fs_reg dst = half(inst->dst, i); - fs_reg delta_xy = offset(inst->src[0], ibld, i); - hbld.MAD(dst, dwR, half(delta_xy, 0), dwP); - fs_inst *mad = hbld.MAD(dst, dst, half(delta_xy, 1), dwQ); - - /* Propagate conditional mod and saturate from the original - * instruction to the second MAD instruction. - */ - set_saturate(inst->saturate, mad); - set_condmod(inst->conditional_mod, mad); - } - - inst->remove(block); - progress = true; - } - - if (progress) - invalidate_live_intervals(); - - return progress; -} - -bool fs_visitor::lower_integer_multiplication() { bool progress = false; @@ -7095,11 +7054,6 @@ fs_visitor::optimize() OPT(compact_virtual_grfs); } while (progress); - if (OPT(lower_linterp)) { - OPT(opt_copy_propagation); - OPT(dead_code_eliminate); - } - /* Do this after cmod propagation has had every possible opportunity to * propagate results into SEL instructions. */ diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 230f9abbe04..d93fb29d85d 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -163,7 +163,6 @@ public: bool lower_pack(); bool lower_regioning(); bool lower_logical_sends(); - bool lower_linterp(); bool lower_integer_multiplication(); bool lower_minmax(); bool lower_simd_width(); diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index d068d1a51c1..be6a00e8476 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -822,7 +822,7 @@ fs_generator::generate_linterp(fs_inst *inst, struct brw_reg interp = stride(src[1], 0, 1, 0); brw_inst *i[2]; - /* fs_visitor::lower_linterp() will do the lowering to MAD instructions for + /* nir_lower_interpolation() will do the lowering to MAD instructions for * us on gen11+ */ assert(devinfo->gen < 11); diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index c96e0302a4c..4dea234dd4b 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -452,6 +452,8 @@ brw_nir_lower_fs_inputs(nir_shader *nir, lower_io_options |= nir_lower_io_force_sample_interpolation; nir_lower_io(nir, nir_var_shader_in, type_size_vec4, lower_io_options); + if (devinfo->gen >= 11) + nir_lower_interpolation(nir, ~0); /* This pass needs actual constants */ nir_opt_constant_folding(nir); |