diff options
author | Kenneth Graunke <[email protected]> | 2016-07-17 18:44:58 -0700 |
---|---|---|
committer | Kenneth Graunke <[email protected]> | 2016-07-20 11:01:11 -0700 |
commit | a2dc11a7818c04d8dc0324e8fcba98d60baea529 (patch) | |
tree | 3530cdae3475ed421f1530ec56f5ba4d23f1f7eb | |
parent | 048a56c1fc8f66e74645cc5ff4b4eb3d5ee471a8 (diff) |
i965: Move load_interpolated_input/barycentric_* intrinsics to the top.
Currently, i965 interpolates all FS inputs at the top of the program.
This has advantages and disadvantages, but I'd like to keep that policy
while reworking this code. We can consider changing it independently.
The next patch will make the compiler generate PLN instructions "on the
fly", when it encounters an input load intrinsic, rather than doing it
for all inputs at the start of the program.
To emulate this behavior, we introduce an ugly pass to move all NIR
load_interpolated_input and payload-based (not interpolator message)
load_barycentric_* intrinsics to the shader's start block.
This helps avoid regressions in shader-db for cases such as:
if (...) {
...load some input...
} else {
...load that same input...
}
which CSE can't handle, because there's no dominance relationship
between the two loads. Because the start block dominates all others,
we can CSE all inputs and emit PLNs exactly once, as we did before.
Ideally, global value numbering would eliminate these redundant loads,
while not forcing them all the way to the start block. When that lands,
we should consider dropping this hacky pass.
Again, this pass currently does nothing, as i965 doesn't generate these
intrinsics yet. But it will shortly, and I figured I'd separate this
code as it's relatively self-contained.
v2: Dramatically simplify pass - instead of creating new instructions,
just remove/re-insert their list nodes (suggested by Jason Ekstrand).
Signed-off-by: Kenneth Graunke <[email protected]>
Reviewed-by: Chris Forbes <[email protected]> [v1]
Reviewed-by: Jason Ekstrand <[email protected]>
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 64 |
1 files changed, 64 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 3aaf843bc97..fc91bbcfa46 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -6403,6 +6403,69 @@ computed_depth_mode(const nir_shader *shader) } /** + * Move load_interpolated_input with simple (payload-based) barycentric modes + * to the top of the program so we don't emit multiple PLNs for the same input. + * + * This works around CSE not being able to handle non-dominating cases + * such as: + * + * if (...) { + * interpolate input + * } else { + * interpolate the same exact input + * } + * + * This should be replaced by global value numbering someday. + */ +void +move_interpolation_to_top(nir_shader *nir) +{ + nir_foreach_function(f, nir) { + if (!f->impl) + continue; + + nir_block *top = nir_start_block(f->impl); + + nir_foreach_block(block, f->impl) { + if (block == top) + continue; + + nir_foreach_instr_reverse_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + switch (intrin->intrinsic) { + case nir_intrinsic_load_barycentric_pixel: + case nir_intrinsic_load_barycentric_centroid: + case nir_intrinsic_load_barycentric_sample: + break; + case nir_intrinsic_load_interpolated_input: { + nir_intrinsic_instr *bary_intrinsic = + nir_instr_as_intrinsic(intrin->src[0].ssa->parent_instr); + nir_intrinsic_op op = bary_intrinsic->intrinsic; + + /* Leave interpolateAtSample/Offset() where it is. */ + if (op == nir_intrinsic_load_barycentric_at_sample || + op == nir_intrinsic_load_barycentric_at_offset) + continue; + } + default: + continue; + } + + exec_node_remove(&instr->node); + exec_list_push_head(&top->instr_list, &instr->node); + instr->block = top; + } + } + nir_metadata_preserve(f->impl, (nir_metadata) + ((unsigned) nir_metadata_block_index | + (unsigned) nir_metadata_dominance)); + } +} + +/** * Apply default interpolation settings to FS inputs which don't specify any. */ static void @@ -6509,6 +6572,7 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, brw_nir_lower_fs_outputs(shader); if (!key->multisample_fbo) NIR_PASS_V(shader, demote_sample_qualifiers); + NIR_PASS_V(shader, move_interpolation_to_top); shader = brw_postprocess_nir(shader, compiler->devinfo, true); /* key->alpha_test_func means simulating alpha testing via discards, |