diff options
author | Timothy Arceri <[email protected]> | 2019-03-22 13:01:03 +1100 |
---|---|---|
committer | Marge Bot <[email protected]> | 2020-04-20 03:46:29 +0000 |
commit | 839818332c18a5cf59584ea3114f46aded792465 (patch) | |
tree | 64a42e22ee1b4c705d423939f9812282f6bd1a24 /src/compiler | |
parent | e4e5beee8a4cc0f7a6b27ce1ea1e04d1177442a1 (diff) |
nir/gcm: dont move movs unless we can replace them later with their src
This helps us avoid moving the movs outside if branches when there
src can't be scalarized.
For example it avoids:
vec4 32 ssa_7 = tex ssa_6 (coord), 0 (texture), 0 (sampler),
if ... {
r0 = imov ssa_7.z
r1 = imov ssa_7.y
r2 = imov ssa_7.x
r3 = imov ssa_7.w
...
} else {
...
if ... {
r0 = imov ssa_7.x
r1 = imov ssa_7.w
...
else {
r0 = imov ssa_7.z
r1 = imov ssa_7.y
...
}
r2 = imov ssa_7.x
r3 = imov ssa_7.w
}
...
vec4 32 ssa_36 = vec4 r0, r1, r2, r3
Becoming something like:
vec4 32 ssa_7 = tex ssa_6 (coord), 0 (texture), 0 (sampler),
r0 = imov ssa_7.z
r1 = imov ssa_7.y
r2 = imov ssa_7.x
r3 = imov ssa_7.w
if ... {
...
} else {
if ... {
r0 = imov r2
r1 = imov r3
...
else {
...
}
...
}
While this is has a smaller instruction count it requires more work
for the same result. With more complex examples we can also end up
shuffling the registers around in a way that requires more registers
to use as temps so that we don't overwrite our original values along
the way.
Reviewed-by: Kenneth Graunke <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4636>
Diffstat (limited to 'src/compiler')
-rw-r--r-- | src/compiler/nir/nir_opt_gcm.c | 67 |
1 files changed, 67 insertions, 0 deletions
diff --git a/src/compiler/nir/nir_opt_gcm.c b/src/compiler/nir/nir_opt_gcm.c index 9a337c9a97f..dbadd584055 100644 --- a/src/compiler/nir/nir_opt_gcm.c +++ b/src/compiler/nir/nir_opt_gcm.c @@ -109,6 +109,67 @@ gcm_build_block_info(struct exec_list *cf_list, struct gcm_state *state, } } +static bool +is_src_scalarizable(nir_src *src) +{ + assert(src->is_ssa); + + nir_instr *src_instr = src->ssa->parent_instr; + switch (src_instr->type) { + case nir_instr_type_alu: { + nir_alu_instr *src_alu = nir_instr_as_alu(src_instr); + + /* ALU operations with output_size == 0 should be scalarized. We + * will also see a bunch of vecN operations from scalarizing ALU + * operations and, since they can easily be copy-propagated, they + * are ok too. + */ + return nir_op_infos[src_alu->op].output_size == 0 || + src_alu->op == nir_op_vec2 || + src_alu->op == nir_op_vec3 || + src_alu->op == nir_op_vec4; + } + + case nir_instr_type_load_const: + /* These are trivially scalarizable */ + return true; + + case nir_instr_type_ssa_undef: + return true; + + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *src_intrin = nir_instr_as_intrinsic(src_instr); + + switch (src_intrin->intrinsic) { + case nir_intrinsic_load_deref: { + nir_deref_instr *deref = nir_src_as_deref(src_intrin->src[0]); + return deref->mode == nir_var_shader_in || + deref->mode == nir_var_uniform || + deref->mode == nir_var_mem_ubo || + deref->mode == nir_var_mem_ssbo || + deref->mode == nir_var_mem_global; + } + + case nir_intrinsic_interp_deref_at_centroid: + case nir_intrinsic_interp_deref_at_sample: + case nir_intrinsic_interp_deref_at_offset: + case nir_intrinsic_load_uniform: + case nir_intrinsic_load_ubo: + case nir_intrinsic_load_ssbo: + case nir_intrinsic_load_global: + case nir_intrinsic_load_input: + return true; + default: + break; + } + } + + default: + /* We can't scalarize this type of instruction */ + return false; + } +} + /* Walks the instruction list and marks immovable instructions as pinned * * This function also serves to initialize the instr->pass_flags field. @@ -138,6 +199,12 @@ gcm_pin_instructions(nir_function_impl *impl, struct gcm_state *state) instr->pass_flags = GCM_INSTR_SCHEDULE_EARLIER_ONLY; break; + case nir_op_mov: + if (!is_src_scalarizable(&(nir_instr_as_alu(instr)->src[0].src))) { + instr->pass_flags = GCM_INSTR_PINNED; + break; + } + default: instr->pass_flags = 0; break; |