diff options
author | Rhys Perry <[email protected]> | 2019-05-17 11:53:32 +0100 |
---|---|---|
committer | Rhys Perry <[email protected]> | 2019-09-06 15:38:04 +0000 |
commit | 300e758b7c428289909b318bb0df230b22ab5d9b (patch) | |
tree | 240645a39c96396a43b9becdefdb844e086675f4 /src/compiler | |
parent | c6be5cefba7e9480f103c2ce2794a67ed9fa33eb (diff) |
nir/lower_io_to_vector: allow FS outputs to be vectorized
v2: handle dual-source blending
v3: use a higher MAX_SLOTS
Signed-off-by: Rhys Perry <[email protected]>
Reviewed-by: Connor Abbott <[email protected]>
Diffstat (limited to 'src/compiler')
-rw-r--r-- | src/compiler/nir/nir_lower_io_to_vector.c | 59 | ||||
-rw-r--r-- | src/compiler/shader_enums.h | 1 |
2 files changed, 33 insertions, 27 deletions
diff --git a/src/compiler/nir/nir_lower_io_to_vector.c b/src/compiler/nir/nir_lower_io_to_vector.c index abc89fb2f49..896b9504868 100644 --- a/src/compiler/nir/nir_lower_io_to_vector.c +++ b/src/compiler/nir/nir_lower_io_to_vector.c @@ -34,6 +34,19 @@ * when all is said and done. */ +/* FRAG_RESULT_MAX+1 instead of just FRAG_RESULT_MAX because of how this pass + * handles dual source blending */ +#define MAX_SLOTS MAX2(VARYING_SLOT_TESS_MAX, FRAG_RESULT_MAX+1) + +static unsigned +get_slot(const nir_variable *var) +{ + /* This handling of dual-source blending might not be correct when more than + * one render target is supported, but it seems no driver supports more than + * one. */ + return var->data.location + var->data.index; +} + static const struct glsl_type * resize_array_vec_type(const struct glsl_type *type, unsigned num_components) { @@ -50,10 +63,6 @@ resize_array_vec_type(const struct glsl_type *type, unsigned num_components) static bool variable_can_rewrite(const nir_variable *var) { - /* Only touch user defined varyings as these are the only ones we split */ - if (var->data.location < VARYING_SLOT_VAR0) - return false; - /* Skip complex types we don't split in the first place */ if (!glsl_type_is_vector_or_scalar(glsl_without_array(var->type))) return false; @@ -97,22 +106,26 @@ variables_can_merge(nir_shader *shader, a->data.interpolation != b->data.interpolation) return false; + if (shader->info.stage == MESA_SHADER_FRAGMENT && + a->data.mode == nir_var_shader_out && + a->data.index != b->data.index) + return false; + return true; } static bool create_new_io_vars(nir_shader *shader, struct exec_list *io_list, - nir_variable *old_vars[MAX_VARYINGS_INCL_PATCH][4], - nir_variable *new_vars[MAX_VARYINGS_INCL_PATCH][4]) + nir_variable *old_vars[MAX_SLOTS][4], + nir_variable *new_vars[MAX_SLOTS][4]) { if (exec_list_is_empty(io_list)) return false; nir_foreach_variable(var, io_list) { if (variable_can_rewrite(var)) { - unsigned loc = var->data.location - VARYING_SLOT_VAR0; unsigned frac = var->data.location_frac; - old_vars[loc][frac] = var; + old_vars[get_slot(var)][frac] = var; } } @@ -121,7 +134,7 @@ create_new_io_vars(nir_shader *shader, struct exec_list *io_list, /* We don't handle combining vars of different type e.g. different array * lengths. */ - for (unsigned loc = 0; loc < MAX_VARYINGS_INCL_PATCH; loc++) { + for (unsigned loc = 0; loc < MAX_SLOTS; loc++) { unsigned frac = 0; while (frac < 4) { nir_variable *first_var = old_vars[loc][frac]; @@ -197,10 +210,10 @@ nir_lower_io_to_vector_impl(nir_function_impl *impl, nir_variable_mode modes) nir_metadata_require(impl, nir_metadata_dominance); nir_shader *shader = impl->function->shader; - nir_variable *old_inputs[MAX_VARYINGS_INCL_PATCH][4] = {{0}}; - nir_variable *new_inputs[MAX_VARYINGS_INCL_PATCH][4] = {{0}}; - nir_variable *old_outputs[MAX_VARYINGS_INCL_PATCH][4] = {{0}}; - nir_variable *new_outputs[MAX_VARYINGS_INCL_PATCH][4] = {{0}}; + nir_variable *old_inputs[MAX_SLOTS][4] = {{0}}; + nir_variable *new_inputs[MAX_SLOTS][4] = {{0}}; + nir_variable *old_outputs[MAX_SLOTS][4] = {{0}}; + nir_variable *new_outputs[MAX_SLOTS][4] = {{0}}; if (modes & nir_var_shader_in) { /* Vertex shaders support overlapping inputs. We don't do those */ @@ -215,11 +228,6 @@ nir_lower_io_to_vector_impl(nir_function_impl *impl, nir_variable_mode modes) } if (modes & nir_var_shader_out) { - /* Fragment shader outputs are always vec4. You shouldn't have - * scalarized them and it doesn't make sense to vectorize them. - */ - assert(b.shader->info.stage != MESA_SHADER_FRAGMENT); - /* If we don't actually merge any variables, remove that bit from modes * so we don't bother doing extra non-work. */ @@ -256,13 +264,12 @@ nir_lower_io_to_vector_impl(nir_function_impl *impl, nir_variable_mode modes) break; if (old_deref->mode == nir_var_shader_out) - assert(b.shader->info.stage == MESA_SHADER_TESS_CTRL); + assert(b.shader->info.stage == MESA_SHADER_TESS_CTRL || + b.shader->info.stage == MESA_SHADER_FRAGMENT); nir_variable *old_var = nir_deref_instr_get_variable(old_deref); - if (old_var->data.location < VARYING_SLOT_VAR0) - break; - const unsigned loc = old_var->data.location - VARYING_SLOT_VAR0; + const unsigned loc = get_slot(old_var); const unsigned old_frac = old_var->data.location_frac; nir_variable *new_var = old_deref->mode == nir_var_shader_in ? new_inputs[loc][old_frac] : @@ -270,7 +277,7 @@ nir_lower_io_to_vector_impl(nir_function_impl *impl, nir_variable_mode modes) if (!new_var) break; - assert(new_var->data.location == VARYING_SLOT_VAR0 + loc); + assert(get_slot(new_var) == loc); const unsigned new_frac = new_var->data.location_frac; nir_component_mask_t vec4_comp_mask = @@ -309,16 +316,14 @@ nir_lower_io_to_vector_impl(nir_function_impl *impl, nir_variable_mode modes) break; nir_variable *old_var = nir_deref_instr_get_variable(old_deref); - if (old_var->data.location < VARYING_SLOT_VAR0) - break; - const unsigned loc = old_var->data.location - VARYING_SLOT_VAR0; + const unsigned loc = get_slot(old_var); const unsigned old_frac = old_var->data.location_frac; nir_variable *new_var = new_outputs[loc][old_frac]; if (!new_var) break; - assert(new_var->data.location == VARYING_SLOT_VAR0 + loc); + assert(get_slot(new_var) == loc); const unsigned new_frac = new_var->data.location_frac; b.cursor = nir_before_instr(&intrin->instr); diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h index 1a0ec5fb12a..0f402f42741 100644 --- a/src/compiler/shader_enums.h +++ b/src/compiler/shader_enums.h @@ -27,6 +27,7 @@ #define SHADER_ENUMS_H #include <stdbool.h> +#include "mesa/main/config.h" #ifdef __cplusplus extern "C" { |