diff options
author | Juan A. Suarez Romero <[email protected]> | 2016-12-16 10:24:43 +0100 |
---|---|---|
committer | Juan A. Suarez Romero <[email protected]> | 2017-01-09 10:42:22 +0100 |
commit | c2acf97fcc9b32eaa9778771282758e5652a8ad4 (patch) | |
tree | a7c8890b9a6a1532b55170d531310382742cb9de /src/compiler | |
parent | 3551a2d3ad2661477ba3b0a36a13eeb68e28fe85 (diff) |
nir/i965: use two slots from inputs_read for dvec3/dvec4 vertex input attributes
So far, input_reads was a bitmap tracking which vertex input locations
were being used.
In OpenGL, an attribute bigger than a vec4 (like a dvec3 or dvec4)
consumes just one location, any other small attribute. So we mark the
proper bit in inputs_read, and also the same bit in double_inputs_read
if the attribute is a dvec3/dvec4.
But in Vulkan, this is slightly different: a dvec3/dvec4 attribute
consumes two locations, not just one. And hence two bits would be marked
in inputs_read for the same vertex input attribute.
To avoid handling two different situations in NIR, we just choose the
latest one: in OpenGL, when creating NIR from GLSL/IR, any dvec3/dvec4
vertex input attribute is marked with two bits in the inputs_read bitmap
(and also in the double_inputs_read), and following attributes are
adjusted accordingly.
As example, if in our GLSL/IR shader we have three attributes:
layout(location = 0) vec3 attr0;
layout(location = 1) dvec4 attr1;
layout(location = 2) dvec3 attr2;
then in our NIR shader we put attr0 in location 0, attr1 in locations 1
and 2, and attr2 in location 3 and 4.
Checking carefully, basically we are using slots rather than locations
in NIR.
When emitting the vertices, we do a inverse map to know the
corresponding location for each slot.
v2 (Jason):
- use two slots from inputs_read for dvec3/dvec4 NIR from GLSL/IR.
v3 (Jason):
- Fix commit log error.
- Use ladder ifs and fix braces.
- elements_double is divisible by 2, don't need DIV_ROUND_UP().
- Use if ladder instead of a switch.
- Add comment about hardware restriction in 64bit vertex attributes.
Reviewed-by: Jason Ekstrand <[email protected]>
Diffstat (limited to 'src/compiler')
-rw-r--r-- | src/compiler/glsl/glsl_to_nir.cpp | 28 | ||||
-rw-r--r-- | src/compiler/nir/nir_gather_info.c | 48 |
2 files changed, 51 insertions, 25 deletions
diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index 69d4c2b20c6..33f71bf416a 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -129,6 +129,19 @@ private: } /* end of anonymous namespace */ +static void +nir_remap_attributes(nir_shader *shader) +{ + nir_foreach_variable(var, &shader->inputs) { + var->data.location += _mesa_bitcount_64(shader->info->double_inputs_read & + BITFIELD64_MASK(var->data.location)); + } + + /* Once the remap is done, reset double_inputs_read, so later it will have + * which location/slots are doubles */ + shader->info->double_inputs_read = 0; +} + nir_shader * glsl_to_nir(const struct gl_shader_program *shader_prog, gl_shader_stage stage, @@ -146,6 +159,13 @@ glsl_to_nir(const struct gl_shader_program *shader_prog, nir_lower_constant_initializers(shader, (nir_variable_mode)~0); + /* Remap the locations to slots so those requiring two slots will occupy + * two locations. For instance, if we have in the IR code a dvec3 attr0 in + * location 0 and vec4 attr1 in location 1, in NIR attr0 will use + * locations/slots 0 and 1, and attr1 will use location/slot 2 */ + if (shader->stage == MESA_SHADER_VERTEX) + nir_remap_attributes(shader); + shader->info->name = ralloc_asprintf(shader, "GLSL%d", shader_prog->Name); if (shader_prog->Label) shader->info->label = ralloc_strdup(shader, shader_prog->Label); @@ -322,6 +342,14 @@ nir_visitor::visit(ir_variable *ir) var->data.compact = ir->type->without_array()->is_scalar(); } } + + /* Mark all the locations that require two slots */ + if (glsl_type_is_dual_slot(glsl_without_array(var->type))) { + for (uint i = 0; i < glsl_count_attribute_slots(var->type, true); i++) { + uint64_t bitfield = BITFIELD64_BIT(var->data.location + i); + shader->info->double_inputs_read |= bitfield; + } + } break; case ir_var_shader_out: diff --git a/src/compiler/nir/nir_gather_info.c b/src/compiler/nir/nir_gather_info.c index 07c99497146..35a1ce4dec6 100644 --- a/src/compiler/nir/nir_gather_info.c +++ b/src/compiler/nir/nir_gather_info.c @@ -53,11 +53,6 @@ set_io_mask(nir_shader *shader, nir_variable *var, int offset, int len) else shader->info->inputs_read |= bitfield; - /* double inputs read is only for vertex inputs */ - if (shader->stage == MESA_SHADER_VERTEX && - glsl_type_is_dual_slot(glsl_without_array(var->type))) - shader->info->double_inputs_read |= bitfield; - if (shader->stage == MESA_SHADER_FRAGMENT) { shader->info->fs.uses_sample_qualifier |= var->data.sample; } @@ -83,26 +78,21 @@ static void mark_whole_variable(nir_shader *shader, nir_variable *var) { const struct glsl_type *type = var->type; - bool is_vertex_input = false; if (nir_is_per_vertex_io(var, shader->stage)) { assert(glsl_type_is_array(type)); type = glsl_get_array_element(type); } - if (shader->stage == MESA_SHADER_VERTEX && - var->data.mode == nir_var_shader_in) - is_vertex_input = true; - const unsigned slots = var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4) - : glsl_count_attribute_slots(type, is_vertex_input); + : glsl_count_attribute_slots(type, false); set_io_mask(shader, var, 0, slots); } static unsigned -get_io_offset(nir_deref_var *deref, bool is_vertex_input) +get_io_offset(nir_deref_var *deref) { unsigned offset = 0; @@ -117,7 +107,7 @@ get_io_offset(nir_deref_var *deref, bool is_vertex_input) return -1; } - offset += glsl_count_attribute_slots(tail->type, is_vertex_input) * + offset += glsl_count_attribute_slots(tail->type, false) * deref_array->base_offset; } /* TODO: we can get the offset for structs here see nir_lower_io() */ @@ -163,12 +153,7 @@ try_mask_partial_io(nir_shader *shader, nir_deref_var *deref) return false; } - bool is_vertex_input = false; - if (shader->stage == MESA_SHADER_VERTEX && - var->data.mode == nir_var_shader_in) - is_vertex_input = true; - - unsigned offset = get_io_offset(deref, is_vertex_input); + unsigned offset = get_io_offset(deref); if (offset == -1) return false; @@ -184,8 +169,7 @@ try_mask_partial_io(nir_shader *shader, nir_deref_var *deref) } /* double element width for double types that takes two slots */ - if (!is_vertex_input && - glsl_type_is_dual_slot(glsl_without_array(type))) { + if (glsl_type_is_dual_slot(glsl_without_array(type))) { elem_width *= 2; } @@ -220,13 +204,27 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader) case nir_intrinsic_interp_var_at_sample: case nir_intrinsic_interp_var_at_offset: case nir_intrinsic_load_var: - case nir_intrinsic_store_var: - if (instr->variables[0]->var->data.mode == nir_var_shader_in || - instr->variables[0]->var->data.mode == nir_var_shader_out) { + case nir_intrinsic_store_var: { + nir_variable *var = instr->variables[0]->var; + + if (var->data.mode == nir_var_shader_in || + var->data.mode == nir_var_shader_out) { if (!try_mask_partial_io(shader, instr->variables[0])) - mark_whole_variable(shader, instr->variables[0]->var); + mark_whole_variable(shader, var); + + /* We need to track which input_reads bits correspond to a + * dvec3/dvec4 input attribute */ + if (shader->stage == MESA_SHADER_VERTEX && + var->data.mode == nir_var_shader_in && + glsl_type_is_dual_slot(glsl_without_array(var->type))) { + for (uint i = 0; i < glsl_count_attribute_slots(var->type, false); i++) { + int idx = var->data.location + i; + shader->info->double_inputs_read |= BITFIELD64_BIT(idx); + } + } } break; + } case nir_intrinsic_load_draw_id: case nir_intrinsic_load_front_face: |