diff options
author | Juan A. Suarez Romero <[email protected]> | 2016-12-16 10:24:43 +0100 |
---|---|---|
committer | Juan A. Suarez Romero <[email protected]> | 2017-01-09 10:42:22 +0100 |
commit | c2acf97fcc9b32eaa9778771282758e5652a8ad4 (patch) | |
tree | a7c8890b9a6a1532b55170d531310382742cb9de /src/mesa/drivers | |
parent | 3551a2d3ad2661477ba3b0a36a13eeb68e28fe85 (diff) |
nir/i965: use two slots from inputs_read for dvec3/dvec4 vertex input attributes
So far, input_reads was a bitmap tracking which vertex input locations
were being used.
In OpenGL, an attribute bigger than a vec4 (like a dvec3 or dvec4)
consumes just one location, any other small attribute. So we mark the
proper bit in inputs_read, and also the same bit in double_inputs_read
if the attribute is a dvec3/dvec4.
But in Vulkan, this is slightly different: a dvec3/dvec4 attribute
consumes two locations, not just one. And hence two bits would be marked
in inputs_read for the same vertex input attribute.
To avoid handling two different situations in NIR, we just choose the
latest one: in OpenGL, when creating NIR from GLSL/IR, any dvec3/dvec4
vertex input attribute is marked with two bits in the inputs_read bitmap
(and also in the double_inputs_read), and following attributes are
adjusted accordingly.
As example, if in our GLSL/IR shader we have three attributes:
layout(location = 0) vec3 attr0;
layout(location = 1) dvec4 attr1;
layout(location = 2) dvec3 attr2;
then in our NIR shader we put attr0 in location 0, attr1 in locations 1
and 2, and attr2 in location 3 and 4.
Checking carefully, basically we are using slots rather than locations
in NIR.
When emitting the vertices, we do a inverse map to know the
corresponding location for each slot.
v2 (Jason):
- use two slots from inputs_read for dvec3/dvec4 NIR from GLSL/IR.
v3 (Jason):
- Fix commit log error.
- Use ladder ifs and fix braces.
- elements_double is divisible by 2, don't need DIV_ROUND_UP().
- Use if ladder instead of a switch.
- Add comment about hardware restriction in 64bit vertex attributes.
Reviewed-by: Jason Ekstrand <[email protected]>
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_draw_upload.c | 11 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 13 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_nir.c | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_nir.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4.cpp | 11 |
6 files changed, 16 insertions, 29 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 57815645924..b7527f2cd9b 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -481,11 +481,16 @@ brw_prepare_vertices(struct brw_context *brw) /* Accumulate the list of enabled arrays. */ brw->vb.nr_enabled = 0; while (vs_inputs) { - GLuint index = ffsll(vs_inputs) - 1; + GLuint first = ffsll(vs_inputs) - 1; + GLuint index = + first - DIV_ROUND_UP(_mesa_bitcount_64(vs_prog_data->double_inputs_read & + BITFIELD64_MASK(first)), 2); struct brw_vertex_element *input = &brw->vb.inputs[index]; input->is_dual_slot = brw->gen >= 8 && - (vs_prog_data->double_inputs_read & BITFIELD64_BIT(index)) != 0; - vs_inputs &= ~BITFIELD64_BIT(index); + (vs_prog_data->double_inputs_read & BITFIELD64_BIT(first)) != 0; + vs_inputs &= ~BITFIELD64_BIT(first); + if (input->is_dual_slot) + vs_inputs &= ~BITFIELD64_BIT(first + 1); brw->vb.enabled[brw->vb.nr_enabled++] = input; } diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index c8a069386dd..03f9c24d151 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -492,19 +492,6 @@ type_size_scalar(const struct glsl_type *type) return 0; } -/* Attribute arrays are loaded as one vec4 per element (or matrix column), - * except for double-precision types, which are loaded as one dvec4. - */ -extern "C" int -type_size_vs_input(const struct glsl_type *type) -{ - if (type->is_double()) { - return type_size_dvec4(type); - } else { - return type_size_vec4(type); - } -} - /** * Create a MOV to read the timestamp register. * diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 3775e6c4a09..cea38d86237 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -36,8 +36,7 @@ fs_reg * fs_visitor::emit_vs_system_value(int location) { fs_reg *reg = new(this->mem_ctx) - fs_reg(ATTR, 4 * (_mesa_bitcount_64(nir->info->inputs_read) + - _mesa_bitcount_64(nir->info->double_inputs_read)), + fs_reg(ATTR, 4 * _mesa_bitcount_64(nir->info->inputs_read), BRW_REGISTER_TYPE_D); struct brw_vs_prog_data *vs_prog_data = brw_vs_prog_data(prog_data); diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index 3645f48777a..2d2fce28eef 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -113,9 +113,7 @@ remap_vs_attrs(nir_block *block, shader_info *nir_info) int attr = intrin->const_index[0]; int slot = _mesa_bitcount_64(nir_info->inputs_read & BITFIELD64_MASK(attr)); - int dslot = _mesa_bitcount_64(nir_info->double_inputs_read & - BITFIELD64_MASK(attr)); - intrin->const_index[0] = 4 * (slot + dslot); + intrin->const_index[0] = 4 * slot; } } return true; @@ -268,7 +266,7 @@ brw_nir_lower_vs_inputs(nir_shader *nir, * loaded as one vec4 or dvec4 per element (or matrix column), depending on * whether it is a double-precision type or not. */ - nir_lower_io(nir, nir_var_shader_in, type_size_vs_input, 0); + nir_lower_io(nir, nir_var_shader_in, type_size_vec4, 0); /* This pass needs actual constants */ nir_opt_constant_folding(nir); diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h index f713d47b40e..ecb41189806 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.h +++ b/src/mesa/drivers/dri/i965/brw_nir.h @@ -34,7 +34,6 @@ extern "C" { int type_size_scalar(const struct glsl_type *type); int type_size_vec4(const struct glsl_type *type); int type_size_dvec4(const struct glsl_type *type); -int type_size_vs_input(const struct glsl_type *type); static inline int type_size_scalar_bytes(const struct glsl_type *type) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index b5e846d7cf5..5ddbe580d5a 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -2737,7 +2737,7 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, ((1 << shader->info->cull_distance_array_size) - 1) << shader->info->clip_distance_array_size; - unsigned nr_attributes = _mesa_bitcount_64(prog_data->inputs_read); + unsigned nr_attribute_slots = _mesa_bitcount_64(prog_data->inputs_read); /* gl_VertexID and gl_InstanceID are system values, but arrive via an * incoming vertex attribute. So, add an extra slot. @@ -2747,18 +2747,17 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) | BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) | BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID))) { - nr_attributes++; + nr_attribute_slots++; } /* gl_DrawID has its very own vec4 */ if (shader->info->system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID)) { - nr_attributes++; + nr_attribute_slots++; } - unsigned nr_attribute_slots = - nr_attributes + - _mesa_bitcount_64(shader->info->double_inputs_read); + unsigned nr_attributes = nr_attribute_slots - + DIV_ROUND_UP(_mesa_bitcount_64(shader->info->double_inputs_read), 2); /* The 3DSTATE_VS documentation lists the lower bound on "Vertex URB Entry * Read Length" as 1 in vec4 mode, and 0 in SIMD8 mode. Empirically, in |