diff options
author | Juan A. Suarez Romero <[email protected]> | 2016-12-16 10:24:43 +0100 |
---|---|---|
committer | Juan A. Suarez Romero <[email protected]> | 2017-01-09 10:42:22 +0100 |
commit | c2acf97fcc9b32eaa9778771282758e5652a8ad4 (patch) | |
tree | a7c8890b9a6a1532b55170d531310382742cb9de /src/intel/vulkan | |
parent | 3551a2d3ad2661477ba3b0a36a13eeb68e28fe85 (diff) |
nir/i965: use two slots from inputs_read for dvec3/dvec4 vertex input attributes
So far, input_reads was a bitmap tracking which vertex input locations
were being used.
In OpenGL, an attribute bigger than a vec4 (like a dvec3 or dvec4)
consumes just one location, any other small attribute. So we mark the
proper bit in inputs_read, and also the same bit in double_inputs_read
if the attribute is a dvec3/dvec4.
But in Vulkan, this is slightly different: a dvec3/dvec4 attribute
consumes two locations, not just one. And hence two bits would be marked
in inputs_read for the same vertex input attribute.
To avoid handling two different situations in NIR, we just choose the
latest one: in OpenGL, when creating NIR from GLSL/IR, any dvec3/dvec4
vertex input attribute is marked with two bits in the inputs_read bitmap
(and also in the double_inputs_read), and following attributes are
adjusted accordingly.
As example, if in our GLSL/IR shader we have three attributes:
layout(location = 0) vec3 attr0;
layout(location = 1) dvec4 attr1;
layout(location = 2) dvec3 attr2;
then in our NIR shader we put attr0 in location 0, attr1 in locations 1
and 2, and attr2 in location 3 and 4.
Checking carefully, basically we are using slots rather than locations
in NIR.
When emitting the vertices, we do a inverse map to know the
corresponding location for each slot.
v2 (Jason):
- use two slots from inputs_read for dvec3/dvec4 NIR from GLSL/IR.
v3 (Jason):
- Fix commit log error.
- Use ladder ifs and fix braces.
- elements_double is divisible by 2, don't need DIV_ROUND_UP().
- Use if ladder instead of a switch.
- Add comment about hardware restriction in 64bit vertex attributes.
Reviewed-by: Jason Ekstrand <[email protected]>
Diffstat (limited to 'src/intel/vulkan')
-rw-r--r-- | src/intel/vulkan/genX_pipeline.c | 46 |
1 files changed, 40 insertions, 6 deletions
diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 9ff84cd2921..c3feb115bb2 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -42,9 +42,35 @@ vertex_element_comp_control(enum isl_format format, unsigned comp) default: unreachable("Invalid component"); } + /* + * Take in account hardware restrictions when dealing with 64-bit floats. + * + * From Broadwell spec, command reference structures, page 586: + * "When SourceElementFormat is set to one of the *64*_PASSTHRU formats, + * 64-bit components are stored * in the URB without any conversion. In + * this case, vertex elements must be written as 128 or 256 bits, with + * VFCOMP_STORE_0 being used to pad the output as required. E.g., if + * R64_PASSTHRU is used to copy a 64-bit Red component into the URB, + * Component 1 must be specified as VFCOMP_STORE_0 (with Components 2,3 + * set to VFCOMP_NOSTORE) in order to output a 128-bit vertex element, or + * Components 1-3 must be specified as VFCOMP_STORE_0 in order to output + * a 256-bit vertex element. Likewise, use of R64G64B64_PASSTHRU requires + * Component 3 to be specified as VFCOMP_STORE_0 in order to output a + * 256-bit vertex element." + */ if (bits) { return VFCOMP_STORE_SRC; - } else if (comp < 3) { + } else if (comp >= 2 && + !isl_format_layouts[format].channels.b.bits && + isl_format_layouts[format].channels.r.type == ISL_RAW) { + /* When emitting 64-bit attributes, we need to write either 128 or 256 + * bit chunks, using VFCOMP_NOSTORE when not writing the chunk, and + * VFCOMP_STORE_0 to pad the written chunk */ + return VFCOMP_NOSTORE; + } else if (comp < 3 || + isl_format_layouts[format].channels.r.type == ISL_RAW) { + /* Note we need to pad with value 0, not 1, due hardware restrictions + * (see comment above) */ return VFCOMP_STORE_0; } else if (isl_format_layouts[format].channels.r.type == ISL_UINT || isl_format_layouts[format].channels.r.type == ISL_SINT) { @@ -64,8 +90,10 @@ emit_vertex_input(struct anv_pipeline *pipeline, /* Pull inputs_read out of the VS prog data */ const uint64_t inputs_read = vs_prog_data->inputs_read; + const uint64_t double_inputs_read = vs_prog_data->double_inputs_read; assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0); const uint32_t elements = inputs_read >> VERT_ATTRIB_GENERIC0; + const uint32_t elements_double = double_inputs_read >> VERT_ATTRIB_GENERIC0; #if GEN_GEN >= 8 /* On BDW+, we only need to allocate space for base ids. Setting up @@ -83,13 +111,16 @@ emit_vertex_input(struct anv_pipeline *pipeline, vs_prog_data->uses_baseinstance; #endif - uint32_t elem_count = __builtin_popcount(elements) + needs_svgs_elem; - if (elem_count == 0) + uint32_t elem_count = __builtin_popcount(elements) - + __builtin_popcount(elements_double) / 2; + + uint32_t total_elems = elem_count + needs_svgs_elem; + if (total_elems == 0) return; uint32_t *p; - const uint32_t num_dwords = 1 + elem_count * 2; + const uint32_t num_dwords = 1 + total_elems * 2; p = anv_batch_emitn(&pipeline->batch, num_dwords, GENX(3DSTATE_VERTEX_ELEMENTS)); memset(p + 1, 0, (num_dwords - 1) * 4); @@ -107,7 +138,10 @@ emit_vertex_input(struct anv_pipeline *pipeline, if ((elements & (1 << desc->location)) == 0) continue; /* Binding unused */ - uint32_t slot = __builtin_popcount(elements & ((1 << desc->location) - 1)); + uint32_t slot = + __builtin_popcount(elements & ((1 << desc->location) - 1)) - + DIV_ROUND_UP(__builtin_popcount(elements_double & + ((1 << desc->location) -1)), 2); struct GENX(VERTEX_ELEMENT_STATE) element = { .VertexBufferIndex = desc->binding, @@ -137,7 +171,7 @@ emit_vertex_input(struct anv_pipeline *pipeline, #endif } - const uint32_t id_slot = __builtin_popcount(elements); + const uint32_t id_slot = elem_count; if (needs_svgs_elem) { /* From the Broadwell PRM for the 3D_Vertex_Component_Control enum: * "Within a VERTEX_ELEMENT_STATE structure, if a Component |