summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIago Toral Quiroga <[email protected]>2017-10-09 14:17:43 +0200
committerIago Toral Quiroga <[email protected]>2017-10-10 08:59:54 +0200
commit5ec21eb1a0c9fa08978784e58456fdd212aab4d7 (patch)
tree43b11a726fc45d97f35f507e8987fc798479bc03
parent63e6db18c5cdec50688d604a43ddaf86a2238f76 (diff)
i965/tes: account for the fact that dvec3/4 inputs take two slots
When computing the total size of the URB for tessellation evaluation inputs we were not accounting for this, and instead we were always assuming that each input would take a single vec4 slot, which could lead to computing a smaller read size than required. Specifically, this is a problem when the last input is a dvec3/4 such that its XY components are stored in the the second half of a payload register (which can happen if the offset for the input in the URB is not 64-bit aligned because there are 32-bit inputs mixed in) and the ZW components in the first half of the next, as in this case we would fail to account for the extra slot required for the ZW components. Fixes (requires another fix in CTS currently in review): KHR-GL45.enhanced_layouts.varying_locations KHR-GL45.enhanced_layouts.varying_array_locations Reviewed-by: Kenneth Graunke <[email protected]>
-rw-r--r--src/intel/compiler/brw_fs_nir.cpp9
1 files changed, 7 insertions, 2 deletions
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index 5b8ccd50bff..425c52c9917 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -2665,17 +2665,22 @@ fs_visitor::nir_emit_tes_intrinsic(const fs_builder &bld,
/* Arbitrarily only push up to 32 vec4 slots worth of data,
* which is 16 registers (since each holds 2 vec4 slots).
*/
+ unsigned slot_count = 1;
+ if (type_sz(dest.type) == 8 && instr->num_components > 2)
+ slot_count++;
+
const unsigned max_push_slots = 32;
- if (imm_offset < max_push_slots) {
+ if (imm_offset + slot_count <= max_push_slots) {
fs_reg src = fs_reg(ATTR, imm_offset / 2, dest.type);
for (int i = 0; i < instr->num_components; i++) {
unsigned comp = 16 / type_sz(dest.type) * (imm_offset % 2) +
i + first_component;
bld.MOV(offset(dest, bld, i), component(src, comp));
}
+
tes_prog_data->base.urb_read_length =
MAX2(tes_prog_data->base.urb_read_length,
- DIV_ROUND_UP(imm_offset + 1, 2));
+ DIV_ROUND_UP(imm_offset + slot_count, 2));
} else {
/* Replicate the patch handle to all enabled channels */
const fs_reg srcs[] = {