summaryrefslogtreecommitdiffstats
path: root/src/mesa
diff options
context:
space:
mode:
authorFrancisco Jerez <[email protected]>2016-05-19 12:50:01 +0200
committerIago Toral Quiroga <[email protected]>2016-05-23 08:56:57 +0200
commitb46867cd378e5fb135fd060d50c8028d3dac622a (patch)
tree3ae8eb57917b0af172f02699c7e450a3febbb346 /src/mesa
parent38b719d6244e831effe93a5d10e932aad2100984 (diff)
i965/fs: do not depend on std140 alignment rules for UBO loads
The previous implementation relied on the std140 alignment rules to avoid handling misalignment in the case where we are loading more than 2 double components from a vector, which requires to emit a second load message. This alternative implementation deals with misalignment and is more flexible going forward. Reviewed-by: Iago Toral Quiroga <[email protected]>
Diffstat (limited to 'src/mesa')
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_nir.cpp59
1 files changed, 13 insertions, 46 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index d5ef569536a..b7df28663d5 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -3585,9 +3585,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
nir->info.num_ubos - 1);
}
- /* Number of 32-bit slots in the type */
- unsigned type_slots = MAX2(1, type_sz(dest.type) / 4);
-
nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
if (const_offset == NULL) {
fs_reg base_offset = retype(get_nir_src(instr->src[1]),
@@ -3605,55 +3602,25 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
* we let CSE deal with duplicate loads. Here we see a vector access
* and we have to split it if necessary.
*/
- fs_reg packed_consts = vgrf(glsl_type::float_type);
- packed_consts.type = dest.type;
+ const unsigned type_size = type_sz(dest.type);
+ const fs_reg packed_consts = bld.vgrf(BRW_REGISTER_TYPE_F);
+ for (unsigned c = 0; c < instr->num_components;) {
+ const unsigned base = const_offset->u32[0] + c * type_size;
- unsigned const_offset_aligned = const_offset->u32[0] & ~15;
+ /* Number of usable components in the next 16B-aligned load */
+ const unsigned count = MIN2(instr->num_components - c,
+ (16 - base % 16) / type_size);
- /* A vec4 only contains half of a dvec4, if we need more than 2
- * components of a dvec4 we will have to issue another load for
- * components z and w.
- */
- int num_components;
- if (type_slots == 1)
- num_components = instr->num_components;
- else
- num_components = MIN2(2, instr->num_components);
-
- /* The computation of num_components doesn't take into account
- * misalignment, which should be okay according to std140 vector
- * alignment rules.
- */
- assert(const_offset->u32[0] % 16 +
- type_sz(dest.type) * num_components <= 16);
-
- int remaining_components = instr->num_components;
- while (remaining_components > 0) {
- /* Read the vec4 from a 16-byte aligned offset */
- struct brw_reg const_offset_reg = brw_imm_ud(const_offset_aligned);
bld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
- retype(packed_consts, BRW_REGISTER_TYPE_F),
- surf_index, const_offset_reg);
-
- const fs_reg consts = byte_offset(packed_consts, (const_offset->u32[0] % 16));
- unsigned dest_offset = instr->num_components - remaining_components;
+ packed_consts, surf_index, brw_imm_ud(base & ~15));
- /* XXX: This doesn't update the sub-16B offset across iterations of
- * the loop, which should work for std140 vector alignment rules.
- */
- assert(dest_offset == 0 || const_offset->u32[0] % 16 == 0);
+ const fs_reg consts =
+ retype(byte_offset(packed_consts, base & 15), dest.type);
- for (int i = 0; i < num_components; i++)
- bld.MOV(offset(dest, bld, i + dest_offset), component(consts, i));
+ for (unsigned d = 0; d < count; d++)
+ bld.MOV(offset(dest, bld, c + d), component(consts, d));
- /* If this is a large enough 64-bit load, we will need to emit
- * another message
- */
- remaining_components -= num_components;
- assert(remaining_components == 0 ||
- (remaining_components <= 2 && type_slots == 2));
- num_components = remaining_components;
- const_offset_aligned += 16;
+ c += count;
}
}
break;