diff options
author | Iago Toral Quiroga <[email protected]> | 2016-07-01 09:01:56 +0200 |
---|---|---|
committer | Samuel Iglesias Gonsálvez <[email protected]> | 2017-01-03 11:26:51 +0100 |
commit | b76f2206f550c37835d4e19eea1588caa0211b85 (patch) | |
tree | 0002a6c3bee3f2a31c2b2ab775b1b272dc87ca0f | |
parent | 5fe8d567d8dadeb2b77addd73762f6bde4acfac2 (diff) |
i965/vec4: fix store output for 64-bit types
We need to shuffle the data before it is written to the URB. Also,
dvec3/4 need two vec4 slots.
v2: use byte_offset() instead of offset().
Reviewed-by: Matt Turner <[email protected]>
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 27 |
1 files changed, 25 insertions, 2 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index 91e1d36714f..065e3170f10 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -435,12 +435,35 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) int varying = instr->const_index[0] + const_offset->u32[0]; - src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F, - instr->num_components); + bool is_64bit = nir_src_bit_size(instr->src[0]) == 64; + if (is_64bit) { + src_reg data; + src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_DF, + instr->num_components); + data = src_reg(this, glsl_type::dvec4_type); + shuffle_64bit_data(dst_reg(data), src, true); + src = retype(data, BRW_REGISTER_TYPE_F); + } else { + src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F, + instr->num_components); + } unsigned c = nir_intrinsic_component(instr); output_reg[varying][c] = dst_reg(src); output_num_components[varying][c] = instr->num_components; + + unsigned num_components = instr->num_components; + if (is_64bit) + num_components *= 2; + + output_reg[varying][c] = dst_reg(src); + output_num_components[varying][c] = MIN2(4, num_components); + + if (is_64bit && num_components > 4) { + assert(num_components <= 8); + output_reg[varying + 1][c] = byte_offset(dst_reg(src), REG_SIZE); + output_num_components[varying + 1][c] = num_components - 4; + } break; } |