summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIago Toral Quiroga <[email protected]>2016-07-01 09:01:56 +0200
committerSamuel Iglesias Gonsálvez <[email protected]>2017-01-03 11:26:51 +0100
commitb76f2206f550c37835d4e19eea1588caa0211b85 (patch)
tree0002a6c3bee3f2a31c2b2ab775b1b272dc87ca0f
parent5fe8d567d8dadeb2b77addd73762f6bde4acfac2 (diff)
i965/vec4: fix store output for 64-bit types
We need to shuffle the data before it is written to the URB. Also, dvec3/4 need two vec4 slots. v2: use byte_offset() instead of offset(). Reviewed-by: Matt Turner <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_nir.cpp27
1 files changed, 25 insertions, 2 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 91e1d36714f..065e3170f10 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -435,12 +435,35 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
int varying = instr->const_index[0] + const_offset->u32[0];
- src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F,
- instr->num_components);
+ bool is_64bit = nir_src_bit_size(instr->src[0]) == 64;
+ if (is_64bit) {
+ src_reg data;
+ src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_DF,
+ instr->num_components);
+ data = src_reg(this, glsl_type::dvec4_type);
+ shuffle_64bit_data(dst_reg(data), src, true);
+ src = retype(data, BRW_REGISTER_TYPE_F);
+ } else {
+ src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F,
+ instr->num_components);
+ }
unsigned c = nir_intrinsic_component(instr);
output_reg[varying][c] = dst_reg(src);
output_num_components[varying][c] = instr->num_components;
+
+ unsigned num_components = instr->num_components;
+ if (is_64bit)
+ num_components *= 2;
+
+ output_reg[varying][c] = dst_reg(src);
+ output_num_components[varying][c] = MIN2(4, num_components);
+
+ if (is_64bit && num_components > 4) {
+ assert(num_components <= 8);
+ output_reg[varying + 1][c] = byte_offset(dst_reg(src), REG_SIZE);
+ output_num_components[varying + 1][c] = num_components - 4;
+ }
break;
}