aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/intel/compiler/brw_eu_defines.h4
-rw-r--r--src/intel/compiler/brw_shader.cpp8
-rw-r--r--src/intel/compiler/brw_vec4.cpp12
-rw-r--r--src/intel/compiler/brw_vec4_copy_propagation.cpp4
-rw-r--r--src/intel/compiler/brw_vec4_generator.cpp23
-rw-r--r--src/intel/compiler/brw_vec4_nir.cpp24
-rw-r--r--src/intel/compiler/brw_vec4_reg_allocate.cpp12
7 files changed, 60 insertions, 27 deletions
diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h
index f0b0d5c2a06..13a70f6f6a1 100644
--- a/src/intel/compiler/brw_eu_defines.h
+++ b/src/intel/compiler/brw_eu_defines.h
@@ -447,7 +447,9 @@ enum opcode {
VEC4_OPCODE_MOV_BYTES,
VEC4_OPCODE_PACK_BYTES,
VEC4_OPCODE_UNPACK_UNIFORM,
- VEC4_OPCODE_FROM_DOUBLE,
+ VEC4_OPCODE_DOUBLE_TO_F32,
+ VEC4_OPCODE_DOUBLE_TO_D32,
+ VEC4_OPCODE_DOUBLE_TO_U32,
VEC4_OPCODE_TO_DOUBLE,
VEC4_OPCODE_PICK_LOW_32BIT,
VEC4_OPCODE_PICK_HIGH_32BIT,
diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp
index 73bbc931352..304b4ecf4fa 100644
--- a/src/intel/compiler/brw_shader.cpp
+++ b/src/intel/compiler/brw_shader.cpp
@@ -326,8 +326,12 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
return "pack_bytes";
case VEC4_OPCODE_UNPACK_UNIFORM:
return "unpack_uniform";
- case VEC4_OPCODE_FROM_DOUBLE:
- return "double_to_single";
+ case VEC4_OPCODE_DOUBLE_TO_F32:
+ return "double_to_f32";
+ case VEC4_OPCODE_DOUBLE_TO_D32:
+ return "double_to_d32";
+ case VEC4_OPCODE_DOUBLE_TO_U32:
+ return "double_to_u32";
case VEC4_OPCODE_TO_DOUBLE:
return "single_to_double";
case VEC4_OPCODE_PICK_LOW_32BIT:
diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp
index 386057e3e3c..0b92ba704e5 100644
--- a/src/intel/compiler/brw_vec4.cpp
+++ b/src/intel/compiler/brw_vec4.cpp
@@ -260,7 +260,9 @@ vec4_instruction::can_do_writemask(const struct gen_device_info *devinfo)
{
switch (opcode) {
case SHADER_OPCODE_GEN4_SCRATCH_READ:
- case VEC4_OPCODE_FROM_DOUBLE:
+ case VEC4_OPCODE_DOUBLE_TO_F32:
+ case VEC4_OPCODE_DOUBLE_TO_D32:
+ case VEC4_OPCODE_DOUBLE_TO_U32:
case VEC4_OPCODE_TO_DOUBLE:
case VEC4_OPCODE_PICK_LOW_32BIT:
case VEC4_OPCODE_PICK_HIGH_32BIT:
@@ -521,7 +523,9 @@ vec4_visitor::opt_reduce_swizzle()
break;
case VEC4_OPCODE_TO_DOUBLE:
- case VEC4_OPCODE_FROM_DOUBLE:
+ case VEC4_OPCODE_DOUBLE_TO_F32:
+ case VEC4_OPCODE_DOUBLE_TO_D32:
+ case VEC4_OPCODE_DOUBLE_TO_U32:
case VEC4_OPCODE_PICK_LOW_32BIT:
case VEC4_OPCODE_PICK_HIGH_32BIT:
case VEC4_OPCODE_SET_LOW_32BIT:
@@ -2255,7 +2259,9 @@ static bool
is_align1_df(vec4_instruction *inst)
{
switch (inst->opcode) {
- case VEC4_OPCODE_FROM_DOUBLE:
+ case VEC4_OPCODE_DOUBLE_TO_F32:
+ case VEC4_OPCODE_DOUBLE_TO_D32:
+ case VEC4_OPCODE_DOUBLE_TO_U32:
case VEC4_OPCODE_TO_DOUBLE:
case VEC4_OPCODE_PICK_LOW_32BIT:
case VEC4_OPCODE_PICK_HIGH_32BIT:
diff --git a/src/intel/compiler/brw_vec4_copy_propagation.cpp b/src/intel/compiler/brw_vec4_copy_propagation.cpp
index e7f6f93f8bd..c1ae32a2936 100644
--- a/src/intel/compiler/brw_vec4_copy_propagation.cpp
+++ b/src/intel/compiler/brw_vec4_copy_propagation.cpp
@@ -293,7 +293,9 @@ static bool
is_align1_opcode(unsigned opcode)
{
switch (opcode) {
- case VEC4_OPCODE_FROM_DOUBLE:
+ case VEC4_OPCODE_DOUBLE_TO_F32:
+ case VEC4_OPCODE_DOUBLE_TO_D32:
+ case VEC4_OPCODE_DOUBLE_TO_U32:
case VEC4_OPCODE_TO_DOUBLE:
case VEC4_OPCODE_PICK_LOW_32BIT:
case VEC4_OPCODE_PICK_HIGH_32BIT:
diff --git a/src/intel/compiler/brw_vec4_generator.cpp b/src/intel/compiler/brw_vec4_generator.cpp
index 65f3a9a9f00..5be4ef7fd4b 100644
--- a/src/intel/compiler/brw_vec4_generator.cpp
+++ b/src/intel/compiler/brw_vec4_generator.cpp
@@ -1940,9 +1940,28 @@ generate_code(struct brw_codegen *p,
break;
}
- case VEC4_OPCODE_FROM_DOUBLE: {
+ case VEC4_OPCODE_DOUBLE_TO_F32:
+ case VEC4_OPCODE_DOUBLE_TO_D32:
+ case VEC4_OPCODE_DOUBLE_TO_U32: {
assert(type_sz(src[0].type) == 8);
- assert(type_sz(dst.type) == 4);
+ assert(type_sz(dst.type) == 8);
+
+ brw_reg_type dst_type;
+
+ switch (inst->opcode) {
+ case VEC4_OPCODE_DOUBLE_TO_F32:
+ dst_type = BRW_REGISTER_TYPE_F;
+ break;
+ case VEC4_OPCODE_DOUBLE_TO_D32:
+ dst_type = BRW_REGISTER_TYPE_D;
+ break;
+ case VEC4_OPCODE_DOUBLE_TO_U32:
+ dst_type = BRW_REGISTER_TYPE_UD;
+ break;
+ default:
+ unreachable("Not supported conversion");
+ }
+ dst = retype(dst, dst_type);
brw_set_default_access_mode(p, BRW_ALIGN_1);
diff --git a/src/intel/compiler/brw_vec4_nir.cpp b/src/intel/compiler/brw_vec4_nir.cpp
index 64371a16de5..9d9ded2b965 100644
--- a/src/intel/compiler/brw_vec4_nir.cpp
+++ b/src/intel/compiler/brw_vec4_nir.cpp
@@ -1183,16 +1183,28 @@ vec4_visitor::emit_conversion_from_double(dst_reg dst, src_reg src,
return;
}
+ enum opcode op;
+ switch (dst.type) {
+ case BRW_REGISTER_TYPE_D:
+ op = VEC4_OPCODE_DOUBLE_TO_D32;
+ break;
+ case BRW_REGISTER_TYPE_UD:
+ op = VEC4_OPCODE_DOUBLE_TO_U32;
+ break;
+ case BRW_REGISTER_TYPE_F:
+ op = VEC4_OPCODE_DOUBLE_TO_F32;
+ break;
+ default:
+ unreachable("Unknown conversion");
+ }
+
dst_reg temp = dst_reg(this, glsl_type::dvec4_type);
emit(MOV(temp, src));
-
dst_reg temp2 = dst_reg(this, glsl_type::dvec4_type);
- temp2 = retype(temp2, dst.type);
- emit(VEC4_OPCODE_FROM_DOUBLE, temp2, src_reg(temp))
- ->size_written = 2 * REG_SIZE;
+ emit(op, temp2, src_reg(temp));
- emit(VEC4_OPCODE_PICK_LOW_32BIT, temp2, src_reg(retype(temp2, BRW_REGISTER_TYPE_DF)));
- vec4_instruction *inst = emit(MOV(dst, src_reg(temp2)));
+ emit(VEC4_OPCODE_PICK_LOW_32BIT, retype(temp2, dst.type), src_reg(temp2));
+ vec4_instruction *inst = emit(MOV(dst, src_reg(retype(temp2, dst.type))));
inst->saturate = saturate;
}
diff --git a/src/intel/compiler/brw_vec4_reg_allocate.cpp b/src/intel/compiler/brw_vec4_reg_allocate.cpp
index e3b46cc2f7f..a0ba77b867c 100644
--- a/src/intel/compiler/brw_vec4_reg_allocate.cpp
+++ b/src/intel/compiler/brw_vec4_reg_allocate.cpp
@@ -447,18 +447,6 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
if (type_sz(inst->dst.type) == 8 && inst->exec_size != 8)
no_spill[inst->dst.nr] = true;
- /* FROM_DOUBLE opcodes are setup so that they use a dst register
- * with a size of 2 even if they only produce a single-precison
- * result (this is so that the opcode can use the larger register to
- * produce a 64-bit aligned intermediary result as required by the
- * hardware during the conversion process). This creates a problem for
- * spilling though, because when we attempt to emit a spill for the
- * dst we see a 32-bit destination and emit a scratch write that
- * allocates a single spill register.
- */
- if (inst->opcode == VEC4_OPCODE_FROM_DOUBLE)
- no_spill[inst->dst.nr] = true;
-
/* We can't spill registers that mix 32-bit and 64-bit access (that
* contain 64-bit data that is operated on via 32-bit instructions)
*/