summaryrefslogtreecommitdiffstats
path: root/src/mesa
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa')
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.cpp91
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.h1
2 files changed, 92 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 35fabcfe65b..35171ceb5e7 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -2140,6 +2140,95 @@ vec4_visitor::lower_simd_width()
return progress;
}
+static bool
+is_align1_df(vec4_instruction *inst)
+{
+ switch (inst->opcode) {
+ case VEC4_OPCODE_FROM_DOUBLE:
+ case VEC4_OPCODE_TO_DOUBLE:
+ case VEC4_OPCODE_PICK_LOW_32BIT:
+ case VEC4_OPCODE_PICK_HIGH_32BIT:
+ case VEC4_OPCODE_SET_LOW_32BIT:
+ case VEC4_OPCODE_SET_HIGH_32BIT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static brw_predicate
+scalarize_predicate(brw_predicate predicate, unsigned writemask)
+{
+ if (predicate != BRW_PREDICATE_NORMAL)
+ return predicate;
+
+ switch (writemask) {
+ case WRITEMASK_X:
+ return BRW_PREDICATE_ALIGN16_REPLICATE_X;
+ case WRITEMASK_Y:
+ return BRW_PREDICATE_ALIGN16_REPLICATE_Y;
+ case WRITEMASK_Z:
+ return BRW_PREDICATE_ALIGN16_REPLICATE_Z;
+ case WRITEMASK_W:
+ return BRW_PREDICATE_ALIGN16_REPLICATE_W;
+ default:
+ unreachable("invalid writemask");
+ }
+}
+
+bool
+vec4_visitor::scalarize_df()
+{
+ bool progress = false;
+
+ foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
+ /* Skip DF instructions that operate in Align1 mode */
+ if (is_align1_df(inst))
+ continue;
+
+ /* Check if this is a double-precision instruction */
+ bool is_double = type_sz(inst->dst.type) == 8;
+ for (int arg = 0; !is_double && arg < 3; arg++) {
+ is_double = inst->src[arg].file != BAD_FILE &&
+ type_sz(inst->src[arg].type) == 8;
+ }
+
+ if (!is_double)
+ continue;
+
+ /* Generate scalar instructions for each enabled channel */
+ for (unsigned chan = 0; chan < 4; chan++) {
+ unsigned chan_mask = 1 << chan;
+ if (!(inst->dst.writemask & chan_mask))
+ continue;
+
+ vec4_instruction *scalar_inst = new(mem_ctx) vec4_instruction(*inst);
+
+ for (unsigned i = 0; i < 3; i++) {
+ unsigned swz = BRW_GET_SWZ(inst->src[i].swizzle, chan);
+ scalar_inst->src[i].swizzle = BRW_SWIZZLE4(swz, swz, swz, swz);
+ }
+
+ scalar_inst->dst.writemask = chan_mask;
+
+ if (inst->predicate != BRW_PREDICATE_NONE) {
+ scalar_inst->predicate =
+ scalarize_predicate(inst->predicate, chan_mask);
+ }
+
+ inst->insert_before(block, scalar_inst);
+ }
+
+ inst->remove(block);
+ progress = true;
+ }
+
+ if (progress)
+ invalidate_live_intervals();
+
+ return progress;
+}
+
bool
vec4_visitor::run()
{
@@ -2239,6 +2328,8 @@ vec4_visitor::run()
if (failed)
return false;
+ OPT(scalarize_df);
+
setup_payload();
if (unlikely(INTEL_DEBUG & DEBUG_SPILL_VEC4)) {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 3ddec849010..253be677233 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -162,6 +162,7 @@ public:
void convert_to_hw_regs();
bool lower_simd_width();
+ bool scalarize_df();
vec4_instruction *emit(vec4_instruction *inst);