From a5399e8b1cc3e2e12b8aa067e8380d1b088c35ca Mon Sep 17 00:00:00 2001 From: Samuel Iglesias Gonsálvez Date: Thu, 25 Aug 2016 16:05:24 +0200 Subject: i965/fs: lower all non-force_writemask_all DF instructions to SIMD4 on IVB/BYT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hardware applies the same channel enable signals to both halves of the compressed instruction which will be just wrong under non-uniform control flow. Fix this by splitting those instructions to SIMD4. Signed-off-by: Samuel Iglesias Gonsálvez Reviewed-by: Francisco Jerez --- src/intel/compiler/brw_fs.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src/intel/compiler/brw_fs.cpp') diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index cae15542fa1..4dcdc1b46de 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -4598,6 +4598,15 @@ get_fpu_lowered_simd_width(const struct gen_device_info *devinfo, */ if (channels_per_grf != (exec_type_size == 8 ? 4 : 8)) max_width = MIN2(max_width, channels_per_grf); + + /* Lower all non-force_writemask_all DF instructions to SIMD4 on IVB/BYT + * because HW applies the same channel enable signals to both halves of + * the compressed instruction which will be just wrong under + * non-uniform control flow. + */ + if (devinfo->gen == 7 && !devinfo->is_haswell && + (exec_type_size == 8 || type_sz(inst->dst.type) == 8)) + max_width = MIN2(max_width, 4); } /* Only power-of-two execution sizes are representable in the instruction -- cgit v1.2.3