diff options
-rw-r--r-- | src/intel/compiler/brw_fs_lower_regioning.cpp | 39 |
1 files changed, 28 insertions, 11 deletions
diff --git a/src/intel/compiler/brw_fs_lower_regioning.cpp b/src/intel/compiler/brw_fs_lower_regioning.cpp index c60d4700419..a76fd262a10 100644 --- a/src/intel/compiler/brw_fs_lower_regioning.cpp +++ b/src/intel/compiler/brw_fs_lower_regioning.cpp @@ -127,20 +127,37 @@ namespace { has_invalid_src_region(const gen_device_info *devinfo, const fs_inst *inst, unsigned i) { - if (is_unordered(inst) || inst->is_control_source(i)) { + if (is_unordered(inst) || inst->is_control_source(i)) return false; - } else { - const unsigned dst_byte_stride = inst->dst.stride * type_sz(inst->dst.type); - const unsigned src_byte_stride = inst->src[i].stride * - type_sz(inst->src[i].type); - const unsigned dst_byte_offset = reg_offset(inst->dst) % REG_SIZE; - const unsigned src_byte_offset = reg_offset(inst->src[i]) % REG_SIZE; - return has_dst_aligned_region_restriction(devinfo, inst) && - !is_uniform(inst->src[i]) && - (src_byte_stride != dst_byte_stride || - src_byte_offset != dst_byte_offset); + /* Empirical testing shows that Broadwell has a bug affecting half-float + * MAD instructions when any of its sources has a non-zero offset, such + * as: + * + * mad(8) g18<1>HF -g17<4,4,1>HF g14.8<4,4,1>HF g11<4,4,1>HF { align16 1Q }; + * + * We used to generate code like this for SIMD8 executions where we + * used to pack components Y and W of a vector at offset 16B of a SIMD + * register. The problem doesn't occur if the stride of the source is 0. + */ + if (devinfo->gen == 8 && + inst->opcode == BRW_OPCODE_MAD && + inst->src[i].type == BRW_REGISTER_TYPE_HF && + reg_offset(inst->src[i]) % REG_SIZE > 0 && + inst->src[i].stride != 0) { + return true; } + + const unsigned dst_byte_stride = inst->dst.stride * type_sz(inst->dst.type); + const unsigned src_byte_stride = inst->src[i].stride * + type_sz(inst->src[i].type); + const unsigned dst_byte_offset = reg_offset(inst->dst) % REG_SIZE; + const unsigned src_byte_offset = reg_offset(inst->src[i]) % REG_SIZE; + + return has_dst_aligned_region_restriction(devinfo, inst) && + !is_uniform(inst->src[i]) && + (src_byte_stride != dst_byte_stride || + src_byte_offset != dst_byte_offset); } /* |