summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/intel/compiler/brw_fs_generator.cpp17
1 files changed, 15 insertions, 2 deletions
diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp
index 409367ed988..05283f54112 100644
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -1278,8 +1278,21 @@ fs_generator::generate_ddy(const fs_inst *inst,
const uint32_t type_size = type_sz(src.type);
if (inst->opcode == FS_OPCODE_DDY_FINE) {
- /* produce accurate derivatives */
- if (devinfo->gen >= 11) {
+ /* produce accurate derivatives.
+ *
+ * From the Broadwell PRM, Volume 7 (3D-Media-GPGPU)
+ * "Register Region Restrictions", Section "1. Special Restrictions":
+ *
+ * "In Align16 mode, the channel selects and channel enables apply to
+ * a pair of half-floats, because these parameters are defined for
+ * DWord elements ONLY. This is applicable when both source and
+ * destination are half-floats."
+ *
+ * So for half-float operations we use the Gen11+ Align1 path. CHV
+ * inherits its FP16 hardware from SKL, so it is not affected.
+ */
+ if (devinfo->gen >= 11 ||
+ (devinfo->is_broadwell && src.type == BRW_REGISTER_TYPE_HF)) {
src = stride(src, 0, 2, 1);
struct brw_reg src_0 = byte_offset(src, 0 * type_size);
struct brw_reg src_2 = byte_offset(src, 2 * type_size);