summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIago Toral Quiroga <[email protected]>2018-05-30 12:14:14 +0200
committerJuan A. Suarez Romero <[email protected]>2019-04-18 11:05:18 +0200
commitaaae24179ff1007776d2f3a5a813f2c52dc83eba (patch)
tree7f311bfa8aa6be3b4be8052dac563dd1d1cf6d6c
parent60c7c6d3ba4ab41eec7f48d6266321e10e2e50df (diff)
intel/compiler: fix ddy for half-float in Broadwell
Broadwell has restrictions that apply to Align16 half-float that make the Align16 implementation of this invalid for this platform. Use the gen11 path for this instead, which uses Align1 mode. The restriction is not present in cherryview, gen9 or gen10, where the Align16 implementation seems to work just fine. v2: - Rework the comment in the code, move the PRM citation from the commit message to the comment in the code (Matt) - Cherryview isn't affected, only Broadwell (Matt) Reviewed-by: Jason Ekstrand <[email protected]> (v1) Reviewed-by: Matt Turner <[email protected]>
-rw-r--r--src/intel/compiler/brw_fs_generator.cpp17
1 files changed, 15 insertions, 2 deletions
diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp
index 409367ed988..05283f54112 100644
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -1278,8 +1278,21 @@ fs_generator::generate_ddy(const fs_inst *inst,
const uint32_t type_size = type_sz(src.type);
if (inst->opcode == FS_OPCODE_DDY_FINE) {
- /* produce accurate derivatives */
- if (devinfo->gen >= 11) {
+ /* produce accurate derivatives.
+ *
+ * From the Broadwell PRM, Volume 7 (3D-Media-GPGPU)
+ * "Register Region Restrictions", Section "1. Special Restrictions":
+ *
+ * "In Align16 mode, the channel selects and channel enables apply to
+ * a pair of half-floats, because these parameters are defined for
+ * DWord elements ONLY. This is applicable when both source and
+ * destination are half-floats."
+ *
+ * So for half-float operations we use the Gen11+ Align1 path. CHV
+ * inherits its FP16 hardware from SKL, so it is not affected.
+ */
+ if (devinfo->gen >= 11 ||
+ (devinfo->is_broadwell && src.type == BRW_REGISTER_TYPE_HF)) {
src = stride(src, 0, 2, 1);
struct brw_reg src_0 = byte_offset(src, 0 * type_size);
struct brw_reg src_2 = byte_offset(src, 2 * type_size);