intel/compiler/fs/icl: Use dummy masked urb write for tess eval

One cannot write the URB arbitrarily and therefore the message has to be carefully constructed. The clever tricks originate from Kenneth and Jason, I'm just writing the patch. Fixes GPU hangs on ICL with Vulkan CTS. Reviewed-by: Kenneth Graunke <[email protected]> Signed-off-by: Topi Pohjolainen <[email protected]>
author: Topi Pohjolainen <[email protected]> 2019-04-07 07:23:33 -0700
committer: Topi Pohjolainen <[email protected]> 2019-04-25 22:00:43 +0300
commit: ff642fb0e6523dbbe44835b5c75ac14501c9a885 (patch)
tree: efc4f50a3d61a6dff978b5df5b72d0069ac85a8b /src/intel
parent: 4e9592c5fa249df11d9aab665f49f26f3d428d77 (diff)
1 files changed, 50 insertions, 1 deletions
diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp
index af9f803fb68..6509868f1c3 100644
--- a/src/intel/compiler/brw_fs_visitor.cpp
+++ b/src/intel/compiler/brw_fs_visitor.cpp
@@ -821,7 +821,13 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
                            header_size);
 
          fs_inst *inst = abld.emit(opcode, reg_undef, payload);
-         inst->eot = slot == last_slot && stage != MESA_SHADER_GEOMETRY;
+
+         /* For ICL WA 1805992985 one needs additional write in the end. */
+         if (devinfo->gen == 11 && stage == MESA_SHADER_TESS_EVAL)
+            inst->eot = false;
+         else
+            inst->eot = slot == last_slot && stage != MESA_SHADER_GEOMETRY;
+
          inst->mlen = length + header_size;
          inst->offset = urb_offset;
          urb_offset = starting_urb_offset + slot + 1;
@@ -857,6 +863,49 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
       inst->mlen = 2;
       inst->offset = 1;
       return;
+   } 
+ 
+   /* ICL WA 1805992985:
+    *
+    * ICLLP GPU hangs on one of tessellation vkcts tests with DS not done. The
+    * send cycle, which is a urb write with an eot must be 4 phases long and
+    * all 8 lanes must valid.
+    */
+   if (devinfo->gen == 11 && stage == MESA_SHADER_TESS_EVAL) {
+      fs_reg payload = fs_reg(VGRF, alloc.allocate(6), BRW_REGISTER_TYPE_UD);
+
+      /* Workaround requires all 8 channels (lanes) to be valid. This is
+       * understood to mean they all need to be alive. First trick is to find
+       * a live channel and copy its urb handle for all the other channels to
+       * make sure all handles are valid.
+       */
+      bld.exec_all().MOV(payload, bld.emit_uniformize(urb_handle));
+
+      /* Second trick is to use masked URB write where one can tell the HW to
+       * actually write data only for selected channels even though all are
+       * active.
+       * Third trick is to take advantage of the must-be-zero (MBZ) area in
+       * the very beginning of the URB.
+       *
+       * One masks data to be written only for the first channel and uses
+       * offset zero explicitly to land data to the MBZ area avoiding trashing
+       * any other part of the URB.
+       *
+       * Since the WA says that the write needs to be 4 phases long one uses
+       * 4 slots data. All are explicitly zeros in order to to keep the MBZ
+       * area written as zeros.
+       */
+      bld.exec_all().MOV(offset(payload, bld, 1), brw_imm_ud(0x10000u));
+      bld.exec_all().MOV(offset(payload, bld, 2), brw_imm_ud(0u));
+      bld.exec_all().MOV(offset(payload, bld, 3), brw_imm_ud(0u));
+      bld.exec_all().MOV(offset(payload, bld, 4), brw_imm_ud(0u));
+      bld.exec_all().MOV(offset(payload, bld, 5), brw_imm_ud(0u));
+
+      fs_inst *inst = bld.exec_all().emit(SHADER_OPCODE_URB_WRITE_SIMD8_MASKED,
+                                          reg_undef, payload);
+      inst->eot = true;
+      inst->mlen = 6;
+      inst->offset = 0;
    }
 }
author	Topi Pohjolainen <[email protected]>	2019-04-07 07:23:33 -0700
committer	Topi Pohjolainen <[email protected]>	2019-04-25 22:00:43 +0300
commit	ff642fb0e6523dbbe44835b5c75ac14501c9a885 (patch)
tree	efc4f50a3d61a6dff978b5df5b72d0069ac85a8b /src/intel
parent	4e9592c5fa249df11d9aab665f49f26f3d428d77 (diff)