intel/compiler/fs: Set up subgroup invocation as a system value

Subgroup invocation is computed using a vector immediate and some dispatch-aware arithmetic. Unfortunately, due to the vector arithmetic, and the fact that it's frequently read 16-wide, it's not something that can easily be CSEd by the back-end compiler. There are a few different possible approaches to this problem: 1) Emit the code to calculate the subgroup invocation on-the-fly and trust NIR to do the CSE. This is what we were doing. 2) Add a back-end instruction for the subgroup ID. This has the advantage of helping the back-end compiler with CSE but has the downside of very poor scheduling for the calculation because it has to be emitted in the back-end. 3) Emit the calculation at the top of the program and re-use the result. This gets rid of the CSE problem but comes at the cost of an extra live register. This commit switches us from 1) to 3). We choose to store the subgroup invocation values as a W type to reduce the impact of the extra live register. Trusting NIR and using 1) was fine but we're soon going to want to use the subgroup invocation value for other things in the back-end compiler and this makes it much easier to do without having to worry about CSE problems. Reviewed-by: Iago Toral Quiroga <[email protected]>
author: Jason Ekstrand <[email protected]> 2017-08-31 21:56:43 -0700
committer: Jason Ekstrand <[email protected]> 2017-11-07 10:37:52 -0800
commit: 6132992cdb858268af0e985727d80e4140be389c (patch)
tree: faf89c223c5cbb174c0bf97aea7f9ed950c9eff1 /src/intel/compiler
parent: 295605c930270a5b90f847b79474507d8b0c9e9c (diff)
1 files changed, 21 insertions, 13 deletions
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index 39e7e692874..35fae180285 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -231,6 +231,24 @@ fs_visitor::nir_emit_system_values()
       nir_system_values[i] = fs_reg();
    }
 
+   /* Always emit SUBGROUP_INVOCATION.  Dead code will clean it up if we
+    * never end up using it.
+    */
+   {
+      const fs_builder abld = bld.annotate("gl_SubgroupInvocation", NULL);
+      fs_reg &reg = nir_system_values[SYSTEM_VALUE_SUBGROUP_INVOCATION];
+      reg = abld.vgrf(BRW_REGISTER_TYPE_W);
+
+      const fs_builder allbld8 = abld.group(8, 0).exec_all();
+      allbld8.MOV(reg, brw_imm_v(0x76543210));
+      if (dispatch_width > 8)
+         allbld8.ADD(byte_offset(reg, 16), reg, brw_imm_uw(8u));
+      if (dispatch_width > 16) {
+         const fs_builder allbld16 = abld.group(16, 0).exec_all();
+         allbld16.ADD(byte_offset(reg, 32), reg, brw_imm_uw(16u));
+      }
+   }
+
    nir_foreach_function(function, nir) {
       assert(strcmp(function->name, "main") == 0);
       assert(function->impl);
@@ -4170,20 +4188,10 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), brw_imm_d(dispatch_width));
       break;
 
-   case nir_intrinsic_load_subgroup_invocation: {
-      fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UW);
-      dest = retype(dest, BRW_REGISTER_TYPE_UD);
-      const fs_builder allbld8 = bld.group(8, 0).exec_all();
-      allbld8.MOV(tmp, brw_imm_v(0x76543210));
-      if (dispatch_width > 8)
-         allbld8.ADD(byte_offset(tmp, 16), tmp, brw_imm_uw(8u));
-      if (dispatch_width > 16) {
-         const fs_builder allbld16 = bld.group(16, 0).exec_all();
-         allbld16.ADD(byte_offset(tmp, 32), tmp, brw_imm_uw(16u));
-      }
-      bld.MOV(dest, tmp);
+   case nir_intrinsic_load_subgroup_invocation:
+      bld.MOV(retype(dest, BRW_REGISTER_TYPE_D),
+              nir_system_values[SYSTEM_VALUE_SUBGROUP_INVOCATION]);
       break;
-   }
 
    case nir_intrinsic_load_subgroup_eq_mask:
    case nir_intrinsic_load_subgroup_ge_mask:
author	Jason Ekstrand <[email protected]>	2017-08-31 21:56:43 -0700
committer	Jason Ekstrand <[email protected]>	2017-11-07 10:37:52 -0800
commit	6132992cdb858268af0e985727d80e4140be389c (patch)
tree	faf89c223c5cbb174c0bf97aea7f9ed950c9eff1 /src/intel/compiler
parent	295605c930270a5b90f847b79474507d8b0c9e9c (diff)