diff options
author | Jason Ekstrand <[email protected]> | 2017-08-31 21:56:43 -0700 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2017-11-07 10:37:52 -0800 |
commit | 6132992cdb858268af0e985727d80e4140be389c (patch) | |
tree | faf89c223c5cbb174c0bf97aea7f9ed950c9eff1 /src/intel/compiler/brw_fs_nir.cpp | |
parent | 295605c930270a5b90f847b79474507d8b0c9e9c (diff) |
intel/compiler/fs: Set up subgroup invocation as a system value
Subgroup invocation is computed using a vector immediate and some
dispatch-aware arithmetic. Unfortunately, due to the vector arithmetic,
and the fact that it's frequently read 16-wide, it's not something that
can easily be CSEd by the back-end compiler. There are a few different
possible approaches to this problem:
1) Emit the code to calculate the subgroup invocation on-the-fly and
trust NIR to do the CSE. This is what we were doing.
2) Add a back-end instruction for the subgroup ID. This has the
advantage of helping the back-end compiler with CSE but has the
downside of very poor scheduling for the calculation because it has
to be emitted in the back-end.
3) Emit the calculation at the top of the program and re-use the
result. This gets rid of the CSE problem but comes at the cost of
an extra live register.
This commit switches us from 1) to 3). We choose to store the subgroup
invocation values as a W type to reduce the impact of the extra live
register. Trusting NIR and using 1) was fine but we're soon going to
want to use the subgroup invocation value for other things in the
back-end compiler and this makes it much easier to do without having to
worry about CSE problems.
Reviewed-by: Iago Toral Quiroga <[email protected]>
Diffstat (limited to 'src/intel/compiler/brw_fs_nir.cpp')
-rw-r--r-- | src/intel/compiler/brw_fs_nir.cpp | 34 |
1 files changed, 21 insertions, 13 deletions
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 39e7e692874..35fae180285 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -231,6 +231,24 @@ fs_visitor::nir_emit_system_values() nir_system_values[i] = fs_reg(); } + /* Always emit SUBGROUP_INVOCATION. Dead code will clean it up if we + * never end up using it. + */ + { + const fs_builder abld = bld.annotate("gl_SubgroupInvocation", NULL); + fs_reg ® = nir_system_values[SYSTEM_VALUE_SUBGROUP_INVOCATION]; + reg = abld.vgrf(BRW_REGISTER_TYPE_W); + + const fs_builder allbld8 = abld.group(8, 0).exec_all(); + allbld8.MOV(reg, brw_imm_v(0x76543210)); + if (dispatch_width > 8) + allbld8.ADD(byte_offset(reg, 16), reg, brw_imm_uw(8u)); + if (dispatch_width > 16) { + const fs_builder allbld16 = abld.group(16, 0).exec_all(); + allbld16.ADD(byte_offset(reg, 32), reg, brw_imm_uw(16u)); + } + } + nir_foreach_function(function, nir) { assert(strcmp(function->name, "main") == 0); assert(function->impl); @@ -4170,20 +4188,10 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), brw_imm_d(dispatch_width)); break; - case nir_intrinsic_load_subgroup_invocation: { - fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UW); - dest = retype(dest, BRW_REGISTER_TYPE_UD); - const fs_builder allbld8 = bld.group(8, 0).exec_all(); - allbld8.MOV(tmp, brw_imm_v(0x76543210)); - if (dispatch_width > 8) - allbld8.ADD(byte_offset(tmp, 16), tmp, brw_imm_uw(8u)); - if (dispatch_width > 16) { - const fs_builder allbld16 = bld.group(16, 0).exec_all(); - allbld16.ADD(byte_offset(tmp, 32), tmp, brw_imm_uw(16u)); - } - bld.MOV(dest, tmp); + case nir_intrinsic_load_subgroup_invocation: + bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), + nir_system_values[SYSTEM_VALUE_SUBGROUP_INVOCATION]); break; - } case nir_intrinsic_load_subgroup_eq_mask: case nir_intrinsic_load_subgroup_ge_mask: |