diff options
author | Jordan Justen <[email protected]> | 2015-07-28 15:25:46 -0700 |
---|---|---|
committer | Jordan Justen <[email protected]> | 2015-12-09 23:50:38 -0800 |
commit | faddb301ff72bd7ac8d4274e0d895ca37a4d3bce (patch) | |
tree | bbe6b498aeb802656bd8b7072e70d58b5eab5e60 | |
parent | 8613206bd3dd80dc916b6ce7c47bf59cd4d114c8 (diff) |
i965/fs: Handle nir shared variable store intrinsic
v4:
* Apply similar optimization for shared variable stores as
0cb7d7b4b7c32246d4c4225a1d17d7ff79a7526d. This was causing a
OpenGLES 3.1 CTS failure, but
867c436ca841b4196b4dde4786f5086c76b20dd7 fixes that.
Signed-off-by: Jordan Justen <[email protected]>
Reviewed-by: Iago Toral Quiroga <[email protected]>
Reviewed-by: Kristian Høgsberg <[email protected]>
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 48 |
1 files changed, 48 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index c9da49564a7..4b7ea1756e9 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -2455,6 +2455,54 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } + case nir_intrinsic_store_shared_indirect: + has_indirect = true; + /* fallthrough */ + case nir_intrinsic_store_shared: { + assert(devinfo->gen >= 7); + + /* Block index */ + fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM); + + /* Value */ + fs_reg val_reg = get_nir_src(instr->src[0]); + + /* Writemask */ + unsigned writemask = instr->const_index[1]; + + /* Combine groups of consecutive enabled channels in one write + * message. We use ffs to find the first enabled channel and then ffs on + * the bit-inverse, down-shifted writemask to determine the length of + * the block of enabled bits. + */ + while (writemask) { + unsigned first_component = ffs(writemask) - 1; + unsigned length = ffs(~(writemask >> first_component)) - 1; + fs_reg offset_reg; + + if (!has_indirect) { + offset_reg = brw_imm_ud(instr->const_index[0] + 4 * first_component); + } else { + offset_reg = vgrf(glsl_type::uint_type); + bld.ADD(offset_reg, + retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_UD), + brw_imm_ud(4 * first_component)); + } + + emit_untyped_write(bld, surf_index, offset_reg, + offset(val_reg, bld, first_component), + 1 /* dims */, length, + BRW_PREDICATE_NONE); + + /* Clear the bits in the writemask that we just wrote, then try + * again to see if more channels are left. + */ + writemask &= (15 << (first_component + length)); + } + + break; + } + case nir_intrinsic_load_input_indirect: unreachable("Not allowed"); /* fallthrough */ |