aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJordan Justen <[email protected]>2015-07-28 15:25:46 -0700
committerJordan Justen <[email protected]>2015-12-09 23:50:38 -0800
commitfaddb301ff72bd7ac8d4274e0d895ca37a4d3bce (patch)
treebbe6b498aeb802656bd8b7072e70d58b5eab5e60
parent8613206bd3dd80dc916b6ce7c47bf59cd4d114c8 (diff)
i965/fs: Handle nir shared variable store intrinsic
v4: * Apply similar optimization for shared variable stores as 0cb7d7b4b7c32246d4c4225a1d17d7ff79a7526d. This was causing a OpenGLES 3.1 CTS failure, but 867c436ca841b4196b4dde4786f5086c76b20dd7 fixes that. Signed-off-by: Jordan Justen <[email protected]> Reviewed-by: Iago Toral Quiroga <[email protected]> Reviewed-by: Kristian Høgsberg <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_nir.cpp48
1 files changed, 48 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index c9da49564a7..4b7ea1756e9 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -2455,6 +2455,54 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
break;
}
+ case nir_intrinsic_store_shared_indirect:
+ has_indirect = true;
+ /* fallthrough */
+ case nir_intrinsic_store_shared: {
+ assert(devinfo->gen >= 7);
+
+ /* Block index */
+ fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM);
+
+ /* Value */
+ fs_reg val_reg = get_nir_src(instr->src[0]);
+
+ /* Writemask */
+ unsigned writemask = instr->const_index[1];
+
+ /* Combine groups of consecutive enabled channels in one write
+ * message. We use ffs to find the first enabled channel and then ffs on
+ * the bit-inverse, down-shifted writemask to determine the length of
+ * the block of enabled bits.
+ */
+ while (writemask) {
+ unsigned first_component = ffs(writemask) - 1;
+ unsigned length = ffs(~(writemask >> first_component)) - 1;
+ fs_reg offset_reg;
+
+ if (!has_indirect) {
+ offset_reg = brw_imm_ud(instr->const_index[0] + 4 * first_component);
+ } else {
+ offset_reg = vgrf(glsl_type::uint_type);
+ bld.ADD(offset_reg,
+ retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(4 * first_component));
+ }
+
+ emit_untyped_write(bld, surf_index, offset_reg,
+ offset(val_reg, bld, first_component),
+ 1 /* dims */, length,
+ BRW_PREDICATE_NONE);
+
+ /* Clear the bits in the writemask that we just wrote, then try
+ * again to see if more channels are left.
+ */
+ writemask &= (15 << (first_component + length));
+ }
+
+ break;
+ }
+
case nir_intrinsic_load_input_indirect:
unreachable("Not allowed");
/* fallthrough */