i965/gen7: Handle atomic instructions from the VEC4 back-end.

This can deal with all the 15 32-bit untyped atomic operations the hardware supports, but only INC and PREDEC are going to be exposed through the API for now. v2: Represent atomics as GLSL intrinsics. Add support for variably indexed atomic counter arrays. v3: Add comment on why we don't need to assign uniform storage for atomic counters. Reviewed-by: Paul Berry <[email protected]>
author: Francisco Jerez <[email protected]> 2013-09-25 16:31:35 -0700
committer: Francisco Jerez <[email protected]> 2013-11-04 12:12:38 -0800
commit: ba885c30c74f9efc94743d4582d30a0e70924b97 (patch)
tree: c1fbbdb4f55d1de6d5e24dc74232382f5b635aca /src
parent: 764f40d92edfdfea4ea2b092fd1ba7888cc7ea7e (diff)
2 files changed, 110 insertions, 2 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index a479646c69b..1f29e576caf 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -489,6 +489,13 @@ public:
    void emit_shader_time_write(enum shader_time_shader_type type,
                                src_reg value);
 
+   void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
+                            dst_reg dst, src_reg offset, src_reg src0,
+                            src_reg src1);
+
+   void emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
+                                  src_reg offset);
+
    src_reg get_scratch_offset(vec4_instruction *inst,
 			      src_reg *reladdr, int reg_offset);
    src_reg get_pull_constant_offset(vec4_instruction *inst,
@@ -514,6 +521,8 @@ public:
 
    void dump_instruction(backend_instruction *inst);
 
+   void visit_atomic_counter_intrinsic(ir_call *ir);
+
 protected:
    void emit_vertex();
    void lower_attributes_to_hw_regs(const int *attribute_map,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index ff6d69ceaea..a036e2dbb06 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -974,8 +974,11 @@ vec4_visitor::visit(ir_variable *ir)
       /* Thanks to the lower_ubo_reference pass, we will see only
        * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
        * variables, so no need for them to be in variable_ht.
+       *
+       * Atomic counters take no uniform storage, no need to do
+       * anything here.
        */
-      if (ir->is_in_uniform_block())
+      if (ir->is_in_uniform_block() || ir->type->contains_atomic())
          return;
 
       /* Track how big the whole uniform variable is, in case we need to put a
@@ -2161,9 +2164,56 @@ vec4_visitor::visit(ir_constant *ir)
 }
 
 void
+vec4_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
+{
+   ir_dereference *deref = static_cast<ir_dereference *>(
+      ir->actual_parameters.get_head());
+   ir_variable *location = deref->variable_referenced();
+   unsigned surf_index = (prog_data->base.binding_table.abo_start +
+                          location->atomic.buffer_index);
+
+   /* Calculate the surface offset */
+   src_reg offset(this, glsl_type::uint_type);
+   ir_dereference_array *deref_array = deref->as_dereference_array();
+   if (deref_array) {
+      deref_array->array_index->accept(this);
+
+      src_reg tmp(this, glsl_type::uint_type);
+      emit(MUL(dst_reg(tmp), this->result, ATOMIC_COUNTER_SIZE));
+      emit(ADD(dst_reg(offset), tmp, location->atomic.offset));
+   } else {
+      offset = location->atomic.offset;
+   }
+
+   /* Emit the appropriate machine instruction */
+   const char *callee = ir->callee->function_name();
+   dst_reg dst = get_assignment_lhs(ir->return_deref, this);
+
+   if (!strcmp("__intrinsic_atomic_read", callee)) {
+      emit_untyped_surface_read(surf_index, dst, offset);
+
+   } else if (!strcmp("__intrinsic_atomic_increment", callee)) {
+      emit_untyped_atomic(BRW_AOP_INC, surf_index, dst, offset,
+                          src_reg(), src_reg());
+
+   } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) {
+      emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dst, offset,
+                          src_reg(), src_reg());
+   }
+}
+
+void
 vec4_visitor::visit(ir_call *ir)
 {
-   assert(!"not reached");
+   const char *callee = ir->callee->function_name();
+
+   if (!strcmp("__intrinsic_atomic_read", callee) ||
+       !strcmp("__intrinsic_atomic_increment", callee) ||
+       !strcmp("__intrinsic_atomic_predecrement", callee)) {
+      visit_atomic_counter_intrinsic(ir);
+   } else {
+      assert(!"Unsupported intrinsic.");
+   }
 }
 
 void
@@ -2558,6 +2608,55 @@ vec4_visitor::visit(ir_end_primitive *)
 }
 
 void
+vec4_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
+                                  dst_reg dst, src_reg offset,
+                                  src_reg src0, src_reg src1)
+{
+   unsigned mlen = 0;
+
+   /* Set the atomic operation offset. */
+   emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), offset));
+   mlen++;
+
+   /* Set the atomic operation arguments. */
+   if (src0.file != BAD_FILE) {
+      emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), src0));
+      mlen++;
+   }
+
+   if (src1.file != BAD_FILE) {
+      emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), src1));
+      mlen++;
+   }
+
+   /* Emit the instruction.  Note that this maps to the normal SIMD8
+    * untyped atomic message on Ivy Bridge, but that's OK because
+    * unused channels will be masked out.
+    */
+   vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst,
+                                 src_reg(atomic_op), src_reg(surf_index));
+   inst->base_mrf = 0;
+   inst->mlen = mlen;
+}
+
+void
+vec4_visitor::emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
+                                        src_reg offset)
+{
+   /* Set the surface read offset. */
+   emit(MOV(brw_writemask(brw_uvec_mrf(8, 0, 0), WRITEMASK_X), offset));
+
+   /* Emit the instruction.  Note that this maps to the normal SIMD8
+    * untyped surface read message, but that's OK because unused
+    * channels will be masked out.
+    */
+   vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_SURFACE_READ,
+                                 dst, src_reg(surf_index));
+   inst->base_mrf = 0;
+   inst->mlen = 1;
+}
+
+void
 vec4_visitor::emit_ndc_computation()
 {
    /* Get the position */
author	Francisco Jerez <[email protected]>	2013-09-25 16:31:35 -0700
committer	Francisco Jerez <[email protected]>	2013-11-04 12:12:38 -0800
commit	ba885c30c74f9efc94743d4582d30a0e70924b97 (patch)
tree	c1fbbdb4f55d1de6d5e24dc74232382f5b635aca /src
parent	764f40d92edfdfea4ea2b092fd1ba7888cc7ea7e (diff)