summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorFrancisco Jerez <[email protected]>2013-09-25 16:30:20 -0700
committerFrancisco Jerez <[email protected]>2013-11-04 12:12:37 -0800
commit764f40d92edfdfea4ea2b092fd1ba7888cc7ea7e (patch)
tree4e518b94d7ace6cd78270c84add68abfd412d8b1 /src
parent34fe051e215107dddbaae71e2edf15f88d839936 (diff)
i965/gen7: Handle atomic instructions from the FS back-end.
This can deal with all the 15 32-bit untyped atomic operations the hardware supports, but only INC and PREDEC are going to be exposed through the API for now. v2: Represent atomics as GLSL intrinsics. Add support for variably indexed atomic counter arrays. Fix interaction with fragment discard. v3: Add comment on why we don't need to assign uniform storage for atomic counters. Reviewed-by: Paul Berry <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h9
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_visitor.cpp134
2 files changed, 141 insertions, 2 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index e9bf52f1f11..675e379fcd6 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -401,6 +401,13 @@ public:
void emit_shader_time_write(enum shader_time_shader_type type,
fs_reg value);
+ void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
+ fs_reg dst, fs_reg offset, fs_reg src0,
+ fs_reg src1);
+
+ void emit_untyped_surface_read(unsigned surf_index, fs_reg dst,
+ fs_reg offset);
+
bool try_rewrite_rhs_to_dst(ir_assignment *ir,
fs_reg dst,
fs_reg src,
@@ -420,6 +427,8 @@ public:
void dump_instruction(backend_instruction *inst);
+ void visit_atomic_counter_intrinsic(ir_call *ir);
+
struct gl_fragment_program *fp;
struct brw_wm_compile *c;
unsigned int sanity_param_count;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index dd606718b66..900d4a5902c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -107,8 +107,11 @@ fs_visitor::visit(ir_variable *ir)
/* Thanks to the lower_ubo_reference pass, we will see only
* ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
* variables, so no need for them to be in variable_ht.
+ *
+ * Atomic counters take no uniform storage, no need to do
+ * anything here.
*/
- if (ir->is_in_uniform_block())
+ if (ir->is_in_uniform_block() || ir->type->contains_atomic())
return;
if (dispatch_width == 16) {
@@ -2196,9 +2199,58 @@ fs_visitor::visit(ir_loop_jump *ir)
}
void
+fs_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
+{
+ ir_dereference *deref = static_cast<ir_dereference *>(
+ ir->actual_parameters.get_head());
+ ir_variable *location = deref->variable_referenced();
+ unsigned surf_index = (c->prog_data.base.binding_table.abo_start +
+ location->atomic.buffer_index);
+
+ /* Calculate the surface offset */
+ fs_reg offset(this, glsl_type::uint_type);
+ ir_dereference_array *deref_array = deref->as_dereference_array();
+
+ if (deref_array) {
+ deref_array->array_index->accept(this);
+
+ fs_reg tmp(this, glsl_type::uint_type);
+ emit(MUL(tmp, this->result, ATOMIC_COUNTER_SIZE));
+ emit(ADD(offset, tmp, location->atomic.offset));
+ } else {
+ offset = location->atomic.offset;
+ }
+
+ /* Emit the appropriate machine instruction */
+ const char *callee = ir->callee->function_name();
+ ir->return_deref->accept(this);
+ fs_reg dst = this->result;
+
+ if (!strcmp("__intrinsic_atomic_read", callee)) {
+ emit_untyped_surface_read(surf_index, dst, offset);
+
+ } else if (!strcmp("__intrinsic_atomic_increment", callee)) {
+ emit_untyped_atomic(BRW_AOP_INC, surf_index, dst, offset,
+ fs_reg(), fs_reg());
+
+ } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) {
+ emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dst, offset,
+ fs_reg(), fs_reg());
+ }
+}
+
+void
fs_visitor::visit(ir_call *ir)
{
- assert(!"FINISHME");
+ const char *callee = ir->callee->function_name();
+
+ if (!strcmp("__intrinsic_atomic_read", callee) ||
+ !strcmp("__intrinsic_atomic_increment", callee) ||
+ !strcmp("__intrinsic_atomic_predecrement", callee)) {
+ visit_atomic_counter_intrinsic(ir);
+ } else {
+ assert(!"Unsupported intrinsic.");
+ }
}
void
@@ -2249,6 +2301,84 @@ fs_visitor::visit(ir_end_primitive *)
assert(!"not reached");
}
+void
+fs_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
+ fs_reg dst, fs_reg offset, fs_reg src0,
+ fs_reg src1)
+{
+ const unsigned operand_len = dispatch_width / 8;
+ unsigned mlen = 0;
+
+ /* Initialize the sample mask in the message header. */
+ emit(MOV(brw_uvec_mrf(8, mlen, 0), brw_imm_ud(0)))
+ ->force_writemask_all = true;
+
+ if (fp->UsesKill) {
+ emit(MOV(brw_uvec_mrf(1, mlen, 7), brw_flag_reg(0, 1)))
+ ->force_writemask_all = true;
+ } else {
+ emit(MOV(brw_uvec_mrf(1, mlen, 7),
+ retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD)))
+ ->force_writemask_all = true;
+ }
+
+ mlen++;
+
+ /* Set the atomic operation offset. */
+ emit(MOV(brw_uvec_mrf(dispatch_width, mlen, 0), offset));
+ mlen += operand_len;
+
+ /* Set the atomic operation arguments. */
+ if (src0.file != BAD_FILE) {
+ emit(MOV(brw_uvec_mrf(dispatch_width, mlen, 0), src0));
+ mlen += operand_len;
+ }
+
+ if (src1.file != BAD_FILE) {
+ emit(MOV(brw_uvec_mrf(dispatch_width, mlen, 0), src1));
+ mlen += operand_len;
+ }
+
+ /* Emit the instruction. */
+ fs_inst inst(SHADER_OPCODE_UNTYPED_ATOMIC, dst, atomic_op, surf_index);
+ inst.base_mrf = 0;
+ inst.mlen = mlen;
+ emit(inst);
+}
+
+void
+fs_visitor::emit_untyped_surface_read(unsigned surf_index, fs_reg dst,
+ fs_reg offset)
+{
+ const unsigned operand_len = dispatch_width / 8;
+ unsigned mlen = 0;
+
+ /* Initialize the sample mask in the message header. */
+ emit(MOV(brw_uvec_mrf(8, mlen, 0), brw_imm_ud(0)))
+ ->force_writemask_all = true;
+
+ if (fp->UsesKill) {
+ emit(MOV(brw_uvec_mrf(1, mlen, 7), brw_flag_reg(0, 1)))
+ ->force_writemask_all = true;
+ } else {
+ emit(MOV(brw_uvec_mrf(1, mlen, 7),
+ retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD)))
+ ->force_writemask_all = true;
+ }
+
+ mlen++;
+
+ /* Set the surface read offset. */
+ emit(MOV(brw_uvec_mrf(dispatch_width, mlen, 0), offset));
+ mlen += operand_len;
+
+ /* Emit the instruction. */
+ fs_inst inst(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst, surf_index);
+ inst.base_mrf = 0;
+ inst.mlen = mlen;
+ emit(inst);
+}
+
fs_inst *
fs_visitor::emit(fs_inst inst)
{