diff options
author | Francisco Jerez <[email protected]> | 2013-09-25 16:31:35 -0700 |
---|---|---|
committer | Francisco Jerez <[email protected]> | 2013-11-04 12:12:38 -0800 |
commit | ba885c30c74f9efc94743d4582d30a0e70924b97 (patch) | |
tree | c1fbbdb4f55d1de6d5e24dc74232382f5b635aca /src/mesa/drivers/dri/i965 | |
parent | 764f40d92edfdfea4ea2b092fd1ba7888cc7ea7e (diff) |
i965/gen7: Handle atomic instructions from the VEC4 back-end.
This can deal with all the 15 32-bit untyped atomic operations the
hardware supports, but only INC and PREDEC are going to be exposed
through the API for now.
v2: Represent atomics as GLSL intrinsics. Add support for variably
indexed atomic counter arrays.
v3: Add comment on why we don't need to assign uniform storage for
atomic counters.
Reviewed-by: Paul Berry <[email protected]>
Diffstat (limited to 'src/mesa/drivers/dri/i965')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4.h | 9 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 103 |
2 files changed, 110 insertions, 2 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index a479646c69b..1f29e576caf 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -489,6 +489,13 @@ public: void emit_shader_time_write(enum shader_time_shader_type type, src_reg value); + void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, + dst_reg dst, src_reg offset, src_reg src0, + src_reg src1); + + void emit_untyped_surface_read(unsigned surf_index, dst_reg dst, + src_reg offset); + src_reg get_scratch_offset(vec4_instruction *inst, src_reg *reladdr, int reg_offset); src_reg get_pull_constant_offset(vec4_instruction *inst, @@ -514,6 +521,8 @@ public: void dump_instruction(backend_instruction *inst); + void visit_atomic_counter_intrinsic(ir_call *ir); + protected: void emit_vertex(); void lower_attributes_to_hw_regs(const int *attribute_map, diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index ff6d69ceaea..a036e2dbb06 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -974,8 +974,11 @@ vec4_visitor::visit(ir_variable *ir) /* Thanks to the lower_ubo_reference pass, we will see only * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO * variables, so no need for them to be in variable_ht. + * + * Atomic counters take no uniform storage, no need to do + * anything here. */ - if (ir->is_in_uniform_block()) + if (ir->is_in_uniform_block() || ir->type->contains_atomic()) return; /* Track how big the whole uniform variable is, in case we need to put a @@ -2161,9 +2164,56 @@ vec4_visitor::visit(ir_constant *ir) } void +vec4_visitor::visit_atomic_counter_intrinsic(ir_call *ir) +{ + ir_dereference *deref = static_cast<ir_dereference *>( + ir->actual_parameters.get_head()); + ir_variable *location = deref->variable_referenced(); + unsigned surf_index = (prog_data->base.binding_table.abo_start + + location->atomic.buffer_index); + + /* Calculate the surface offset */ + src_reg offset(this, glsl_type::uint_type); + ir_dereference_array *deref_array = deref->as_dereference_array(); + if (deref_array) { + deref_array->array_index->accept(this); + + src_reg tmp(this, glsl_type::uint_type); + emit(MUL(dst_reg(tmp), this->result, ATOMIC_COUNTER_SIZE)); + emit(ADD(dst_reg(offset), tmp, location->atomic.offset)); + } else { + offset = location->atomic.offset; + } + + /* Emit the appropriate machine instruction */ + const char *callee = ir->callee->function_name(); + dst_reg dst = get_assignment_lhs(ir->return_deref, this); + + if (!strcmp("__intrinsic_atomic_read", callee)) { + emit_untyped_surface_read(surf_index, dst, offset); + + } else if (!strcmp("__intrinsic_atomic_increment", callee)) { + emit_untyped_atomic(BRW_AOP_INC, surf_index, dst, offset, + src_reg(), src_reg()); + + } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) { + emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dst, offset, + src_reg(), src_reg()); + } +} + +void vec4_visitor::visit(ir_call *ir) { - assert(!"not reached"); + const char *callee = ir->callee->function_name(); + + if (!strcmp("__intrinsic_atomic_read", callee) || + !strcmp("__intrinsic_atomic_increment", callee) || + !strcmp("__intrinsic_atomic_predecrement", callee)) { + visit_atomic_counter_intrinsic(ir); + } else { + assert(!"Unsupported intrinsic."); + } } void @@ -2558,6 +2608,55 @@ vec4_visitor::visit(ir_end_primitive *) } void +vec4_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, + dst_reg dst, src_reg offset, + src_reg src0, src_reg src1) +{ + unsigned mlen = 0; + + /* Set the atomic operation offset. */ + emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), offset)); + mlen++; + + /* Set the atomic operation arguments. */ + if (src0.file != BAD_FILE) { + emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), src0)); + mlen++; + } + + if (src1.file != BAD_FILE) { + emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), src1)); + mlen++; + } + + /* Emit the instruction. Note that this maps to the normal SIMD8 + * untyped atomic message on Ivy Bridge, but that's OK because + * unused channels will be masked out. + */ + vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst, + src_reg(atomic_op), src_reg(surf_index)); + inst->base_mrf = 0; + inst->mlen = mlen; +} + +void +vec4_visitor::emit_untyped_surface_read(unsigned surf_index, dst_reg dst, + src_reg offset) +{ + /* Set the surface read offset. */ + emit(MOV(brw_writemask(brw_uvec_mrf(8, 0, 0), WRITEMASK_X), offset)); + + /* Emit the instruction. Note that this maps to the normal SIMD8 + * untyped surface read message, but that's OK because unused + * channels will be masked out. + */ + vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_SURFACE_READ, + dst, src_reg(surf_index)); + inst->base_mrf = 0; + inst->mlen = 1; +} + +void vec4_visitor::emit_ndc_computation() { /* Get the position */ |