diff options
author | Plamena Manolova <[email protected]> | 2018-04-27 15:06:56 +0100 |
---|---|---|
committer | Plamena Manolova <[email protected]> | 2018-06-01 16:36:39 +0100 |
commit | 939312702e35928770e5f90f7b053ece3d10e7ae (patch) | |
tree | f82406635ed7400b46173053f6252d3b86926a5b | |
parent | 60e843c4d5a5688196d13611a357cdc5b1b1141d (diff) |
i965: Add ARB_fragment_shader_interlock support.
Adds suppport for ARB_fragment_shader_interlock. We achieve
the interlock and fragment ordering by issuing a memory fence
via sendc.
Signed-off-by: Plamena Manolova <[email protected]>
Reviewed-by: Francisco Jerez <[email protected]>
-rw-r--r-- | docs/features.txt | 2 | ||||
-rw-r--r-- | docs/relnotes/18.2.0.html | 2 | ||||
-rw-r--r-- | src/intel/compiler/brw_eu.h | 3 | ||||
-rw-r--r-- | src/intel/compiler/brw_eu_defines.h | 2 | ||||
-rw-r--r-- | src/intel/compiler/brw_eu_emit.c | 7 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_generator.cpp | 7 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_nir.cpp | 15 | ||||
-rw-r--r-- | src/intel/compiler/brw_shader.cpp | 4 | ||||
-rw-r--r-- | src/intel/compiler/brw_vec4_generator.cpp | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_extensions.c | 1 |
10 files changed, 37 insertions, 8 deletions
diff --git a/docs/features.txt b/docs/features.txt index e786bbecf44..ed4050cf98a 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -300,7 +300,7 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve GL_ARB_cl_event not started GL_ARB_compute_variable_group_size DONE (nvc0, radeonsi) GL_ARB_ES3_2_compatibility DONE (i965/gen8+) - GL_ARB_fragment_shader_interlock not started + GL_ARB_fragment_shader_interlock DONE (i965) GL_ARB_gpu_shader_int64 DONE (i965/gen8+, nvc0, radeonsi, softpipe, llvmpipe) GL_ARB_parallel_shader_compile not started, but Chia-I Wu did some related work in 2014 GL_ARB_post_depth_coverage DONE (i965, nvc0) diff --git a/docs/relnotes/18.2.0.html b/docs/relnotes/18.2.0.html index 00b253c076f..a3f44a29dc8 100644 --- a/docs/relnotes/18.2.0.html +++ b/docs/relnotes/18.2.0.html @@ -44,7 +44,7 @@ Note: some of the new features are only available with certain drivers. </p> <ul> -<li>TBD</li> +<li>GL_ARB_fragment_shader_interlock on i965</li> </ul> <h2>Bug fixes</h2> diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h index 2655cdb0c5c..84d5a6f86be 100644 --- a/src/intel/compiler/brw_eu.h +++ b/src/intel/compiler/brw_eu.h @@ -509,7 +509,8 @@ brw_byte_scattered_write(struct brw_codegen *p, void brw_memory_fence(struct brw_codegen *p, - struct brw_reg dst); + struct brw_reg dst, + enum opcode send_op); void brw_pixel_interpolator_query(struct brw_codegen *p, diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index 36519af63f2..ee306a6c2ce 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -480,6 +480,8 @@ enum opcode { SHADER_OPCODE_GET_BUFFER_SIZE, + SHADER_OPCODE_INTERLOCK, + VEC4_OPCODE_MOV_BYTES, VEC4_OPCODE_PACK_BYTES, VEC4_OPCODE_UNPACK_UNIFORM, diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index 6d81c636f27..c442f8cc6f4 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -3288,7 +3288,8 @@ brw_set_memory_fence_message(struct brw_codegen *p, void brw_memory_fence(struct brw_codegen *p, - struct brw_reg dst) + struct brw_reg dst, + enum opcode send_op) { const struct gen_device_info *devinfo = p->devinfo; const bool commit_enable = @@ -3304,7 +3305,7 @@ brw_memory_fence(struct brw_codegen *p, /* Set dst as destination for dependency tracking, the MEMORY_FENCE * message doesn't write anything back. */ - insn = next_insn(p, BRW_OPCODE_SEND); + insn = next_insn(p, send_op); dst = retype(dst, BRW_REGISTER_TYPE_UW); brw_set_dest(p, insn, dst); brw_set_src0(p, insn, dst); @@ -3316,7 +3317,7 @@ brw_memory_fence(struct brw_codegen *p, * flush it too. Use a different register so both flushes can be * pipelined by the hardware. */ - insn = next_insn(p, BRW_OPCODE_SEND); + insn = next_insn(p, send_op); brw_set_dest(p, insn, offset(dst, 1)); brw_set_src0(p, insn, offset(dst, 1)); brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE, diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index f49ab442fb2..de84f468d9b 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -2277,7 +2277,12 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) break; case SHADER_OPCODE_MEMORY_FENCE: - brw_memory_fence(p, dst); + brw_memory_fence(p, dst, BRW_OPCODE_SEND); + break; + + case SHADER_OPCODE_INTERLOCK: + /* The interlock is basically a memory fence issued via sendc */ + brw_memory_fence(p, dst, BRW_OPCODE_SENDC); break; case SHADER_OPCODE_FIND_LIVE_CHANNEL: { diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index e287f11e470..166da0aa6d7 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -4823,6 +4823,21 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } + case nir_intrinsic_begin_invocation_interlock: { + const fs_builder ubld = bld.group(8, 0); + const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2); + + ubld.emit(SHADER_OPCODE_INTERLOCK, tmp)->size_written = 2 * + REG_SIZE; + + break; + } + + case nir_intrinsic_end_invocation_interlock: { + /* We don't need to do anything here */ + break; + } + default: unreachable("unknown intrinsic"); } diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index 6e81db9c298..b7fb06ddbd9 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -296,6 +296,9 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) return "typed_surface_write_logical"; case SHADER_OPCODE_MEMORY_FENCE: return "memory_fence"; + case SHADER_OPCODE_INTERLOCK: + /* For an interlock we actually issue a memory fence via sendc. */ + return "interlock"; case SHADER_OPCODE_BYTE_SCATTERED_READ: return "byte_scattered_read"; @@ -1003,6 +1006,7 @@ backend_instruction::has_side_effects() const case SHADER_OPCODE_TYPED_SURFACE_WRITE: case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: case SHADER_OPCODE_MEMORY_FENCE: + case SHADER_OPCODE_INTERLOCK: case SHADER_OPCODE_URB_WRITE_SIMD8: case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT: case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED: diff --git a/src/intel/compiler/brw_vec4_generator.cpp b/src/intel/compiler/brw_vec4_generator.cpp index 3d17ff97971..7519ccc9df3 100644 --- a/src/intel/compiler/brw_vec4_generator.cpp +++ b/src/intel/compiler/brw_vec4_generator.cpp @@ -1904,7 +1904,7 @@ generate_code(struct brw_codegen *p, break; case SHADER_OPCODE_MEMORY_FENCE: - brw_memory_fence(p, dst); + brw_memory_fence(p, dst, BRW_OPCODE_SEND); break; case SHADER_OPCODE_FIND_LIVE_CHANNEL: { diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index 34597c5e05a..5a9369d7b43 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -245,6 +245,7 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.EXT_shader_samples_identical = true; ctx->Extensions.OES_primitive_bounding_box = true; ctx->Extensions.OES_texture_buffer = true; + ctx->Extensions.ARB_fragment_shader_interlock = true; if (can_do_pipelined_register_writes(brw->screen)) { ctx->Extensions.ARB_draw_indirect = true; |