diff options
author | Jordan Justen <[email protected]> | 2016-03-02 01:11:29 -0800 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2016-03-02 12:03:05 -0800 |
commit | 98cdce1ce4737cf09c5d9613a85bb118f0f1757b (patch) | |
tree | c0173496a8feec78c3049f59a68d84ef7e771d68 | |
parent | da4745104cc02fc0052a2e05e37c69a4dce76eef (diff) |
anv/gen7: Use predicated rendering for indirect compute
For OpenGL, see commit 9a939ebb47a0d37a6b29e3dbb1b20bdc9538a721.
Fixes:
* dEQP-VK.compute.indirect_dispatch.upload_buffer.empty_command
* dEQP-VK.compute.indirect_dispatch.gen_in_compute.empty_command
Signed-off-by: Jordan Justen <[email protected]>
-rw-r--r-- | src/intel/vulkan/genX_cmd_buffer.c | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index dbb72b44ee2..a888c360673 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -496,6 +496,9 @@ void genX(CmdDispatch)( #define GPGPU_DISPATCHDIMY 0x2504 #define GPGPU_DISPATCHDIMZ 0x2508 +#define MI_PREDICATE_SRC0 0x2400 +#define MI_PREDICATE_SRC1 0x2408 + void genX(CmdDispatchIndirect)( VkCommandBuffer commandBuffer, VkBuffer _buffer, @@ -520,8 +523,50 @@ void genX(CmdDispatchIndirect)( emit_lrm(batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); emit_lrm(batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); +#if GEN_GEN <= 7 + /* Clear upper 32-bits of SRC0 and all 64-bits of SRC1 */ + emit_lri(batch, MI_PREDICATE_SRC0 + 4, 0); + emit_lri(batch, MI_PREDICATE_SRC1 + 0, 0); + emit_lri(batch, MI_PREDICATE_SRC1 + 4, 0); + + /* Load compute_dispatch_indirect_x_size into SRC0 */ + emit_lrm(batch, MI_PREDICATE_SRC0, bo, bo_offset + 0); + + /* predicate = (compute_dispatch_indirect_x_size == 0); */ + anv_batch_emit(batch, GENX(MI_PREDICATE), + .LoadOperation = LOAD_LOAD, + .CombineOperation = COMBINE_SET, + .CompareOperation = COMPARE_SRCS_EQUAL); + + /* Load compute_dispatch_indirect_y_size into SRC0 */ + emit_lrm(batch, MI_PREDICATE_SRC0, bo, bo_offset + 4); + + /* predicate |= (compute_dispatch_indirect_y_size == 0); */ + anv_batch_emit(batch, GENX(MI_PREDICATE), + .LoadOperation = LOAD_LOAD, + .CombineOperation = COMBINE_OR, + .CompareOperation = COMPARE_SRCS_EQUAL); + + /* Load compute_dispatch_indirect_z_size into SRC0 */ + emit_lrm(batch, MI_PREDICATE_SRC0, bo, bo_offset + 8); + + /* predicate |= (compute_dispatch_indirect_z_size == 0); */ + anv_batch_emit(batch, GENX(MI_PREDICATE), + .LoadOperation = LOAD_LOAD, + .CombineOperation = COMBINE_OR, + .CompareOperation = COMPARE_SRCS_EQUAL); + + /* predicate = !predicate; */ +#define COMPARE_FALSE 1 + anv_batch_emit(batch, GENX(MI_PREDICATE), + .LoadOperation = LOAD_LOADINV, + .CombineOperation = COMBINE_OR, + .CompareOperation = COMPARE_FALSE); +#endif + anv_batch_emit(batch, GENX(GPGPU_WALKER), .IndirectParameterEnable = true, + .PredicateEnable = GEN_GEN <= 7, .SIMDSize = prog_data->simd_size / 16, .ThreadDepthCounterMaximum = 0, .ThreadHeightCounterMaximum = 0, |