diff options
-rw-r--r-- | src/amd/llvm/ac_llvm_build.c | 20 | ||||
-rw-r--r-- | src/amd/llvm/ac_llvm_build.h | 3 | ||||
-rw-r--r-- | src/amd/vulkan/radv_nir_to_llvm.c | 28 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/gfx10_shader_ngg.c | 28 |
4 files changed, 31 insertions, 48 deletions
diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c index a0e9eecd823..3df941b1f59 100644 --- a/src/amd/llvm/ac_llvm_build.c +++ b/src/amd/llvm/ac_llvm_build.c @@ -4728,6 +4728,26 @@ ac_export_mrt_z(struct ac_llvm_context *ctx, LLVMValueRef depth, args->enabled_channels = mask; } +/* Send GS Alloc Req message from the first wave of the group to SPI. + * Message payload is: + * - bits 0..10: vertices in group + * - bits 12..22: primitives in group + */ +void ac_build_sendmsg_gs_alloc_req(struct ac_llvm_context *ctx, LLVMValueRef wave_id, + LLVMValueRef vtx_cnt, LLVMValueRef prim_cnt) +{ + LLVMBuilderRef builder = ctx->builder; + LLVMValueRef tmp; + + ac_build_ifcc(ctx, LLVMBuildICmp(builder, LLVMIntEQ, wave_id, ctx->i32_0, ""), 5020); + + tmp = LLVMBuildShl(builder, prim_cnt, LLVMConstInt(ctx->i32, 12, false),""); + tmp = LLVMBuildOr(builder, tmp, vtx_cnt, ""); + ac_build_sendmsg(ctx, AC_SENDMSG_GS_ALLOC_REQ, tmp); + + ac_build_endif(ctx, 5020); +} + static LLVMTypeRef arg_llvm_type(enum ac_arg_type type, unsigned size, struct ac_llvm_context *ctx) { diff --git a/src/amd/llvm/ac_llvm_build.h b/src/amd/llvm/ac_llvm_build.h index 8f6d56ab687..9e216a80956 100644 --- a/src/amd/llvm/ac_llvm_build.h +++ b/src/amd/llvm/ac_llvm_build.h @@ -747,6 +747,9 @@ ac_export_mrt_z(struct ac_llvm_context *ctx, LLVMValueRef depth, LLVMValueRef stencil, LLVMValueRef samplemask, struct ac_export_args *args); +void ac_build_sendmsg_gs_alloc_req(struct ac_llvm_context *ctx, LLVMValueRef wave_id, + LLVMValueRef vtx_cnt, LLVMValueRef prim_cnt); + static inline LLVMValueRef ac_get_arg(struct ac_llvm_context *ctx, struct ac_arg arg) { diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 98f3f4903b4..3aeba178316 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -2371,28 +2371,6 @@ ngg_gs_emit_vertex_ptr(struct radv_shader_context *ctx, LLVMValueRef gsthread, return ngg_gs_vertex_ptr(ctx, vertexidx); } -/* Send GS Alloc Req message from the first wave of the group to SPI. - * Message payload is: - * - bits 0..10: vertices in group - * - bits 12..22: primitives in group - */ -static void build_sendmsg_gs_alloc_req(struct radv_shader_context *ctx, - LLVMValueRef vtx_cnt, - LLVMValueRef prim_cnt) -{ - LLVMBuilderRef builder = ctx->ac.builder; - LLVMValueRef tmp; - - tmp = LLVMBuildICmp(builder, LLVMIntEQ, get_wave_id_in_tg(ctx), ctx->ac.i32_0, ""); - ac_build_ifcc(&ctx->ac, tmp, 5020); - - tmp = LLVMBuildShl(builder, prim_cnt, LLVMConstInt(ctx->ac.i32, 12, false),""); - tmp = LLVMBuildOr(builder, tmp, vtx_cnt, ""); - ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_ALLOC_REQ, tmp); - - ac_build_endif(&ctx->ac, 5020); -} - struct ngg_prim { unsigned num_vertices; LLVMValueRef isnull; @@ -3020,7 +2998,8 @@ handle_ngg_outputs_post_2(struct radv_shader_context *ctx) /* TODO: primitive culling */ - build_sendmsg_gs_alloc_req(ctx, ngg_get_vtx_cnt(ctx), ngg_get_prim_cnt(ctx)); + ac_build_sendmsg_gs_alloc_req(&ctx->ac, get_wave_id_in_tg(ctx), + ngg_get_vtx_cnt(ctx), ngg_get_prim_cnt(ctx)); /* TODO: streamout queries */ /* Export primitive data to the index buffer. Format is: @@ -3317,7 +3296,8 @@ static void gfx10_ngg_gs_emit_epilogue_2(struct radv_shader_context *ctx) * there are 4 or more contiguous null primitives in the export * (in the common case of single-dword prim exports). */ - build_sendmsg_gs_alloc_req(ctx, vertlive_scan.result_reduce, num_emit_threads); + ac_build_sendmsg_gs_alloc_req(&ctx->ac, get_wave_id_in_tg(ctx), + vertlive_scan.result_reduce, num_emit_threads); /* Setup the reverse vertex compaction permutation. We re-use stream 1 * of the primitive liveness flags, relying on the fact that each diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index 5aac4ceac25..d403383b09b 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -71,28 +71,6 @@ static LLVMValueRef ngg_get_query_buf(struct si_shader_context *ctx) LLVMConstInt(ctx->i32, GFX10_GS_QUERY_BUF, false)); } -/* Send GS Alloc Req message from the first wave of the group to SPI. - * Message payload is: - * - bits 0..10: vertices in group - * - bits 12..22: primitives in group - */ -static void build_sendmsg_gs_alloc_req(struct si_shader_context *ctx, - LLVMValueRef vtx_cnt, - LLVMValueRef prim_cnt) -{ - LLVMBuilderRef builder = ctx->ac.builder; - LLVMValueRef tmp; - - tmp = LLVMBuildICmp(builder, LLVMIntEQ, get_wave_id_in_tg(ctx), ctx->ac.i32_0, ""); - ac_build_ifcc(&ctx->ac, tmp, 5020); - - tmp = LLVMBuildShl(builder, prim_cnt, LLVMConstInt(ctx->ac.i32, 12, false),""); - tmp = LLVMBuildOr(builder, tmp, vtx_cnt, ""); - ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_ALLOC_REQ, tmp); - - ac_build_endif(&ctx->ac, 5020); -} - struct ngg_prim { unsigned num_vertices; LLVMValueRef isnull; @@ -676,7 +654,8 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi, ac_build_endif(&ctx->ac, 5400); } - build_sendmsg_gs_alloc_req(ctx, ngg_get_vtx_cnt(ctx), ngg_get_prim_cnt(ctx)); + ac_build_sendmsg_gs_alloc_req(&ctx->ac, get_wave_id_in_tg(ctx), + ngg_get_vtx_cnt(ctx), ngg_get_prim_cnt(ctx)); /* Update query buffer */ /* TODO: this won't catch 96-bit clear_buffer via transform feedback. */ @@ -1213,7 +1192,8 @@ void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx) * there are 4 or more contiguous null primitives in the export * (in the common case of single-dword prim exports). */ - build_sendmsg_gs_alloc_req(ctx, vertlive_scan.result_reduce, num_emit_threads); + ac_build_sendmsg_gs_alloc_req(&ctx->ac, get_wave_id_in_tg(ctx), + vertlive_scan.result_reduce, num_emit_threads); /* Setup the reverse vertex compaction permutation. We re-use stream 1 * of the primitive liveness flags, relying on the fact that each |