summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2019-12-12 20:17:41 -0500
committerMarge Bot <[email protected]>2019-12-16 20:06:07 +0000
commit4846aeaf572db40e612c042205d45174aa6e2d5f (patch)
tree44ccb1e635de206cf567e6b1051e36750d8030e4 /src/gallium
parentde4a4595f62a9bd1494781c203ea8b1049545605 (diff)
radeonsi/gfx10: don't insert NGG streamout atomics if they are never used
Reviewed-by: Pierre-Eric Pelloux-Prayer <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3095>
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/radeonsi/gfx10_shader_ngg.c65
1 files changed, 34 insertions, 31 deletions
diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
index 5d77a3b6252..6239b1ba466 100644
--- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
+++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
@@ -675,7 +675,8 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi,
/* Update query buffer */
/* TODO: this won't catch 96-bit clear_buffer via transform feedback. */
- if (!info->properties[TGSI_PROPERTY_VS_BLIT_SGPRS_AMD]) {
+ if (ctx->screen->use_ngg_streamout &&
+ !info->properties[TGSI_PROPERTY_VS_BLIT_SGPRS_AMD]) {
tmp = si_unpack_param(ctx, ctx->vs_state_bits, 6, 1);
tmp = LLVMBuildTrunc(builder, tmp, ctx->i1, "");
ac_build_ifcc(&ctx->ac, tmp, 5029); /* if (STREAMOUT_QUERY_ENABLED) */
@@ -1102,38 +1103,40 @@ void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx)
}
/* Write shader query data. */
- tmp = si_unpack_param(ctx, ctx->vs_state_bits, 6, 1);
- tmp = LLVMBuildTrunc(builder, tmp, ctx->i1, "");
- ac_build_ifcc(&ctx->ac, tmp, 5109); /* if (STREAMOUT_QUERY_ENABLED) */
- unsigned num_query_comps = sel->so.num_outputs ? 8 : 4;
- tmp = LLVMBuildICmp(builder, LLVMIntULT, tid,
- LLVMConstInt(ctx->i32, num_query_comps, false), "");
- ac_build_ifcc(&ctx->ac, tmp, 5110);
- {
- LLVMValueRef offset;
- tmp = tid;
- if (sel->so.num_outputs)
- tmp = LLVMBuildAnd(builder, tmp, LLVMConstInt(ctx->i32, 3, false), "");
- offset = LLVMBuildNUWMul(builder, tmp, LLVMConstInt(ctx->i32, 32, false), "");
- if (sel->so.num_outputs) {
- tmp = LLVMBuildLShr(builder, tid, LLVMConstInt(ctx->i32, 2, false), "");
- tmp = LLVMBuildNUWMul(builder, tmp, LLVMConstInt(ctx->i32, 8, false), "");
- offset = LLVMBuildAdd(builder, offset, tmp, "");
- }
+ if (ctx->screen->use_ngg_streamout) {
+ tmp = si_unpack_param(ctx, ctx->vs_state_bits, 6, 1);
+ tmp = LLVMBuildTrunc(builder, tmp, ctx->i1, "");
+ ac_build_ifcc(&ctx->ac, tmp, 5109); /* if (STREAMOUT_QUERY_ENABLED) */
+ unsigned num_query_comps = sel->so.num_outputs ? 8 : 4;
+ tmp = LLVMBuildICmp(builder, LLVMIntULT, tid,
+ LLVMConstInt(ctx->i32, num_query_comps, false), "");
+ ac_build_ifcc(&ctx->ac, tmp, 5110);
+ {
+ LLVMValueRef offset;
+ tmp = tid;
+ if (sel->so.num_outputs)
+ tmp = LLVMBuildAnd(builder, tmp, LLVMConstInt(ctx->i32, 3, false), "");
+ offset = LLVMBuildNUWMul(builder, tmp, LLVMConstInt(ctx->i32, 32, false), "");
+ if (sel->so.num_outputs) {
+ tmp = LLVMBuildLShr(builder, tid, LLVMConstInt(ctx->i32, 2, false), "");
+ tmp = LLVMBuildNUWMul(builder, tmp, LLVMConstInt(ctx->i32, 8, false), "");
+ offset = LLVMBuildAdd(builder, offset, tmp, "");
+ }
- tmp = LLVMBuildLoad(builder, ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch, tid), "");
- LLVMValueRef args[] = {
- tmp,
- ngg_get_query_buf(ctx),
- offset,
- LLVMConstInt(ctx->i32, 16, false), /* soffset */
- ctx->i32_0, /* cachepolicy */
- };
- ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.raw.buffer.atomic.add.i32",
- ctx->i32, args, 5, 0);
+ tmp = LLVMBuildLoad(builder, ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch, tid), "");
+ LLVMValueRef args[] = {
+ tmp,
+ ngg_get_query_buf(ctx),
+ offset,
+ LLVMConstInt(ctx->i32, 16, false), /* soffset */
+ ctx->i32_0, /* cachepolicy */
+ };
+ ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.raw.buffer.atomic.add.i32",
+ ctx->i32, args, 5, 0);
+ }
+ ac_build_endif(&ctx->ac, 5110);
+ ac_build_endif(&ctx->ac, 5109);
}
- ac_build_endif(&ctx->ac, 5110);
- ac_build_endif(&ctx->ac, 5109);
/* TODO: culling */