aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/radeonsi
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2019-12-16 19:09:21 -0500
committerMarek Olšák <[email protected]>2019-12-27 13:50:57 -0500
commitaa3df12fc2fbe2963eb09cbd2a126b82d208f0ca (patch)
tree15f18eb071fd6e1eb2e3c6f30dc7b82b61c7edf5 /src/gallium/drivers/radeonsi
parent17164d4e27fbccae83ae42d16f29884cba325ebc (diff)
radeonsi/gfx10: enable NGG passthrough for eligible shaders
Acked-by: Pierre-Eric Pelloux-Prayer <[email protected]>
Diffstat (limited to 'src/gallium/drivers/radeonsi')
-rw-r--r--src/gallium/drivers/radeonsi/gfx10_shader_ngg.c65
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h4
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c10
3 files changed, 47 insertions, 32 deletions
diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
index 419a4021943..5aac4ceac25 100644
--- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
+++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
@@ -98,6 +98,7 @@ struct ngg_prim {
LLVMValueRef isnull;
LLVMValueRef index[3];
LLVMValueRef edgeflag[3];
+ LLVMValueRef passthrough;
};
static void build_export_prim(struct si_shader_context *ctx,
@@ -107,17 +108,21 @@ static void build_export_prim(struct si_shader_context *ctx,
struct ac_export_args args;
LLVMValueRef tmp;
- tmp = LLVMBuildZExt(builder, prim->isnull, ctx->ac.i32, "");
- args.out[0] = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->ac.i32, 31, false), "");
-
- for (unsigned i = 0; i < prim->num_vertices; ++i) {
- tmp = LLVMBuildShl(builder, prim->index[i],
- LLVMConstInt(ctx->ac.i32, 10 * i, false), "");
- args.out[0] = LLVMBuildOr(builder, args.out[0], tmp, "");
- tmp = LLVMBuildZExt(builder, prim->edgeflag[i], ctx->ac.i32, "");
- tmp = LLVMBuildShl(builder, tmp,
- LLVMConstInt(ctx->ac.i32, 10 * i + 9, false), "");
- args.out[0] = LLVMBuildOr(builder, args.out[0], tmp, "");
+ if (prim->passthrough) {
+ args.out[0] = prim->passthrough;
+ } else {
+ tmp = LLVMBuildZExt(builder, prim->isnull, ctx->ac.i32, "");
+ args.out[0] = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->ac.i32, 31, false), "");
+
+ for (unsigned i = 0; i < prim->num_vertices; ++i) {
+ tmp = LLVMBuildShl(builder, prim->index[i],
+ LLVMConstInt(ctx->ac.i32, 10 * i, false), "");
+ args.out[0] = LLVMBuildOr(builder, args.out[0], tmp, "");
+ tmp = LLVMBuildZExt(builder, prim->edgeflag[i], ctx->ac.i32, "");
+ tmp = LLVMBuildShl(builder, tmp,
+ LLVMConstInt(ctx->ac.i32, 10 * i + 9, false), "");
+ args.out[0] = LLVMBuildOr(builder, args.out[0], tmp, "");
+ }
}
args.out[0] = LLVMBuildBitCast(builder, args.out[0], ctx->ac.f32, "");
@@ -729,25 +734,29 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi,
{
struct ngg_prim prim = {};
- prim.num_vertices = num_vertices;
- prim.isnull = ctx->ac.i1false;
- memcpy(prim.index, vtxindex, sizeof(vtxindex[0]) * 3);
+ if (gfx10_is_ngg_passthrough(ctx->shader)) {
+ prim.passthrough = ac_get_arg(&ctx->ac, ctx->gs_vtx01_offset);
+ } else {
+ prim.num_vertices = num_vertices;
+ prim.isnull = ctx->ac.i1false;
+ memcpy(prim.index, vtxindex, sizeof(vtxindex[0]) * 3);
+
+ for (unsigned i = 0; i < num_vertices; ++i) {
+ if (ctx->type != PIPE_SHADER_VERTEX) {
+ prim.edgeflag[i] = ctx->i1false;
+ continue;
+ }
- for (unsigned i = 0; i < num_vertices; ++i) {
- if (ctx->type != PIPE_SHADER_VERTEX) {
- prim.edgeflag[i] = ctx->i1false;
- continue;
- }
+ tmp = LLVMBuildLShr(builder,
+ ac_get_arg(&ctx->ac, ctx->args.gs_invocation_id),
+ LLVMConstInt(ctx->ac.i32, 8 + i, false), "");
+ prim.edgeflag[i] = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
- tmp = LLVMBuildLShr(builder,
- ac_get_arg(&ctx->ac, ctx->args.gs_invocation_id),
- LLVMConstInt(ctx->ac.i32, 8 + i, false), "");
- prim.edgeflag[i] = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
-
- if (sel->info.writes_edgeflag) {
- tmp2 = LLVMBuildLoad(builder, user_edgeflags[i], "");
- prim.edgeflag[i] = LLVMBuildAnd(builder, prim.edgeflag[i],
- tmp2, "");
+ if (sel->info.writes_edgeflag) {
+ tmp2 = LLVMBuildLoad(builder, user_edgeflags[i], "");
+ prim.edgeflag[i] = LLVMBuildAnd(builder, prim.edgeflag[i],
+ tmp2, "");
+ }
}
}
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index bd0c777c148..ee9a75c9739 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -790,7 +790,7 @@ union si_vgt_param_key {
uint32_t index;
};
-#define SI_NUM_VGT_STAGES_KEY_BITS 4
+#define SI_NUM_VGT_STAGES_KEY_BITS 5
#define SI_NUM_VGT_STAGES_STATES (1 << SI_NUM_VGT_STAGES_KEY_BITS)
/* The VGT_SHADER_STAGES key used to index the table of precomputed values.
@@ -801,6 +801,7 @@ union si_vgt_stages_key {
#if UTIL_ARCH_LITTLE_ENDIAN
unsigned tess:1;
unsigned gs:1;
+ unsigned ngg_passthrough:1;
unsigned ngg:1; /* gfx10+ */
unsigned streamout:1; /* only used with NGG */
unsigned _pad:32 - SI_NUM_VGT_STAGES_KEY_BITS;
@@ -808,6 +809,7 @@ union si_vgt_stages_key {
unsigned _pad:32 - SI_NUM_VGT_STAGES_KEY_BITS;
unsigned streamout:1;
unsigned ngg:1;
+ unsigned ngg_passthrough:1;
unsigned gs:1;
unsigned tess:1;
#endif
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index a81636801cc..6fa9c37bf86 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -3872,9 +3872,9 @@ static struct si_pm4_state *si_build_vgt_shader_config(struct si_screen *screen,
}
if (key.u.ngg) {
- stages |= S_028B54_PRIMGEN_EN(1);
- if (key.u.streamout)
- stages |= S_028B54_NGG_WAVE_ID_EN(1);
+ stages |= S_028B54_PRIMGEN_EN(1) |
+ S_028B54_NGG_WAVE_ID_EN(key.u.streamout) |
+ S_028B54_PRIMGEN_PASSTHRU_EN(key.u.ngg_passthrough);
} else if (key.u.gs)
stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
@@ -4027,6 +4027,10 @@ bool si_update_shaders(struct si_context *sctx)
}
}
+ /* This must be done after the shader variant is selected. */
+ if (sctx->ngg)
+ key.u.ngg_passthrough = gfx10_is_ngg_passthrough(si_get_vs(sctx)->current);
+
si_update_vgt_shader_config(sctx, key);
if (old_clip_disable != si_get_vs_state(sctx)->key.opt.clip_disable)