aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2019-06-05 20:20:47 -0400
committerMarek Olšák <[email protected]>2019-07-03 15:51:13 -0400
commitb680f723f8c2edd3382631d3481bfcb58d2952a5 (patch)
treedebeaa20c599e6b8bad112cc848a242da7731d11
parent3203a74dcb32ba291a5b17d85fb89b578a903e5d (diff)
radeonsi/gfx10: export correct PrimitiveID from NGG vertex shaders
Acked-by: Bas Nieuwenhuizen <[email protected]>
-rw-r--r--src/gallium/drivers/radeonsi/gfx10_shader_ngg.c43
-rw-r--r--src/gallium/drivers/radeonsi/si_gfx_cs.c1
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h1
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c18
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h3
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c1
-rw-r--r--src/gallium/drivers/radeonsi/si_state.h1
-rw-r--r--src/gallium/drivers/radeonsi/si_state_draw.c14
8 files changed, 71 insertions, 11 deletions
diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
index 8461a39488e..8fbce10012f 100644
--- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
+++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
@@ -616,6 +616,30 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi,
emitted_prims = nggso.emit[0];
}
+ /* Copy Primitive IDs from GS threads to the LDS address corresponding
+ * to the ES thread of the provoking vertex.
+ */
+ if (ctx->type == PIPE_SHADER_VERTEX &&
+ ctx->shader->key.mono.u.vs_export_prim_id) {
+ /* Streamout uses LDS. We need to wait for it before we can reuse it. */
+ if (sel->so.num_outputs)
+ ac_build_s_barrier(&ctx->ac);
+
+ ac_build_ifcc(&ctx->ac, is_gs_thread, 5400);
+ /* Extract the PROVOKING_VTX_INDEX field. */
+ LLVMValueRef provoking_vtx_in_prim =
+ si_unpack_param(ctx, ctx->param_vs_state_bits, 4, 2);
+
+ /* provoking_vtx_index = vtxindex[provoking_vtx_in_prim]; */
+ LLVMValueRef indices = ac_build_gather_values(&ctx->ac, vtxindex, 3);
+ LLVMValueRef provoking_vtx_index =
+ LLVMBuildExtractElement(builder, indices, provoking_vtx_in_prim, "");
+
+ LLVMBuildStore(builder, ctx->abi.gs_prim_id,
+ ac_build_gep0(&ctx->ac, ctx->esgs_ring, provoking_vtx_index));
+ ac_build_endif(&ctx->ac, 5400);
+ }
+
/* TODO: primitive culling */
build_sendmsg_gs_alloc_req(ctx, ngg_get_vtx_cnt(ctx), ngg_get_prim_cnt(ctx));
@@ -700,12 +724,23 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi,
}
}
- /* TODO: Vertex shaders have to get PrimitiveID from GS VGPRs. */
- if (ctx->type == PIPE_SHADER_TESS_EVAL &&
- ctx->shader->key.mono.u.vs_export_prim_id) {
+ if (ctx->shader->key.mono.u.vs_export_prim_id) {
outputs[i].semantic_name = TGSI_SEMANTIC_PRIMID;
outputs[i].semantic_index = 0;
- outputs[i].values[0] = ac_to_float(&ctx->ac, si_get_primitive_id(ctx, 0));
+
+ if (ctx->type == PIPE_SHADER_VERTEX) {
+ /* Wait for GS stores to finish. */
+ ac_build_s_barrier(&ctx->ac);
+
+ tmp = ac_build_gep0(&ctx->ac, ctx->esgs_ring,
+ get_thread_id_in_tg(ctx));
+ outputs[i].values[0] = LLVMBuildLoad(builder, tmp, "");
+ } else {
+ assert(ctx->type == PIPE_SHADER_TESS_EVAL);
+ outputs[i].values[0] = si_get_primitive_id(ctx, 0);
+ }
+
+ outputs[i].values[0] = ac_to_float(&ctx->ac, outputs[i].values[0]);
for (unsigned j = 1; j < 4; j++)
outputs[i].values[j] = LLVMGetUndef(ctx->f32);
diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index f32e64ea570..277a25a0b3e 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -441,6 +441,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
ctx->last_prim = -1;
ctx->last_multi_vgt_param = -1;
ctx->last_rast_prim = -1;
+ ctx->last_flatshade_first = -1;
ctx->last_sc_line_stipple = ~0;
ctx->last_vs_state = ~0;
ctx->last_ls = NULL;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index a9080c93505..298e63738c4 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1059,6 +1059,7 @@ struct si_context {
int last_prim;
int last_multi_vgt_param;
int last_rast_prim;
+ int last_flatshade_first;
unsigned last_sc_line_stipple;
unsigned current_vs_state;
unsigned last_vs_state;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 032b5a7bd8b..be3b897c791 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5228,10 +5228,20 @@ static bool si_shader_binary_open(struct si_screen *screen,
esgs_ring_size = shader->gs_info.esgs_ring_size;;
}
- if (sel && shader->key.as_ngg && sel->so.num_outputs) {
- unsigned esgs_vertex_bytes = 4 * (4 * sel->info.num_outputs + 1);
- esgs_ring_size = MAX2(esgs_ring_size,
- shader->ngg.max_out_verts * esgs_vertex_bytes);
+ if (sel && shader->key.as_ngg) {
+ if (sel->so.num_outputs) {
+ unsigned esgs_vertex_bytes = 4 * (4 * sel->info.num_outputs + 1);
+ esgs_ring_size = MAX2(esgs_ring_size,
+ shader->ngg.max_out_verts * esgs_vertex_bytes);
+ }
+
+ /* GS stores Primitive IDs into LDS at the address corresponding
+ * to the provoking vertex. All vertex threads load and export
+ * PrimitiveID for their thread.
+ */
+ if (sel->type == PIPE_SHADER_VERTEX &&
+ shader->key.mono.u.vs_export_prim_id)
+ esgs_ring_size = MAX2(esgs_ring_size, shader->ngg.max_out_verts * 4);
}
if (esgs_ring_size) {
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index b545bf1bc23..801895b240c 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -241,13 +241,14 @@ enum {
};
/* Fields of driver-defined VS state SGPR. */
-/* Clamp vertex color output (only used in VS as VS). */
#define S_VS_STATE_CLAMP_VERTEX_COLOR(x) (((unsigned)(x) & 0x1) << 0)
#define C_VS_STATE_CLAMP_VERTEX_COLOR 0xFFFFFFFE
#define S_VS_STATE_INDEXED(x) (((unsigned)(x) & 0x1) << 1)
#define C_VS_STATE_INDEXED 0xFFFFFFFD
#define S_VS_STATE_OUTPRIM(x) (((unsigned)(x) & 0x3) << 2)
#define C_VS_STATE_OUTPRIM 0xFFFFFFF3
+#define S_VS_STATE_PROVOKING_VTX_INDEX(x) (((unsigned)(x) & 0x3) << 4)
+#define C_VS_STATE_PROVOKING_VTX_INDEX 0xFFFFFFCF
#define S_VS_STATE_LS_OUT_PATCH_SIZE(x) (((unsigned)(x) & 0x1FFF) << 8)
#define C_VS_STATE_LS_OUT_PATCH_SIZE 0xFFE000FF
#define S_VS_STATE_LS_OUT_VERTEX_SIZE(x) (((unsigned)(x) & 0xFF) << 24)
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index e9388e6252c..9f3e08675ac 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -892,6 +892,7 @@ static void *si_create_rs_state(struct pipe_context *ctx,
rs->clamp_fragment_color = state->clamp_fragment_color;
rs->clamp_vertex_color = state->clamp_vertex_color;
rs->flatshade = state->flatshade;
+ rs->flatshade_first = state->flatshade_first;
rs->sprite_coord_enable = state->sprite_coord_enable;
rs->rasterizer_discard = state->rasterizer_discard;
rs->pa_sc_line_stipple = state->line_stipple_enable ?
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 4493969037c..91b4f1ea13e 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -74,6 +74,7 @@ struct si_state_rasterizer {
unsigned clip_plane_enable:8;
unsigned half_pixel_center:1;
unsigned flatshade:1;
+ unsigned flatshade_first:1;
unsigned two_side:1;
unsigned multisample_enable:1;
unsigned force_persample_interp:1;
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index eddfdd65da2..2f142bc6781 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -586,7 +586,9 @@ static void si_emit_rasterizer_prim_state(struct si_context *sctx)
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
if (likely(rast_prim == sctx->last_rast_prim &&
- rs->pa_sc_line_stipple == sctx->last_sc_line_stipple))
+ rs->pa_sc_line_stipple == sctx->last_sc_line_stipple &&
+ (sctx->chip_class <= GFX9 ||
+ rs->flatshade_first == sctx->last_flatshade_first)))
return;
if (util_prim_is_lines(rast_prim)) {
@@ -599,9 +601,10 @@ static void si_emit_rasterizer_prim_state(struct si_context *sctx)
sctx->context_roll = true;
}
+ unsigned gs_out = si_conv_prim_to_gs_out(sctx->current_rast_prim);
+
if (rast_prim != sctx->last_rast_prim &&
(sctx->ngg || sctx->gs_shader.cso)) {
- unsigned gs_out = si_conv_prim_to_gs_out(sctx->current_rast_prim);
radeon_set_context_reg(cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, gs_out);
sctx->context_roll = true;
@@ -611,8 +614,15 @@ static void si_emit_rasterizer_prim_state(struct si_context *sctx)
}
}
+ if (sctx->chip_class >= GFX10) {
+ unsigned vtx_index = rs->flatshade_first ? 0 : gs_out;
+ sctx->current_vs_state &= C_VS_STATE_PROVOKING_VTX_INDEX;
+ sctx->current_vs_state |= S_VS_STATE_PROVOKING_VTX_INDEX(vtx_index);
+ }
+
sctx->last_rast_prim = rast_prim;
sctx->last_sc_line_stipple = rs->pa_sc_line_stipple;
+ sctx->last_flatshade_first = rs->flatshade_first;
}
static void si_emit_vs_state(struct si_context *sctx,