summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c24
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h10
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c25
-rw-r--r--src/gallium/drivers/radeonsi/si_state_draw.c8
4 files changed, 38 insertions, 29 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index c5f13be20b8..6372ccfcfe1 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -109,7 +109,7 @@ static struct si_shader_context * si_shader_context(
* less than 64, so that a 64-bit bitmask of used inputs or outputs can be
* calculated.
*/
-static unsigned get_unique_index(unsigned semantic_name, unsigned index)
+unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index)
{
switch (semantic_name) {
case TGSI_SEMANTIC_POSITION:
@@ -160,7 +160,7 @@ static unsigned get_unique_index(unsigned semantic_name, unsigned index)
static int get_param_index(unsigned semantic_name, unsigned index,
uint64_t mask)
{
- unsigned unique_index = get_unique_index(semantic_name, index);
+ unsigned unique_index = si_shader_io_get_unique_index(semantic_name, index);
int i, param_index = 0;
/* If not present... */
@@ -337,13 +337,6 @@ static void declare_input_gs(
struct si_shader *shader = si_shader_ctx->shader;
si_store_shader_io_attribs(shader, decl);
-
- if (decl->Semantic.Name != TGSI_SEMANTIC_PRIMID) {
- shader->gs_used_inputs |=
- 1llu << get_unique_index(decl->Semantic.Name,
- decl->Semantic.Index);
- shader->nparam++;
- }
}
static LLVMValueRef fetch_input_gs(
@@ -410,7 +403,7 @@ static LLVMValueRef fetch_input_gs(
args[1] = vtx_offset;
args[2] = lp_build_const_int32(gallivm,
(get_param_index(input->name, input->sid,
- shader->gs_used_inputs) * 4 +
+ shader->selector->gs_used_inputs) * 4 +
swizzle) * 256);
args[3] = uint->zero;
args[4] = uint->one; /* OFFEN */
@@ -2304,7 +2297,7 @@ static void si_llvm_emit_vertex(
*/
can_emit = LLVMBuildICmp(gallivm->builder, LLVMIntULE, gs_next_vertex,
lp_build_const_int32(gallivm,
- shader->gs_max_out_vertices), "");
+ shader->selector->gs_max_out_vertices), "");
kill = lp_build_select(&bld_base->base, can_emit,
lp_build_const_float(gallivm, 1.0f),
lp_build_const_float(gallivm, -1.0f));
@@ -2319,7 +2312,7 @@ static void si_llvm_emit_vertex(
LLVMValueRef out_val = LLVMBuildLoad(gallivm->builder, out_ptr[chan], "");
LLVMValueRef voffset =
lp_build_const_int32(gallivm, (i * 4 + chan) *
- shader->gs_max_out_vertices);
+ shader->selector->gs_max_out_vertices);
voffset = lp_build_add(uint, voffset, gs_next_vertex);
voffset = lp_build_mul_imm(uint, voffset, 4);
@@ -2767,7 +2760,7 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen,
for (chan = 0; chan < 4; chan++) {
args[2] = lp_build_const_int32(gallivm,
(i * 4 + chan) *
- gs->gs_max_out_vertices * 16 * 4);
+ gs->selector->gs_max_out_vertices * 16 * 4);
outputs[i].values[chan] =
LLVMBuildBitCast(gallivm->builder,
@@ -2866,11 +2859,6 @@ int si_shader_create(struct si_screen *sscreen, struct si_shader *shader)
si_shader_ctx.radeon_bld.load_input = declare_input_gs;
bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_gs;
bld_base->emit_epilogue = si_llvm_emit_gs_epilogue;
-
- shader->gs_output_prim =
- sel->info.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM][0];
- shader->gs_max_out_vertices =
- sel->info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES][0];
break;
case TGSI_PROCESSOR_FRAGMENT:
si_shader_ctx.radeon_bld.load_input = declare_input_fs;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 827f79e8d15..c46e649fd3d 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -124,6 +124,10 @@ struct si_shader_selector {
/* PIPE_SHADER_[VERTEX|FRAGMENT|...] */
unsigned type;
+
+ unsigned gs_output_prim;
+ unsigned gs_max_out_vertices;
+ uint64_t gs_used_inputs; /* mask of "get_unique_index" bits */
};
union si_shader_key {
@@ -171,11 +175,6 @@ struct si_shader {
unsigned noutput;
struct si_shader_output output[40];
- /* geometry shader properties */
- unsigned gs_output_prim;
- unsigned gs_max_out_vertices;
- uint64_t gs_used_inputs; /* mask of "get_unique_index" bits */
-
unsigned nparam;
bool uses_instanceid;
bool vs_out_misc_write;
@@ -199,5 +198,6 @@ int si_shader_create(struct si_screen *sscreen, struct si_shader *shader);
int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
LLVMModuleRef mod);
void si_shader_destroy(struct pipe_context *ctx, struct si_shader *shader);
+unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
#endif
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index da5fcb09423..46dbca3b200 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2210,9 +2210,8 @@ static INLINE void si_shader_selector_key(struct pipe_context *ctx,
key->vs.instance_divisors[i] = sctx->vertex_elements->elements[i].instance_divisor;
if (sctx->gs_shader) {
- /* At this point, the GS should be selected and compiled. */
key->vs.as_es = 1;
- key->vs.gs_used_inputs = sctx->gs_shader->current->gs_used_inputs;
+ key->vs.gs_used_inputs = sctx->gs_shader->gs_used_inputs;
}
} else if (sel->type == PIPE_SHADER_FRAGMENT) {
if (sel->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS][0])
@@ -2305,12 +2304,34 @@ static void *si_create_shader_state(struct pipe_context *ctx,
unsigned pipe_shader_type)
{
struct si_shader_selector *sel = CALLOC_STRUCT(si_shader_selector);
+ int i;
sel->type = pipe_shader_type;
sel->tokens = tgsi_dup_tokens(state->tokens);
sel->so = state->stream_output;
tgsi_scan_shader(state->tokens, &sel->info);
+ switch (pipe_shader_type) {
+ case PIPE_SHADER_GEOMETRY:
+ sel->gs_output_prim =
+ sel->info.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM][0];
+ sel->gs_max_out_vertices =
+ sel->info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES][0];
+
+ for (i = 0; i < sel->info.num_inputs; i++) {
+ unsigned name = sel->info.input_semantic_name[i];
+ unsigned index = sel->info.input_semantic_index[i];
+
+ switch (name) {
+ case TGSI_SEMANTIC_PRIMID:
+ break;
+ default:
+ sel->gs_used_inputs |=
+ 1llu << si_shader_io_get_unique_index(name, index);
+ }
+ }
+ }
+
return sel;
}
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index a47534429b8..2881199d56d 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -83,7 +83,7 @@ static void si_shader_gs(struct pipe_context *ctx, struct si_shader *shader)
{
struct si_context *sctx = (struct si_context *)ctx;
unsigned gs_vert_itemsize = shader->noutput * (16 >> 2);
- unsigned gs_max_vert_out = shader->gs_max_out_vertices;
+ unsigned gs_max_vert_out = shader->selector->gs_max_out_vertices;
unsigned gsvs_itemsize = gs_vert_itemsize * gs_max_vert_out;
unsigned cut_mode;
struct si_pm4_state *pm4;
@@ -121,7 +121,7 @@ static void si_shader_gs(struct pipe_context *ctx, struct si_shader *shader)
si_pm4_set_reg(pm4, R_028A68_VGT_GSVS_RING_OFFSET_3, gsvs_itemsize);
si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
- shader->nparam * (16 >> 2));
+ util_bitcount64(shader->selector->gs_used_inputs) * (16 >> 2));
si_pm4_set_reg(pm4, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize);
si_pm4_set_reg(pm4, R_028B38_VGT_GS_MAX_VERT_OUT, gs_max_vert_out);
@@ -427,7 +427,7 @@ static bool si_update_draw_info_state(struct si_context *sctx,
unsigned prim = si_conv_pipe_prim(info->mode);
unsigned gs_out_prim =
si_conv_prim_to_gs_out(sctx->gs_shader ?
- sctx->gs_shader->current->gs_output_prim :
+ sctx->gs_shader->gs_output_prim :
info->mode);
unsigned ls_mask = 0;
unsigned ia_multi_vgt_param = si_get_ia_multi_vgt_param(sctx, info);
@@ -629,7 +629,7 @@ static void si_update_derived_state(struct si_context *sctx)
si_set_ring_buffer(ctx, PIPE_SHADER_GEOMETRY, SI_RING_GSVS,
sctx->gsvs_ring,
- sctx->gs_shader->current->gs_max_out_vertices *
+ sctx->gs_shader->gs_max_out_vertices *
sctx->gs_shader->current->noutput * 16,
64, true, true, 4, 16);