diff options
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.c | 24 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.h | 10 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.c | 25 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_draw.c | 8 |
4 files changed, 38 insertions, 29 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index c5f13be20b8..6372ccfcfe1 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -109,7 +109,7 @@ static struct si_shader_context * si_shader_context( * less than 64, so that a 64-bit bitmask of used inputs or outputs can be * calculated. */ -static unsigned get_unique_index(unsigned semantic_name, unsigned index) +unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index) { switch (semantic_name) { case TGSI_SEMANTIC_POSITION: @@ -160,7 +160,7 @@ static unsigned get_unique_index(unsigned semantic_name, unsigned index) static int get_param_index(unsigned semantic_name, unsigned index, uint64_t mask) { - unsigned unique_index = get_unique_index(semantic_name, index); + unsigned unique_index = si_shader_io_get_unique_index(semantic_name, index); int i, param_index = 0; /* If not present... */ @@ -337,13 +337,6 @@ static void declare_input_gs( struct si_shader *shader = si_shader_ctx->shader; si_store_shader_io_attribs(shader, decl); - - if (decl->Semantic.Name != TGSI_SEMANTIC_PRIMID) { - shader->gs_used_inputs |= - 1llu << get_unique_index(decl->Semantic.Name, - decl->Semantic.Index); - shader->nparam++; - } } static LLVMValueRef fetch_input_gs( @@ -410,7 +403,7 @@ static LLVMValueRef fetch_input_gs( args[1] = vtx_offset; args[2] = lp_build_const_int32(gallivm, (get_param_index(input->name, input->sid, - shader->gs_used_inputs) * 4 + + shader->selector->gs_used_inputs) * 4 + swizzle) * 256); args[3] = uint->zero; args[4] = uint->one; /* OFFEN */ @@ -2304,7 +2297,7 @@ static void si_llvm_emit_vertex( */ can_emit = LLVMBuildICmp(gallivm->builder, LLVMIntULE, gs_next_vertex, lp_build_const_int32(gallivm, - shader->gs_max_out_vertices), ""); + shader->selector->gs_max_out_vertices), ""); kill = lp_build_select(&bld_base->base, can_emit, lp_build_const_float(gallivm, 1.0f), lp_build_const_float(gallivm, -1.0f)); @@ -2319,7 +2312,7 @@ static void si_llvm_emit_vertex( LLVMValueRef out_val = LLVMBuildLoad(gallivm->builder, out_ptr[chan], ""); LLVMValueRef voffset = lp_build_const_int32(gallivm, (i * 4 + chan) * - shader->gs_max_out_vertices); + shader->selector->gs_max_out_vertices); voffset = lp_build_add(uint, voffset, gs_next_vertex); voffset = lp_build_mul_imm(uint, voffset, 4); @@ -2767,7 +2760,7 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen, for (chan = 0; chan < 4; chan++) { args[2] = lp_build_const_int32(gallivm, (i * 4 + chan) * - gs->gs_max_out_vertices * 16 * 4); + gs->selector->gs_max_out_vertices * 16 * 4); outputs[i].values[chan] = LLVMBuildBitCast(gallivm->builder, @@ -2866,11 +2859,6 @@ int si_shader_create(struct si_screen *sscreen, struct si_shader *shader) si_shader_ctx.radeon_bld.load_input = declare_input_gs; bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_gs; bld_base->emit_epilogue = si_llvm_emit_gs_epilogue; - - shader->gs_output_prim = - sel->info.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM][0]; - shader->gs_max_out_vertices = - sel->info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES][0]; break; case TGSI_PROCESSOR_FRAGMENT: si_shader_ctx.radeon_bld.load_input = declare_input_fs; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 827f79e8d15..c46e649fd3d 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -124,6 +124,10 @@ struct si_shader_selector { /* PIPE_SHADER_[VERTEX|FRAGMENT|...] */ unsigned type; + + unsigned gs_output_prim; + unsigned gs_max_out_vertices; + uint64_t gs_used_inputs; /* mask of "get_unique_index" bits */ }; union si_shader_key { @@ -171,11 +175,6 @@ struct si_shader { unsigned noutput; struct si_shader_output output[40]; - /* geometry shader properties */ - unsigned gs_output_prim; - unsigned gs_max_out_vertices; - uint64_t gs_used_inputs; /* mask of "get_unique_index" bits */ - unsigned nparam; bool uses_instanceid; bool vs_out_misc_write; @@ -199,5 +198,6 @@ int si_shader_create(struct si_screen *sscreen, struct si_shader *shader); int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader, LLVMModuleRef mod); void si_shader_destroy(struct pipe_context *ctx, struct si_shader *shader); +unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index); #endif diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index da5fcb09423..46dbca3b200 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2210,9 +2210,8 @@ static INLINE void si_shader_selector_key(struct pipe_context *ctx, key->vs.instance_divisors[i] = sctx->vertex_elements->elements[i].instance_divisor; if (sctx->gs_shader) { - /* At this point, the GS should be selected and compiled. */ key->vs.as_es = 1; - key->vs.gs_used_inputs = sctx->gs_shader->current->gs_used_inputs; + key->vs.gs_used_inputs = sctx->gs_shader->gs_used_inputs; } } else if (sel->type == PIPE_SHADER_FRAGMENT) { if (sel->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS][0]) @@ -2305,12 +2304,34 @@ static void *si_create_shader_state(struct pipe_context *ctx, unsigned pipe_shader_type) { struct si_shader_selector *sel = CALLOC_STRUCT(si_shader_selector); + int i; sel->type = pipe_shader_type; sel->tokens = tgsi_dup_tokens(state->tokens); sel->so = state->stream_output; tgsi_scan_shader(state->tokens, &sel->info); + switch (pipe_shader_type) { + case PIPE_SHADER_GEOMETRY: + sel->gs_output_prim = + sel->info.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM][0]; + sel->gs_max_out_vertices = + sel->info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES][0]; + + for (i = 0; i < sel->info.num_inputs; i++) { + unsigned name = sel->info.input_semantic_name[i]; + unsigned index = sel->info.input_semantic_index[i]; + + switch (name) { + case TGSI_SEMANTIC_PRIMID: + break; + default: + sel->gs_used_inputs |= + 1llu << si_shader_io_get_unique_index(name, index); + } + } + } + return sel; } diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index a47534429b8..2881199d56d 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -83,7 +83,7 @@ static void si_shader_gs(struct pipe_context *ctx, struct si_shader *shader) { struct si_context *sctx = (struct si_context *)ctx; unsigned gs_vert_itemsize = shader->noutput * (16 >> 2); - unsigned gs_max_vert_out = shader->gs_max_out_vertices; + unsigned gs_max_vert_out = shader->selector->gs_max_out_vertices; unsigned gsvs_itemsize = gs_vert_itemsize * gs_max_vert_out; unsigned cut_mode; struct si_pm4_state *pm4; @@ -121,7 +121,7 @@ static void si_shader_gs(struct pipe_context *ctx, struct si_shader *shader) si_pm4_set_reg(pm4, R_028A68_VGT_GSVS_RING_OFFSET_3, gsvs_itemsize); si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE, - shader->nparam * (16 >> 2)); + util_bitcount64(shader->selector->gs_used_inputs) * (16 >> 2)); si_pm4_set_reg(pm4, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize); si_pm4_set_reg(pm4, R_028B38_VGT_GS_MAX_VERT_OUT, gs_max_vert_out); @@ -427,7 +427,7 @@ static bool si_update_draw_info_state(struct si_context *sctx, unsigned prim = si_conv_pipe_prim(info->mode); unsigned gs_out_prim = si_conv_prim_to_gs_out(sctx->gs_shader ? - sctx->gs_shader->current->gs_output_prim : + sctx->gs_shader->gs_output_prim : info->mode); unsigned ls_mask = 0; unsigned ia_multi_vgt_param = si_get_ia_multi_vgt_param(sctx, info); @@ -629,7 +629,7 @@ static void si_update_derived_state(struct si_context *sctx) si_set_ring_buffer(ctx, PIPE_SHADER_GEOMETRY, SI_RING_GSVS, sctx->gsvs_ring, - sctx->gs_shader->current->gs_max_out_vertices * + sctx->gs_shader->gs_max_out_vertices * sctx->gs_shader->current->noutput * 16, 64, true, true, 4, 16); |