diff options
-rw-r--r-- | src/gallium/drivers/radeonsi/si_blit.c | 10 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_debug.c | 18 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_descriptors.c | 12 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.c | 6 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.h | 21 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.h | 31 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_draw.c | 44 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_shaders.c | 279 |
9 files changed, 224 insertions, 199 deletions
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index d5c5db30029..082ea850675 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -55,11 +55,11 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op) util_blitter_save_depth_stencil_alpha(sctx->blitter, sctx->queued.named.dsa); util_blitter_save_stencil_ref(sctx->blitter, &sctx->stencil_ref.state); util_blitter_save_rasterizer(sctx->blitter, sctx->queued.named.rasterizer); - util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader); - util_blitter_save_geometry_shader(sctx->blitter, sctx->gs_shader); - util_blitter_save_tessctrl_shader(sctx->blitter, sctx->tcs_shader); - util_blitter_save_tesseval_shader(sctx->blitter, sctx->tes_shader); - util_blitter_save_vertex_shader(sctx->blitter, sctx->vs_shader); + util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader.cso); + util_blitter_save_geometry_shader(sctx->blitter, sctx->gs_shader.cso); + util_blitter_save_tessctrl_shader(sctx->blitter, sctx->tcs_shader.cso); + util_blitter_save_tesseval_shader(sctx->blitter, sctx->tes_shader.cso); + util_blitter_save_vertex_shader(sctx->blitter, sctx->vs_shader.cso); util_blitter_save_vertex_elements(sctx->blitter, sctx->vertex_elements); util_blitter_save_sample_mask(sctx->blitter, sctx->sample_mask.sample_mask); util_blitter_save_viewport(sctx->blitter, &sctx->viewports.states[0]); diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c index 7d41e8d00e0..53062187b88 100644 --- a/src/gallium/drivers/radeonsi/si_debug.c +++ b/src/gallium/drivers/radeonsi/si_debug.c @@ -31,15 +31,15 @@ #include "ddebug/dd_util.h" -static void si_dump_shader(struct si_shader_selector *sel, const char *name, +static void si_dump_shader(struct si_shader_ctx_state *state, const char *name, FILE *f) { - if (!sel || !sel->current) + if (!state->cso || !state->current) return; fprintf(f, "%s shader disassembly:\n", name); - si_dump_shader_key(sel->type, &sel->current->key, f); - fprintf(f, "%s\n\n", sel->current->binary.disasm_string); + si_dump_shader_key(state->cso->type, &state->current->key, f); + fprintf(f, "%s\n\n", state->current->binary.disasm_string); } /* Parsed IBs are difficult to read without colors. Use "less -R file" to @@ -536,11 +536,11 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f, if (flags & PIPE_DEBUG_DEVICE_IS_HUNG) si_dump_debug_registers(sctx, f); - si_dump_shader(sctx->vs_shader, "Vertex", f); - si_dump_shader(sctx->tcs_shader, "Tessellation control", f); - si_dump_shader(sctx->tes_shader, "Tessellation evaluation", f); - si_dump_shader(sctx->gs_shader, "Geometry", f); - si_dump_shader(sctx->ps_shader, "Fragment", f); + si_dump_shader(&sctx->vs_shader, "Vertex", f); + si_dump_shader(&sctx->tcs_shader, "Tessellation control", f); + si_dump_shader(&sctx->tes_shader, "Tessellation evaluation", f); + si_dump_shader(&sctx->gs_shader, "Geometry", f); + si_dump_shader(&sctx->ps_shader, "Fragment", f); si_dump_last_bo_list(sctx, f); si_dump_last_ib(sctx, f); diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 19dd14f9b6f..13738da5e2c 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -915,10 +915,10 @@ static void si_set_user_data_base(struct si_context *sctx, void si_shader_change_notify(struct si_context *sctx) { /* VS can be bound as VS, ES, or LS. */ - if (sctx->tes_shader) + if (sctx->tes_shader.cso) si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, R_00B530_SPI_SHADER_USER_DATA_LS_0); - else if (sctx->gs_shader) + else if (sctx->gs_shader.cso) si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, R_00B330_SPI_SHADER_USER_DATA_ES_0); else @@ -926,8 +926,8 @@ void si_shader_change_notify(struct si_context *sctx) R_00B130_SPI_SHADER_USER_DATA_VS_0); /* TES can be bound as ES, VS, or not bound. */ - if (sctx->tes_shader) { - if (sctx->gs_shader) + if (sctx->tes_shader.cso) { + if (sctx->gs_shader.cso) si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL, R_00B330_SPI_SHADER_USER_DATA_ES_0); else @@ -964,7 +964,7 @@ void si_emit_shader_userdata(struct si_context *sctx, struct r600_atom *atom) unsigned i; uint32_t *sh_base = sctx->shader_userdata.sh_base; - if (sctx->gs_shader) { + if (sctx->gs_shader.cso) { /* The VS copy shader needs these for clipping, streamout, and rings. */ unsigned vs_base = R_00B130_SPI_SHADER_USER_DATA_VS_0; unsigned i = PIPE_SHADER_VERTEX; @@ -975,7 +975,7 @@ void si_emit_shader_userdata(struct si_context *sctx, struct r600_atom *atom) /* The TESSEVAL shader needs this for the ESGS ring buffer. */ si_emit_shader_pointer(sctx, &sctx->rw_buffers[i].desc, R_00B330_SPI_SHADER_USER_DATA_ES_0, true); - } else if (sctx->tes_shader) { + } else if (sctx->tes_shader.cso) { /* The TESSEVAL shader needs this for streamout. */ si_emit_shader_pointer(sctx, &sctx->rw_buffers[PIPE_SHADER_VERTEX].desc, R_00B130_SPI_SHADER_USER_DATA_VS_0, true); diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 37e793a2204..c084f03cd25 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -57,8 +57,8 @@ static void si_destroy_context(struct pipe_context *context) sctx->b.b.delete_sampler_state(&sctx->b.b, sctx->pstipple_sampler_state); if (sctx->dummy_pixel_shader) sctx->b.b.delete_fs_state(&sctx->b.b, sctx->dummy_pixel_shader); - if (sctx->fixed_func_tcs_shader) - sctx->b.b.delete_tcs_state(&sctx->b.b, sctx->fixed_func_tcs_shader); + if (sctx->fixed_func_tcs_shader.cso) + sctx->b.b.delete_tcs_state(&sctx->b.b, sctx->fixed_func_tcs_shader.cso); if (sctx->custom_dsa_flush) sctx->b.b.delete_depth_stencil_alpha_state(&sctx->b.b, sctx->custom_dsa_flush); if (sctx->custom_blend_resolve) @@ -293,6 +293,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: case PIPE_CAP_TEXTURE_FLOAT_LINEAR: case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: + case PIPE_CAP_SHAREABLE_SHADERS: case PIPE_CAP_DEPTH_BOUNDS_TEST: case PIPE_CAP_SAMPLER_VIEW_TARGET: case PIPE_CAP_TEXTURE_QUERY_LOD: @@ -337,7 +338,6 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_FAKE_SW_MSAA: case PIPE_CAP_TEXTURE_GATHER_OFFSETS: case PIPE_CAP_VERTEXID_NOBASE: - case PIPE_CAP_SHAREABLE_SHADERS: return 0; case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 2abd5b5a0c3..d7a2282952a 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -152,6 +152,15 @@ struct si_viewports { struct pipe_viewport_state states[SI_MAX_VIEWPORTS]; }; +/* A shader state consists of the shader selector, which is a constant state + * object shared by multiple contexts and shouldn't be modified, and + * the current shader variant selected for this context. + */ +struct si_shader_ctx_state { + struct si_shader_selector *cso; + struct si_shader *current; +}; + struct si_context { struct r600_common_context b; struct blitter_context *blitter; @@ -162,7 +171,7 @@ struct si_context { void *pstipple_sampler_state; struct si_screen *screen; struct pipe_fence_handle *last_gfx_fence; - struct si_shader_selector *fixed_func_tcs_shader; + struct si_shader_ctx_state fixed_func_tcs_shader; LLVMTargetMachineRef tm; /* Atoms (direct states). */ @@ -199,11 +208,11 @@ struct si_context { void *dummy_pixel_shader; /* shaders */ - struct si_shader_selector *ps_shader; - struct si_shader_selector *gs_shader; - struct si_shader_selector *vs_shader; - struct si_shader_selector *tcs_shader; - struct si_shader_selector *tes_shader; + struct si_shader_ctx_state ps_shader; + struct si_shader_ctx_state gs_shader; + struct si_shader_ctx_state vs_shader; + struct si_shader_ctx_state tcs_shader; + struct si_shader_ctx_state tes_shader; struct si_cs_shader_state cs_shader_state; /* shader information */ diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 54dad726d01..b1076ed9183 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -179,15 +179,18 @@ struct radeon_shader_reloc; struct si_shader; +/* A shader selector is a gallium CSO and contains shader variants and + * binaries for one TGSI program. This can be shared by multiple contexts. + */ struct si_shader_selector { - struct si_shader *current; + pipe_mutex mutex; + struct si_shader *first_variant; /* immutable after the first variant */ + struct si_shader *last_variant; /* mutable */ struct tgsi_token *tokens; struct pipe_stream_output_info so; struct tgsi_shader_info info; - unsigned num_shaders; - /* PIPE_SHADER_[VERTEX|FRAGMENT|...] */ unsigned type; @@ -293,24 +296,24 @@ struct si_shader { static inline struct tgsi_shader_info *si_get_vs_info(struct si_context *sctx) { - if (sctx->gs_shader) - return &sctx->gs_shader->info; - else if (sctx->tes_shader) - return &sctx->tes_shader->info; - else if (sctx->vs_shader) - return &sctx->vs_shader->info; + if (sctx->gs_shader.cso) + return &sctx->gs_shader.cso->info; + else if (sctx->tes_shader.cso) + return &sctx->tes_shader.cso->info; + else if (sctx->vs_shader.cso) + return &sctx->vs_shader.cso->info; else return NULL; } static inline struct si_shader* si_get_vs_state(struct si_context *sctx) { - if (sctx->gs_shader) - return sctx->gs_shader->current->gs_copy_shader; - else if (sctx->tes_shader) - return sctx->tes_shader->current; + if (sctx->gs_shader.current) + return sctx->gs_shader.current->gs_copy_shader; + else if (sctx->tes_shader.current) + return sctx->tes_shader.current; else - return sctx->vs_shader->current; + return sctx->vs_shader.current; } static inline bool si_vs_exports_prim_id(struct si_shader *shader) diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 2e77a3605a6..243bdc6e6d7 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -266,7 +266,7 @@ static void si_emit_cb_target_mask(struct si_context *sctx, struct r600_atom *at * Reproducible with Unigine Heaven 4.0 and drirc missing. */ if (blend->dual_src_blend && - (sctx->ps_shader->ps_colors_written & 0x3) != 0x3) + (sctx->ps_shader.cso->ps_colors_written & 0x3) != 0x3) mask = 0; radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, mask); diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 5face423941..ce6c98c3124 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -109,11 +109,11 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_patches) { struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; - struct si_shader_selector *ls = sctx->vs_shader; + struct si_shader_ctx_state *ls = &sctx->vs_shader; /* The TES pointer will only be used for sctx->last_tcs. * It would be wrong to think that TCS = TES. */ struct si_shader_selector *tcs = - sctx->tcs_shader ? sctx->tcs_shader : sctx->tes_shader; + sctx->tcs_shader.cso ? sctx->tcs_shader.cso : sctx->tes_shader.cso; unsigned tes_sh_base = sctx->shader_userdata.sh_base[PIPE_SHADER_TESS_EVAL]; unsigned num_tcs_input_cp = info->vertices_per_patch; unsigned num_tcs_output_cp, num_tcs_inputs, num_tcs_outputs; @@ -138,9 +138,9 @@ static void si_emit_derived_tess_state(struct si_context *sctx, /* This calculates how shader inputs and outputs among VS, TCS, and TES * are laid out in LDS. */ - num_tcs_inputs = util_last_bit64(ls->outputs_written); + num_tcs_inputs = util_last_bit64(ls->cso->outputs_written); - if (sctx->tcs_shader) { + if (sctx->tcs_shader.cso) { num_tcs_outputs = util_last_bit64(tcs->outputs_written); num_tcs_output_cp = tcs->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT]; num_tcs_patch_outputs = util_last_bit64(tcs->patch_outputs_written); @@ -159,7 +159,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, pervertex_output_patch_size = num_tcs_output_cp * output_vertex_size; output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16; - output_patch0_offset = sctx->tcs_shader ? input_patch_size * *num_patches : 0; + output_patch0_offset = sctx->tcs_shader.cso ? input_patch_size * *num_patches : 0; perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size; lds_size = output_patch0_offset + output_patch_size * *num_patches; @@ -231,13 +231,13 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, bool partial_vs_wave = false; bool partial_es_wave = false; - if (sctx->gs_shader) + if (sctx->gs_shader.cso) primgroup_size = 64; /* recommended with a GS */ - if (sctx->tes_shader) { + if (sctx->tes_shader.cso) { unsigned num_cp_out = - sctx->tcs_shader ? - sctx->tcs_shader->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] : + sctx->tcs_shader.cso ? + sctx->tcs_shader.cso->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] : info->vertices_per_patch; unsigned max_size = 256 / MAX2(info->vertices_per_patch, num_cp_out); @@ -248,8 +248,8 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, /* SWITCH_ON_EOI must be set if PrimID is used. * If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */ - if ((sctx->tcs_shader && sctx->tcs_shader->info.uses_primid) || - sctx->tes_shader->info.uses_primid) { + if ((sctx->tcs_shader.cso && sctx->tcs_shader.cso->info.uses_primid) || + sctx->tes_shader.cso->info.uses_primid) { ia_switch_on_eoi = true; partial_es_wave = true; } @@ -258,7 +258,7 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, if ((sctx->b.family == CHIP_TAHITI || sctx->b.family == CHIP_PITCAIRN || sctx->b.family == CHIP_BONAIRE) && - sctx->gs_shader) + sctx->gs_shader.cso) partial_vs_wave = true; } @@ -328,11 +328,11 @@ static unsigned si_get_ls_hs_config(struct si_context *sctx, { unsigned num_output_cp; - if (!sctx->tes_shader) + if (!sctx->tes_shader.cso) return 0; - num_output_cp = sctx->tcs_shader ? - sctx->tcs_shader->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] : + num_output_cp = sctx->tcs_shader.cso ? + sctx->tcs_shader.cso->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] : info->vertices_per_patch; return S_028B58_NUM_PATCHES(num_patches) | @@ -395,7 +395,7 @@ static void si_emit_draw_registers(struct si_context *sctx, unsigned gs_out_prim = si_conv_prim_to_gs_out(sctx->current_rast_prim); unsigned ia_multi_vgt_param, ls_hs_config, num_patches = 0; - if (sctx->tes_shader) + if (sctx->tes_shader.cso) si_emit_derived_tess_state(sctx, info, &num_patches); ia_multi_vgt_param = si_get_ia_multi_vgt_param(sctx, info, num_patches); @@ -735,11 +735,11 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) (info->indexed || !info->count_from_stream_output)) return; - if (!sctx->ps_shader || !sctx->vs_shader) { + if (!sctx->ps_shader.cso || !sctx->vs_shader.cso) { assert(0); return; } - if (!!sctx->tes_shader != (info->mode == PIPE_PRIM_PATCHES)) { + if (!!sctx->tes_shader.cso != (info->mode == PIPE_PRIM_PATCHES)) { assert(0); return; } @@ -751,11 +751,11 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) * This must be done after si_decompress_textures, which can call * draw_vbo recursively, and before si_update_shaders, which uses * current_rast_prim for this draw_vbo call. */ - if (sctx->gs_shader) - sctx->current_rast_prim = sctx->gs_shader->gs_output_prim; - else if (sctx->tes_shader) + if (sctx->gs_shader.cso) + sctx->current_rast_prim = sctx->gs_shader.cso->gs_output_prim; + else if (sctx->tes_shader.cso) sctx->current_rast_prim = - sctx->tes_shader->info.properties[TGSI_PROPERTY_TES_PRIM_MODE]; + sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE]; else sctx->current_rast_prim = info->mode; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index c98509bb0b9..8b26b943e00 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -523,26 +523,26 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, key->vs.instance_divisors[i] = sctx->vertex_elements->elements[i].instance_divisor; - if (sctx->tes_shader) + if (sctx->tes_shader.cso) key->vs.as_ls = 1; - else if (sctx->gs_shader) { + else if (sctx->gs_shader.cso) { key->vs.as_es = 1; - key->vs.es_enabled_outputs = sctx->gs_shader->inputs_read; + key->vs.es_enabled_outputs = sctx->gs_shader.cso->inputs_read; } - if (!sctx->gs_shader && sctx->ps_shader && - sctx->ps_shader->info.uses_primid) + if (!sctx->gs_shader.cso && sctx->ps_shader.cso && + sctx->ps_shader.cso->info.uses_primid) key->vs.export_prim_id = 1; break; case PIPE_SHADER_TESS_CTRL: key->tcs.prim_mode = - sctx->tes_shader->info.properties[TGSI_PROPERTY_TES_PRIM_MODE]; + sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE]; break; case PIPE_SHADER_TESS_EVAL: - if (sctx->gs_shader) { + if (sctx->gs_shader.cso) { key->tes.as_es = 1; - key->tes.es_enabled_outputs = sctx->gs_shader->inputs_read; - } else if (sctx->ps_shader && sctx->ps_shader->info.uses_primid) + key->tes.es_enabled_outputs = sctx->gs_shader.cso->inputs_read; + } else if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid) key->tes.export_prim_id = 1; break; case PIPE_SHADER_GEOMETRY: @@ -589,11 +589,13 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, /* Select the hw shader variant depending on the current state. */ static int si_shader_select(struct pipe_context *ctx, - struct si_shader_selector *sel) + struct si_shader_ctx_state *state) { struct si_context *sctx = (struct si_context *)ctx; + struct si_shader_selector *sel = state->cso; + struct si_shader *current = state->current; union si_shader_key key; - struct si_shader * shader = NULL; + struct si_shader *iter, *shader = NULL; int r; si_shader_selector_key(ctx, sel, &key); @@ -602,49 +604,51 @@ static int si_shader_select(struct pipe_context *ctx, * This path is also used for most shaders that don't need multiple * variants, it will cost just a computation of the key and this * test. */ - if (likely(sel->current && memcmp(&sel->current->key, &key, sizeof(key)) == 0)) { + if (likely(current && memcmp(¤t->key, &key, sizeof(key)) == 0)) return 0; - } - /* lookup if we have other variants in the list */ - if (sel->num_shaders > 1) { - struct si_shader *p = sel->current, *c = p->next_variant; + pipe_mutex_lock(sel->mutex); - while (c && memcmp(&c->key, &key, sizeof(key)) != 0) { - p = c; - c = c->next_variant; + /* Find the shader variant. */ + for (iter = sel->first_variant; iter; iter = iter->next_variant) { + /* Don't check the "current" shader. We checked it above. */ + if (current != iter && + memcmp(&iter->key, &key, sizeof(key)) == 0) { + state->current = iter; + pipe_mutex_unlock(sel->mutex); + return 0; } + } - if (c) { - p->next_variant = c->next_variant; - shader = c; - } + /* Build a new shader. */ + shader = CALLOC_STRUCT(si_shader); + if (!shader) { + pipe_mutex_unlock(sel->mutex); + return -ENOMEM; } + shader->selector = sel; + shader->key = key; + + r = si_shader_create(sctx->screen, sctx->tm, shader); + if (unlikely(r)) { + R600_ERR("Failed to build shader variant (type=%u) %d\n", + sel->type, r); + FREE(shader); + pipe_mutex_unlock(sel->mutex); + return r; + } + si_shader_init_pm4_state(shader); - if (shader) { - shader->next_variant = sel->current; - sel->current = shader; + if (!sel->last_variant) { + sel->first_variant = shader; + sel->last_variant = shader; } else { - shader = CALLOC(1, sizeof(struct si_shader)); - shader->selector = sel; - shader->key = key; - - shader->next_variant = sel->current; - sel->current = shader; - r = si_shader_create((struct si_screen*)ctx->screen, sctx->tm, - shader); - if (unlikely(r)) { - R600_ERR("Failed to build shader variant (type=%u) %d\n", - sel->type, r); - sel->current = NULL; - FREE(shader); - return r; - } - si_shader_init_pm4_state(shader); - sel->num_shaders++; - p_atomic_inc(&sctx->screen->b.num_compilations); + sel->last_variant->next_variant = shader; + sel->last_variant = shader; } - + state->current = shader; + p_atomic_inc(&sctx->screen->b.num_compilations); + pipe_mutex_unlock(sel->mutex); return 0; } @@ -752,14 +756,18 @@ static void *si_create_shader_selector(struct pipe_context *ctx, break; } - if (sscreen->b.debug_flags & DBG_PRECOMPILE) - if (si_shader_select(ctx, sel)) { + if (sscreen->b.debug_flags & DBG_PRECOMPILE) { + struct si_shader_ctx_state state = {sel}; + + if (si_shader_select(ctx, &state)) { fprintf(stderr, "radeonsi: can't create a shader\n"); tgsi_free_tokens(sel->tokens); FREE(sel); return NULL; } + } + pipe_mutex_init(sel->mutex); return sel; } @@ -787,10 +795,11 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state) struct si_context *sctx = (struct si_context *)ctx; struct si_shader_selector *sel = state; - if (sctx->vs_shader == sel || !sel) + if (sctx->vs_shader.cso == sel || !sel) return; - sctx->vs_shader = sel; + sctx->vs_shader.cso = sel; + sctx->vs_shader.current = sel->first_variant; si_mark_atom_dirty(sctx, &sctx->clip_regs); si_update_viewports_and_scissors(sctx); } @@ -799,12 +808,13 @@ static void si_bind_gs_shader(struct pipe_context *ctx, void *state) { struct si_context *sctx = (struct si_context *)ctx; struct si_shader_selector *sel = state; - bool enable_changed = !!sctx->gs_shader != !!sel; + bool enable_changed = !!sctx->gs_shader.cso != !!sel; - if (sctx->gs_shader == sel) + if (sctx->gs_shader.cso == sel) return; - sctx->gs_shader = sel; + sctx->gs_shader.cso = sel; + sctx->gs_shader.current = sel ? sel->first_variant : NULL; si_mark_atom_dirty(sctx, &sctx->clip_regs); sctx->last_rast_prim = -1; /* reset this so that it gets updated */ @@ -817,12 +827,13 @@ static void si_bind_tcs_shader(struct pipe_context *ctx, void *state) { struct si_context *sctx = (struct si_context *)ctx; struct si_shader_selector *sel = state; - bool enable_changed = !!sctx->tcs_shader != !!sel; + bool enable_changed = !!sctx->tcs_shader.cso != !!sel; - if (sctx->tcs_shader == sel) + if (sctx->tcs_shader.cso == sel) return; - sctx->tcs_shader = sel; + sctx->tcs_shader.cso = sel; + sctx->tcs_shader.current = sel ? sel->first_variant : NULL; if (enable_changed) sctx->last_tcs = NULL; /* invalidate derived tess state */ @@ -832,12 +843,13 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state) { struct si_context *sctx = (struct si_context *)ctx; struct si_shader_selector *sel = state; - bool enable_changed = !!sctx->tes_shader != !!sel; + bool enable_changed = !!sctx->tes_shader.cso != !!sel; - if (sctx->tes_shader == sel) + if (sctx->tes_shader.cso == sel) return; - sctx->tes_shader = sel; + sctx->tes_shader.cso = sel; + sctx->tes_shader.current = sel ? sel->first_variant : NULL; si_mark_atom_dirty(sctx, &sctx->clip_regs); sctx->last_rast_prim = -1; /* reset this so that it gets updated */ @@ -864,7 +876,7 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state) struct si_shader_selector *sel = state; /* skip if supplied shader is one already in use */ - if (sctx->ps_shader == sel) + if (sctx->ps_shader.cso == sel) return; /* use a dummy shader if binding a NULL shader */ @@ -873,7 +885,8 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state) sel = sctx->dummy_pixel_shader; } - sctx->ps_shader = sel; + sctx->ps_shader.cso = sel; + sctx->ps_shader.current = sel->first_variant; si_mark_atom_dirty(sctx, &sctx->cb_target_mask); } @@ -881,8 +894,8 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state) { struct si_context *sctx = (struct si_context *)ctx; struct si_shader_selector *sel = (struct si_shader_selector *)state; - struct si_shader *p = sel->current, *c; - struct si_shader_selector **current_shader[SI_NUM_SHADERS] = { + struct si_shader *p = sel->first_variant, *c; + struct si_shader_ctx_state *current_shader[SI_NUM_SHADERS] = { [PIPE_SHADER_VERTEX] = &sctx->vs_shader, [PIPE_SHADER_TESS_CTRL] = &sctx->tcs_shader, [PIPE_SHADER_TESS_EVAL] = &sctx->tes_shader, @@ -890,8 +903,10 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state) [PIPE_SHADER_FRAGMENT] = &sctx->ps_shader, }; - if (*current_shader[sel->type] == sel) - *current_shader[sel->type] = NULL; + if (current_shader[sel->type]->cso == sel) { + current_shader[sel->type]->cso = NULL; + current_shader[sel->type]->current = NULL; + } while (p) { c = p->next_variant; @@ -927,6 +942,7 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state) p = c; } + pipe_mutex_destroy(sel->mutex); free(sel->tokens); free(sel); } @@ -934,7 +950,7 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state) static void si_emit_spi_map(struct si_context *sctx, struct r600_atom *atom) { struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; - struct si_shader *ps = sctx->ps_shader->current; + struct si_shader *ps = sctx->ps_shader.current; struct si_shader *vs = si_get_vs_state(sctx); struct tgsi_shader_info *psinfo = &ps->selector->info; struct tgsi_shader_info *vsinfo = &vs->selector->info; @@ -1004,7 +1020,7 @@ bcolor: static void si_emit_spi_ps_input(struct si_context *sctx, struct r600_atom *atom) { struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; - struct si_shader *ps = sctx->ps_shader->current; + struct si_shader *ps = sctx->ps_shader.current; unsigned input_ena = ps->spi_ps_input_ena; /* we need to enable at least one of them, otherwise we hang the GPU */ @@ -1133,7 +1149,7 @@ static void si_init_gs_rings(struct si_context *sctx) static void si_update_gs_rings(struct si_context *sctx) { - unsigned gsvs_itemsize = sctx->gs_shader->gsvs_itemsize; + unsigned gsvs_itemsize = sctx->gs_shader.cso->gsvs_itemsize; uint64_t offset; if (gsvs_itemsize == sctx->last_gsvs_itemsize) @@ -1167,17 +1183,14 @@ static void si_update_gs_rings(struct si_context *sctx) * < 0 if there was a failure */ static int si_update_scratch_buffer(struct si_context *sctx, - struct si_shader_selector *sel) + struct si_shader *shader) { - struct si_shader *shader; uint64_t scratch_va = sctx->scratch_buffer->gpu_address; int r; - if (!sel) + if (!shader) return 0; - shader = sel->current; - /* This shader doesn't need a scratch buffer */ if (shader->scratch_bytes_per_wave == 0) return 0; @@ -1209,20 +1222,20 @@ static unsigned si_get_current_scratch_buffer_size(struct si_context *sctx) return sctx->scratch_buffer ? sctx->scratch_buffer->b.b.width0 : 0; } -static unsigned si_get_scratch_buffer_bytes_per_wave(struct si_shader_selector *sel) +static unsigned si_get_scratch_buffer_bytes_per_wave(struct si_shader *shader) { - return sel ? sel->current->scratch_bytes_per_wave : 0; + return shader ? shader->scratch_bytes_per_wave : 0; } static unsigned si_get_max_scratch_bytes_per_wave(struct si_context *sctx) { unsigned bytes = 0; - bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->ps_shader)); - bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->gs_shader)); - bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->vs_shader)); - bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->tcs_shader)); - bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->tes_shader)); + bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->ps_shader.current)); + bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->gs_shader.current)); + bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->vs_shader.current)); + bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->tcs_shader.current)); + bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->tes_shader.current)); return bytes; } @@ -1256,46 +1269,46 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx) * last used, so we still need to try to update them, even if * they require scratch buffers smaller than the current size. */ - r = si_update_scratch_buffer(sctx, sctx->ps_shader); + r = si_update_scratch_buffer(sctx, sctx->ps_shader.current); if (r < 0) return false; if (r == 1) - si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4); + si_pm4_bind_state(sctx, ps, sctx->ps_shader.current->pm4); - r = si_update_scratch_buffer(sctx, sctx->gs_shader); + r = si_update_scratch_buffer(sctx, sctx->gs_shader.current); if (r < 0) return false; if (r == 1) - si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4); + si_pm4_bind_state(sctx, gs, sctx->gs_shader.current->pm4); - r = si_update_scratch_buffer(sctx, sctx->tcs_shader); + r = si_update_scratch_buffer(sctx, sctx->tcs_shader.current); if (r < 0) return false; if (r == 1) - si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4); + si_pm4_bind_state(sctx, hs, sctx->tcs_shader.current->pm4); /* VS can be bound as LS, ES, or VS. */ - r = si_update_scratch_buffer(sctx, sctx->vs_shader); + r = si_update_scratch_buffer(sctx, sctx->vs_shader.current); if (r < 0) return false; if (r == 1) { - if (sctx->tes_shader) - si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4); - else if (sctx->gs_shader) - si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4); + if (sctx->tes_shader.current) + si_pm4_bind_state(sctx, ls, sctx->vs_shader.current->pm4); + else if (sctx->gs_shader.current) + si_pm4_bind_state(sctx, es, sctx->vs_shader.current->pm4); else - si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4); + si_pm4_bind_state(sctx, vs, sctx->vs_shader.current->pm4); } /* TES can be bound as ES or VS. */ - r = si_update_scratch_buffer(sctx, sctx->tes_shader); + r = si_update_scratch_buffer(sctx, sctx->tes_shader.current); if (r < 0) return false; if (r == 1) { - if (sctx->gs_shader) - si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4); + if (sctx->gs_shader.current) + si_pm4_bind_state(sctx, es, sctx->tes_shader.current->pm4); else - si_pm4_bind_state(sctx, vs, sctx->tes_shader->current->pm4); + si_pm4_bind_state(sctx, vs, sctx->tes_shader.current->pm4); } } @@ -1361,7 +1374,7 @@ static void si_generate_fixed_func_tcs(struct si_context *sctx) if (!ureg) return; /* if we get here, we're screwed */ - assert(!sctx->fixed_func_tcs_shader); + assert(!sctx->fixed_func_tcs_shader.cso); ureg_DECL_constant2D(ureg, 0, 1, SI_DRIVER_STATE_CONST_BUF); const0 = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, 0), @@ -1376,7 +1389,7 @@ static void si_generate_fixed_func_tcs(struct si_context *sctx) ureg_MOV(ureg, tessinner, const1); ureg_END(ureg); - sctx->fixed_func_tcs_shader = + sctx->fixed_func_tcs_shader.cso = ureg_create_shader_and_destroy(ureg, &sctx->b.b); } @@ -1384,7 +1397,7 @@ static void si_update_vgt_shader_config(struct si_context *sctx) { /* Calculate the index of the config. * 0 = VS, 1 = VS+GS, 2 = VS+Tess, 3 = VS+Tess+GS */ - unsigned index = 2*!!sctx->tes_shader + !!sctx->gs_shader; + unsigned index = 2*!!sctx->tes_shader.cso + !!sctx->gs_shader.cso; struct si_pm4_state **pm4 = &sctx->vgt_shader_config[index]; if (!*pm4) { @@ -1392,17 +1405,17 @@ static void si_update_vgt_shader_config(struct si_context *sctx) *pm4 = CALLOC_STRUCT(si_pm4_state); - if (sctx->tes_shader) { + if (sctx->tes_shader.cso) { stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) | S_028B54_HS_EN(1); - if (sctx->gs_shader) + if (sctx->gs_shader.cso) stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS) | S_028B54_GS_EN(1) | S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER); else stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS); - } else if (sctx->gs_shader) { + } else if (sctx->gs_shader.cso) { stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) | S_028B54_GS_EN(1) | S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER); @@ -1432,7 +1445,7 @@ bool si_update_shaders(struct si_context *sctx) int r; /* Update stages before GS. */ - if (sctx->tes_shader) { + if (sctx->tes_shader.cso) { if (!sctx->tf_ring) { si_init_tess_factor_ring(sctx); if (!sctx->tf_ring) @@ -1440,65 +1453,65 @@ bool si_update_shaders(struct si_context *sctx) } /* VS as LS */ - r = si_shader_select(ctx, sctx->vs_shader); + r = si_shader_select(ctx, &sctx->vs_shader); if (r) return false; - si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4); + si_pm4_bind_state(sctx, ls, sctx->vs_shader.current->pm4); - if (sctx->tcs_shader) { - r = si_shader_select(ctx, sctx->tcs_shader); + if (sctx->tcs_shader.cso) { + r = si_shader_select(ctx, &sctx->tcs_shader); if (r) return false; - si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4); + si_pm4_bind_state(sctx, hs, sctx->tcs_shader.current->pm4); } else { - if (!sctx->fixed_func_tcs_shader) { + if (!sctx->fixed_func_tcs_shader.cso) { si_generate_fixed_func_tcs(sctx); - if (!sctx->fixed_func_tcs_shader) + if (!sctx->fixed_func_tcs_shader.cso) return false; } - r = si_shader_select(ctx, sctx->fixed_func_tcs_shader); + r = si_shader_select(ctx, &sctx->fixed_func_tcs_shader); if (r) return false; si_pm4_bind_state(sctx, hs, - sctx->fixed_func_tcs_shader->current->pm4); + sctx->fixed_func_tcs_shader.current->pm4); } - r = si_shader_select(ctx, sctx->tes_shader); + r = si_shader_select(ctx, &sctx->tes_shader); if (r) return false; - if (sctx->gs_shader) { + if (sctx->gs_shader.cso) { /* TES as ES */ - si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4); + si_pm4_bind_state(sctx, es, sctx->tes_shader.current->pm4); } else { /* TES as VS */ - si_pm4_bind_state(sctx, vs, sctx->tes_shader->current->pm4); - si_update_so(sctx, sctx->tes_shader); + si_pm4_bind_state(sctx, vs, sctx->tes_shader.current->pm4); + si_update_so(sctx, sctx->tes_shader.cso); } - } else if (sctx->gs_shader) { + } else if (sctx->gs_shader.cso) { /* VS as ES */ - r = si_shader_select(ctx, sctx->vs_shader); + r = si_shader_select(ctx, &sctx->vs_shader); if (r) return false; - si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4); + si_pm4_bind_state(sctx, es, sctx->vs_shader.current->pm4); } else { /* VS as VS */ - r = si_shader_select(ctx, sctx->vs_shader); + r = si_shader_select(ctx, &sctx->vs_shader); if (r) return false; - si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4); - si_update_so(sctx, sctx->vs_shader); + si_pm4_bind_state(sctx, vs, sctx->vs_shader.current->pm4); + si_update_so(sctx, sctx->vs_shader.cso); } /* Update GS. */ - if (sctx->gs_shader) { - r = si_shader_select(ctx, sctx->gs_shader); + if (sctx->gs_shader.cso) { + r = si_shader_select(ctx, &sctx->gs_shader); if (r) return false; - si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4); - si_pm4_bind_state(sctx, vs, sctx->gs_shader->current->gs_copy_shader->pm4); - si_update_so(sctx, sctx->gs_shader); + si_pm4_bind_state(sctx, gs, sctx->gs_shader.current->pm4); + si_pm4_bind_state(sctx, vs, sctx->gs_shader.current->gs_copy_shader->pm4); + si_update_so(sctx, sctx->gs_shader.cso); if (!sctx->gsvs_ring) { si_init_gs_rings(sctx); @@ -1514,10 +1527,10 @@ bool si_update_shaders(struct si_context *sctx) si_update_vgt_shader_config(sctx); - r = si_shader_select(ctx, sctx->ps_shader); + r = si_shader_select(ctx, &sctx->ps_shader); if (r) return false; - si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4); + si_pm4_bind_state(sctx, ps, sctx->ps_shader.current->pm4); if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) || sctx->sprite_coord_enable != rs->sprite_coord_enable || @@ -1543,13 +1556,13 @@ bool si_update_shaders(struct si_context *sctx) return false; } - if (sctx->ps_db_shader_control != sctx->ps_shader->current->db_shader_control) { - sctx->ps_db_shader_control = sctx->ps_shader->current->db_shader_control; + if (sctx->ps_db_shader_control != sctx->ps_shader.current->db_shader_control) { + sctx->ps_db_shader_control = sctx->ps_shader.current->db_shader_control; si_mark_atom_dirty(sctx, &sctx->db_render_state); } - if (sctx->smoothing_enabled != sctx->ps_shader->current->key.ps.poly_line_smoothing) { - sctx->smoothing_enabled = sctx->ps_shader->current->key.ps.poly_line_smoothing; + if (sctx->smoothing_enabled != sctx->ps_shader.current->key.ps.poly_line_smoothing) { + sctx->smoothing_enabled = sctx->ps_shader.current->key.ps.poly_line_smoothing; si_mark_atom_dirty(sctx, &sctx->msaa_config); if (sctx->b.chip_class == SI) |