diff options
Diffstat (limited to 'src/gallium/drivers/radeonsi/si_state_shaders.c')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_shaders.c | 281 |
1 files changed, 203 insertions, 78 deletions
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index a09f588b356..b5e14ead160 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -28,6 +28,7 @@ #include "si_pipe.h" #include "si_shader.h" #include "sid.h" +#include "radeon/r600_cs.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_ureg.h" @@ -665,8 +666,16 @@ static void *si_create_shader_state(struct pipe_context *ctx, struct si_shader_selector *sel = CALLOC_STRUCT(si_shader_selector); int i; + if (!sel) + return NULL; + sel->type = pipe_shader_type; sel->tokens = tgsi_dup_tokens(state->tokens); + if (!sel->tokens) { + FREE(sel); + return NULL; + } + sel->so = state->stream_output; tgsi_scan_shader(state->tokens, &sel->info); p_atomic_inc(&sscreen->b.num_shaders_created); @@ -679,6 +688,8 @@ static void *si_create_shader_state(struct pipe_context *ctx, sel->info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES]; sel->gs_num_invocations = sel->info.properties[TGSI_PROPERTY_GS_INVOCATIONS]; + sel->gsvs_itemsize = sel->info.num_outputs * 16 * + sel->gs_max_out_vertices; for (i = 0; i < sel->info.num_inputs; i++) { unsigned name = sel->info.input_semantic_name[i]; @@ -713,10 +724,24 @@ static void *si_create_shader_state(struct pipe_context *ctx, } } break; + case PIPE_SHADER_FRAGMENT: + for (i = 0; i < sel->info.num_outputs; i++) { + unsigned name = sel->info.output_semantic_name[i]; + unsigned index = sel->info.output_semantic_index[i]; + + if (name == TGSI_SEMANTIC_COLOR) + sel->ps_colors_written |= 1 << index; + } + break; } if (sscreen->b.debug_flags & DBG_PRECOMPILE) - si_shader_select(ctx, sel); + if (si_shader_select(ctx, sel)) { + fprintf(stderr, "radeonsi: can't create a shader\n"); + tgsi_free_tokens(sel->tokens); + FREE(sel); + return NULL; + } return sel; } @@ -751,6 +776,25 @@ static void *si_create_tes_state(struct pipe_context *ctx, return si_create_shader_state(ctx, state, PIPE_SHADER_TESS_EVAL); } +/** + * Normally, we only emit 1 viewport and 1 scissor if no shader is using + * the VIEWPORT_INDEX output, and emitting the other viewports and scissors + * is delayed. When a shader with VIEWPORT_INDEX appears, this should be + * called to emit the rest. + */ +static void si_update_viewports_and_scissors(struct si_context *sctx) +{ + struct tgsi_shader_info *info = si_get_vs_info(sctx); + + if (!info || !info->writes_viewport_index) + return; + + if (sctx->scissors.dirty_mask) + si_mark_atom_dirty(sctx, &sctx->scissors.atom); + if (sctx->viewports.dirty_mask) + si_mark_atom_dirty(sctx, &sctx->viewports.atom); +} + static void si_bind_vs_shader(struct pipe_context *ctx, void *state) { struct si_context *sctx = (struct si_context *)ctx; @@ -761,6 +805,7 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state) sctx->vs_shader = sel; si_mark_atom_dirty(sctx, &sctx->clip_regs); + si_update_viewports_and_scissors(sctx); } static void si_bind_gs_shader(struct pipe_context *ctx, void *state) @@ -778,6 +823,7 @@ static void si_bind_gs_shader(struct pipe_context *ctx, void *state) if (enable_changed) si_shader_change_notify(sctx); + si_update_viewports_and_scissors(sctx); } static void si_bind_tcs_shader(struct pipe_context *ctx, void *state) @@ -812,6 +858,7 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state) si_shader_change_notify(sctx); sctx->last_tes_sh_base = -1; /* invalidate derived tess state */ } + si_update_viewports_and_scissors(sctx); } static void si_make_dummy_ps(struct si_context *sctx) @@ -840,6 +887,7 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state) } sctx->ps_shader = sel; + si_mark_atom_dirty(sctx, &sctx->cb_target_mask); } static void si_delete_shader_selector(struct pipe_context *ctx, @@ -946,14 +994,19 @@ static void si_delete_tes_shader(struct pipe_context *ctx, void *state) si_delete_shader_selector(ctx, sel); } -static void si_update_spi_map(struct si_context *sctx) +static void si_emit_spi_map(struct si_context *sctx, struct r600_atom *atom) { + struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; struct si_shader *ps = sctx->ps_shader->current; struct si_shader *vs = si_get_vs_state(sctx); struct tgsi_shader_info *psinfo = &ps->selector->info; struct tgsi_shader_info *vsinfo = &vs->selector->info; - struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); - unsigned i, j, tmp; + unsigned i, j, tmp, num_written = 0; + + if (!ps->nparam) + return; + + radeon_set_context_reg_seq(cs, R_028644_SPI_PS_INPUT_CNTL_0, ps->nparam); for (i = 0; i < psinfo->num_inputs; i++) { unsigned name = psinfo->input_semantic_name[i]; @@ -997,9 +1050,9 @@ bcolor: tmp = S_028644_OFFSET(0x20); } - si_pm4_set_reg(pm4, - R_028644_SPI_PS_INPUT_CNTL_0 + param_offset * 4, - tmp); + assert(param_offset == num_written); + radeon_emit(cs, tmp); + num_written++; if (name == TGSI_SEMANTIC_COLOR && ps->key.ps.color_two_side) { @@ -1008,8 +1061,7 @@ bcolor: goto bcolor; } } - - si_pm4_set_state(sctx, spi, pm4); + assert(ps->nparam == num_written); } /* Initialize state related to ESGS / GSVS ring buffers */ @@ -1018,15 +1070,21 @@ static void si_init_gs_rings(struct si_context *sctx) unsigned esgs_ring_size = 128 * 1024; unsigned gsvs_ring_size = 60 * 1024 * 1024; - assert(!sctx->gs_rings); - sctx->gs_rings = CALLOC_STRUCT(si_pm4_state); + assert(!sctx->esgs_ring && !sctx->gsvs_ring); sctx->esgs_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM, PIPE_USAGE_DEFAULT, esgs_ring_size); + if (!sctx->esgs_ring) + return; sctx->gsvs_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM, PIPE_USAGE_DEFAULT, gsvs_ring_size); + if (!sctx->gsvs_ring) { + pipe_resource_reference(&sctx->esgs_ring, NULL); + return; + } + /* Append these registers to the init config state. */ if (sctx->b.chip_class >= CIK) { if (sctx->b.chip_class >= VI) { /* The maximum sizes are 63.999 MB on VI, because @@ -1034,17 +1092,24 @@ static void si_init_gs_rings(struct si_context *sctx) assert(esgs_ring_size / 256 < (1 << 18)); assert(gsvs_ring_size / 256 < (1 << 18)); } - si_pm4_set_reg(sctx->gs_rings, R_030900_VGT_ESGS_RING_SIZE, + si_pm4_set_reg(sctx->init_config, R_030900_VGT_ESGS_RING_SIZE, esgs_ring_size / 256); - si_pm4_set_reg(sctx->gs_rings, R_030904_VGT_GSVS_RING_SIZE, + si_pm4_set_reg(sctx->init_config, R_030904_VGT_GSVS_RING_SIZE, gsvs_ring_size / 256); } else { - si_pm4_set_reg(sctx->gs_rings, R_0088C8_VGT_ESGS_RING_SIZE, + si_pm4_set_reg(sctx->init_config, R_0088C8_VGT_ESGS_RING_SIZE, esgs_ring_size / 256); - si_pm4_set_reg(sctx->gs_rings, R_0088CC_VGT_GSVS_RING_SIZE, + si_pm4_set_reg(sctx->init_config, R_0088CC_VGT_GSVS_RING_SIZE, gsvs_ring_size / 256); } + /* Flush the context to re-emit the init_config state. + * This is done only once in a lifetime of a context. + */ + si_pm4_upload_indirect_buffer(sctx, sctx->init_config); + sctx->b.initial_gfx_cs_size = 0; /* force flush */ + si_context_gfx_flush(sctx, RADEON_FLUSH_ASYNC, NULL); + si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_VERTEX, SI_RING_ESGS, sctx->esgs_ring, 0, esgs_ring_size, true, true, 4, 64, 0); @@ -1058,11 +1123,14 @@ static void si_init_gs_rings(struct si_context *sctx) static void si_update_gs_rings(struct si_context *sctx) { - unsigned gs_vert_itemsize = sctx->gs_shader->info.num_outputs * 16; - unsigned gs_max_vert_out = sctx->gs_shader->gs_max_out_vertices; - unsigned gsvs_itemsize = gs_vert_itemsize * gs_max_vert_out; + unsigned gsvs_itemsize = sctx->gs_shader->gsvs_itemsize; uint64_t offset; + if (gsvs_itemsize == sctx->last_gsvs_itemsize) + return; + + sctx->last_gsvs_itemsize = gsvs_itemsize; + si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_GSVS, sctx->gsvs_ring, gsvs_itemsize, 64, true, true, 4, 16, 0); @@ -1081,17 +1149,19 @@ static void si_update_gs_rings(struct si_context *sctx) si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_GSVS_3, sctx->gsvs_ring, gsvs_itemsize, 64, true, true, 4, 16, offset); - } + /** - * @returns 1 if \p sel has been updated to use a new scratch buffer and 0 - * otherwise. + * @returns 1 if \p sel has been updated to use a new scratch buffer + * 0 if not + * < 0 if there was a failure */ -static unsigned si_update_scratch_buffer(struct si_context *sctx, +static int si_update_scratch_buffer(struct si_context *sctx, struct si_shader_selector *sel) { struct si_shader *shader; uint64_t scratch_va = sctx->scratch_buffer->gpu_address; + int r; if (!sel) return 0; @@ -1112,7 +1182,9 @@ static unsigned si_update_scratch_buffer(struct si_context *sctx, si_shader_apply_scratch_relocs(sctx, shader, scratch_va); /* Replace the shader bo with a new bo that has the relocs applied. */ - si_shader_binary_upload(sctx->screen, shader); + r = si_shader_binary_upload(sctx->screen, shader); + if (r) + return r; /* Update the shader state to use the new shader bo. */ si_shader_init_pm4_state(shader); @@ -1151,7 +1223,7 @@ static unsigned si_get_max_scratch_bytes_per_wave(struct si_context *sctx) return bytes; } -static void si_update_spi_tmpring_size(struct si_context *sctx) +static bool si_update_spi_tmpring_size(struct si_context *sctx) { unsigned current_scratch_buffer_size = si_get_current_scratch_buffer_size(sctx); @@ -1159,6 +1231,7 @@ static void si_update_spi_tmpring_size(struct si_context *sctx) si_get_max_scratch_bytes_per_wave(sctx); unsigned scratch_needed_size = scratch_bytes_per_wave * sctx->scratch_waves; + int r; if (scratch_needed_size > 0) { @@ -1171,6 +1244,9 @@ static void si_update_spi_tmpring_size(struct si_context *sctx) sctx->scratch_buffer = si_resource_create_custom(&sctx->screen->b.b, PIPE_USAGE_DEFAULT, scratch_needed_size); + if (!sctx->scratch_buffer) + return false; + sctx->emit_scratch_reloc = true; } /* Update the shaders, so they are using the latest scratch. The @@ -1178,31 +1254,57 @@ static void si_update_spi_tmpring_size(struct si_context *sctx) * last used, so we still need to try to update them, even if * they require scratch buffers smaller than the current size. */ - if (si_update_scratch_buffer(sctx, sctx->ps_shader)) + r = si_update_scratch_buffer(sctx, sctx->ps_shader); + if (r < 0) + return false; + if (r == 1) si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4); - if (si_update_scratch_buffer(sctx, sctx->gs_shader)) + + r = si_update_scratch_buffer(sctx, sctx->gs_shader); + if (r < 0) + return false; + if (r == 1) si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4); - if (si_update_scratch_buffer(sctx, sctx->tcs_shader)) + + r = si_update_scratch_buffer(sctx, sctx->tcs_shader); + if (r < 0) + return false; + if (r == 1) si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4); /* VS can be bound as LS, ES, or VS. */ if (sctx->tes_shader) { - if (si_update_scratch_buffer(sctx, sctx->vs_shader)) + r = si_update_scratch_buffer(sctx, sctx->vs_shader); + if (r < 0) + return false; + if (r == 1) si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4); } else if (sctx->gs_shader) { - if (si_update_scratch_buffer(sctx, sctx->vs_shader)) + r = si_update_scratch_buffer(sctx, sctx->vs_shader); + if (r < 0) + return false; + if (r == 1) si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4); } else { - if (si_update_scratch_buffer(sctx, sctx->vs_shader)) + r = si_update_scratch_buffer(sctx, sctx->vs_shader); + if (r < 0) + return false; + if (r == 1) si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4); } /* TES can be bound as ES or VS. */ if (sctx->gs_shader) { - if (si_update_scratch_buffer(sctx, sctx->tes_shader)) + r = si_update_scratch_buffer(sctx, sctx->tes_shader); + if (r < 0) + return false; + if (r == 1) si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4); } else { - if (si_update_scratch_buffer(sctx, sctx->tes_shader)) + r = si_update_scratch_buffer(sctx, sctx->tes_shader); + if (r < 0) + return false; + if (r == 1) si_pm4_bind_state(sctx, vs, sctx->tes_shader->current->pm4); } } @@ -1213,40 +1315,44 @@ static void si_update_spi_tmpring_size(struct si_context *sctx) sctx->spi_tmpring_size = S_0286E8_WAVES(sctx->scratch_waves) | S_0286E8_WAVESIZE(scratch_bytes_per_wave >> 10); + return true; } static void si_init_tess_factor_ring(struct si_context *sctx) { - assert(!sctx->tf_state); - sctx->tf_state = CALLOC_STRUCT(si_pm4_state); + assert(!sctx->tf_ring); sctx->tf_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM, PIPE_USAGE_DEFAULT, 32768 * sctx->screen->b.info.max_se); - sctx->b.clear_buffer(&sctx->b.b, sctx->tf_ring, 0, - sctx->tf_ring->width0, fui(0), false); + if (!sctx->tf_ring) + return; + assert(((sctx->tf_ring->width0 / 4) & C_030938_SIZE) == 0); + /* Append these registers to the init config state. */ if (sctx->b.chip_class >= CIK) { - si_pm4_set_reg(sctx->tf_state, R_030938_VGT_TF_RING_SIZE, + si_pm4_set_reg(sctx->init_config, R_030938_VGT_TF_RING_SIZE, S_030938_SIZE(sctx->tf_ring->width0 / 4)); - si_pm4_set_reg(sctx->tf_state, R_030940_VGT_TF_MEMORY_BASE, + si_pm4_set_reg(sctx->init_config, R_030940_VGT_TF_MEMORY_BASE, r600_resource(sctx->tf_ring)->gpu_address >> 8); } else { - si_pm4_set_reg(sctx->tf_state, R_008988_VGT_TF_RING_SIZE, + si_pm4_set_reg(sctx->init_config, R_008988_VGT_TF_RING_SIZE, S_008988_SIZE(sctx->tf_ring->width0 / 4)); - si_pm4_set_reg(sctx->tf_state, R_0089B8_VGT_TF_MEMORY_BASE, + si_pm4_set_reg(sctx->init_config, R_0089B8_VGT_TF_MEMORY_BASE, r600_resource(sctx->tf_ring)->gpu_address >> 8); } - si_pm4_add_bo(sctx->tf_state, r600_resource(sctx->tf_ring), - RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RESOURCE_RW); - si_pm4_bind_state(sctx, tf_ring, sctx->tf_state); + + /* Flush the context to re-emit the init_config state. + * This is done only once in a lifetime of a context. + */ + si_pm4_upload_indirect_buffer(sctx, sctx->init_config); + sctx->b.initial_gfx_cs_size = 0; /* force flush */ + si_context_gfx_flush(sctx, RADEON_FLUSH_ASYNC, NULL); si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_TESS_CTRL, SI_RING_TESS_FACTOR, sctx->tf_ring, 0, sctx->tf_ring->width0, false, false, 0, 0, 0); - - sctx->b.flags |= SI_CONTEXT_VGT_FLUSH; } /** @@ -1280,7 +1386,6 @@ static void si_generate_fixed_func_tcs(struct si_context *sctx) sctx->fixed_func_tcs_shader = ureg_create_shader_and_destroy(ureg, &sctx->b.b); - assert(sctx->fixed_func_tcs_shader); } static void si_update_vgt_shader_config(struct si_context *sctx) @@ -1328,32 +1433,49 @@ static void si_update_so(struct si_context *sctx, struct si_shader_selector *sha sctx->b.streamout.stride_in_dw = shader->so.stride; } -void si_update_shaders(struct si_context *sctx) +bool si_update_shaders(struct si_context *sctx) { struct pipe_context *ctx = (struct pipe_context*)sctx; struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; + int r; /* Update stages before GS. */ if (sctx->tes_shader) { - if (!sctx->tf_state) + if (!sctx->tf_ring) { si_init_tess_factor_ring(sctx); + if (!sctx->tf_ring) + return false; + } /* VS as LS */ - si_shader_select(ctx, sctx->vs_shader); + r = si_shader_select(ctx, sctx->vs_shader); + if (r) + return false; si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4); if (sctx->tcs_shader) { - si_shader_select(ctx, sctx->tcs_shader); + r = si_shader_select(ctx, sctx->tcs_shader); + if (r) + return false; si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4); } else { - if (!sctx->fixed_func_tcs_shader) + if (!sctx->fixed_func_tcs_shader) { si_generate_fixed_func_tcs(sctx); - si_shader_select(ctx, sctx->fixed_func_tcs_shader); + if (!sctx->fixed_func_tcs_shader) + return false; + } + + r = si_shader_select(ctx, sctx->fixed_func_tcs_shader); + if (r) + return false; si_pm4_bind_state(sctx, hs, sctx->fixed_func_tcs_shader->current->pm4); } - si_shader_select(ctx, sctx->tes_shader); + r = si_shader_select(ctx, sctx->tes_shader); + if (r) + return false; + if (sctx->gs_shader) { /* TES as ES */ si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4); @@ -1364,50 +1486,45 @@ void si_update_shaders(struct si_context *sctx) } } else if (sctx->gs_shader) { /* VS as ES */ - si_shader_select(ctx, sctx->vs_shader); + r = si_shader_select(ctx, sctx->vs_shader); + if (r) + return false; si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4); } else { /* VS as VS */ - si_shader_select(ctx, sctx->vs_shader); + r = si_shader_select(ctx, sctx->vs_shader); + if (r) + return false; si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4); si_update_so(sctx, sctx->vs_shader); } /* Update GS. */ if (sctx->gs_shader) { - si_shader_select(ctx, sctx->gs_shader); + r = si_shader_select(ctx, sctx->gs_shader); + if (r) + return false; si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4); si_pm4_bind_state(sctx, vs, sctx->gs_shader->current->gs_copy_shader->pm4); si_update_so(sctx, sctx->gs_shader); - if (!sctx->gs_rings) + if (!sctx->gsvs_ring) { si_init_gs_rings(sctx); - - if (sctx->emitted.named.gs_rings != sctx->gs_rings) - sctx->b.flags |= SI_CONTEXT_VGT_FLUSH; - si_pm4_bind_state(sctx, gs_rings, sctx->gs_rings); + if (!sctx->gsvs_ring) + return false; + } si_update_gs_rings(sctx); } else { - si_pm4_bind_state(sctx, gs_rings, NULL); si_pm4_bind_state(sctx, gs, NULL); si_pm4_bind_state(sctx, es, NULL); } si_update_vgt_shader_config(sctx); - si_shader_select(ctx, sctx->ps_shader); - - if (!sctx->ps_shader->current) { - struct si_shader_selector *sel; - - /* use a dummy shader if compiling the shader (variant) failed */ - si_make_dummy_ps(sctx); - sel = sctx->dummy_pixel_shader; - si_shader_select(ctx, sel); - sctx->ps_shader->current = sel->current; - } - + r = si_shader_select(ctx, sctx->ps_shader); + if (r) + return false; si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4); if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) || @@ -1415,12 +1532,17 @@ void si_update_shaders(struct si_context *sctx) sctx->flatshade != rs->flatshade) { sctx->sprite_coord_enable = rs->sprite_coord_enable; sctx->flatshade = rs->flatshade; - si_update_spi_map(sctx); + si_mark_atom_dirty(sctx, &sctx->spi_map); } - if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) || - si_pm4_state_changed(sctx, gs)) { - si_update_spi_tmpring_size(sctx); + if (si_pm4_state_changed(sctx, ls) || + si_pm4_state_changed(sctx, hs) || + si_pm4_state_changed(sctx, es) || + si_pm4_state_changed(sctx, gs) || + si_pm4_state_changed(sctx, vs) || + si_pm4_state_changed(sctx, ps)) { + if (!si_update_spi_tmpring_size(sctx)) + return false; } if (sctx->ps_db_shader_control != sctx->ps_shader->current->db_shader_control) { @@ -1435,10 +1557,13 @@ void si_update_shaders(struct si_context *sctx) if (sctx->b.chip_class == SI) si_mark_atom_dirty(sctx, &sctx->db_render_state); } + return true; } void si_init_shader_functions(struct si_context *sctx) { + si_init_atom(sctx, &sctx->spi_map, &sctx->atoms.s.spi_map, si_emit_spi_map); + sctx->b.b.create_vs_state = si_create_vs_state; sctx->b.b.create_tcs_state = si_create_tcs_state; sctx->b.b.create_tes_state = si_create_tes_state; |