diff options
Diffstat (limited to 'src/gallium/drivers/radeonsi')
-rw-r--r-- | src/gallium/drivers/radeonsi/cik_sdma.c | 3 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_blit.c | 11 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_descriptors.c | 8 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_dma.c | 3 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.c | 33 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.h | 6 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.c | 47 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_draw.c | 67 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_shaders.c | 95 |
10 files changed, 182 insertions, 92 deletions
diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c index 6454b8ce8c0..e53af1dd6b5 100644 --- a/src/gallium/drivers/radeonsi/cik_sdma.c +++ b/src/gallium/drivers/radeonsi/cik_sdma.c @@ -242,7 +242,8 @@ void cik_sdma_copy(struct pipe_context *ctx, if (src->format != dst->format || rdst->surface.nsamples > 1 || rsrc->surface.nsamples > 1 || - (rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << dst_level)) { + (rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << dst_level) || + rdst->dcc_buffer || rsrc->dcc_buffer) { goto fallback; } diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 082ea850675..fce014a1e6b 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -326,7 +326,7 @@ void si_decompress_color_textures(struct si_context *sctx, assert(view); tex = (struct r600_texture *)view->texture; - assert(tex->cmask.size || tex->fmask.size); + assert(tex->cmask.size || tex->fmask.size || tex->dcc_buffer); si_blit_decompress_color(&sctx->b.b, tex, view->u.tex.first_level, view->u.tex.last_level, @@ -455,7 +455,7 @@ static void si_decompress_subresource(struct pipe_context *ctx, si_blit_decompress_depth_in_place(sctx, rtex, true, level, level, first_layer, last_layer); - } else if (rtex->fmask.size || rtex->cmask.size) { + } else if (rtex->fmask.size || rtex->cmask.size || rtex->dcc_buffer) { si_blit_decompress_color(ctx, rtex, level, level, first_layer, last_layer); } @@ -507,7 +507,7 @@ void si_resource_copy_region(struct pipe_context *ctx, util_blitter_default_dst_texture(&dst_templ, dst, dst_level, dstz); util_blitter_default_src_texture(&src_templ, src, src_level); - if (util_format_is_compressed(src->format) && + if (util_format_is_compressed(src->format) || util_format_is_compressed(dst->format)) { unsigned blocksize = util_format_get_blocksize(src->format); @@ -536,7 +536,7 @@ void si_resource_copy_region(struct pipe_context *ctx, src_force_level = src_level; } else if (!util_blitter_is_copy_supported(sctx->blitter, dst, src) || /* also *8_SNORM has precision issues, use UNORM instead */ - util_format_is_snorm(src->format)) { + util_format_is_snorm8(src->format)) { if (util_format_is_subsampled_422(src->format)) { src_templ.format = PIPE_FORMAT_R8G8B8A8_UINT; dst_templ.format = PIPE_FORMAT_R8G8B8A8_UINT; @@ -675,7 +675,8 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx, info->src.box.depth == 1 && dst->surface.level[info->dst.level].mode >= RADEON_SURF_MODE_1D && !(dst->surface.flags & RADEON_SURF_SCANOUT) && - (!dst->cmask.size || !dst->dirty_level_mask) /* dst cannot be fast-cleared */) { + (!dst->cmask.size || !dst->dirty_level_mask) && /* dst cannot be fast-cleared */ + !dst->dcc_buffer) { si_blitter_begin(ctx, SI_COLOR_RESOLVE | (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND)); util_blitter_custom_resolve_color(sctx->blitter, diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 13738da5e2c..a8ff6f27319 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -181,6 +181,11 @@ static void si_set_sampler_view(struct si_context *sctx, unsigned shader, rview->resource, RADEON_USAGE_READ, r600_get_sampler_view_priority(rview->resource)); + if (rview->dcc_buffer && rview->dcc_buffer != rview->resource) + radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, + rview->dcc_buffer, RADEON_USAGE_READ, + RADEON_PRIO_DCC); + pipe_sampler_view_reference(&views->views[slot], view); memcpy(views->desc.list + slot*8, view_desc, 8*4); views->desc.enabled_mask |= 1llu << slot; @@ -229,7 +234,8 @@ static void si_set_sampler_views(struct pipe_context *ctx, } else { samplers->depth_texture_mask &= ~(1 << slot); } - if (rtex->cmask.size || rtex->fmask.size) { + if (rtex->cmask.size || rtex->fmask.size || + (rtex->dcc_buffer && rtex->dirty_level_mask)) { samplers->compressed_colortex_mask |= 1 << slot; } else { samplers->compressed_colortex_mask &= ~(1 << slot); diff --git a/src/gallium/drivers/radeonsi/si_dma.c b/src/gallium/drivers/radeonsi/si_dma.c index 31b0b41e5a4..581e89f42d8 100644 --- a/src/gallium/drivers/radeonsi/si_dma.c +++ b/src/gallium/drivers/radeonsi/si_dma.c @@ -248,7 +248,8 @@ void si_dma_copy(struct pipe_context *ctx, if (src->format != dst->format || src_box->depth > 1 || (rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << dst_level) || rdst->cmask.size || rdst->fmask.size || - rsrc->cmask.size || rsrc->fmask.size) { + rsrc->cmask.size || rsrc->fmask.size || + rdst->dcc_buffer || rsrc->dcc_buffer) { goto fallback; } diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 5f910c95ef3..60baad3d13c 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -55,8 +55,6 @@ static void si_destroy_context(struct pipe_context *context) if (sctx->pstipple_sampler_state) sctx->b.b.delete_sampler_state(&sctx->b.b, sctx->pstipple_sampler_state); - if (sctx->dummy_pixel_shader) - sctx->b.b.delete_fs_state(&sctx->b.b, sctx->dummy_pixel_shader); if (sctx->fixed_func_tcs_shader.cso) sctx->b.b.delete_tcs_state(&sctx->b.b, sctx->fixed_func_tcs_shader.cso); if (sctx->custom_dsa_flush) @@ -300,6 +298,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_GATHER_SM5: case PIPE_CAP_TGSI_TXQS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: return 1; case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: @@ -578,6 +577,33 @@ static bool si_initialize_pipe_config(struct si_screen *sscreen) return true; } +static bool si_init_gs_info(struct si_screen *sscreen) +{ + switch (sscreen->b.family) { + case CHIP_OLAND: + case CHIP_HAINAN: + case CHIP_KAVERI: + case CHIP_KABINI: + case CHIP_MULLINS: + case CHIP_ICELAND: + case CHIP_CARRIZO: + case CHIP_STONEY: + sscreen->gs_table_depth = 16; + return true; + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_TONGA: + case CHIP_FIJI: + sscreen->gs_table_depth = 32; + return true; + default: + return false; + } +} + struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) { struct si_screen *sscreen = CALLOC_STRUCT(si_screen); @@ -595,7 +621,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) sscreen->b.b.resource_create = r600_resource_create_common; if (!r600_common_screen_init(&sscreen->b, ws) || - !si_initialize_pipe_config(sscreen)) { + !si_initialize_pipe_config(sscreen) || + !si_init_gs_info(sscreen)) { FREE(sscreen); return NULL; } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index d7a2282952a..42cd8803c36 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -42,6 +42,7 @@ #define SI_BASE_VERTEX_UNKNOWN INT_MIN #define SI_RESTART_INDEX_UNKNOWN INT_MIN #define SI_NUM_SMOOTH_AA_SAMPLES 8 +#define SI_GS_PER_ES 128 /* Instruction cache. */ #define SI_CONTEXT_INV_ICACHE (R600_CONTEXT_PRIVATE_FLAG << 0) @@ -85,6 +86,7 @@ struct si_compute; struct si_screen { struct r600_common_screen b; + unsigned gs_table_depth; }; struct si_blend_color { @@ -96,6 +98,7 @@ struct si_sampler_view { struct pipe_sampler_view base; struct list_head list; struct r600_resource *resource; + struct r600_resource *dcc_buffer; /* [0..7] = image descriptor * [4..7] = buffer descriptor */ uint32_t state[8]; @@ -203,9 +206,6 @@ struct si_context { struct si_pm4_state *init_config; bool init_config_has_vgt_flush; struct si_pm4_state *vgt_shader_config[4]; - /* With rasterizer discard, there doesn't have to be a pixel shader. - * In that case, we bind this one: */ - void *dummy_pixel_shader; /* shaders */ struct si_shader_ctx_state ps_shader; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 243bdc6e6d7..18b64056bc7 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -266,6 +266,7 @@ static void si_emit_cb_target_mask(struct si_context *sctx, struct r600_atom *at * Reproducible with Unigine Heaven 4.0 and drirc missing. */ if (blend->dual_src_blend && + sctx->ps_shader.cso && (sctx->ps_shader.cso->ps_colors_written & 0x3) != 0x3) mask = 0; @@ -697,6 +698,7 @@ static void *si_create_rs_state(struct pipe_context *ctx, rs->clamp_fragment_color = state->clamp_fragment_color; rs->flatshade = state->flatshade; rs->sprite_coord_enable = state->sprite_coord_enable; + rs->rasterizer_discard = state->rasterizer_discard; rs->pa_sc_line_stipple = state->line_stipple_enable ? S_028A0C_LINE_PATTERN(state->line_stipple_pattern) | S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0; @@ -1924,8 +1926,21 @@ static void si_initialize_color_surface(struct si_context *sctx, surf->cb_color_info = color_info; surf->cb_color_attrib = color_attrib; - if (sctx->b.chip_class >= VI) - surf->cb_dcc_control = S_028C78_OVERWRITE_COMBINER_DISABLE(1); + if (sctx->b.chip_class >= VI && rtex->dcc_buffer) { + unsigned max_uncompressed_block_size = 2; + uint64_t dcc_offset = rtex->surface.level[level].dcc_offset; + + if (rtex->surface.nsamples > 1) { + if (rtex->surface.bpe == 1) + max_uncompressed_block_size = 0; + else if (rtex->surface.bpe == 2) + max_uncompressed_block_size = 1; + } + + surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) | + S_028C78_INDEPENDENT_64B_BLOCKS(1); + surf->cb_dcc_base = (rtex->dcc_buffer->gpu_address + dcc_offset) >> 8; + } if (rtex->fmask.size) { surf->cb_color_fmask = (offset + rtex->fmask.offset) >> 8; @@ -2249,6 +2264,12 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom RADEON_PRIO_CMASK); } + if (tex->dcc_buffer && tex->dcc_buffer != &tex->resource) { + radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, + tex->dcc_buffer, RADEON_USAGE_READWRITE, + RADEON_PRIO_DCC); + } + radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, sctx->b.chip_class >= VI ? 14 : 13); radeon_emit(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */ @@ -2266,7 +2287,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom radeon_emit(cs, tex->color_clear_value[1]); /* R_028C90_CB_COLOR0_CLEAR_WORD1 */ if (sctx->b.chip_class >= VI) - radeon_emit(cs, 0); /* R_028C94_CB_COLOR0_DCC_BASE */ + radeon_emit(cs, cb->cb_dcc_base); /* R_028C94_CB_COLOR0_DCC_BASE */ } /* set CB_COLOR1_INFO for possible dual-src blending */ if (i == 1 && state->cbufs[0] && @@ -2633,8 +2654,18 @@ si_create_sampler_view_custom(struct pipe_context *ctx, view->state[4] = (S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(pitch - 1)); view->state[5] = (S_008F24_BASE_ARRAY(state->u.tex.first_layer) | S_008F24_LAST_ARRAY(last_layer)); - view->state[6] = 0; - view->state[7] = 0; + + if (tmp->dcc_buffer) { + uint64_t dcc_offset = surflevel[base_level].dcc_offset; + unsigned swap = r600_translate_colorswap(pipe_format); + + view->state[6] = S_008F28_COMPRESSION_EN(1) | S_008F28_ALPHA_IS_ON_MSB(swap <= 1); + view->state[7] = (tmp->dcc_buffer->gpu_address + dcc_offset) >> 8; + view->dcc_buffer = tmp->dcc_buffer; + } else { + view->state[6] = 0; + view->state[7] = 0; + } /* Initialize the sampler view for FMASK. */ if (tmp->fmask.size) { @@ -3262,7 +3293,7 @@ static void si_init_config(struct si_context *sctx) si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0)); /* FIXME calculate these values somehow ??? */ - si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, 0x80); + si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES); si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40); si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2); @@ -3336,6 +3367,7 @@ static void si_init_config(struct si_context *sctx) break; case CHIP_KABINI: case CHIP_MULLINS: + case CHIP_STONEY: raster_config = 0x00000000; raster_config_1 = 0x00000000; break; @@ -3406,7 +3438,8 @@ static void si_init_config(struct si_context *sctx) if (sctx->b.chip_class >= VI) { si_pm4_set_reg(pm4, R_028424_CB_DCC_CONTROL, - S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1)); + S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) | + S_028424_OVERWRITE_COMBINER_WATERMARK(4)); si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30); si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32); } diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index fba6619d2fd..8b9a311cd3f 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -61,6 +61,7 @@ struct si_state_rasterizer { bool poly_smooth; bool uses_poly_offset; bool clamp_fragment_color; + bool rasterizer_discard; }; struct si_dsa_stencil_ref_part { diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index ce6c98c3124..cf0891a2ab7 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -223,6 +223,7 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; unsigned prim = info->mode; unsigned primgroup_size = 128; /* recommended without a GS */ + unsigned max_primgroup_in_wave = 2; /* SWITCH_ON_EOP(0) is always preferable. */ bool wd_switch_on_eop = false; @@ -246,13 +247,10 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, /* primgroup_size must be set to a multiple of NUM_PATCHES */ primgroup_size = (primgroup_size / num_patches) * num_patches; - /* SWITCH_ON_EOI must be set if PrimID is used. - * If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */ + /* SWITCH_ON_EOI must be set if PrimID is used. */ if ((sctx->tcs_shader.cso && sctx->tcs_shader.cso->info.uses_primid) || - sctx->tes_shader.cso->info.uses_primid) { + sctx->tes_shader.cso->info.uses_primid) ia_switch_on_eoi = true; - partial_es_wave = true; - } /* Bug with tessellation and GS on Bonaire and older 2 SE chips. */ if ((sctx->b.family == CHIP_TAHITI || @@ -269,10 +267,6 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, wd_switch_on_eop = true; } - if (sctx->b.streamout.streamout_enabled || - sctx->b.streamout.prims_gen_query_enabled) - partial_vs_wave = true; - if (sctx->b.chip_class >= CIK) { /* WD_SWITCH_ON_EOP has no effect on GPUs with less than * 4 shader engines. Set 1 to pass the assertion below. @@ -282,7 +276,8 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, prim == PIPE_PRIM_LINE_LOOP || prim == PIPE_PRIM_TRIANGLE_FAN || prim == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY || - info->primitive_restart) + info->primitive_restart || + info->count_from_stream_output) wd_switch_on_eop = true; /* Hawaii hangs if instancing is enabled and WD_SWITCH_ON_EOP is 0. @@ -292,14 +287,34 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, (info->indirect || info->instance_count > 1)) wd_switch_on_eop = true; - /* USE_OPAQUE doesn't work when WD_SWITCH_ON_EOP is 0. */ - if (info->count_from_stream_output) - wd_switch_on_eop = true; + /* Required on CIK and later. */ + if (sctx->b.screen->info.max_se > 2 && !wd_switch_on_eop) + ia_switch_on_eoi = true; + + /* Required by Hawaii and, for some special cases, by VI. */ + if (ia_switch_on_eoi && + (sctx->b.family == CHIP_HAWAII || + (sctx->b.chip_class == VI && + (sctx->gs_shader.cso || max_primgroup_in_wave != 2)))) + partial_vs_wave = true; + + /* Instancing bug on Bonaire. */ + if (sctx->b.family == CHIP_BONAIRE && ia_switch_on_eoi && + (info->indirect || info->instance_count > 1)) + partial_vs_wave = true; /* If the WD switch is false, the IA switch must be false too. */ assert(wd_switch_on_eop || !ia_switch_on_eop); } + /* If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */ + if (ia_switch_on_eoi) + partial_es_wave = true; + + /* GS requirement. */ + if (SI_GS_PER_ES / primgroup_size >= sctx->screen->gs_table_depth - 3) + partial_es_wave = true; + /* Hw bug with single-primitive instances and SWITCH_ON_EOI * on multi-SE chips. */ if (sctx->b.screen->info.max_se >= 2 && ia_switch_on_eoi && @@ -308,18 +323,14 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, u_prims_for_vertices(info->mode, info->count) <= 1))) sctx->b.flags |= SI_CONTEXT_VGT_FLUSH; - /* Instancing bug on 2 SE chips. */ - if (sctx->b.screen->info.max_se == 2 && ia_switch_on_eoi && - (info->indirect || info->instance_count > 1)) - partial_vs_wave = true; - return S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) | S_028AA8_SWITCH_ON_EOI(ia_switch_on_eoi) | S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) | S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) | S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1) | S_028AA8_WD_SWITCH_ON_EOP(sctx->b.chip_class >= CIK ? wd_switch_on_eop : 0) | - S_028AA8_MAX_PRIMGRP_IN_WAVE(sctx->b.chip_class >= VI ? 2 : 0); + S_028AA8_MAX_PRIMGRP_IN_WAVE(sctx->b.chip_class >= VI ? + max_primgroup_in_wave : 0); } static unsigned si_get_ls_hs_config(struct si_context *sctx, @@ -636,6 +647,17 @@ void si_emit_cache_flush(struct si_context *si_ctx, struct r600_atom *atom) S_0085F0_CB5_DEST_BASE_ENA(1) | S_0085F0_CB6_DEST_BASE_ENA(1) | S_0085F0_CB7_DEST_BASE_ENA(1); + + /* Necessary for DCC */ + if (sctx->chip_class >= VI) { + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0) | compute); + radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_DATA_TS) | + EVENT_INDEX(5)); + radeon_emit(cs, 0); + radeon_emit(cs, 0); + radeon_emit(cs, 0); + radeon_emit(cs, 0); + } } if (sctx->flags & SI_CONTEXT_FLUSH_AND_INV_DB) { cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) | @@ -728,6 +750,7 @@ static void si_get_draw_start_count(struct si_context *sctx, void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) { struct si_context *sctx = (struct si_context *)ctx; + struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; struct pipe_index_buffer ib = {}; unsigned mask; @@ -735,7 +758,11 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) (info->indexed || !info->count_from_stream_output)) return; - if (!sctx->ps_shader.cso || !sctx->vs_shader.cso) { + if (!sctx->vs_shader.cso) { + assert(0); + return; + } + if (!sctx->ps_shader.cso && (!rs || !rs->rasterizer_discard)) { assert(0); return; } diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index eea00e0fafc..4a3a04caa52 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -799,11 +799,11 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state) struct si_context *sctx = (struct si_context *)ctx; struct si_shader_selector *sel = state; - if (sctx->vs_shader.cso == sel || !sel) + if (sctx->vs_shader.cso == sel) return; sctx->vs_shader.cso = sel; - sctx->vs_shader.current = sel->first_variant; + sctx->vs_shader.current = sel ? sel->first_variant : NULL; si_mark_atom_dirty(sctx, &sctx->clip_regs); si_update_viewports_and_scissors(sctx); } @@ -864,16 +864,6 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state) si_update_viewports_and_scissors(sctx); } -static void si_make_dummy_ps(struct si_context *sctx) -{ - if (!sctx->dummy_pixel_shader) { - sctx->dummy_pixel_shader = - util_make_fragment_cloneinput_shader(&sctx->b.b, 0, - TGSI_SEMANTIC_GENERIC, - TGSI_INTERPOLATE_CONSTANT); - } -} - static void si_bind_ps_shader(struct pipe_context *ctx, void *state) { struct si_context *sctx = (struct si_context *)ctx; @@ -883,14 +873,8 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state) if (sctx->ps_shader.cso == sel) return; - /* use a dummy shader if binding a NULL shader */ - if (!sel) { - si_make_dummy_ps(sctx); - sel = sctx->dummy_pixel_shader; - } - sctx->ps_shader.cso = sel; - sctx->ps_shader.current = sel->first_variant; + sctx->ps_shader.current = sel ? sel->first_variant : NULL; si_mark_atom_dirty(sctx, &sctx->cb_target_mask); } @@ -956,13 +940,15 @@ static void si_emit_spi_map(struct si_context *sctx, struct r600_atom *atom) struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; struct si_shader *ps = sctx->ps_shader.current; struct si_shader *vs = si_get_vs_state(sctx); - struct tgsi_shader_info *psinfo = &ps->selector->info; + struct tgsi_shader_info *psinfo; struct tgsi_shader_info *vsinfo = &vs->selector->info; unsigned i, j, tmp, num_written = 0; - if (!ps->nparam) + if (!ps || !ps->nparam) return; + psinfo = &ps->selector->info; + radeon_set_context_reg_seq(cs, R_028644_SPI_PS_INPUT_CNTL_0, ps->nparam); for (i = 0; i < psinfo->num_inputs; i++) { @@ -1025,7 +1011,12 @@ static void si_emit_spi_ps_input(struct si_context *sctx, struct r600_atom *atom { struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; struct si_shader *ps = sctx->ps_shader.current; - unsigned input_ena = ps->spi_ps_input_ena; + unsigned input_ena; + + if (!ps) + return; + + input_ena = ps->spi_ps_input_ena; /* we need to enable at least one of them, otherwise we hang the GPU */ assert(G_0286CC_PERSP_SAMPLE_ENA(input_ena) || @@ -1531,23 +1522,38 @@ bool si_update_shaders(struct si_context *sctx) si_update_vgt_shader_config(sctx); - r = si_shader_select(ctx, &sctx->ps_shader); - if (r) - return false; - si_pm4_bind_state(sctx, ps, sctx->ps_shader.current->pm4); - - if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) || - sctx->sprite_coord_enable != rs->sprite_coord_enable || - sctx->flatshade != rs->flatshade) { - sctx->sprite_coord_enable = rs->sprite_coord_enable; - sctx->flatshade = rs->flatshade; - si_mark_atom_dirty(sctx, &sctx->spi_map); - } + if (sctx->ps_shader.cso) { + r = si_shader_select(ctx, &sctx->ps_shader); + if (r) + return false; + si_pm4_bind_state(sctx, ps, sctx->ps_shader.current->pm4); + + if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) || + sctx->sprite_coord_enable != rs->sprite_coord_enable || + sctx->flatshade != rs->flatshade) { + sctx->sprite_coord_enable = rs->sprite_coord_enable; + sctx->flatshade = rs->flatshade; + si_mark_atom_dirty(sctx, &sctx->spi_map); + } + + if (si_pm4_state_changed(sctx, ps) || + sctx->force_persample_interp != rs->force_persample_interp) { + sctx->force_persample_interp = rs->force_persample_interp; + si_mark_atom_dirty(sctx, &sctx->spi_ps_input); + } + + if (sctx->ps_db_shader_control != sctx->ps_shader.current->db_shader_control) { + sctx->ps_db_shader_control = sctx->ps_shader.current->db_shader_control; + si_mark_atom_dirty(sctx, &sctx->db_render_state); + } + + if (sctx->smoothing_enabled != sctx->ps_shader.current->key.ps.poly_line_smoothing) { + sctx->smoothing_enabled = sctx->ps_shader.current->key.ps.poly_line_smoothing; + si_mark_atom_dirty(sctx, &sctx->msaa_config); - if (si_pm4_state_changed(sctx, ps) || - sctx->force_persample_interp != rs->force_persample_interp) { - sctx->force_persample_interp = rs->force_persample_interp; - si_mark_atom_dirty(sctx, &sctx->spi_ps_input); + if (sctx->b.chip_class == SI) + si_mark_atom_dirty(sctx, &sctx->db_render_state); + } } if (si_pm4_state_changed(sctx, ls) || @@ -1559,19 +1565,6 @@ bool si_update_shaders(struct si_context *sctx) if (!si_update_spi_tmpring_size(sctx)) return false; } - - if (sctx->ps_db_shader_control != sctx->ps_shader.current->db_shader_control) { - sctx->ps_db_shader_control = sctx->ps_shader.current->db_shader_control; - si_mark_atom_dirty(sctx, &sctx->db_render_state); - } - - if (sctx->smoothing_enabled != sctx->ps_shader.current->key.ps.poly_line_smoothing) { - sctx->smoothing_enabled = sctx->ps_shader.current->key.ps.poly_line_smoothing; - si_mark_atom_dirty(sctx, &sctx->msaa_config); - - if (sctx->b.chip_class == SI) - si_mark_atom_dirty(sctx, &sctx->db_render_state); - } return true; } |