summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/radeonsi
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/radeonsi')
-rw-r--r--src/gallium/drivers/radeonsi/cik_sdma.c3
-rw-r--r--src/gallium/drivers/radeonsi/si_blit.c11
-rw-r--r--src/gallium/drivers/radeonsi/si_descriptors.c8
-rw-r--r--src/gallium/drivers/radeonsi/si_dma.c3
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.c33
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h6
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c47
-rw-r--r--src/gallium/drivers/radeonsi/si_state.h1
-rw-r--r--src/gallium/drivers/radeonsi/si_state_draw.c67
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c95
10 files changed, 182 insertions, 92 deletions
diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c
index 6454b8ce8c0..e53af1dd6b5 100644
--- a/src/gallium/drivers/radeonsi/cik_sdma.c
+++ b/src/gallium/drivers/radeonsi/cik_sdma.c
@@ -242,7 +242,8 @@ void cik_sdma_copy(struct pipe_context *ctx,
if (src->format != dst->format ||
rdst->surface.nsamples > 1 || rsrc->surface.nsamples > 1 ||
- (rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << dst_level)) {
+ (rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << dst_level) ||
+ rdst->dcc_buffer || rsrc->dcc_buffer) {
goto fallback;
}
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index 082ea850675..fce014a1e6b 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -326,7 +326,7 @@ void si_decompress_color_textures(struct si_context *sctx,
assert(view);
tex = (struct r600_texture *)view->texture;
- assert(tex->cmask.size || tex->fmask.size);
+ assert(tex->cmask.size || tex->fmask.size || tex->dcc_buffer);
si_blit_decompress_color(&sctx->b.b, tex,
view->u.tex.first_level, view->u.tex.last_level,
@@ -455,7 +455,7 @@ static void si_decompress_subresource(struct pipe_context *ctx,
si_blit_decompress_depth_in_place(sctx, rtex, true,
level, level,
first_layer, last_layer);
- } else if (rtex->fmask.size || rtex->cmask.size) {
+ } else if (rtex->fmask.size || rtex->cmask.size || rtex->dcc_buffer) {
si_blit_decompress_color(ctx, rtex, level, level,
first_layer, last_layer);
}
@@ -507,7 +507,7 @@ void si_resource_copy_region(struct pipe_context *ctx,
util_blitter_default_dst_texture(&dst_templ, dst, dst_level, dstz);
util_blitter_default_src_texture(&src_templ, src, src_level);
- if (util_format_is_compressed(src->format) &&
+ if (util_format_is_compressed(src->format) ||
util_format_is_compressed(dst->format)) {
unsigned blocksize = util_format_get_blocksize(src->format);
@@ -536,7 +536,7 @@ void si_resource_copy_region(struct pipe_context *ctx,
src_force_level = src_level;
} else if (!util_blitter_is_copy_supported(sctx->blitter, dst, src) ||
/* also *8_SNORM has precision issues, use UNORM instead */
- util_format_is_snorm(src->format)) {
+ util_format_is_snorm8(src->format)) {
if (util_format_is_subsampled_422(src->format)) {
src_templ.format = PIPE_FORMAT_R8G8B8A8_UINT;
dst_templ.format = PIPE_FORMAT_R8G8B8A8_UINT;
@@ -675,7 +675,8 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
info->src.box.depth == 1 &&
dst->surface.level[info->dst.level].mode >= RADEON_SURF_MODE_1D &&
!(dst->surface.flags & RADEON_SURF_SCANOUT) &&
- (!dst->cmask.size || !dst->dirty_level_mask) /* dst cannot be fast-cleared */) {
+ (!dst->cmask.size || !dst->dirty_level_mask) && /* dst cannot be fast-cleared */
+ !dst->dcc_buffer) {
si_blitter_begin(ctx, SI_COLOR_RESOLVE |
(info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND));
util_blitter_custom_resolve_color(sctx->blitter,
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 13738da5e2c..a8ff6f27319 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -181,6 +181,11 @@ static void si_set_sampler_view(struct si_context *sctx, unsigned shader,
rview->resource, RADEON_USAGE_READ,
r600_get_sampler_view_priority(rview->resource));
+ if (rview->dcc_buffer && rview->dcc_buffer != rview->resource)
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+ rview->dcc_buffer, RADEON_USAGE_READ,
+ RADEON_PRIO_DCC);
+
pipe_sampler_view_reference(&views->views[slot], view);
memcpy(views->desc.list + slot*8, view_desc, 8*4);
views->desc.enabled_mask |= 1llu << slot;
@@ -229,7 +234,8 @@ static void si_set_sampler_views(struct pipe_context *ctx,
} else {
samplers->depth_texture_mask &= ~(1 << slot);
}
- if (rtex->cmask.size || rtex->fmask.size) {
+ if (rtex->cmask.size || rtex->fmask.size ||
+ (rtex->dcc_buffer && rtex->dirty_level_mask)) {
samplers->compressed_colortex_mask |= 1 << slot;
} else {
samplers->compressed_colortex_mask &= ~(1 << slot);
diff --git a/src/gallium/drivers/radeonsi/si_dma.c b/src/gallium/drivers/radeonsi/si_dma.c
index 31b0b41e5a4..581e89f42d8 100644
--- a/src/gallium/drivers/radeonsi/si_dma.c
+++ b/src/gallium/drivers/radeonsi/si_dma.c
@@ -248,7 +248,8 @@ void si_dma_copy(struct pipe_context *ctx,
if (src->format != dst->format || src_box->depth > 1 ||
(rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << dst_level) ||
rdst->cmask.size || rdst->fmask.size ||
- rsrc->cmask.size || rsrc->fmask.size) {
+ rsrc->cmask.size || rsrc->fmask.size ||
+ rdst->dcc_buffer || rsrc->dcc_buffer) {
goto fallback;
}
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 5f910c95ef3..60baad3d13c 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -55,8 +55,6 @@ static void si_destroy_context(struct pipe_context *context)
if (sctx->pstipple_sampler_state)
sctx->b.b.delete_sampler_state(&sctx->b.b, sctx->pstipple_sampler_state);
- if (sctx->dummy_pixel_shader)
- sctx->b.b.delete_fs_state(&sctx->b.b, sctx->dummy_pixel_shader);
if (sctx->fixed_func_tcs_shader.cso)
sctx->b.b.delete_tcs_state(&sctx->b.b, sctx->fixed_func_tcs_shader.cso);
if (sctx->custom_dsa_flush)
@@ -300,6 +298,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_GATHER_SM5:
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
+ case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
return 1;
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
@@ -578,6 +577,33 @@ static bool si_initialize_pipe_config(struct si_screen *sscreen)
return true;
}
+static bool si_init_gs_info(struct si_screen *sscreen)
+{
+ switch (sscreen->b.family) {
+ case CHIP_OLAND:
+ case CHIP_HAINAN:
+ case CHIP_KAVERI:
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+ case CHIP_ICELAND:
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+ sscreen->gs_table_depth = 16;
+ return true;
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ case CHIP_VERDE:
+ case CHIP_BONAIRE:
+ case CHIP_HAWAII:
+ case CHIP_TONGA:
+ case CHIP_FIJI:
+ sscreen->gs_table_depth = 32;
+ return true;
+ default:
+ return false;
+ }
+}
+
struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
{
struct si_screen *sscreen = CALLOC_STRUCT(si_screen);
@@ -595,7 +621,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
sscreen->b.b.resource_create = r600_resource_create_common;
if (!r600_common_screen_init(&sscreen->b, ws) ||
- !si_initialize_pipe_config(sscreen)) {
+ !si_initialize_pipe_config(sscreen) ||
+ !si_init_gs_info(sscreen)) {
FREE(sscreen);
return NULL;
}
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index d7a2282952a..42cd8803c36 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -42,6 +42,7 @@
#define SI_BASE_VERTEX_UNKNOWN INT_MIN
#define SI_RESTART_INDEX_UNKNOWN INT_MIN
#define SI_NUM_SMOOTH_AA_SAMPLES 8
+#define SI_GS_PER_ES 128
/* Instruction cache. */
#define SI_CONTEXT_INV_ICACHE (R600_CONTEXT_PRIVATE_FLAG << 0)
@@ -85,6 +86,7 @@ struct si_compute;
struct si_screen {
struct r600_common_screen b;
+ unsigned gs_table_depth;
};
struct si_blend_color {
@@ -96,6 +98,7 @@ struct si_sampler_view {
struct pipe_sampler_view base;
struct list_head list;
struct r600_resource *resource;
+ struct r600_resource *dcc_buffer;
/* [0..7] = image descriptor
* [4..7] = buffer descriptor */
uint32_t state[8];
@@ -203,9 +206,6 @@ struct si_context {
struct si_pm4_state *init_config;
bool init_config_has_vgt_flush;
struct si_pm4_state *vgt_shader_config[4];
- /* With rasterizer discard, there doesn't have to be a pixel shader.
- * In that case, we bind this one: */
- void *dummy_pixel_shader;
/* shaders */
struct si_shader_ctx_state ps_shader;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 243bdc6e6d7..18b64056bc7 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -266,6 +266,7 @@ static void si_emit_cb_target_mask(struct si_context *sctx, struct r600_atom *at
* Reproducible with Unigine Heaven 4.0 and drirc missing.
*/
if (blend->dual_src_blend &&
+ sctx->ps_shader.cso &&
(sctx->ps_shader.cso->ps_colors_written & 0x3) != 0x3)
mask = 0;
@@ -697,6 +698,7 @@ static void *si_create_rs_state(struct pipe_context *ctx,
rs->clamp_fragment_color = state->clamp_fragment_color;
rs->flatshade = state->flatshade;
rs->sprite_coord_enable = state->sprite_coord_enable;
+ rs->rasterizer_discard = state->rasterizer_discard;
rs->pa_sc_line_stipple = state->line_stipple_enable ?
S_028A0C_LINE_PATTERN(state->line_stipple_pattern) |
S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0;
@@ -1924,8 +1926,21 @@ static void si_initialize_color_surface(struct si_context *sctx,
surf->cb_color_info = color_info;
surf->cb_color_attrib = color_attrib;
- if (sctx->b.chip_class >= VI)
- surf->cb_dcc_control = S_028C78_OVERWRITE_COMBINER_DISABLE(1);
+ if (sctx->b.chip_class >= VI && rtex->dcc_buffer) {
+ unsigned max_uncompressed_block_size = 2;
+ uint64_t dcc_offset = rtex->surface.level[level].dcc_offset;
+
+ if (rtex->surface.nsamples > 1) {
+ if (rtex->surface.bpe == 1)
+ max_uncompressed_block_size = 0;
+ else if (rtex->surface.bpe == 2)
+ max_uncompressed_block_size = 1;
+ }
+
+ surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
+ S_028C78_INDEPENDENT_64B_BLOCKS(1);
+ surf->cb_dcc_base = (rtex->dcc_buffer->gpu_address + dcc_offset) >> 8;
+ }
if (rtex->fmask.size) {
surf->cb_color_fmask = (offset + rtex->fmask.offset) >> 8;
@@ -2249,6 +2264,12 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
RADEON_PRIO_CMASK);
}
+ if (tex->dcc_buffer && tex->dcc_buffer != &tex->resource) {
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+ tex->dcc_buffer, RADEON_USAGE_READWRITE,
+ RADEON_PRIO_DCC);
+ }
+
radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C,
sctx->b.chip_class >= VI ? 14 : 13);
radeon_emit(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */
@@ -2266,7 +2287,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
radeon_emit(cs, tex->color_clear_value[1]); /* R_028C90_CB_COLOR0_CLEAR_WORD1 */
if (sctx->b.chip_class >= VI)
- radeon_emit(cs, 0); /* R_028C94_CB_COLOR0_DCC_BASE */
+ radeon_emit(cs, cb->cb_dcc_base); /* R_028C94_CB_COLOR0_DCC_BASE */
}
/* set CB_COLOR1_INFO for possible dual-src blending */
if (i == 1 && state->cbufs[0] &&
@@ -2633,8 +2654,18 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
view->state[4] = (S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(pitch - 1));
view->state[5] = (S_008F24_BASE_ARRAY(state->u.tex.first_layer) |
S_008F24_LAST_ARRAY(last_layer));
- view->state[6] = 0;
- view->state[7] = 0;
+
+ if (tmp->dcc_buffer) {
+ uint64_t dcc_offset = surflevel[base_level].dcc_offset;
+ unsigned swap = r600_translate_colorswap(pipe_format);
+
+ view->state[6] = S_008F28_COMPRESSION_EN(1) | S_008F28_ALPHA_IS_ON_MSB(swap <= 1);
+ view->state[7] = (tmp->dcc_buffer->gpu_address + dcc_offset) >> 8;
+ view->dcc_buffer = tmp->dcc_buffer;
+ } else {
+ view->state[6] = 0;
+ view->state[7] = 0;
+ }
/* Initialize the sampler view for FMASK. */
if (tmp->fmask.size) {
@@ -3262,7 +3293,7 @@ static void si_init_config(struct si_context *sctx)
si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
/* FIXME calculate these values somehow ??? */
- si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, 0x80);
+ si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES);
si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40);
si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2);
@@ -3336,6 +3367,7 @@ static void si_init_config(struct si_context *sctx)
break;
case CHIP_KABINI:
case CHIP_MULLINS:
+ case CHIP_STONEY:
raster_config = 0x00000000;
raster_config_1 = 0x00000000;
break;
@@ -3406,7 +3438,8 @@ static void si_init_config(struct si_context *sctx)
if (sctx->b.chip_class >= VI) {
si_pm4_set_reg(pm4, R_028424_CB_DCC_CONTROL,
- S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1));
+ S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) |
+ S_028424_OVERWRITE_COMBINER_WATERMARK(4));
si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30);
si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32);
}
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index fba6619d2fd..8b9a311cd3f 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -61,6 +61,7 @@ struct si_state_rasterizer {
bool poly_smooth;
bool uses_poly_offset;
bool clamp_fragment_color;
+ bool rasterizer_discard;
};
struct si_dsa_stencil_ref_part {
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index ce6c98c3124..cf0891a2ab7 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -223,6 +223,7 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
unsigned prim = info->mode;
unsigned primgroup_size = 128; /* recommended without a GS */
+ unsigned max_primgroup_in_wave = 2;
/* SWITCH_ON_EOP(0) is always preferable. */
bool wd_switch_on_eop = false;
@@ -246,13 +247,10 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
/* primgroup_size must be set to a multiple of NUM_PATCHES */
primgroup_size = (primgroup_size / num_patches) * num_patches;
- /* SWITCH_ON_EOI must be set if PrimID is used.
- * If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */
+ /* SWITCH_ON_EOI must be set if PrimID is used. */
if ((sctx->tcs_shader.cso && sctx->tcs_shader.cso->info.uses_primid) ||
- sctx->tes_shader.cso->info.uses_primid) {
+ sctx->tes_shader.cso->info.uses_primid)
ia_switch_on_eoi = true;
- partial_es_wave = true;
- }
/* Bug with tessellation and GS on Bonaire and older 2 SE chips. */
if ((sctx->b.family == CHIP_TAHITI ||
@@ -269,10 +267,6 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
wd_switch_on_eop = true;
}
- if (sctx->b.streamout.streamout_enabled ||
- sctx->b.streamout.prims_gen_query_enabled)
- partial_vs_wave = true;
-
if (sctx->b.chip_class >= CIK) {
/* WD_SWITCH_ON_EOP has no effect on GPUs with less than
* 4 shader engines. Set 1 to pass the assertion below.
@@ -282,7 +276,8 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
prim == PIPE_PRIM_LINE_LOOP ||
prim == PIPE_PRIM_TRIANGLE_FAN ||
prim == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY ||
- info->primitive_restart)
+ info->primitive_restart ||
+ info->count_from_stream_output)
wd_switch_on_eop = true;
/* Hawaii hangs if instancing is enabled and WD_SWITCH_ON_EOP is 0.
@@ -292,14 +287,34 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
(info->indirect || info->instance_count > 1))
wd_switch_on_eop = true;
- /* USE_OPAQUE doesn't work when WD_SWITCH_ON_EOP is 0. */
- if (info->count_from_stream_output)
- wd_switch_on_eop = true;
+ /* Required on CIK and later. */
+ if (sctx->b.screen->info.max_se > 2 && !wd_switch_on_eop)
+ ia_switch_on_eoi = true;
+
+ /* Required by Hawaii and, for some special cases, by VI. */
+ if (ia_switch_on_eoi &&
+ (sctx->b.family == CHIP_HAWAII ||
+ (sctx->b.chip_class == VI &&
+ (sctx->gs_shader.cso || max_primgroup_in_wave != 2))))
+ partial_vs_wave = true;
+
+ /* Instancing bug on Bonaire. */
+ if (sctx->b.family == CHIP_BONAIRE && ia_switch_on_eoi &&
+ (info->indirect || info->instance_count > 1))
+ partial_vs_wave = true;
/* If the WD switch is false, the IA switch must be false too. */
assert(wd_switch_on_eop || !ia_switch_on_eop);
}
+ /* If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */
+ if (ia_switch_on_eoi)
+ partial_es_wave = true;
+
+ /* GS requirement. */
+ if (SI_GS_PER_ES / primgroup_size >= sctx->screen->gs_table_depth - 3)
+ partial_es_wave = true;
+
/* Hw bug with single-primitive instances and SWITCH_ON_EOI
* on multi-SE chips. */
if (sctx->b.screen->info.max_se >= 2 && ia_switch_on_eoi &&
@@ -308,18 +323,14 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
u_prims_for_vertices(info->mode, info->count) <= 1)))
sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
- /* Instancing bug on 2 SE chips. */
- if (sctx->b.screen->info.max_se == 2 && ia_switch_on_eoi &&
- (info->indirect || info->instance_count > 1))
- partial_vs_wave = true;
-
return S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) |
S_028AA8_SWITCH_ON_EOI(ia_switch_on_eoi) |
S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) |
S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) |
S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1) |
S_028AA8_WD_SWITCH_ON_EOP(sctx->b.chip_class >= CIK ? wd_switch_on_eop : 0) |
- S_028AA8_MAX_PRIMGRP_IN_WAVE(sctx->b.chip_class >= VI ? 2 : 0);
+ S_028AA8_MAX_PRIMGRP_IN_WAVE(sctx->b.chip_class >= VI ?
+ max_primgroup_in_wave : 0);
}
static unsigned si_get_ls_hs_config(struct si_context *sctx,
@@ -636,6 +647,17 @@ void si_emit_cache_flush(struct si_context *si_ctx, struct r600_atom *atom)
S_0085F0_CB5_DEST_BASE_ENA(1) |
S_0085F0_CB6_DEST_BASE_ENA(1) |
S_0085F0_CB7_DEST_BASE_ENA(1);
+
+ /* Necessary for DCC */
+ if (sctx->chip_class >= VI) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0) | compute);
+ radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_DATA_TS) |
+ EVENT_INDEX(5));
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+ }
}
if (sctx->flags & SI_CONTEXT_FLUSH_AND_INV_DB) {
cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) |
@@ -728,6 +750,7 @@ static void si_get_draw_start_count(struct si_context *sctx,
void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
{
struct si_context *sctx = (struct si_context *)ctx;
+ struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
struct pipe_index_buffer ib = {};
unsigned mask;
@@ -735,7 +758,11 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
(info->indexed || !info->count_from_stream_output))
return;
- if (!sctx->ps_shader.cso || !sctx->vs_shader.cso) {
+ if (!sctx->vs_shader.cso) {
+ assert(0);
+ return;
+ }
+ if (!sctx->ps_shader.cso && (!rs || !rs->rasterizer_discard)) {
assert(0);
return;
}
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index eea00e0fafc..4a3a04caa52 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -799,11 +799,11 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
struct si_context *sctx = (struct si_context *)ctx;
struct si_shader_selector *sel = state;
- if (sctx->vs_shader.cso == sel || !sel)
+ if (sctx->vs_shader.cso == sel)
return;
sctx->vs_shader.cso = sel;
- sctx->vs_shader.current = sel->first_variant;
+ sctx->vs_shader.current = sel ? sel->first_variant : NULL;
si_mark_atom_dirty(sctx, &sctx->clip_regs);
si_update_viewports_and_scissors(sctx);
}
@@ -864,16 +864,6 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state)
si_update_viewports_and_scissors(sctx);
}
-static void si_make_dummy_ps(struct si_context *sctx)
-{
- if (!sctx->dummy_pixel_shader) {
- sctx->dummy_pixel_shader =
- util_make_fragment_cloneinput_shader(&sctx->b.b, 0,
- TGSI_SEMANTIC_GENERIC,
- TGSI_INTERPOLATE_CONSTANT);
- }
-}
-
static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
@@ -883,14 +873,8 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
if (sctx->ps_shader.cso == sel)
return;
- /* use a dummy shader if binding a NULL shader */
- if (!sel) {
- si_make_dummy_ps(sctx);
- sel = sctx->dummy_pixel_shader;
- }
-
sctx->ps_shader.cso = sel;
- sctx->ps_shader.current = sel->first_variant;
+ sctx->ps_shader.current = sel ? sel->first_variant : NULL;
si_mark_atom_dirty(sctx, &sctx->cb_target_mask);
}
@@ -956,13 +940,15 @@ static void si_emit_spi_map(struct si_context *sctx, struct r600_atom *atom)
struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
struct si_shader *ps = sctx->ps_shader.current;
struct si_shader *vs = si_get_vs_state(sctx);
- struct tgsi_shader_info *psinfo = &ps->selector->info;
+ struct tgsi_shader_info *psinfo;
struct tgsi_shader_info *vsinfo = &vs->selector->info;
unsigned i, j, tmp, num_written = 0;
- if (!ps->nparam)
+ if (!ps || !ps->nparam)
return;
+ psinfo = &ps->selector->info;
+
radeon_set_context_reg_seq(cs, R_028644_SPI_PS_INPUT_CNTL_0, ps->nparam);
for (i = 0; i < psinfo->num_inputs; i++) {
@@ -1025,7 +1011,12 @@ static void si_emit_spi_ps_input(struct si_context *sctx, struct r600_atom *atom
{
struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
struct si_shader *ps = sctx->ps_shader.current;
- unsigned input_ena = ps->spi_ps_input_ena;
+ unsigned input_ena;
+
+ if (!ps)
+ return;
+
+ input_ena = ps->spi_ps_input_ena;
/* we need to enable at least one of them, otherwise we hang the GPU */
assert(G_0286CC_PERSP_SAMPLE_ENA(input_ena) ||
@@ -1531,23 +1522,38 @@ bool si_update_shaders(struct si_context *sctx)
si_update_vgt_shader_config(sctx);
- r = si_shader_select(ctx, &sctx->ps_shader);
- if (r)
- return false;
- si_pm4_bind_state(sctx, ps, sctx->ps_shader.current->pm4);
-
- if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
- sctx->sprite_coord_enable != rs->sprite_coord_enable ||
- sctx->flatshade != rs->flatshade) {
- sctx->sprite_coord_enable = rs->sprite_coord_enable;
- sctx->flatshade = rs->flatshade;
- si_mark_atom_dirty(sctx, &sctx->spi_map);
- }
+ if (sctx->ps_shader.cso) {
+ r = si_shader_select(ctx, &sctx->ps_shader);
+ if (r)
+ return false;
+ si_pm4_bind_state(sctx, ps, sctx->ps_shader.current->pm4);
+
+ if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
+ sctx->sprite_coord_enable != rs->sprite_coord_enable ||
+ sctx->flatshade != rs->flatshade) {
+ sctx->sprite_coord_enable = rs->sprite_coord_enable;
+ sctx->flatshade = rs->flatshade;
+ si_mark_atom_dirty(sctx, &sctx->spi_map);
+ }
+
+ if (si_pm4_state_changed(sctx, ps) ||
+ sctx->force_persample_interp != rs->force_persample_interp) {
+ sctx->force_persample_interp = rs->force_persample_interp;
+ si_mark_atom_dirty(sctx, &sctx->spi_ps_input);
+ }
+
+ if (sctx->ps_db_shader_control != sctx->ps_shader.current->db_shader_control) {
+ sctx->ps_db_shader_control = sctx->ps_shader.current->db_shader_control;
+ si_mark_atom_dirty(sctx, &sctx->db_render_state);
+ }
+
+ if (sctx->smoothing_enabled != sctx->ps_shader.current->key.ps.poly_line_smoothing) {
+ sctx->smoothing_enabled = sctx->ps_shader.current->key.ps.poly_line_smoothing;
+ si_mark_atom_dirty(sctx, &sctx->msaa_config);
- if (si_pm4_state_changed(sctx, ps) ||
- sctx->force_persample_interp != rs->force_persample_interp) {
- sctx->force_persample_interp = rs->force_persample_interp;
- si_mark_atom_dirty(sctx, &sctx->spi_ps_input);
+ if (sctx->b.chip_class == SI)
+ si_mark_atom_dirty(sctx, &sctx->db_render_state);
+ }
}
if (si_pm4_state_changed(sctx, ls) ||
@@ -1559,19 +1565,6 @@ bool si_update_shaders(struct si_context *sctx)
if (!si_update_spi_tmpring_size(sctx))
return false;
}
-
- if (sctx->ps_db_shader_control != sctx->ps_shader.current->db_shader_control) {
- sctx->ps_db_shader_control = sctx->ps_shader.current->db_shader_control;
- si_mark_atom_dirty(sctx, &sctx->db_render_state);
- }
-
- if (sctx->smoothing_enabled != sctx->ps_shader.current->key.ps.poly_line_smoothing) {
- sctx->smoothing_enabled = sctx->ps_shader.current->key.ps.poly_line_smoothing;
- si_mark_atom_dirty(sctx, &sctx->msaa_config);
-
- if (sctx->b.chip_class == SI)
- si_mark_atom_dirty(sctx, &sctx->db_render_state);
- }
return true;
}