summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/radeonsi
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2014-12-30 18:41:25 +0100
committerMarek Olšák <[email protected]>2015-01-07 12:06:43 +0100
commitd8185aa9a8e3588fe014faef8afaeae56d45e90b (patch)
tree050b638b39485392c466f05ef629505fb155350f /src/gallium/drivers/radeonsi
parent7c9ec6ca7ee30109f0bcf0f3f4bcee6fb30dac81 (diff)
radeonsi: emit SURFACE_SYNC last
This fixes a case where a transform feedback buffer is fed back as an index buffer, because SURFACE_SYNC must be after VS_PARTIAL_FLUSH. Reviewed-by: Michel Dänzer <[email protected]>
Diffstat (limited to 'src/gallium/drivers/radeonsi')
-rw-r--r--src/gallium/drivers/radeonsi/si_state_draw.c58
1 files changed, 35 insertions, 23 deletions
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 3703e5f3d58..cd4880bfd2d 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -369,6 +369,7 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato
{
struct radeon_winsys_cs *cs = sctx->rings.gfx.cs;
uint32_t cp_coher_cntl = 0;
+ uint32_t sqc_caches = 0;
uint32_t compute =
PKT3_SHADER_TYPE_S(!!(sctx->flags & SI_CONTEXT_FLAG_COMPUTE));
@@ -377,10 +378,9 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato
if (sctx->chip_class == SI &&
sctx->flags & BOTH_ICACHE_KCACHE &&
(sctx->flags & BOTH_ICACHE_KCACHE) != BOTH_ICACHE_KCACHE) {
- r600_write_config_reg(cs, R_008C08_SQC_CACHES,
+ sqc_caches =
S_008C08_INST_INVALIDATE(!!(sctx->flags & SI_CONTEXT_INV_ICACHE)) |
- S_008C08_DATA_INVALIDATE(!!(sctx->flags & SI_CONTEXT_INV_KCACHE)));
- cs->buf[cs->cdw-3] |= compute; /* set the compute bit in the header */
+ S_008C08_DATA_INVALIDATE(!!(sctx->flags & SI_CONTEXT_INV_KCACHE));
} else {
if (sctx->flags & SI_CONTEXT_INV_ICACHE)
cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1);
@@ -409,24 +409,6 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato
S_0085F0_DB_DEST_BASE_ENA(1);
}
- if (cp_coher_cntl) {
- if (sctx->chip_class >= CIK) {
- radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0) | compute);
- radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
- radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */
- radeon_emit(cs, 0xff); /* CP_COHER_SIZE_HI */
- radeon_emit(cs, 0); /* CP_COHER_BASE */
- radeon_emit(cs, 0); /* CP_COHER_BASE_HI */
- radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
- } else {
- radeon_emit(cs, PKT3(PKT3_SURFACE_SYNC, 3, 0) | compute);
- radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
- radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */
- radeon_emit(cs, 0); /* CP_COHER_BASE */
- radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
- }
- }
-
if (sctx->flags & SI_CONTEXT_FLUSH_AND_INV_CB_META) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute);
radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0));
@@ -441,6 +423,12 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato
EVENT_WRITE_INV_L2);
}
+ /* FLUSH_AND_INV events must be emitted before PS_PARTIAL_FLUSH.
+ * Otherwise, clearing CMASK (CB meta) with CP DMA isn't reliable.
+ *
+ * I think the reason is that FLUSH_AND_INV is only added to a queue
+ * and it is PS_PARTIAL_FLUSH that waits for it to complete.
+ */
if (sctx->flags & SI_CONTEXT_PS_PARTIAL_FLUSH) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute);
radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
@@ -448,12 +436,10 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute);
radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
}
-
if (sctx->flags & SI_CONTEXT_CS_PARTIAL_FLUSH) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute);
radeon_emit(cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH | EVENT_INDEX(4)));
}
-
if (sctx->flags & SI_CONTEXT_VGT_FLUSH) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute);
radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
@@ -463,6 +449,32 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato
radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_STREAMOUT_SYNC) | EVENT_INDEX(0));
}
+ /* SURFACE_SYNC must be emitted after partial flushes.
+ * It looks like SURFACE_SYNC flushes caches immediately and doesn't
+ * wait for any engines. This should be last.
+ */
+ if (sqc_caches) {
+ r600_write_config_reg(cs, R_008C08_SQC_CACHES, sqc_caches);
+ cs->buf[cs->cdw-3] |= compute; /* set the compute bit in the header */
+ }
+ if (cp_coher_cntl) {
+ if (sctx->chip_class >= CIK) {
+ radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0) | compute);
+ radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
+ radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */
+ radeon_emit(cs, 0xff); /* CP_COHER_SIZE_HI */
+ radeon_emit(cs, 0); /* CP_COHER_BASE */
+ radeon_emit(cs, 0); /* CP_COHER_BASE_HI */
+ radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
+ } else {
+ radeon_emit(cs, PKT3(PKT3_SURFACE_SYNC, 3, 0) | compute);
+ radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
+ radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */
+ radeon_emit(cs, 0); /* CP_COHER_BASE */
+ radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
+ }
+ }
+
sctx->flags = 0;
}