summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2018-08-02 20:33:06 -0400
committerMarek Olšák <[email protected]>2018-08-29 15:31:41 -0400
commit0c5429cc73f6d1787914fcebb4cb95677c8ebb82 (patch)
tree272edc977551085b6e8b2ab2da92bc7d83b5bafb /src
parent8f6e06d1608bff31165511787a06da8c635e6da0 (diff)
radeonsi: add flag L2_STREAM for minimal cache usage
Diffstat (limited to 'src')
-rw-r--r--src/amd/common/sid.h2
-rw-r--r--src/gallium/drivers/radeonsi/si_cp_dma.c16
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h1
3 files changed, 13 insertions, 6 deletions
diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h
index 0671f7d3998..d9c4a1a7414 100644
--- a/src/amd/common/sid.h
+++ b/src/amd/common/sid.h
@@ -294,11 +294,13 @@
#define V_500_GDS 1 /* program SAS to 1 as well */
#define V_500_DATA 2
#define V_500_SRC_ADDR_TC_L2 3 /* new for CIK */
+#define S_500_DST_CACHE_POLICY(x) (((unsigned)(x) & 0x3) << 25) /* CIK+ */
#define S_500_DST_SEL(x) (((unsigned)(x) & 0x3) << 20)
#define V_500_DST_ADDR 0
#define V_500_GDS 1 /* program DAS to 1 as well */
#define V_500_NOWHERE 2 /* new for GFX9 */
#define V_500_DST_ADDR_TC_L2 3 /* new for CIK */
+#define S_500_SRC_CACHE_POLICY(x) (((unsigned)(x) & 0x3) << 13) /* CIK+ */
#define S_500_ENGINE(x) ((x) & 0x1)
#define V_500_ME 0
#define V_500_PFP 1
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index bae592a4f7d..61be22f28b5 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -88,15 +88,19 @@ static void si_emit_cp_dma(struct si_context *sctx, uint64_t dst_va,
/* Src and dst flags. */
if (sctx->chip_class >= GFX9 && !(flags & CP_DMA_CLEAR) &&
- src_va == dst_va)
+ src_va == dst_va) {
header |= S_411_DST_SEL(V_411_NOWHERE); /* prefetch only */
- else if (sctx->chip_class >= CIK && cache_policy != L2_BYPASS)
- header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2);
+ } else if (sctx->chip_class >= CIK && cache_policy != L2_BYPASS) {
+ header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2) |
+ S_500_DST_CACHE_POLICY(cache_policy == L2_STREAM);
+ }
- if (flags & CP_DMA_CLEAR)
+ if (flags & CP_DMA_CLEAR) {
header |= S_411_SRC_SEL(V_411_DATA);
- else if (sctx->chip_class >= CIK && cache_policy != L2_BYPASS)
- header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2);
+ } else if (sctx->chip_class >= CIK && cache_policy != L2_BYPASS) {
+ header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2) |
+ S_500_SRC_CACHE_POLICY(cache_policy == L2_STREAM);
+ }
if (sctx->chip_class >= CIK) {
radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 5fa8c33f6cb..95489f09612 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1111,6 +1111,7 @@ void si_init_clear_functions(struct si_context *sctx);
enum si_cache_policy {
L2_BYPASS,
L2_LRU, /* same as SLC=0 */
+ L2_STREAM, /* same as SLC=1 */
};
enum si_coherency {