summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/radeonsi/Makefile.sources1
-rw-r--r--src/gallium/drivers/radeonsi/meson.build1
-rw-r--r--src/gallium/drivers/radeonsi/si_clear.c6
-rw-r--r--src/gallium/drivers/radeonsi/si_cp_dma.c12
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.c7
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h11
-rw-r--r--src/gallium/drivers/radeonsi/si_test_clearbuffer.c140
-rw-r--r--src/gallium/drivers/radeonsi/si_test_dma.c3
8 files changed, 170 insertions, 11 deletions
diff --git a/src/gallium/drivers/radeonsi/Makefile.sources b/src/gallium/drivers/radeonsi/Makefile.sources
index f760b5b7a69..c052e8dbeb3 100644
--- a/src/gallium/drivers/radeonsi/Makefile.sources
+++ b/src/gallium/drivers/radeonsi/Makefile.sources
@@ -43,6 +43,7 @@ C_SOURCES := \
si_state_streamout.c \
si_state_viewport.c \
si_state.h \
+ si_test_clearbuffer.c \
si_test_dma.c \
si_texture.c \
si_uvd.c \
diff --git a/src/gallium/drivers/radeonsi/meson.build b/src/gallium/drivers/radeonsi/meson.build
index 90498398f38..9e249adc61e 100644
--- a/src/gallium/drivers/radeonsi/meson.build
+++ b/src/gallium/drivers/radeonsi/meson.build
@@ -59,6 +59,7 @@ files_libradeonsi = files(
'si_state_shaders.c',
'si_state_streamout.c',
'si_state_viewport.c',
+ 'si_test_clearbuffer.c',
'si_test_dma.c',
'si_texture.c',
'si_uvd.c',
diff --git a/src/gallium/drivers/radeonsi/si_clear.c b/src/gallium/drivers/radeonsi/si_clear.c
index 4e07de81bac..654ff0ace78 100644
--- a/src/gallium/drivers/radeonsi/si_clear.c
+++ b/src/gallium/drivers/radeonsi/si_clear.c
@@ -256,7 +256,7 @@ void vi_dcc_clear_level(struct si_context *sctx,
}
si_clear_buffer(sctx, dcc_buffer, dcc_offset, clear_size,
- clear_value, SI_COHERENCY_CB_META);
+ clear_value, SI_COHERENCY_CB_META, SI_METHOD_BEST);
}
/* Set the same micro tile mode as the destination of the last MSAA resolve.
@@ -489,7 +489,7 @@ static void si_do_fast_color_clear(struct si_context *sctx,
si_clear_buffer(sctx, &tex->cmask_buffer->b.b,
tex->cmask_offset, tex->surface.cmask_size,
- 0xCCCCCCCC, SI_COHERENCY_CB_META);
+ 0xCCCCCCCC, SI_COHERENCY_CB_META, SI_METHOD_BEST);
need_decompress_pass = true;
}
@@ -520,7 +520,7 @@ static void si_do_fast_color_clear(struct si_context *sctx,
/* Do the fast clear. */
si_clear_buffer(sctx, &tex->cmask_buffer->b.b,
tex->cmask_offset, tex->surface.cmask_size, 0,
- SI_COHERENCY_CB_META);
+ SI_COHERENCY_CB_META, SI_METHOD_BEST);
need_decompress_pass = true;
}
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index f98fad43b3e..b0133323590 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -215,7 +215,7 @@ static void si_cp_dma_prepare(struct si_context *sctx, struct pipe_resource *dst
void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
uint64_t offset, uint64_t size, unsigned value,
- enum si_coherency coher)
+ enum si_coherency coher, enum si_method xfer )
{
struct radeon_winsys *ws = sctx->ws;
struct r600_resource *rdst = r600_resource(dst);
@@ -227,7 +227,7 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
if (!size)
return;
- dma_clear_size = size & ~3ull;
+ dma_clear_size = size & ~3ull;
/* Mark the buffer range of destination as valid (initialized),
* so that transfer_map knows it should wait for the GPU when mapping
@@ -250,7 +250,9 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
* For example, DeusEx:MD has 21 buffer clears per frame and all
* of them are moved to SDMA thanks to this. */
!ws->cs_is_buffer_referenced(sctx->gfx_cs, rdst->buf,
- RADEON_USAGE_READWRITE))) {
+ RADEON_USAGE_READWRITE)) &&
+ /* bypass sdma transfer with param xfer */
+ (xfer != SI_METHOD_CP_DMA)) {
sctx->dma_clear_buffer(sctx, dst, offset, dma_clear_size, value);
offset += dma_clear_size;
@@ -263,7 +265,7 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
/* Flush the caches. */
sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
- SI_CONTEXT_CS_PARTIAL_FLUSH | flush_flags;
+ SI_CONTEXT_CS_PARTIAL_FLUSH | flush_flags;
while (dma_clear_size) {
unsigned byte_count = MIN2(dma_clear_size, cp_dma_max_byte_count(sctx));
@@ -356,7 +358,7 @@ static void si_pipe_clear_buffer(struct pipe_context *ctx,
}
si_clear_buffer(sctx, dst, offset, size, dword_value,
- SI_COHERENCY_SHADER);
+ SI_COHERENCY_SHADER, SI_METHOD_BEST);
}
/**
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index cc05d2f8de3..e9cf1c32724 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -101,6 +101,7 @@ static const struct debug_named_value debug_options[] = {
{ "testvmfaultcp", DBG(TEST_VMFAULT_CP), "Invoke a CP VM fault test and exit." },
{ "testvmfaultsdma", DBG(TEST_VMFAULT_SDMA), "Invoke a SDMA VM fault test and exit." },
{ "testvmfaultshader", DBG(TEST_VMFAULT_SHADER), "Invoke a shader VM fault test and exit." },
+ { "testclearbufperf", DBG(TEST_CLEARBUF_PERF), "Test Clearbuffer Performance" },
DEBUG_NAMED_VALUE_END /* must be last */
};
@@ -545,7 +546,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
/* Clear the NULL constant buffer, because loads should return zeros. */
si_clear_buffer(sctx, sctx->null_const_buf.buffer, 0,
sctx->null_const_buf.buffer->width0, 0,
- SI_COHERENCY_SHADER);
+ SI_COHERENCY_SHADER, SI_METHOD_BEST);
}
uint64_t max_threads_per_block;
@@ -1069,6 +1070,10 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
if (sscreen->debug_flags & DBG(TEST_DMA))
si_test_dma(sscreen);
+ if (sscreen->debug_flags & DBG(TEST_CLEARBUF_PERF)) {
+ si_test_clearbuffer(sscreen);
+ }
+
if (sscreen->debug_flags & (DBG(TEST_VMFAULT_CP) |
DBG(TEST_VMFAULT_SDMA) |
DBG(TEST_VMFAULT_SHADER)))
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 9ab79bcaa2e..7bfc9f5da1a 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -165,6 +165,7 @@ enum {
DBG_TEST_VMFAULT_CP,
DBG_TEST_VMFAULT_SDMA,
DBG_TEST_VMFAULT_SHADER,
+ DBG_TEST_CLEARBUF_PERF,
};
#define DBG_ALL_SHADERS (((1 << (DBG_CS + 1)) - 1))
@@ -1110,10 +1111,15 @@ enum si_coherency {
SI_COHERENCY_CB_META,
};
+enum si_method {
+ SI_METHOD_CP_DMA,
+ SI_METHOD_BEST,
+};
+
void si_cp_dma_wait_for_idle(struct si_context *sctx);
void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
uint64_t offset, uint64_t size, unsigned value,
- enum si_coherency coher);
+ enum si_coherency coher, enum si_method xfer);
void si_copy_buffer(struct si_context *sctx,
struct pipe_resource *dst, struct pipe_resource *src,
uint64_t dst_offset, uint64_t src_offset, unsigned size,
@@ -1199,6 +1205,9 @@ void si_resume_queries(struct si_context *sctx);
/* si_test_dma.c */
void si_test_dma(struct si_screen *sscreen);
+/* si_test_clearbuffer.c */
+void si_test_clearbuffer(struct si_screen *sscreen);
+
/* si_uvd.c */
struct pipe_video_codec *si_uvd_create_decoder(struct pipe_context *context,
const struct pipe_video_codec *templ);
diff --git a/src/gallium/drivers/radeonsi/si_test_clearbuffer.c b/src/gallium/drivers/radeonsi/si_test_clearbuffer.c
new file mode 100644
index 00000000000..00fbd2d043a
--- /dev/null
+++ b/src/gallium/drivers/radeonsi/si_test_clearbuffer.c
@@ -0,0 +1,140 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+/* This file implements tests on the si_clearbuffer function. */
+
+#include "si_pipe.h"
+
+#define CLEARBUF_MIN 32
+#define CLEARBUF_COUNT 16
+#define CLEARBUF_MEMSZ 1024
+
+static uint64_t
+measure_clearbuf_time(struct pipe_context *ctx,
+ uint64_t memory_size)
+{
+ struct pipe_query *query_te;
+ union pipe_query_result qresult;
+ struct pipe_resource *buf;
+
+ struct si_context *sctx = (struct si_context*)ctx;
+ struct pipe_screen *screen = ctx->screen;
+
+ buf = pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT, memory_size);
+
+ query_te = ctx->create_query(ctx, PIPE_QUERY_TIME_ELAPSED, 0);
+
+ ctx->begin_query(ctx, query_te);
+ /* operation */
+ si_clear_buffer(sctx, buf, 0, memory_size, 0x00,
+ SI_COHERENCY_SHADER, SI_METHOD_CP_DMA);
+ ctx->end_query(ctx, query_te);
+ ctx->get_query_result(ctx, query_te, true, &qresult);
+
+ /* Cleanup. */
+ ctx->destroy_query(ctx, query_te);
+ pipe_resource_reference(&buf, NULL);
+
+ /* Report Results */
+ return qresult.u64;
+}
+
+/**
+ * @brief Analyze rate of clearing a 1K Buffer averaged over 16 iterations
+ * @param ctx Context of pipe to perform analysis on
+ */
+static void
+analyze_clearbuf_perf_avg(struct pipe_context *ctx)
+{
+ uint index = 0;
+ uint64_t result[CLEARBUF_COUNT];
+ uint64_t sum = 0;
+ long long int rate_kBps;
+
+ /* Run Tests. */
+ for (index = 0 ; index < CLEARBUF_COUNT ; index++) {
+ result[index] = measure_clearbuf_time(ctx, CLEARBUF_MEMSZ);
+ sum += result[index];
+ }
+
+ /* Calculate Results. */
+ /* kBps = (size(bytes))/(1000) / (time(ns)/(1000*1000*1000)) */
+ rate_kBps = CLEARBUF_COUNT*CLEARBUF_MEMSZ;
+ rate_kBps *= 1000UL*1000UL;
+ rate_kBps /= sum;
+
+ /* Display Results. */
+ printf("CP DMA clear_buffer performance (buffer %lu ,repeat %u ):",
+ (uint64_t)CLEARBUF_MEMSZ,
+ CLEARBUF_COUNT );
+ printf(" %llu kB/s\n", rate_kBps );
+}
+
+/**
+ * @brief Analyze rate of clearing a range of Buffer sizes
+ * @param ctx Context of pipe to perform analysis on
+ */
+static void
+analyze_clearbuf_perf_rng(struct pipe_context *ctx)
+{
+ uint index = 0;
+ uint64_t result[CLEARBUF_COUNT];
+ uint64_t mem_size;
+ long long int rate_kBps;
+
+ /* Run Tests. */
+ mem_size = CLEARBUF_MIN;
+ for (index = 0 ; index < CLEARBUF_COUNT ; index++ ) {
+ result[index] = measure_clearbuf_time(ctx, mem_size);
+ mem_size <<= 1;
+ }
+
+ /* Calculate & Display Results. */
+ /* kBps = (size(bytes))/(1000) / (time(ns)/(1000*1000*1000)) */
+ mem_size = CLEARBUF_MIN;
+ for (index = 0 ; index < CLEARBUF_COUNT ; index++ ) {
+ rate_kBps = mem_size;
+ rate_kBps *= 1000UL*1000UL;
+ rate_kBps /= result[index];
+
+ printf("CP DMA clear_buffer performance (buffer %lu):",
+ mem_size );
+ printf(" %llu kB/s\n", rate_kBps );
+
+ mem_size <<= 1;
+ }
+}
+
+void si_test_clearbuffer(struct si_screen *sscreen)
+{
+ struct pipe_screen *screen = &sscreen->b;
+ struct pipe_context *ctx = screen->context_create(screen, NULL, 0);
+
+ analyze_clearbuf_perf_avg(ctx);
+ analyze_clearbuf_perf_rng(ctx);
+
+ exit(0);
+}
+
diff --git a/src/gallium/drivers/radeonsi/si_test_dma.c b/src/gallium/drivers/radeonsi/si_test_dma.c
index ee6ab7c2cf2..baab580308a 100644
--- a/src/gallium/drivers/radeonsi/si_test_dma.c
+++ b/src/gallium/drivers/radeonsi/si_test_dma.c
@@ -307,7 +307,8 @@ void si_test_dma(struct si_screen *sscreen)
set_random_pixels(ctx, src, &src_cpu);
/* clear dst pixels */
- si_clear_buffer(sctx, dst, 0, sdst->surface.surf_size, 0, true);
+ si_clear_buffer(sctx, dst, 0, sdst->surface.surf_size, 0,
+ true, SI_METHOD_BEST);
memset(dst_cpu.ptr, 0, dst_cpu.layer_stride * tdst.array_size);
/* preparation */