aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2017-02-15 18:22:27 +0100
committerMarek Olšák <[email protected]>2017-02-18 01:22:08 +0100
commit6b73aafceb1eb8e81754e2f349826994de678466 (patch)
tree8867b645888363ea28c3e1f87ac270ead32403ca /src/gallium
parent620aded541a5b81df74575888754094fea2f2ae2 (diff)
radeonsi: use a clever alignment for constant buffer uploads
This results in a very tiny decrease in lgkm wait cycles. Reviewed-by: Nicolai Hähnle <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/radeon/radeon_winsys.h1
-rw-r--r--src/gallium/drivers/radeonsi/si_descriptors.c4
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h15
-rw-r--r--src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c1
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_winsys.c1
5 files changed, 21 insertions, 1 deletions
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
index 432550dbef4..812c0362afc 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -201,6 +201,7 @@ struct radeon_info {
uint32_t ce_fw_version;
uint32_t vce_harvest_config;
uint32_t clock_crystal_freq;
+ uint32_t tcc_cache_line_size;
/* Kernel info. */
uint32_t drm_major; /* version */
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 8f636af96aa..72b33f3e8e0 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -1047,7 +1047,9 @@ void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuf
{
void *tmp;
- u_upload_alloc(sctx->b.b.stream_uploader, 0, size, 256, const_offset,
+ u_upload_alloc(sctx->b.b.stream_uploader, 0, size,
+ si_optimal_tcc_alignment(sctx, size),
+ const_offset,
(struct pipe_resource**)rbuffer, &tmp);
if (*rbuffer)
util_memcpy_cpu_to_le32(tmp, ptr, size);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index fb24babe61f..bee6881d096 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -512,4 +512,19 @@ static inline bool si_vs_exports_prim_id(struct si_shader *shader)
return false;
}
+static inline unsigned
+si_optimal_tcc_alignment(struct si_context *sctx, unsigned upload_size)
+{
+ unsigned alignment, tcc_cache_line_size;
+
+ /* If the upload size is less than the cache line size (e.g. 16, 32),
+ * the whole thing will fit into a cache line if we align it to its size.
+ * The idea is that multiple small uploads can share a cache line.
+ * If the upload size is greater, align it to the cache line size.
+ */
+ alignment = util_next_power_of_two(upload_size);
+ tcc_cache_line_size = sctx->screen->b.info.tcc_cache_line_size;
+ return MIN2(alignment, tcc_cache_line_size);
+}
+
#endif
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
index db0087c094e..6511c4855d8 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -345,6 +345,7 @@ static bool do_winsys_init(struct amdgpu_winsys *ws, int fd)
ws->info.has_userptr = true;
ws->info.num_render_backends = ws->amdinfo.rb_pipes;
ws->info.clock_crystal_freq = ws->amdinfo.gpu_counter_freq;
+ ws->info.tcc_cache_line_size = 64; /* TC L2 line size on GCN */
ws->info.num_tile_pipes = cik_get_num_tile_pipes(&ws->amdinfo);
ws->info.pipe_interleave_bytes = 256 << ((ws->amdinfo.gb_addr_cfg >> 4) & 0x7);
ws->info.has_virtual_memory = true;
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index 6c077ea0abc..a39a7bed5f4 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -524,6 +524,7 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
ws->info.gfx_ib_pad_with_type2 = ws->info.chip_class <= SI ||
(ws->info.family == CHIP_HAWAII &&
ws->accel_working2 < 3);
+ ws->info.tcc_cache_line_size = 64; /* TC L2 line size on GCN */
ws->check_vm = strstr(debug_get_option("R600_DEBUG", ""), "check_vm") != NULL;