summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/radeonsi/si_state.c
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2017-06-09 18:46:07 +0200
committerMarek Olšák <[email protected]>2017-06-27 19:55:09 +0200
commit4a10d6154e1a6086e1eecf0e17ab63cc41862ea6 (patch)
treed8166d615f4771bc96147185dc12a52e59776c4a /src/gallium/drivers/radeonsi/si_state.c
parentaef998fe4b7551faf8a44409aa74554b45d2b67c (diff)
radeonsi: move instance divisors into a constant buffer
Shader key size: 107 -> 47 Divisors of 0 and 1 are encoded in the shader key. Greater instance divisors are loaded from a constant buffer. The shader code doing the division is huge. Is it something we need to worry about? Does any app use instance divisors >= 2? VS prolog disassembly: s_load_dwordx4 s[12:15], s[0:1], 0x80 ; C00A0300 00000080 s_nop 0 ; BF800000 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s14, s[12:15], 0x4 ; C0220386 00000004 s_waitcnt lgkmcnt(0) ; BF8C007F v_cvt_f32_u32_e32 v4, s14 ; 7E080C0E v_rcp_iflag_f32_e32 v4, v4 ; 7E084704 v_mul_f32_e32 v4, 0x4f800000, v4 ; 0A0808FF 4F800000 v_cvt_u32_f32_e32 v4, v4 ; 7E080F04 v_mul_hi_u32 v5, v4, s14 ; D2860005 00001D04 v_mul_lo_i32 v6, v4, s14 ; D2850006 00001D04 v_cmp_eq_u32_e64 s[12:13], 0, v5 ; D0CA000C 00020A80 v_sub_i32_e32 v5, vcc, 0, v6 ; 340A0C80 v_cndmask_b32_e64 v5, v6, v5, s[12:13] ; D1000005 00320B06 v_mul_hi_u32 v5, v5, v4 ; D2860005 00020905 v_add_i32_e32 v6, vcc, v5, v4 ; 320C0905 v_subrev_i32_e32 v4, vcc, v5, v4 ; 36080905 v_cndmask_b32_e64 v4, v4, v6, s[12:13] ; D1000004 00320D04 v_mul_hi_u32 v5, v4, v1 ; D2860005 00020304 v_add_i32_e32 v4, vcc, s8, v0 ; 32080008 v_mul_lo_i32 v6, v5, s14 ; D2850006 00001D05 v_add_i32_e32 v7, vcc, 1, v5 ; 320E0A81 v_cmp_ge_u32_e64 s[12:13], v1, v6 ; D0CE000C 00020D01 v_sub_i32_e32 v6, vcc, v1, v6 ; 340C0D01 v_cmp_le_u32_e32 vcc, s14, v6 ; 7D960C0E v_cndmask_b32_e64 v8, 0, -1, s[12:13] ; D1000008 00318280 v_cndmask_b32_e64 v6, 0, -1, vcc ; D1000006 01A98280 v_and_b32_e32 v6, v8, v6 ; 260C0D08 v_cmp_eq_u32_e32 vcc, 0, v6 ; 7D940C80 v_cndmask_b32_e32 v6, v7, v5, vcc ; 000C0B07 v_add_i32_e32 v5, vcc, -1, v5 ; 320A0AC1 v_cmp_eq_u32_e32 vcc, 0, v8 ; 7D941080 v_cndmask_b32_e32 v5, v6, v5, vcc ; 000A0B06 v_add_i32_e32 v5, vcc, s9, v5 ; 320A0A09 v2: set prefer_mono for fetched instance divisors Reviewed-by: Nicolai Hähnle <[email protected]>
Diffstat (limited to 'src/gallium/drivers/radeonsi/si_state.c')
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c15
1 files changed, 15 insertions, 0 deletions
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index a674a602e3a..7e3d1a02e07 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3773,6 +3773,11 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
if (elements[i].instance_divisor) {
v->uses_instance_divisors = true;
v->instance_divisors[i] = elements[i].instance_divisor;
+
+ if (v->instance_divisors[i] == 1)
+ v->instance_divisor_is_one |= 1u << i;
+ else
+ v->instance_divisor_is_fetched |= 1u << i;
}
if (!used[vbo_index]) {
@@ -3901,6 +3906,16 @@ static void si_bind_vertex_elements(struct pipe_context *ctx, void *state)
v->uses_instance_divisors || /* we don't check which divisors changed */
memcmp(old->fix_fetch, v->fix_fetch, sizeof(v->fix_fetch[0]) * v->count)))
sctx->do_update_shaders = true;
+
+ if (v && v->instance_divisor_is_fetched) {
+ struct pipe_constant_buffer cb;
+
+ cb.buffer = NULL;
+ cb.user_buffer = v->instance_divisors;
+ cb.buffer_offset = 0;
+ cb.buffer_size = sizeof(uint32_t) * v->count;
+ si_set_rw_buffer(sctx, SI_VS_CONST_INSTANCE_DIVISORS, &cb);
+ }
}
static void si_delete_vertex_element(struct pipe_context *ctx, void *state)