diff options
author | Marek Olšák <[email protected]> | 2017-06-09 18:46:07 +0200 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2017-06-27 19:55:09 +0200 |
commit | 4a10d6154e1a6086e1eecf0e17ab63cc41862ea6 (patch) | |
tree | d8166d615f4771bc96147185dc12a52e59776c4a /src/gallium/drivers/radeonsi/si_state_shaders.c | |
parent | aef998fe4b7551faf8a44409aa74554b45d2b67c (diff) |
radeonsi: move instance divisors into a constant buffer
Shader key size: 107 -> 47
Divisors of 0 and 1 are encoded in the shader key. Greater instance divisors
are loaded from a constant buffer.
The shader code doing the division is huge. Is it something we need to
worry about? Does any app use instance divisors >= 2?
VS prolog disassembly:
s_load_dwordx4 s[12:15], s[0:1], 0x80 ; C00A0300 00000080
s_nop 0 ; BF800000
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s14, s[12:15], 0x4 ; C0220386 00000004
s_waitcnt lgkmcnt(0) ; BF8C007F
v_cvt_f32_u32_e32 v4, s14 ; 7E080C0E
v_rcp_iflag_f32_e32 v4, v4 ; 7E084704
v_mul_f32_e32 v4, 0x4f800000, v4 ; 0A0808FF 4F800000
v_cvt_u32_f32_e32 v4, v4 ; 7E080F04
v_mul_hi_u32 v5, v4, s14 ; D2860005 00001D04
v_mul_lo_i32 v6, v4, s14 ; D2850006 00001D04
v_cmp_eq_u32_e64 s[12:13], 0, v5 ; D0CA000C 00020A80
v_sub_i32_e32 v5, vcc, 0, v6 ; 340A0C80
v_cndmask_b32_e64 v5, v6, v5, s[12:13] ; D1000005 00320B06
v_mul_hi_u32 v5, v5, v4 ; D2860005 00020905
v_add_i32_e32 v6, vcc, v5, v4 ; 320C0905
v_subrev_i32_e32 v4, vcc, v5, v4 ; 36080905
v_cndmask_b32_e64 v4, v4, v6, s[12:13] ; D1000004 00320D04
v_mul_hi_u32 v5, v4, v1 ; D2860005 00020304
v_add_i32_e32 v4, vcc, s8, v0 ; 32080008
v_mul_lo_i32 v6, v5, s14 ; D2850006 00001D05
v_add_i32_e32 v7, vcc, 1, v5 ; 320E0A81
v_cmp_ge_u32_e64 s[12:13], v1, v6 ; D0CE000C 00020D01
v_sub_i32_e32 v6, vcc, v1, v6 ; 340C0D01
v_cmp_le_u32_e32 vcc, s14, v6 ; 7D960C0E
v_cndmask_b32_e64 v8, 0, -1, s[12:13] ; D1000008 00318280
v_cndmask_b32_e64 v6, 0, -1, vcc ; D1000006 01A98280
v_and_b32_e32 v6, v8, v6 ; 260C0D08
v_cmp_eq_u32_e32 vcc, 0, v6 ; 7D940C80
v_cndmask_b32_e32 v6, v7, v5, vcc ; 000C0B07
v_add_i32_e32 v5, vcc, -1, v5 ; 320A0AC1
v_cmp_eq_u32_e32 vcc, 0, v8 ; 7D941080
v_cndmask_b32_e32 v5, v6, v5, vcc ; 000A0B06
v_add_i32_e32 v5, vcc, s9, v5 ; 320A0A09
v2: set prefer_mono for fetched instance divisors
Reviewed-by: Nicolai Hähnle <[email protected]>
Diffstat (limited to 'src/gallium/drivers/radeonsi/si_state_shaders.c')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_shaders.c | 12 |
1 files changed, 10 insertions, 2 deletions
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 4eb3b758b4e..af3f2a90e2a 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1187,10 +1187,18 @@ static void si_shader_selector_key_vs(struct si_context *sctx, if (!sctx->vertex_elements) return; + prolog_key->instance_divisor_is_one = + sctx->vertex_elements->instance_divisor_is_one; + prolog_key->instance_divisor_is_fetched = + sctx->vertex_elements->instance_divisor_is_fetched; + + /* Prefer a monolithic shader to allow scheduling divisions around + * VBO loads. */ + if (prolog_key->instance_divisor_is_fetched) + key->opt.prefer_mono = 1; + unsigned count = MIN2(vs->info.num_inputs, sctx->vertex_elements->count); - memcpy(prolog_key->instance_divisors, - sctx->vertex_elements->instance_divisors, count * 4); memcpy(key->mono.vs_fix_fetch, sctx->vertex_elements->fix_fetch, count); } |