diff options
author | Samuel Pitoiset <[email protected]> | 2019-02-12 15:09:32 +0100 |
---|---|---|
committer | Samuel Pitoiset <[email protected]> | 2019-02-14 09:10:56 +0100 |
commit | 4b3549c0846f451119ee883a33205b0caa1cf16e (patch) | |
tree | ab6a66296ebc72150aafb11e61fe3a0377b8ffd2 /src/amd/vulkan | |
parent | 210aec3612bc02a6a02035188937a4755da5a252 (diff) |
radv: reduce the number of loaded channels for vertex input fetches
It's unnecessary to load more channels than the vertex attribute
format. The remaining channels are filled with 0 for y and z,
and 1 for w.
29077 shaders in 15096 tests
Totals:
SGPRS: 1321605 -> 1318869 (-0.21 %)
VGPRS: 935236 -> 932252 (-0.32 %)
Spilled SGPRs: 24860 -> 24776 (-0.34 %)
Code Size: 49832348 -> 49819464 (-0.03 %) bytes
Max Waves: 242101 -> 242611 (0.21 %)
Totals from affected shaders:
SGPRS: 93675 -> 90939 (-2.92 %)
VGPRS: 58016 -> 55032 (-5.14 %)
Spilled SGPRs: 172 -> 88 (-48.84 %)
Code Size: 2862740 -> 2849856 (-0.45 %) bytes
Max Waves: 15474 -> 15984 (3.30 %)
This mostly helps Croteam games (Talos/Sam2017).
Signed-off-by: Samuel Pitoiset <[email protected]>
Reviewed-by: Bas Nieuwenhuizen <[email protected]>
Diffstat (limited to 'src/amd/vulkan')
-rw-r--r-- | src/amd/vulkan/radv_nir_to_llvm.c | 81 |
1 files changed, 79 insertions, 2 deletions
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index a0ce569d409..08ab64971ab 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -2029,6 +2029,72 @@ adjust_vertex_fetch_alpha(struct radv_shader_context *ctx, return alpha; } +static unsigned +get_num_channels_from_data_format(unsigned data_format) +{ + switch (data_format) { + case V_008F0C_BUF_DATA_FORMAT_8: + case V_008F0C_BUF_DATA_FORMAT_16: + case V_008F0C_BUF_DATA_FORMAT_32: + return 1; + case V_008F0C_BUF_DATA_FORMAT_8_8: + case V_008F0C_BUF_DATA_FORMAT_16_16: + case V_008F0C_BUF_DATA_FORMAT_32_32: + return 2; + case V_008F0C_BUF_DATA_FORMAT_10_11_11: + case V_008F0C_BUF_DATA_FORMAT_11_11_10: + case V_008F0C_BUF_DATA_FORMAT_32_32_32: + return 3; + case V_008F0C_BUF_DATA_FORMAT_8_8_8_8: + case V_008F0C_BUF_DATA_FORMAT_10_10_10_2: + case V_008F0C_BUF_DATA_FORMAT_2_10_10_10: + case V_008F0C_BUF_DATA_FORMAT_16_16_16_16: + case V_008F0C_BUF_DATA_FORMAT_32_32_32_32: + return 4; + default: + break; + } + + return 4; +} + +static LLVMValueRef +radv_fixup_vertex_input_fetches(struct radv_shader_context *ctx, + LLVMValueRef value, + unsigned num_channels, + bool is_float) +{ + LLVMValueRef zero = is_float ? ctx->ac.f32_0 : ctx->ac.i32_0; + LLVMValueRef one = is_float ? ctx->ac.f32_1 : ctx->ac.i32_1; + LLVMTypeRef elemtype; + LLVMValueRef chan[4]; + + if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMVectorTypeKind) { + unsigned vec_size = LLVMGetVectorSize(LLVMTypeOf(value)); + + if (num_channels == vec_size) + return value; + + num_channels = MIN2(num_channels, vec_size); + + for (unsigned i = 0; i < num_channels; i++) + chan[i] = ac_llvm_extract_elem(&ctx->ac, value, i); + + elemtype = LLVMGetElementType(LLVMTypeOf(value)); + } else { + if (num_channels) { + assert(num_channels == 1); + chan[0] = value; + } + elemtype = LLVMTypeOf(value); + } + + for (unsigned i = num_channels; i < 4; i++) + chan[i] = i == 3 ? one : zero; + + return ac_build_gather_values(&ctx->ac, chan, 4); +} + static void handle_vs_input_decl(struct radv_shader_context *ctx, struct nir_variable *variable) @@ -2041,7 +2107,7 @@ handle_vs_input_decl(struct radv_shader_context *ctx, unsigned attrib_count = glsl_count_attribute_slots(variable->type, true); uint8_t input_usage_mask = ctx->shader_info->info.vs.input_usage_mask[variable->data.location]; - unsigned num_channels = util_last_bit(input_usage_mask); + unsigned num_input_channels = util_last_bit(input_usage_mask); variable->data.driver_location = variable->data.location * 4; @@ -2049,6 +2115,10 @@ handle_vs_input_decl(struct radv_shader_context *ctx, for (unsigned i = 0; i < attrib_count; ++i) { LLVMValueRef output[4]; unsigned attrib_index = variable->data.location + i - VERT_ATTRIB_GENERIC0; + unsigned attrib_format = ctx->options->key.vs.vertex_attribute_formats[attrib_index]; + unsigned data_format = attrib_format & 0x0f; + unsigned num_format = (attrib_format >> 4) & 0x07; + bool is_float = num_format == V_008F0C_BUF_NUM_FORMAT_FLOAT; if (ctx->options->key.vs.instance_rate_inputs & (1u << attrib_index)) { uint32_t divisor = ctx->options->key.vs.instance_rate_divisors[attrib_index]; @@ -2080,12 +2150,19 @@ handle_vs_input_decl(struct radv_shader_context *ctx, t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset); + /* Adjust the number of channels to load based on the vertex + * attribute format. + */ + unsigned num_format_channels = get_num_channels_from_data_format(data_format); + unsigned num_channels = MIN2(num_input_channels, num_format_channels); + input = ac_build_buffer_load_format(&ctx->ac, t_list, buffer_index, ctx->ac.i32_0, num_channels, false, true); - input = ac_build_expand_to_vec4(&ctx->ac, input, num_channels); + input = radv_fixup_vertex_input_fetches(ctx, input, num_channels, + is_float); for (unsigned chan = 0; chan < 4; chan++) { LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false); |