summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2018-01-30 18:34:25 +0100
committerMarek Olšák <[email protected]>2018-02-01 16:20:19 +0100
commitbe973ed21f6e456ebd753f26a99151d9ea6e765c (patch)
tree1f08168ae096270918076f69b4a21d766b55efb7 /src/gallium
parent472361dd7e2f82e3697047776967544348b4012a (diff)
radeonsi: load the right number of components for VS inputs and TBOs
The supported counts are 1, 2, 4. (3=4) The following snippet loads float, vec2, vec3, and vec4: Before: buffer_load_format_x v9, v4, s[0:3], 0 idxen ; E0002000 80000904 buffer_load_format_xyzw v[0:3], v5, s[8:11], 0 idxen ; E00C2000 80020005 s_waitcnt vmcnt(0) ; BF8C0F70 buffer_load_format_xyzw v[2:5], v6, s[12:15], 0 idxen ; E00C2000 80030206 s_waitcnt vmcnt(0) ; BF8C0F70 buffer_load_format_xyzw v[5:8], v7, s[4:7], 0 idxen ; E00C2000 80010507 After: buffer_load_format_x v10, v4, s[0:3], 0 idxen ; E0002000 80000A04 buffer_load_format_xy v[8:9], v5, s[8:11], 0 idxen ; E0042000 80020805 buffer_load_format_xyzw v[0:3], v6, s[12:15], 0 idxen ; E00C2000 80030006 s_waitcnt vmcnt(0) ; BF8C0F70 buffer_load_format_xyzw v[3:6], v7, s[4:7], 0 idxen ; E00C2000 80010307 Reviewed-by: Samuel Pitoiset <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c13
-rw-r--r--src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c8
2 files changed, 16 insertions, 5 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 9458920b9d7..78156f1edec 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -480,8 +480,8 @@ void si_llvm_load_input_vs(
unsigned input_index,
LLVMValueRef out[4])
{
- unsigned vs_blit_property =
- ctx->shader->selector->info.properties[TGSI_PROPERTY_VS_BLIT_SGPRS];
+ const struct tgsi_shader_info *info = &ctx->shader->selector->info;
+ unsigned vs_blit_property = info->properties[TGSI_PROPERTY_VS_BLIT_SGPRS];
if (vs_blit_property) {
LLVMValueRef vertex_id = ctx->abi.vertex_id;
@@ -555,6 +555,7 @@ void si_llvm_load_input_vs(
unsigned fix_fetch;
unsigned num_fetches;
unsigned fetch_stride;
+ unsigned num_channels;
LLVMValueRef t_list_ptr;
LLVMValueRef t_offset;
@@ -580,24 +581,29 @@ void si_llvm_load_input_vs(
case SI_FIX_FETCH_RGB_64_FLOAT:
num_fetches = 3; /* 3 2-dword loads */
fetch_stride = 8;
+ num_channels = 2;
break;
case SI_FIX_FETCH_RGBA_64_FLOAT:
num_fetches = 2; /* 2 4-dword loads */
fetch_stride = 16;
+ num_channels = 4;
break;
case SI_FIX_FETCH_RGB_8:
case SI_FIX_FETCH_RGB_8_INT:
num_fetches = 3;
fetch_stride = 1;
+ num_channels = 1;
break;
case SI_FIX_FETCH_RGB_16:
case SI_FIX_FETCH_RGB_16_INT:
num_fetches = 3;
fetch_stride = 2;
+ num_channels = 1;
break;
default:
num_fetches = 1;
fetch_stride = 0;
+ num_channels = util_last_bit(info->input_usage_mask[input_index]);
}
for (unsigned i = 0; i < num_fetches; i++) {
@@ -605,7 +611,8 @@ void si_llvm_load_input_vs(
input[i] = ac_build_buffer_load_format(&ctx->ac, t_list,
vertex_index, voffset,
- 4, true);
+ num_channels, true);
+ input[i] = ac_build_expand_to_vec4(&ctx->ac, input[i], num_channels);
}
/* Break up the vec4 into individual components */
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index c80dc8ef0ee..42cb3251e78 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -1821,12 +1821,16 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
unsigned target = inst->Texture.Texture;
if (target == TGSI_TEXTURE_BUFFER) {
- emit_data->output[emit_data->chan] =
+ unsigned num_channels =
+ util_last_bit(inst->Dst[0].Register.WriteMask);
+ LLVMValueRef result =
ac_build_buffer_load_format(&ctx->ac,
emit_data->args[0],
emit_data->args[2],
emit_data->args[1],
- 4, true);
+ num_channels, true);
+ emit_data->output[emit_data->chan] =
+ ac_build_expand_to_vec4(&ctx->ac, result, num_channels);
return;
}