aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorRhys Perry <[email protected]>2019-12-13 13:23:27 +0000
committerRhys Perry <[email protected]>2020-01-28 11:40:01 +0000
commitd9e357e35ba5364dd414684df7dd565adfe01592 (patch)
tree490c906e44bc107df244a0cb43c04ab58d6b8bfd /src
parent525b1073474e070c8ade47856e649747ed12f775 (diff)
aco: skip unused channels at the start when fetching vertices
pipeline-db (Vega): Totals from affected shaders: SGPRS: 161320 -> 161224 (-0.06 %) VGPRS: 153968 -> 149408 (-2.96 %) Spilled SGPRs: 0 -> 0 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Private memory VGPRs: 0 -> 0 (0.00 %) Scratch size: 0 -> 0 (0.00 %) dwords per thread Code Size: 4331496 -> 4331308 (-0.00 %) bytes LDS: 0 -> 0 (0.00 %) blocks Max Waves: 27814 -> 28594 (2.80 %) pipeline-db (Navi): Totals from affected shaders: SGPRS: 161504 -> 161408 (-0.06 %) VGPRS: 153836 -> 149440 (-2.86 %) Spilled SGPRs: 0 -> 0 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Private memory VGPRs: 0 -> 0 (0.00 %) Scratch size: 0 -> 0 (0.00 %) dwords per thread Code Size: 4327572 -> 4327604 (0.00 %) bytes LDS: 0 -> 0 (0.00 %) blocks Max Waves: 27837 -> 28618 (2.81 %) Signed-off-by: Rhys Perry <[email protected]> Reviewed-by: Daniel Schürmann <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3086>
Diffstat (limited to 'src')
-rw-r--r--src/amd/compiler/aco_instruction_selection.cpp11
1 files changed, 10 insertions, 1 deletions
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 11f7805b56d..0847d5b6ca8 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -3214,6 +3214,15 @@ void visit_load_input(isel_context *ctx, nir_intrinsic_instr *instr)
unsigned channel_start = 0;
bool direct_fetch = false;
+ /* skip unused channels at the start */
+ if (vtx_info->chan_byte_size && !post_shuffle) {
+ channel_start = ffs(mask) - 1;
+ for (unsigned i = 0; i < channel_start; i++)
+ channels[i] = Temp(0, s1);
+ } else if (vtx_info->chan_byte_size && post_shuffle && !(mask & 0x8)) {
+ num_channels = 3 - (ffs(mask) - 1);
+ }
+
/* load channels */
while (channel_start < num_channels) {
unsigned fetch_size = num_channels - channel_start;
@@ -3290,7 +3299,7 @@ void visit_load_input(isel_context *ctx, nir_intrinsic_instr *instr)
unsigned num_temp = 0;
for (unsigned i = 0; i < dst.size(); i++) {
unsigned idx = i + component;
- if (idx < num_channels && channels[swizzle[idx]].id()) {
+ if (swizzle[idx] < num_channels && channels[swizzle[idx]].id()) {
Temp channel = channels[swizzle[idx]];
if (idx == 3 && alpha_adjust != RADV_ALPHA_ADJUST_NONE)
channel = adjust_vertex_fetch_alpha(ctx, alpha_adjust, channel);