diff options
author | Bas Nieuwenhuizen <[email protected]> | 2017-07-04 00:49:55 +0200 |
---|---|---|
committer | Bas Nieuwenhuizen <[email protected]> | 2017-07-05 20:23:00 +0200 |
commit | 860a8e6b99b27b50d3545a4077afcaf0fcba264a (patch) | |
tree | da82f4c705065c77e3585f8071deccf7bfc28086 /src | |
parent | 3d527ba19b2f2ff3ee379818017adbdf6894ceeb (diff) |
ac/nir: Move VS position exports before param exports.
According to Nicolai the SX can already start work when all
the position exports are done, so do those first.
Signed-off-by: Bas Nieuwenhuizen <[email protected]>
Reviewed-by: Dave Airlie <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/amd/common/ac_nir_to_llvm.c | 109 |
1 files changed, 54 insertions, 55 deletions
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index e72747ab78a..beafd5685f3 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -5245,66 +5245,30 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx, } - for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) { - LLVMValueRef values[4]; - if (!(ctx->output_mask & (1ull << i))) - continue; - + LLVMValueRef pos_values[4] = {ctx->f32zero, ctx->f32zero, ctx->f32zero, ctx->f32one}; + if (ctx->output_mask & (1ull << VARYING_SLOT_POS)) { for (unsigned j = 0; j < 4; j++) - values[j] = to_float(&ctx->ac, LLVMBuildLoad(ctx->builder, - ctx->outputs[radeon_llvm_reg_index_soa(i, j)], "")); - - if (i == VARYING_SLOT_POS) { - target = V_008DFC_SQ_EXP_POS; - } else if (i == VARYING_SLOT_CLIP_DIST0) { - continue; - } else if (i == VARYING_SLOT_PSIZ) { - outinfo->writes_pointsize = true; - psize_value = values[0]; - continue; - } else if (i == VARYING_SLOT_LAYER) { - outinfo->writes_layer = true; - layer_value = values[0]; - target = V_008DFC_SQ_EXP_PARAM + param_count; - outinfo->vs_output_param_offset[VARYING_SLOT_LAYER] = param_count; - param_count++; - } else if (i == VARYING_SLOT_VIEWPORT) { - outinfo->writes_viewport_index = true; - viewport_index_value = values[0]; - continue; - } else if (i == VARYING_SLOT_PRIMITIVE_ID) { - target = V_008DFC_SQ_EXP_PARAM + param_count; - outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count; - param_count++; - } else if (i >= VARYING_SLOT_VAR0) { - outinfo->export_mask |= 1u << (i - VARYING_SLOT_VAR0); - target = V_008DFC_SQ_EXP_PARAM + param_count; - outinfo->vs_output_param_offset[i] = param_count; - param_count++; - } + pos_values[j] = LLVMBuildLoad(ctx->builder, + ctx->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_POS, j)], ""); + } + si_llvm_init_export_args(ctx, pos_values, V_008DFC_SQ_EXP_POS, &pos_args[0]); - si_llvm_init_export_args(ctx, values, target, &args); + if (ctx->output_mask & (1ull << VARYING_SLOT_PSIZ)) { + outinfo->writes_pointsize = true; + psize_value = LLVMBuildLoad(ctx->builder, + ctx->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_PSIZ, 0)], ""); + } - if (target >= V_008DFC_SQ_EXP_POS && - target <= (V_008DFC_SQ_EXP_POS + 3)) { - memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS], - &args, sizeof(args)); - } else { - ac_build_export(&ctx->ac, &args); - } + if (ctx->output_mask & (1ull << VARYING_SLOT_LAYER)) { + outinfo->writes_layer = true; + layer_value = LLVMBuildLoad(ctx->builder, + ctx->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)], ""); } - /* We need to add the position output manually if it's missing. */ - if (!pos_args[0].out[0]) { - pos_args[0].enabled_channels = 0xf; - pos_args[0].valid_mask = 0; - pos_args[0].done = 0; - pos_args[0].target = V_008DFC_SQ_EXP_POS; - pos_args[0].compr = 0; - pos_args[0].out[0] = ctx->f32zero; /* X */ - pos_args[0].out[1] = ctx->f32zero; /* Y */ - pos_args[0].out[2] = ctx->f32zero; /* Z */ - pos_args[0].out[3] = ctx->f32one; /* W */ + if (ctx->output_mask & (1ull << VARYING_SLOT_VIEWPORT)) { + outinfo->writes_viewport_index = true; + viewport_index_value = LLVMBuildLoad(ctx->builder, + ctx->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_VIEWPORT, 0)], ""); } uint32_t mask = ((outinfo->writes_pointsize == true ? 1 : 0) | @@ -5345,6 +5309,41 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx, ac_build_export(&ctx->ac, &pos_args[i]); } + for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) { + LLVMValueRef values[4]; + if (!(ctx->output_mask & (1ull << i))) + continue; + + for (unsigned j = 0; j < 4; j++) + values[j] = to_float(&ctx->ac, LLVMBuildLoad(ctx->builder, + ctx->outputs[radeon_llvm_reg_index_soa(i, j)], "")); + + if (i == VARYING_SLOT_LAYER) { + target = V_008DFC_SQ_EXP_PARAM + param_count; + outinfo->vs_output_param_offset[VARYING_SLOT_LAYER] = param_count; + param_count++; + } else if (i == VARYING_SLOT_PRIMITIVE_ID) { + target = V_008DFC_SQ_EXP_PARAM + param_count; + outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count; + param_count++; + } else if (i >= VARYING_SLOT_VAR0) { + outinfo->export_mask |= 1u << (i - VARYING_SLOT_VAR0); + target = V_008DFC_SQ_EXP_PARAM + param_count; + outinfo->vs_output_param_offset[i] = param_count; + param_count++; + } else + continue; + + si_llvm_init_export_args(ctx, values, target, &args); + + if (target >= V_008DFC_SQ_EXP_POS && + target <= (V_008DFC_SQ_EXP_POS + 3)) { + memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS], + &args, sizeof(args)); + } else { + ac_build_export(&ctx->ac, &args); + } + } if (export_prim_id) { LLVMValueRef values[4]; |