summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBas Nieuwenhuizen <[email protected]>2017-07-04 00:49:55 +0200
committerBas Nieuwenhuizen <[email protected]>2017-07-05 20:23:00 +0200
commit860a8e6b99b27b50d3545a4077afcaf0fcba264a (patch)
treeda82f4c705065c77e3585f8071deccf7bfc28086
parent3d527ba19b2f2ff3ee379818017adbdf6894ceeb (diff)
ac/nir: Move VS position exports before param exports.
According to Nicolai the SX can already start work when all the position exports are done, so do those first. Signed-off-by: Bas Nieuwenhuizen <[email protected]> Reviewed-by: Dave Airlie <[email protected]>
-rw-r--r--src/amd/common/ac_nir_to_llvm.c109
1 files changed, 54 insertions, 55 deletions
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index e72747ab78a..beafd5685f3 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -5245,66 +5245,30 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx,
}
- for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
- LLVMValueRef values[4];
- if (!(ctx->output_mask & (1ull << i)))
- continue;
-
+ LLVMValueRef pos_values[4] = {ctx->f32zero, ctx->f32zero, ctx->f32zero, ctx->f32one};
+ if (ctx->output_mask & (1ull << VARYING_SLOT_POS)) {
for (unsigned j = 0; j < 4; j++)
- values[j] = to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
- ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
-
- if (i == VARYING_SLOT_POS) {
- target = V_008DFC_SQ_EXP_POS;
- } else if (i == VARYING_SLOT_CLIP_DIST0) {
- continue;
- } else if (i == VARYING_SLOT_PSIZ) {
- outinfo->writes_pointsize = true;
- psize_value = values[0];
- continue;
- } else if (i == VARYING_SLOT_LAYER) {
- outinfo->writes_layer = true;
- layer_value = values[0];
- target = V_008DFC_SQ_EXP_PARAM + param_count;
- outinfo->vs_output_param_offset[VARYING_SLOT_LAYER] = param_count;
- param_count++;
- } else if (i == VARYING_SLOT_VIEWPORT) {
- outinfo->writes_viewport_index = true;
- viewport_index_value = values[0];
- continue;
- } else if (i == VARYING_SLOT_PRIMITIVE_ID) {
- target = V_008DFC_SQ_EXP_PARAM + param_count;
- outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count;
- param_count++;
- } else if (i >= VARYING_SLOT_VAR0) {
- outinfo->export_mask |= 1u << (i - VARYING_SLOT_VAR0);
- target = V_008DFC_SQ_EXP_PARAM + param_count;
- outinfo->vs_output_param_offset[i] = param_count;
- param_count++;
- }
+ pos_values[j] = LLVMBuildLoad(ctx->builder,
+ ctx->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_POS, j)], "");
+ }
+ si_llvm_init_export_args(ctx, pos_values, V_008DFC_SQ_EXP_POS, &pos_args[0]);
- si_llvm_init_export_args(ctx, values, target, &args);
+ if (ctx->output_mask & (1ull << VARYING_SLOT_PSIZ)) {
+ outinfo->writes_pointsize = true;
+ psize_value = LLVMBuildLoad(ctx->builder,
+ ctx->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_PSIZ, 0)], "");
+ }
- if (target >= V_008DFC_SQ_EXP_POS &&
- target <= (V_008DFC_SQ_EXP_POS + 3)) {
- memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS],
- &args, sizeof(args));
- } else {
- ac_build_export(&ctx->ac, &args);
- }
+ if (ctx->output_mask & (1ull << VARYING_SLOT_LAYER)) {
+ outinfo->writes_layer = true;
+ layer_value = LLVMBuildLoad(ctx->builder,
+ ctx->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)], "");
}
- /* We need to add the position output manually if it's missing. */
- if (!pos_args[0].out[0]) {
- pos_args[0].enabled_channels = 0xf;
- pos_args[0].valid_mask = 0;
- pos_args[0].done = 0;
- pos_args[0].target = V_008DFC_SQ_EXP_POS;
- pos_args[0].compr = 0;
- pos_args[0].out[0] = ctx->f32zero; /* X */
- pos_args[0].out[1] = ctx->f32zero; /* Y */
- pos_args[0].out[2] = ctx->f32zero; /* Z */
- pos_args[0].out[3] = ctx->f32one; /* W */
+ if (ctx->output_mask & (1ull << VARYING_SLOT_VIEWPORT)) {
+ outinfo->writes_viewport_index = true;
+ viewport_index_value = LLVMBuildLoad(ctx->builder,
+ ctx->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_VIEWPORT, 0)], "");
}
uint32_t mask = ((outinfo->writes_pointsize == true ? 1 : 0) |
@@ -5345,6 +5309,41 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx,
ac_build_export(&ctx->ac, &pos_args[i]);
}
+ for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
+ LLVMValueRef values[4];
+ if (!(ctx->output_mask & (1ull << i)))
+ continue;
+
+ for (unsigned j = 0; j < 4; j++)
+ values[j] = to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
+ ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
+
+ if (i == VARYING_SLOT_LAYER) {
+ target = V_008DFC_SQ_EXP_PARAM + param_count;
+ outinfo->vs_output_param_offset[VARYING_SLOT_LAYER] = param_count;
+ param_count++;
+ } else if (i == VARYING_SLOT_PRIMITIVE_ID) {
+ target = V_008DFC_SQ_EXP_PARAM + param_count;
+ outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count;
+ param_count++;
+ } else if (i >= VARYING_SLOT_VAR0) {
+ outinfo->export_mask |= 1u << (i - VARYING_SLOT_VAR0);
+ target = V_008DFC_SQ_EXP_PARAM + param_count;
+ outinfo->vs_output_param_offset[i] = param_count;
+ param_count++;
+ } else
+ continue;
+
+ si_llvm_init_export_args(ctx, values, target, &args);
+
+ if (target >= V_008DFC_SQ_EXP_POS &&
+ target <= (V_008DFC_SQ_EXP_POS + 3)) {
+ memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS],
+ &args, sizeof(args));
+ } else {
+ ac_build_export(&ctx->ac, &args);
+ }
+ }
if (export_prim_id) {
LLVMValueRef values[4];