diff options
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_scan.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.c | 111 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader_internal.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_shaders.c | 3 |
5 files changed, 89 insertions, 30 deletions
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index b8932891e4c..212d1bb95a8 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -1139,7 +1139,6 @@ tgsi_scan_tess_ctrl(const struct tgsi_token *tokens, if (main_block_tf_writemask || cond_block_tf_writemask) { /* Accumulate the result: */ out->tessfactors_are_def_in_all_invocs &= - main_block_tf_writemask && !(cond_block_tf_writemask & ~main_block_tf_writemask); /* Analyze the next code segment from scratch. */ @@ -1155,7 +1154,6 @@ tgsi_scan_tess_ctrl(const struct tgsi_token *tokens, /* Accumulate the result for the last code segment separated by a barrier. */ if (main_block_tf_writemask || cond_block_tf_writemask) { out->tessfactors_are_def_in_all_invocs &= - main_block_tf_writemask && !(cond_block_tf_writemask & ~main_block_tf_writemask); } diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index e7888e6012b..43619dd329d 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1150,7 +1150,7 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base, LLVMValueRef buffer, base, buf_addr; LLVMValueRef values[4]; bool skip_lds_store; - bool is_tess_factor = false; + bool is_tess_factor = false, is_tess_inner = false; /* Only handle per-patch and per-vertex outputs here. * Vectors will be lowered to scalars and this function will be called again. @@ -1177,8 +1177,11 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base, /* Always write tess factors into LDS for the TCS epilog. */ if (name == TGSI_SEMANTIC_TESSINNER || name == TGSI_SEMANTIC_TESSOUTER) { - skip_lds_store = false; + /* The epilog doesn't read LDS if invocation 0 defines tess factors. */ + skip_lds_store = !sh_info->reads_tessfactor_outputs && + ctx->shader->selector->tcs_info.tessfactors_are_def_in_all_invocs; is_tess_factor = true; + is_tess_inner = name == TGSI_SEMANTIC_TESSINNER; } } } @@ -1207,6 +1210,18 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base, buf_addr, base, 4 * chan_index, 1, 0, true, false); } + + /* Write tess factors into VGPRs for the epilog. */ + if (is_tess_factor && + ctx->shader->selector->tcs_info.tessfactors_are_def_in_all_invocs) { + if (!is_tess_inner) { + LLVMBuildStore(gallivm->builder, value, /* outer */ + ctx->invoc0_tess_factors[chan_index]); + } else if (chan_index < 2) { + LLVMBuildStore(gallivm->builder, value, /* inner */ + ctx->invoc0_tess_factors[4 + chan_index]); + } + } } if (inst->Dst[0].Register.WriteMask == 0xF && !is_tess_factor) { @@ -2671,7 +2686,9 @@ static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base) static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, LLVMValueRef rel_patch_id, LLVMValueRef invocation_id, - LLVMValueRef tcs_out_current_patch_data_offset) + LLVMValueRef tcs_out_current_patch_data_offset, + LLVMValueRef invoc0_tf_outer[4], + LLVMValueRef invoc0_tf_inner[2]) { struct si_shader_context *ctx = si_shader_context(bld_base); struct gallivm_state *gallivm = &ctx->gallivm; @@ -2682,7 +2699,9 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, unsigned stride, outer_comps, inner_comps, i, offset; struct lp_build_if_state if_ctx, inner_if_ctx; - si_llvm_emit_barrier(NULL, bld_base, NULL); + /* Add a barrier before loading tess factors from LDS. */ + if (!shader->key.part.tcs.epilog.invoc0_tess_factors_are_def) + si_llvm_emit_barrier(NULL, bld_base, NULL); /* Do this only for invocation 0, because the tess levels are per-patch, * not per-vertex. @@ -2716,32 +2735,32 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, return; } - /* Load tess_inner and tess_outer from LDS. - * Any invocation can write them, so we can't get them from a temporary. - */ - tess_inner_index = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSINNER, 0); - tess_outer_index = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSOUTER, 0); - - lds_base = tcs_out_current_patch_data_offset; - lds_inner = LLVMBuildAdd(gallivm->builder, lds_base, - LLVMConstInt(ctx->i32, - tess_inner_index * 4, 0), ""); - lds_outer = LLVMBuildAdd(gallivm->builder, lds_base, - LLVMConstInt(ctx->i32, - tess_outer_index * 4, 0), ""); - for (i = 0; i < 4; i++) { inner[i] = LLVMGetUndef(ctx->i32); outer[i] = LLVMGetUndef(ctx->i32); } - if (shader->key.part.tcs.epilog.prim_mode == PIPE_PRIM_LINES) { - /* For isolines, the hardware expects tess factors in the - * reverse order from what GLSL / TGSI specify. - */ - outer[0] = out[1] = lds_load(bld_base, TGSI_TYPE_SIGNED, 0, lds_outer); - outer[1] = out[0] = lds_load(bld_base, TGSI_TYPE_SIGNED, 1, lds_outer); + if (shader->key.part.tcs.epilog.invoc0_tess_factors_are_def) { + /* Tess factors are in VGPRs. */ + for (i = 0; i < outer_comps; i++) + outer[i] = out[i] = invoc0_tf_outer[i]; + for (i = 0; i < inner_comps; i++) + inner[i] = out[outer_comps+i] = invoc0_tf_inner[i]; } else { + /* Load tess_inner and tess_outer from LDS. + * Any invocation can write them, so we can't get them from a temporary. + */ + tess_inner_index = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSINNER, 0); + tess_outer_index = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSOUTER, 0); + + lds_base = tcs_out_current_patch_data_offset; + lds_inner = LLVMBuildAdd(gallivm->builder, lds_base, + LLVMConstInt(ctx->i32, + tess_inner_index * 4, 0), ""); + lds_outer = LLVMBuildAdd(gallivm->builder, lds_base, + LLVMConstInt(ctx->i32, + tess_outer_index * 4, 0), ""); + for (i = 0; i < outer_comps; i++) { outer[i] = out[i] = lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_outer); @@ -2752,6 +2771,15 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, } } + if (shader->key.part.tcs.epilog.prim_mode == PIPE_PRIM_LINES) { + /* For isolines, the hardware expects tess factors in the + * reverse order from what GLSL / TGSI specify. + */ + LLVMValueRef tmp = out[0]; + out[0] = out[1]; + out[1] = tmp; + } + /* Convert the outputs to vectors for stores. */ vec0 = lp_build_gather_values(gallivm, out, MIN2(stride, 4)); vec1 = NULL; @@ -2946,7 +2974,18 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base) ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, ""); ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, ""); - ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, ""); + + if (ctx->shader->selector->tcs_info.tessfactors_are_def_in_all_invocs) { + vgpr++; /* skip the tess factor LDS offset */ + for (unsigned i = 0; i < 6; i++) { + LLVMValueRef value = + LLVMBuildLoad(builder, ctx->invoc0_tess_factors[i], ""); + value = bitcast(bld_base, TGSI_TYPE_FLOAT, value); + ret = LLVMBuildInsertValue(builder, ret, value, vgpr++, ""); + } + } else { + ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, ""); + } ctx->return_value = ret; } @@ -4330,7 +4369,7 @@ static void create_function(struct si_shader_context *ctx) */ for (i = 0; i < GFX6_TCS_NUM_USER_SGPR + 2; i++) returns[num_returns++] = ctx->i32; /* SGPRs */ - for (i = 0; i < 5; i++) + for (i = 0; i < 11; i++) returns[num_returns++] = ctx->f32; /* VGPRs */ break; @@ -4387,7 +4426,7 @@ static void create_function(struct si_shader_context *ctx) */ for (i = 0; i <= 8 + GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K; i++) returns[num_returns++] = ctx->i32; /* SGPRs */ - for (i = 0; i < 5; i++) + for (i = 0; i < 11; i++) returns[num_returns++] = ctx->f32; /* VGPRs */ } break; @@ -5692,6 +5731,14 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx, } } + if (ctx->type == PIPE_SHADER_TESS_CTRL && + sel->tcs_info.tessfactors_are_def_in_all_invocs) { + for (unsigned i = 0; i < 6; i++) { + ctx->invoc0_tess_factors[i] = + lp_build_alloca_undef(&ctx->gallivm, ctx->i32, ""); + } + } + if (ctx->type == PIPE_SHADER_GEOMETRY) { int i; for (i = 0; i < 4; i++) { @@ -6926,16 +6973,24 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx, add_arg(&fninfo, ARG_VGPR, ctx->i32); /* invocation ID within the patch */ add_arg(&fninfo, ARG_VGPR, ctx->i32); /* LDS offset where tess factors should be loaded from */ + for (unsigned i = 0; i < 6; i++) + add_arg(&fninfo, ARG_VGPR, ctx->i32); /* tess factors */ + /* Create the function. */ si_create_function(ctx, "tcs_epilog", NULL, 0, &fninfo, ctx->screen->b.chip_class >= CIK ? 128 : 64); declare_lds_as_pointer(ctx); func = ctx->main_fn; + LLVMValueRef invoc0_tess_factors[6]; + for (unsigned i = 0; i < 6; i++) + invoc0_tess_factors[i] = LLVMGetParam(func, tess_factors_idx + 3 + i); + si_write_tess_factors(bld_base, LLVMGetParam(func, tess_factors_idx), LLVMGetParam(func, tess_factors_idx + 1), - LLVMGetParam(func, tess_factors_idx + 2)); + LLVMGetParam(func, tess_factors_idx + 2), + invoc0_tess_factors, invoc0_tess_factors + 4); LLVMBuildRetVoid(gallivm->builder); } diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index ee6b0c167f9..4592ac551ce 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -327,6 +327,7 @@ struct si_shader_selector { struct nir_shader *nir; struct pipe_stream_output_info so; struct tgsi_shader_info info; + struct tgsi_tessctrl_info tcs_info; /* PIPE_SHADER_[VERTEX|FRAGMENT|...] */ unsigned type; @@ -404,6 +405,7 @@ struct si_vs_prolog_bits { /* Common TCS bits between the shader key and the epilog key. */ struct si_tcs_epilog_bits { unsigned prim_mode:3; + unsigned invoc0_tess_factors_are_def:1; unsigned tes_reads_tess_factors:1; }; diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index ad29ab7e845..023f9a6a093 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -209,6 +209,7 @@ struct si_shader_context { LLVMValueRef gsvs_ring[4]; LLVMValueRef lds; + LLVMValueRef invoc0_tess_factors[6]; /* outer[4], inner[2] */ LLVMValueRef gs_next_vertex[4]; LLVMValueRef postponed_kill; LLVMValueRef return_value; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 9f76551cfbb..6398111e5a6 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1301,6 +1301,8 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, key->part.tcs.epilog.prim_mode = sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE]; + key->part.tcs.epilog.invoc0_tess_factors_are_def = + sel->tcs_info.tessfactors_are_def_in_all_invocs; key->part.tcs.epilog.tes_reads_tess_factors = sctx->tes_shader.cso->info.reads_tess_factors; @@ -2004,6 +2006,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx, } tgsi_scan_shader(state->tokens, &sel->info); + tgsi_scan_tess_ctrl(state->tokens, &sel->info, &sel->tcs_info); } else { assert(state->type == PIPE_SHADER_IR_NIR); |