summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_scan.c2
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c111
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h2
-rw-r--r--src/gallium/drivers/radeonsi/si_shader_internal.h1
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c3
5 files changed, 89 insertions, 30 deletions
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index b8932891e4c..212d1bb95a8 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -1139,7 +1139,6 @@ tgsi_scan_tess_ctrl(const struct tgsi_token *tokens,
if (main_block_tf_writemask || cond_block_tf_writemask) {
/* Accumulate the result: */
out->tessfactors_are_def_in_all_invocs &=
- main_block_tf_writemask &&
!(cond_block_tf_writemask & ~main_block_tf_writemask);
/* Analyze the next code segment from scratch. */
@@ -1155,7 +1154,6 @@ tgsi_scan_tess_ctrl(const struct tgsi_token *tokens,
/* Accumulate the result for the last code segment separated by a barrier. */
if (main_block_tf_writemask || cond_block_tf_writemask) {
out->tessfactors_are_def_in_all_invocs &=
- main_block_tf_writemask &&
!(cond_block_tf_writemask & ~main_block_tf_writemask);
}
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index e7888e6012b..43619dd329d 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1150,7 +1150,7 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
LLVMValueRef buffer, base, buf_addr;
LLVMValueRef values[4];
bool skip_lds_store;
- bool is_tess_factor = false;
+ bool is_tess_factor = false, is_tess_inner = false;
/* Only handle per-patch and per-vertex outputs here.
* Vectors will be lowered to scalars and this function will be called again.
@@ -1177,8 +1177,11 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
/* Always write tess factors into LDS for the TCS epilog. */
if (name == TGSI_SEMANTIC_TESSINNER ||
name == TGSI_SEMANTIC_TESSOUTER) {
- skip_lds_store = false;
+ /* The epilog doesn't read LDS if invocation 0 defines tess factors. */
+ skip_lds_store = !sh_info->reads_tessfactor_outputs &&
+ ctx->shader->selector->tcs_info.tessfactors_are_def_in_all_invocs;
is_tess_factor = true;
+ is_tess_inner = name == TGSI_SEMANTIC_TESSINNER;
}
}
}
@@ -1207,6 +1210,18 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
buf_addr, base,
4 * chan_index, 1, 0, true, false);
}
+
+ /* Write tess factors into VGPRs for the epilog. */
+ if (is_tess_factor &&
+ ctx->shader->selector->tcs_info.tessfactors_are_def_in_all_invocs) {
+ if (!is_tess_inner) {
+ LLVMBuildStore(gallivm->builder, value, /* outer */
+ ctx->invoc0_tess_factors[chan_index]);
+ } else if (chan_index < 2) {
+ LLVMBuildStore(gallivm->builder, value, /* inner */
+ ctx->invoc0_tess_factors[4 + chan_index]);
+ }
+ }
}
if (inst->Dst[0].Register.WriteMask == 0xF && !is_tess_factor) {
@@ -2671,7 +2686,9 @@ static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base)
static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
LLVMValueRef rel_patch_id,
LLVMValueRef invocation_id,
- LLVMValueRef tcs_out_current_patch_data_offset)
+ LLVMValueRef tcs_out_current_patch_data_offset,
+ LLVMValueRef invoc0_tf_outer[4],
+ LLVMValueRef invoc0_tf_inner[2])
{
struct si_shader_context *ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = &ctx->gallivm;
@@ -2682,7 +2699,9 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
unsigned stride, outer_comps, inner_comps, i, offset;
struct lp_build_if_state if_ctx, inner_if_ctx;
- si_llvm_emit_barrier(NULL, bld_base, NULL);
+ /* Add a barrier before loading tess factors from LDS. */
+ if (!shader->key.part.tcs.epilog.invoc0_tess_factors_are_def)
+ si_llvm_emit_barrier(NULL, bld_base, NULL);
/* Do this only for invocation 0, because the tess levels are per-patch,
* not per-vertex.
@@ -2716,32 +2735,32 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
return;
}
- /* Load tess_inner and tess_outer from LDS.
- * Any invocation can write them, so we can't get them from a temporary.
- */
- tess_inner_index = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSINNER, 0);
- tess_outer_index = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSOUTER, 0);
-
- lds_base = tcs_out_current_patch_data_offset;
- lds_inner = LLVMBuildAdd(gallivm->builder, lds_base,
- LLVMConstInt(ctx->i32,
- tess_inner_index * 4, 0), "");
- lds_outer = LLVMBuildAdd(gallivm->builder, lds_base,
- LLVMConstInt(ctx->i32,
- tess_outer_index * 4, 0), "");
-
for (i = 0; i < 4; i++) {
inner[i] = LLVMGetUndef(ctx->i32);
outer[i] = LLVMGetUndef(ctx->i32);
}
- if (shader->key.part.tcs.epilog.prim_mode == PIPE_PRIM_LINES) {
- /* For isolines, the hardware expects tess factors in the
- * reverse order from what GLSL / TGSI specify.
- */
- outer[0] = out[1] = lds_load(bld_base, TGSI_TYPE_SIGNED, 0, lds_outer);
- outer[1] = out[0] = lds_load(bld_base, TGSI_TYPE_SIGNED, 1, lds_outer);
+ if (shader->key.part.tcs.epilog.invoc0_tess_factors_are_def) {
+ /* Tess factors are in VGPRs. */
+ for (i = 0; i < outer_comps; i++)
+ outer[i] = out[i] = invoc0_tf_outer[i];
+ for (i = 0; i < inner_comps; i++)
+ inner[i] = out[outer_comps+i] = invoc0_tf_inner[i];
} else {
+ /* Load tess_inner and tess_outer from LDS.
+ * Any invocation can write them, so we can't get them from a temporary.
+ */
+ tess_inner_index = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSINNER, 0);
+ tess_outer_index = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSOUTER, 0);
+
+ lds_base = tcs_out_current_patch_data_offset;
+ lds_inner = LLVMBuildAdd(gallivm->builder, lds_base,
+ LLVMConstInt(ctx->i32,
+ tess_inner_index * 4, 0), "");
+ lds_outer = LLVMBuildAdd(gallivm->builder, lds_base,
+ LLVMConstInt(ctx->i32,
+ tess_outer_index * 4, 0), "");
+
for (i = 0; i < outer_comps; i++) {
outer[i] = out[i] =
lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_outer);
@@ -2752,6 +2771,15 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
}
}
+ if (shader->key.part.tcs.epilog.prim_mode == PIPE_PRIM_LINES) {
+ /* For isolines, the hardware expects tess factors in the
+ * reverse order from what GLSL / TGSI specify.
+ */
+ LLVMValueRef tmp = out[0];
+ out[0] = out[1];
+ out[1] = tmp;
+ }
+
/* Convert the outputs to vectors for stores. */
vec0 = lp_build_gather_values(gallivm, out, MIN2(stride, 4));
vec1 = NULL;
@@ -2946,7 +2974,18 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, "");
ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, "");
- ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, "");
+
+ if (ctx->shader->selector->tcs_info.tessfactors_are_def_in_all_invocs) {
+ vgpr++; /* skip the tess factor LDS offset */
+ for (unsigned i = 0; i < 6; i++) {
+ LLVMValueRef value =
+ LLVMBuildLoad(builder, ctx->invoc0_tess_factors[i], "");
+ value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
+ ret = LLVMBuildInsertValue(builder, ret, value, vgpr++, "");
+ }
+ } else {
+ ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, "");
+ }
ctx->return_value = ret;
}
@@ -4330,7 +4369,7 @@ static void create_function(struct si_shader_context *ctx)
*/
for (i = 0; i < GFX6_TCS_NUM_USER_SGPR + 2; i++)
returns[num_returns++] = ctx->i32; /* SGPRs */
- for (i = 0; i < 5; i++)
+ for (i = 0; i < 11; i++)
returns[num_returns++] = ctx->f32; /* VGPRs */
break;
@@ -4387,7 +4426,7 @@ static void create_function(struct si_shader_context *ctx)
*/
for (i = 0; i <= 8 + GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K; i++)
returns[num_returns++] = ctx->i32; /* SGPRs */
- for (i = 0; i < 5; i++)
+ for (i = 0; i < 11; i++)
returns[num_returns++] = ctx->f32; /* VGPRs */
}
break;
@@ -5692,6 +5731,14 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
}
}
+ if (ctx->type == PIPE_SHADER_TESS_CTRL &&
+ sel->tcs_info.tessfactors_are_def_in_all_invocs) {
+ for (unsigned i = 0; i < 6; i++) {
+ ctx->invoc0_tess_factors[i] =
+ lp_build_alloca_undef(&ctx->gallivm, ctx->i32, "");
+ }
+ }
+
if (ctx->type == PIPE_SHADER_GEOMETRY) {
int i;
for (i = 0; i < 4; i++) {
@@ -6926,16 +6973,24 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
add_arg(&fninfo, ARG_VGPR, ctx->i32); /* invocation ID within the patch */
add_arg(&fninfo, ARG_VGPR, ctx->i32); /* LDS offset where tess factors should be loaded from */
+ for (unsigned i = 0; i < 6; i++)
+ add_arg(&fninfo, ARG_VGPR, ctx->i32); /* tess factors */
+
/* Create the function. */
si_create_function(ctx, "tcs_epilog", NULL, 0, &fninfo,
ctx->screen->b.chip_class >= CIK ? 128 : 64);
declare_lds_as_pointer(ctx);
func = ctx->main_fn;
+ LLVMValueRef invoc0_tess_factors[6];
+ for (unsigned i = 0; i < 6; i++)
+ invoc0_tess_factors[i] = LLVMGetParam(func, tess_factors_idx + 3 + i);
+
si_write_tess_factors(bld_base,
LLVMGetParam(func, tess_factors_idx),
LLVMGetParam(func, tess_factors_idx + 1),
- LLVMGetParam(func, tess_factors_idx + 2));
+ LLVMGetParam(func, tess_factors_idx + 2),
+ invoc0_tess_factors, invoc0_tess_factors + 4);
LLVMBuildRetVoid(gallivm->builder);
}
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index ee6b0c167f9..4592ac551ce 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -327,6 +327,7 @@ struct si_shader_selector {
struct nir_shader *nir;
struct pipe_stream_output_info so;
struct tgsi_shader_info info;
+ struct tgsi_tessctrl_info tcs_info;
/* PIPE_SHADER_[VERTEX|FRAGMENT|...] */
unsigned type;
@@ -404,6 +405,7 @@ struct si_vs_prolog_bits {
/* Common TCS bits between the shader key and the epilog key. */
struct si_tcs_epilog_bits {
unsigned prim_mode:3;
+ unsigned invoc0_tess_factors_are_def:1;
unsigned tes_reads_tess_factors:1;
};
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h
index ad29ab7e845..023f9a6a093 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -209,6 +209,7 @@ struct si_shader_context {
LLVMValueRef gsvs_ring[4];
LLVMValueRef lds;
+ LLVMValueRef invoc0_tess_factors[6]; /* outer[4], inner[2] */
LLVMValueRef gs_next_vertex[4];
LLVMValueRef postponed_kill;
LLVMValueRef return_value;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 9f76551cfbb..6398111e5a6 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1301,6 +1301,8 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
key->part.tcs.epilog.prim_mode =
sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
+ key->part.tcs.epilog.invoc0_tess_factors_are_def =
+ sel->tcs_info.tessfactors_are_def_in_all_invocs;
key->part.tcs.epilog.tes_reads_tess_factors =
sctx->tes_shader.cso->info.reads_tess_factors;
@@ -2004,6 +2006,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
}
tgsi_scan_shader(state->tokens, &sel->info);
+ tgsi_scan_tess_ctrl(state->tokens, &sel->info, &sel->tcs_info);
} else {
assert(state->type == PIPE_SHADER_IR_NIR);