summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2015-08-03 01:34:32 +0200
committerMarek Olšák <[email protected]>2015-08-06 20:44:36 +0200
commit60159bcfc66a067b50da06f5cabfa20d72e898ed (patch)
treed44f652dfc4411f3aa040e8975fdc3d0bcfc88fb /src/gallium
parentc2a5d1dcb14acbd2db4a674453a8622d4b9a572a (diff)
radeonsi: before storing tess levels, load them from LDS instead of temporary
Also use only one store if stride <= 4. All the fetches from and stores to temporaries can be removed now. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91461 Reviewed-by: Michel Dänzer <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c136
1 files changed, 57 insertions, 79 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 92382e85985..61d36430bcf 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -681,18 +681,8 @@ static LLVMValueRef fetch_output_tcs(
enum tgsi_opcode_type type, unsigned swizzle)
{
struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
- struct si_shader *shader = si_shader_ctx->shader;
- struct tgsi_shader_info *info = &shader->selector->info;
- unsigned name = info->output_semantic_name[reg->Register.Index];
LLVMValueRef dw_addr, stride;
- /* Just read the local temp "output" register to get TESSOUTER/INNER. */
- if (!reg->Register.Indirect &&
- (name == TGSI_SEMANTIC_TESSOUTER ||
- name == TGSI_SEMANTIC_TESSINNER)) {
- return radeon_llvm_emit_fetch(bld_base, reg, type, swizzle);
- }
-
if (reg->Register.Dimension) {
stride = unpack_param(si_shader_ctx, SI_PARAM_TCS_OUT_LAYOUT, 13, 8);
dw_addr = get_tcs_out_current_patch_offset(si_shader_ctx);
@@ -731,8 +721,6 @@ static void store_output_tcs(struct lp_build_tgsi_context * bld_base,
LLVMValueRef dst[4])
{
struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
- struct si_shader *shader = si_shader_ctx->shader;
- struct tgsi_shader_info *sinfo = &shader->selector->info;
const struct tgsi_full_dst_register *reg = &inst->Dst[0];
unsigned chan_index;
LLVMValueRef dw_addr, stride;
@@ -746,14 +734,6 @@ static void store_output_tcs(struct lp_build_tgsi_context * bld_base,
return;
}
- /* Write tessellation levels to "output" temp registers.
- * Also write them to LDS as per-patch outputs (below).
- */
- if (!reg->Register.Indirect &&
- (sinfo->output_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_TESSINNER ||
- sinfo->output_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_TESSOUTER))
- radeon_llvm_emit_store(bld_base, inst, info, dst);
-
if (reg->Register.Dimension) {
stride = unpack_param(si_shader_ctx, SI_PARAM_TCS_OUT_LAYOUT, 13, 8);
dw_addr = get_tcs_out_current_patch_offset(si_shader_ctx);
@@ -1854,58 +1834,78 @@ handle_semantic:
}
}
-static void si_write_tess_factors(struct si_shader_context *si_shader_ctx,
- unsigned name, LLVMValueRef *out_ptr)
+/* This only writes the tessellation factor levels. */
+static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
{
- struct si_shader *shader = si_shader_ctx->shader;
- struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
+ struct si_shader *shader = si_shader_ctx->shader;
+ unsigned tess_inner_index, tess_outer_index;
+ LLVMValueRef lds_base, lds_inner, lds_outer;
LLVMValueRef tf_base, rel_patch_id, byteoffset, buffer, rw_buffers;
- LLVMValueRef output, out[4];
+ LLVMValueRef out[6], vec0, vec1, invocation_id;
unsigned stride, outer_comps, inner_comps, i;
+ struct lp_build_if_state if_ctx;
- if (name != TGSI_SEMANTIC_TESSOUTER &&
- name != TGSI_SEMANTIC_TESSINNER) {
- assert(0);
- return;
- }
+ invocation_id = unpack_param(si_shader_ctx, SI_PARAM_REL_IDS, 8, 5);
+ /* Do this only for invocation 0, because the tess levels are per-patch,
+ * not per-vertex.
+ *
+ * This can't jump, because invocation 0 executes this. It should
+ * at least mask out the loads and stores for other invocations.
+ */
+ lp_build_if(&if_ctx, gallivm,
+ LLVMBuildICmp(gallivm->builder, LLVMIntEQ,
+ invocation_id, bld_base->uint_bld.zero, ""));
+
+ /* Determine the layout of one tess factor element in the buffer. */
switch (shader->key.tcs.prim_mode) {
case PIPE_PRIM_LINES:
- stride = 2;
+ stride = 2; /* 2 dwords, 1 vec2 store */
outer_comps = 2;
inner_comps = 0;
break;
case PIPE_PRIM_TRIANGLES:
- stride = 4;
+ stride = 4; /* 4 dwords, 1 vec4 store */
outer_comps = 3;
inner_comps = 1;
break;
case PIPE_PRIM_QUADS:
- stride = 6;
+ stride = 6; /* 6 dwords, 2 stores (vec4 + vec2) */
outer_comps = 4;
inner_comps = 2;
break;
default:
assert(0);
+ return;
}
- /* Load the outputs as i32. */
- for (i = 0; i < 4; i++)
- out[i] = LLVMBuildBitCast(gallivm->builder,
- LLVMBuildLoad(gallivm->builder, out_ptr[i], ""),
- bld_base->uint_bld.elem_type, "");
-
- /* Convert the outputs to vectors. */
- if (name == TGSI_SEMANTIC_TESSOUTER)
- output = lp_build_gather_values(gallivm, out,
- util_next_power_of_two(outer_comps));
- else if (inner_comps > 1)
- output = lp_build_gather_values(gallivm, out, inner_comps);
- else if (inner_comps == 1)
- output = out[0];
- else
- return;
+ /* Load tess_inner and tess_outer from LDS.
+ * Any invocation can write them, so we can't get them from a temporary.
+ */
+ tess_inner_index = si_shader_io_get_unique_index(TGSI_SEMANTIC_TESSINNER, 0);
+ tess_outer_index = si_shader_io_get_unique_index(TGSI_SEMANTIC_TESSOUTER, 0);
+
+ lds_base = get_tcs_out_current_patch_data_offset(si_shader_ctx);
+ lds_inner = LLVMBuildAdd(gallivm->builder, lds_base,
+ lp_build_const_int32(gallivm,
+ tess_inner_index * 4), "");
+ lds_outer = LLVMBuildAdd(gallivm->builder, lds_base,
+ lp_build_const_int32(gallivm,
+ tess_outer_index * 4), "");
+
+ for (i = 0; i < outer_comps; i++)
+ out[i] = lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_outer);
+ for (i = 0; i < inner_comps; i++)
+ out[outer_comps+i] = lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_inner);
+
+ /* Convert the outputs to vectors for stores. */
+ vec0 = lp_build_gather_values(gallivm, out, MIN2(stride, 4));
+ vec1 = NULL;
+
+ if (stride > 4)
+ vec1 = lp_build_gather_values(gallivm, out+4, stride - 4);
/* Get the buffer. */
rw_buffers = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
@@ -1913,22 +1913,20 @@ static void si_write_tess_factors(struct si_shader_context *si_shader_ctx,
buffer = build_indexed_load_const(si_shader_ctx, rw_buffers,
lp_build_const_int32(gallivm, SI_RING_TESS_FACTOR));
- /* Get offsets. */
+ /* Get the offset. */
tf_base = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
SI_PARAM_TESS_FACTOR_OFFSET);
rel_patch_id = get_rel_patch_id(si_shader_ctx);
byteoffset = LLVMBuildMul(gallivm->builder, rel_patch_id,
lp_build_const_int32(gallivm, 4 * stride), "");
- /* Store the output. */
- if (name == TGSI_SEMANTIC_TESSOUTER) {
- build_tbuffer_store_dwords(si_shader_ctx, buffer, output,
- outer_comps, byteoffset, tf_base, 0);
- } else if (inner_comps) {
- build_tbuffer_store_dwords(si_shader_ctx, buffer, output,
- inner_comps, byteoffset, tf_base,
- outer_comps * 4);
- }
+ /* Store the outputs. */
+ build_tbuffer_store_dwords(si_shader_ctx, buffer, vec0,
+ MIN2(stride, 4), byteoffset, tf_base, 0);
+ if (vec1)
+ build_tbuffer_store_dwords(si_shader_ctx, buffer, vec1,
+ stride - 4, byteoffset, tf_base, 16);
+ lp_build_endif(&if_ctx);
}
static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context * bld_base)
@@ -1962,26 +1960,6 @@ static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context * bld_base)
}
}
-static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context * bld_base)
-{
- struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
- struct si_shader *shader = si_shader_ctx->shader;
- struct tgsi_shader_info *info = &shader->selector->info;
- unsigned i;
-
- /* Only write tessellation factors. Other outputs have already been
- * written to LDS by instructions. */
- for (i = 0; i < info->num_outputs; i++) {
- LLVMValueRef *out_ptr = si_shader_ctx->radeon_bld.soa.outputs[i];
- unsigned name = info->output_semantic_name[i];
-
- if (name == TGSI_SEMANTIC_TESSINNER ||
- name == TGSI_SEMANTIC_TESSOUTER) {
- si_write_tess_factors(si_shader_ctx, name, out_ptr);
- }
- }
-}
-
static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context * bld_base)
{
struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);