summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2015-02-22 15:09:35 +0100
committerMarek Olšák <[email protected]>2015-07-23 00:59:32 +0200
commitfff16e4ad2cf51749e01e04805908effe49217d1 (patch)
treedee9830c79c40a113ea1e60239707fe251ea9868 /src
parent4805685b6fe6efb7891dbc6dbab6ae4edce7e19e (diff)
radeonsi: add shader code generation for tessellation
Reviewed-by: Michel Dänzer <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/radeon/radeon_llvm_emit.c2
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c723
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h109
-rw-r--r--src/gallium/drivers/radeonsi/si_state.h5
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c41
5 files changed, 851 insertions, 29 deletions
diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c b/src/gallium/drivers/radeon/radeon_llvm_emit.c
index 25580b6bd4c..973d6edff8a 100644
--- a/src/gallium/drivers/radeon/radeon_llvm_emit.c
+++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c
@@ -62,6 +62,8 @@ void radeon_llvm_shader_type(LLVMValueRef F, unsigned type)
switch (type) {
case TGSI_PROCESSOR_VERTEX:
+ case TGSI_PROCESSOR_TESS_CTRL:
+ case TGSI_PROCESSOR_TESS_EVAL:
llvm_type = RADEON_LLVM_SHADER_VS;
break;
case TGSI_PROCESSOR_GEOMETRY:
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 9b03a53035d..cddd9a0e120 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -71,12 +71,17 @@ struct si_shader_context
int param_streamout_write_index;
int param_streamout_offset[4];
int param_vertex_id;
+ int param_rel_auto_id;
int param_instance_id;
+ int param_tes_u;
+ int param_tes_v;
+ int param_tes_rel_patch_id;
+ int param_tes_patch_id;
int param_es2gs_offset;
LLVMTargetMachineRef tm;
LLVMValueRef const_md;
LLVMValueRef const_resource[SI_NUM_CONST_BUFFERS];
- LLVMValueRef ddxy_lds;
+ LLVMValueRef lds;
LLVMValueRef *constants[SI_NUM_CONST_BUFFERS];
LLVMValueRef resources[SI_NUM_SAMPLER_VIEWS];
LLVMValueRef samplers[SI_NUM_SAMPLER_STATES];
@@ -133,6 +138,14 @@ unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index)
assert(index <= 63-4);
return 4 + index;
+ /* patch indices are completely separate and thus start from 0 */
+ case TGSI_SEMANTIC_TESSOUTER:
+ return 0;
+ case TGSI_SEMANTIC_TESSINNER:
+ return 1;
+ case TGSI_SEMANTIC_PATCH:
+ return 2 + index;
+
default:
/* Don't fail here. The result of this function is only used
* for LS, TCS, TES, and GS, where legacy GL semantics can't
@@ -210,6 +223,136 @@ static LLVMValueRef unpack_param(struct si_shader_context *si_shader_ctx,
return value;
}
+static LLVMValueRef get_rel_patch_id(struct si_shader_context *si_shader_ctx)
+{
+ switch (si_shader_ctx->type) {
+ case TGSI_PROCESSOR_TESS_CTRL:
+ return unpack_param(si_shader_ctx, SI_PARAM_REL_IDS, 0, 8);
+
+ case TGSI_PROCESSOR_TESS_EVAL:
+ return LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ si_shader_ctx->param_tes_rel_patch_id);
+
+ default:
+ assert(0);
+ return NULL;
+ }
+}
+
+/* Tessellation shaders pass outputs to the next shader using LDS.
+ *
+ * LS outputs = TCS inputs
+ * TCS outputs = TES inputs
+ *
+ * The LDS layout is:
+ * - TCS inputs for patch 0
+ * - TCS inputs for patch 1
+ * - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2)
+ * - ...
+ * - TCS outputs for patch 0 = get_tcs_out_patch0_offset
+ * - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset
+ * - TCS outputs for patch 1
+ * - Per-patch TCS outputs for patch 1
+ * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2)
+ * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2)
+ * - ...
+ *
+ * All three shaders VS(LS), TCS, TES share the same LDS space.
+ */
+
+static LLVMValueRef
+get_tcs_in_patch_stride(struct si_shader_context *si_shader_ctx)
+{
+ if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX)
+ return unpack_param(si_shader_ctx, SI_PARAM_LS_OUT_LAYOUT, 0, 13);
+ else if (si_shader_ctx->type == TGSI_PROCESSOR_TESS_CTRL)
+ return unpack_param(si_shader_ctx, SI_PARAM_TCS_IN_LAYOUT, 0, 13);
+ else {
+ assert(0);
+ return NULL;
+ }
+}
+
+static LLVMValueRef
+get_tcs_out_patch_stride(struct si_shader_context *si_shader_ctx)
+{
+ return unpack_param(si_shader_ctx, SI_PARAM_TCS_OUT_LAYOUT, 0, 13);
+}
+
+static LLVMValueRef
+get_tcs_out_patch0_offset(struct si_shader_context *si_shader_ctx)
+{
+ return lp_build_mul_imm(&si_shader_ctx->radeon_bld.soa.bld_base.uint_bld,
+ unpack_param(si_shader_ctx,
+ SI_PARAM_TCS_OUT_OFFSETS,
+ 0, 16),
+ 4);
+}
+
+static LLVMValueRef
+get_tcs_out_patch0_patch_data_offset(struct si_shader_context *si_shader_ctx)
+{
+ return lp_build_mul_imm(&si_shader_ctx->radeon_bld.soa.bld_base.uint_bld,
+ unpack_param(si_shader_ctx,
+ SI_PARAM_TCS_OUT_OFFSETS,
+ 16, 16),
+ 4);
+}
+
+static LLVMValueRef
+get_tcs_in_current_patch_offset(struct si_shader_context *si_shader_ctx)
+{
+ struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
+ LLVMValueRef patch_stride = get_tcs_in_patch_stride(si_shader_ctx);
+ LLVMValueRef rel_patch_id = get_rel_patch_id(si_shader_ctx);
+
+ return LLVMBuildMul(gallivm->builder, patch_stride, rel_patch_id, "");
+}
+
+static LLVMValueRef
+get_tcs_out_current_patch_offset(struct si_shader_context *si_shader_ctx)
+{
+ struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
+ LLVMValueRef patch0_offset = get_tcs_out_patch0_offset(si_shader_ctx);
+ LLVMValueRef patch_stride = get_tcs_out_patch_stride(si_shader_ctx);
+ LLVMValueRef rel_patch_id = get_rel_patch_id(si_shader_ctx);
+
+ return LLVMBuildAdd(gallivm->builder, patch0_offset,
+ LLVMBuildMul(gallivm->builder, patch_stride,
+ rel_patch_id, ""),
+ "");
+}
+
+static LLVMValueRef
+get_tcs_out_current_patch_data_offset(struct si_shader_context *si_shader_ctx)
+{
+ struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
+ LLVMValueRef patch0_patch_data_offset =
+ get_tcs_out_patch0_patch_data_offset(si_shader_ctx);
+ LLVMValueRef patch_stride = get_tcs_out_patch_stride(si_shader_ctx);
+ LLVMValueRef rel_patch_id = get_rel_patch_id(si_shader_ctx);
+
+ return LLVMBuildAdd(gallivm->builder, patch0_patch_data_offset,
+ LLVMBuildMul(gallivm->builder, patch_stride,
+ rel_patch_id, ""),
+ "");
+}
+
+static void build_indexed_store(struct si_shader_context *si_shader_ctx,
+ LLVMValueRef base_ptr, LLVMValueRef index,
+ LLVMValueRef value)
+{
+ struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMValueRef indices[2], pointer;
+
+ indices[0] = bld_base->uint_bld.zero;
+ indices[1] = index;
+
+ pointer = LLVMBuildGEP(gallivm->builder, base_ptr, indices, 2, "");
+ LLVMBuildStore(gallivm->builder, value, pointer);
+}
+
/**
* Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad.
* It's equivalent to doing a load from &base_ptr[index].
@@ -337,6 +480,12 @@ static LLVMValueRef get_primitive_id(struct lp_build_tgsi_context *bld_base,
return bld_base->uint_bld.zero;
switch (si_shader_ctx->type) {
+ case TGSI_PROCESSOR_TESS_CTRL:
+ return LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ SI_PARAM_PATCH_ID);
+ case TGSI_PROCESSOR_TESS_EVAL:
+ return LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ si_shader_ctx->param_tes_patch_id);
case TGSI_PROCESSOR_GEOMETRY:
return LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
SI_PARAM_PRIMITIVE_ID);
@@ -346,6 +495,278 @@ static LLVMValueRef get_primitive_id(struct lp_build_tgsi_context *bld_base,
}
}
+/**
+ * Return the value of tgsi_ind_register for indexing.
+ * This is the indirect index with the constant offset added to it.
+ */
+static LLVMValueRef get_indirect_index(struct si_shader_context *si_shader_ctx,
+ const struct tgsi_ind_register *ind,
+ int rel_index)
+{
+ struct gallivm_state *gallivm = si_shader_ctx->radeon_bld.soa.bld_base.base.gallivm;
+ LLVMValueRef result;
+
+ result = si_shader_ctx->radeon_bld.soa.addr[ind->Index][ind->Swizzle];
+ result = LLVMBuildLoad(gallivm->builder, result, "");
+ result = LLVMBuildAdd(gallivm->builder, result,
+ lp_build_const_int32(gallivm, rel_index), "");
+ return result;
+}
+
+/**
+ * Calculate a dword address given an input or output register and a stride.
+ */
+static LLVMValueRef get_dw_address(struct si_shader_context *si_shader_ctx,
+ const struct tgsi_full_dst_register *dst,
+ const struct tgsi_full_src_register *src,
+ LLVMValueRef vertex_dw_stride,
+ LLVMValueRef base_addr)
+{
+ struct gallivm_state *gallivm = si_shader_ctx->radeon_bld.soa.bld_base.base.gallivm;
+ struct tgsi_shader_info *info = &si_shader_ctx->shader->selector->info;
+ ubyte *name, *index, *array_first;
+ int first, param;
+ struct tgsi_full_dst_register reg;
+
+ /* Set the register description. The address computation is the same
+ * for sources and destinations. */
+ if (src) {
+ reg.Register.File = src->Register.File;
+ reg.Register.Index = src->Register.Index;
+ reg.Register.Indirect = src->Register.Indirect;
+ reg.Register.Dimension = src->Register.Dimension;
+ reg.Indirect = src->Indirect;
+ reg.Dimension = src->Dimension;
+ reg.DimIndirect = src->DimIndirect;
+ } else
+ reg = *dst;
+
+ /* If the register is 2-dimensional (e.g. an array of vertices
+ * in a primitive), calculate the base address of the vertex. */
+ if (reg.Register.Dimension) {
+ LLVMValueRef index;
+
+ if (reg.Dimension.Indirect)
+ index = get_indirect_index(si_shader_ctx, &reg.DimIndirect,
+ reg.Dimension.Index);
+ else
+ index = lp_build_const_int32(gallivm, reg.Dimension.Index);
+
+ base_addr = LLVMBuildAdd(gallivm->builder, base_addr,
+ LLVMBuildMul(gallivm->builder, index,
+ vertex_dw_stride, ""), "");
+ }
+
+ /* Get information about the register. */
+ if (reg.Register.File == TGSI_FILE_INPUT) {
+ name = info->input_semantic_name;
+ index = info->input_semantic_index;
+ array_first = info->input_array_first;
+ } else if (reg.Register.File == TGSI_FILE_OUTPUT) {
+ name = info->output_semantic_name;
+ index = info->output_semantic_index;
+ array_first = info->output_array_first;
+ } else {
+ assert(0);
+ return NULL;
+ }
+
+ if (reg.Register.Indirect) {
+ /* Add the relative address of the element. */
+ LLVMValueRef ind_index;
+
+ if (reg.Indirect.ArrayID)
+ first = array_first[reg.Indirect.ArrayID];
+ else
+ first = reg.Register.Index;
+
+ ind_index = get_indirect_index(si_shader_ctx, &reg.Indirect,
+ reg.Register.Index - first);
+
+ base_addr = LLVMBuildAdd(gallivm->builder, base_addr,
+ LLVMBuildMul(gallivm->builder, ind_index,
+ lp_build_const_int32(gallivm, 4), ""), "");
+
+ param = si_shader_io_get_unique_index(name[first], index[first]);
+ } else {
+ param = si_shader_io_get_unique_index(name[reg.Register.Index],
+ index[reg.Register.Index]);
+ }
+
+ /* Add the base address of the element. */
+ return LLVMBuildAdd(gallivm->builder, base_addr,
+ lp_build_const_int32(gallivm, param * 4), "");
+}
+
+/**
+ * Load from LDS.
+ *
+ * \param type output value type
+ * \param swizzle offset (typically 0..3); it can be ~0, which loads a vec4
+ * \param dw_addr address in dwords
+ */
+static LLVMValueRef lds_load(struct lp_build_tgsi_context *bld_base,
+ enum tgsi_opcode_type type, unsigned swizzle,
+ LLVMValueRef dw_addr)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMValueRef value;
+
+ if (swizzle == ~0) {
+ LLVMValueRef values[TGSI_NUM_CHANNELS];
+
+ for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++)
+ values[chan] = lds_load(bld_base, type, chan, dw_addr);
+
+ return lp_build_gather_values(bld_base->base.gallivm, values,
+ TGSI_NUM_CHANNELS);
+ }
+
+ dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
+ lp_build_const_int32(gallivm, swizzle));
+
+ value = build_indexed_load(si_shader_ctx, si_shader_ctx->lds, dw_addr);
+ return LLVMBuildBitCast(gallivm->builder, value,
+ tgsi2llvmtype(bld_base, type), "");
+}
+
+/**
+ * Store to LDS.
+ *
+ * \param swizzle offset (typically 0..3)
+ * \param dw_addr address in dwords
+ * \param value value to store
+ */
+static void lds_store(struct lp_build_tgsi_context * bld_base,
+ unsigned swizzle, LLVMValueRef dw_addr,
+ LLVMValueRef value)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+
+ dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
+ lp_build_const_int32(gallivm, swizzle));
+
+ value = LLVMBuildBitCast(gallivm->builder, value,
+ LLVMInt32TypeInContext(gallivm->context), "");
+ build_indexed_store(si_shader_ctx, si_shader_ctx->lds,
+ dw_addr, value);
+}
+
+static LLVMValueRef fetch_input_tcs(
+ struct lp_build_tgsi_context *bld_base,
+ const struct tgsi_full_src_register *reg,
+ enum tgsi_opcode_type type, unsigned swizzle)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ LLVMValueRef dw_addr, stride;
+
+ stride = unpack_param(si_shader_ctx, SI_PARAM_TCS_IN_LAYOUT, 13, 8);
+ dw_addr = get_tcs_in_current_patch_offset(si_shader_ctx);
+ dw_addr = get_dw_address(si_shader_ctx, NULL, reg, stride, dw_addr);
+
+ return lds_load(bld_base, type, swizzle, dw_addr);
+}
+
+static LLVMValueRef fetch_output_tcs(
+ struct lp_build_tgsi_context *bld_base,
+ const struct tgsi_full_src_register *reg,
+ enum tgsi_opcode_type type, unsigned swizzle)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ struct si_shader *shader = si_shader_ctx->shader;
+ struct tgsi_shader_info *info = &shader->selector->info;
+ unsigned name = info->output_semantic_name[reg->Register.Index];
+ LLVMValueRef dw_addr, stride;
+
+ /* Just read the local temp "output" register to get TESSOUTER/INNER. */
+ if (!reg->Register.Indirect &&
+ (name == TGSI_SEMANTIC_TESSOUTER ||
+ name == TGSI_SEMANTIC_TESSINNER)) {
+ return radeon_llvm_emit_fetch(bld_base, reg, type, swizzle);
+ }
+
+ if (reg->Register.Dimension) {
+ stride = unpack_param(si_shader_ctx, SI_PARAM_TCS_OUT_LAYOUT, 13, 8);
+ dw_addr = get_tcs_out_current_patch_offset(si_shader_ctx);
+ dw_addr = get_dw_address(si_shader_ctx, NULL, reg, stride, dw_addr);
+ } else {
+ dw_addr = get_tcs_out_current_patch_data_offset(si_shader_ctx);
+ dw_addr = get_dw_address(si_shader_ctx, NULL, reg, NULL, dw_addr);
+ }
+
+ return lds_load(bld_base, type, swizzle, dw_addr);
+}
+
+static LLVMValueRef fetch_input_tes(
+ struct lp_build_tgsi_context *bld_base,
+ const struct tgsi_full_src_register *reg,
+ enum tgsi_opcode_type type, unsigned swizzle)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ LLVMValueRef dw_addr, stride;
+
+ if (reg->Register.Dimension) {
+ stride = unpack_param(si_shader_ctx, SI_PARAM_TCS_OUT_LAYOUT, 13, 8);
+ dw_addr = get_tcs_out_current_patch_offset(si_shader_ctx);
+ dw_addr = get_dw_address(si_shader_ctx, NULL, reg, stride, dw_addr);
+ } else {
+ dw_addr = get_tcs_out_current_patch_data_offset(si_shader_ctx);
+ dw_addr = get_dw_address(si_shader_ctx, NULL, reg, NULL, dw_addr);
+ }
+
+ return lds_load(bld_base, type, swizzle, dw_addr);
+}
+
+static void store_output_tcs(struct lp_build_tgsi_context * bld_base,
+ const struct tgsi_full_instruction * inst,
+ const struct tgsi_opcode_info * info,
+ LLVMValueRef dst[4])
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ struct si_shader *shader = si_shader_ctx->shader;
+ struct tgsi_shader_info *sinfo = &shader->selector->info;
+ const struct tgsi_full_dst_register *reg = &inst->Dst[0];
+ unsigned chan_index;
+ LLVMValueRef dw_addr, stride;
+
+ /* Only handle per-patch and per-vertex outputs here.
+ * Vectors will be lowered to scalars and this function will be called again.
+ */
+ if (reg->Register.File != TGSI_FILE_OUTPUT ||
+ (dst[0] && LLVMGetTypeKind(LLVMTypeOf(dst[0])) == LLVMVectorTypeKind)) {
+ radeon_llvm_emit_store(bld_base, inst, info, dst);
+ return;
+ }
+
+ /* Write tessellation levels to "output" temp registers.
+ * Also write them to LDS as per-patch outputs (below).
+ */
+ if (!reg->Register.Indirect &&
+ (sinfo->output_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_TESSINNER ||
+ sinfo->output_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_TESSOUTER))
+ radeon_llvm_emit_store(bld_base, inst, info, dst);
+
+ if (reg->Register.Dimension) {
+ stride = unpack_param(si_shader_ctx, SI_PARAM_TCS_OUT_LAYOUT, 13, 8);
+ dw_addr = get_tcs_out_current_patch_offset(si_shader_ctx);
+ dw_addr = get_dw_address(si_shader_ctx, reg, NULL, stride, dw_addr);
+ } else {
+ dw_addr = get_tcs_out_current_patch_data_offset(si_shader_ctx);
+ dw_addr = get_dw_address(si_shader_ctx, reg, NULL, NULL, dw_addr);
+ }
+
+ TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) {
+ LLVMValueRef value = dst[chan_index];
+
+ if (inst->Instruction.Saturate)
+ value = radeon_llvm_saturate(bld_base, value);
+
+ lds_store(bld_base, chan_index, dw_addr, value);
+ }
+}
+
static LLVMValueRef fetch_input_gs(
struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_src_register *reg,
@@ -398,7 +819,7 @@ static LLVMValueRef fetch_input_gs(
args[1] = vtx_offset;
args[2] = lp_build_const_int32(gallivm,
(get_param_index(semantic_name, semantic_index,
- shader->selector->gs_used_inputs) * 4 +
+ shader->selector->inputs_read) * 4 +
swizzle) * 256);
args[3] = uint->zero;
args[4] = uint->one; /* OFFEN */
@@ -616,6 +1037,7 @@ static void declare_system_value(
{
struct si_shader_context *si_shader_ctx =
si_shader_context(&radeon_bld->soa.bld_base);
+ struct lp_build_context *bld = &radeon_bld->soa.bld_base.base;
struct lp_build_context *uint_bld = &radeon_bld->soa.bld_base.uint_bld;
struct gallivm_state *gallivm = &radeon_bld->gallivm;
LLVMValueRef value = 0;
@@ -645,8 +1067,13 @@ static void declare_system_value(
break;
case TGSI_SEMANTIC_INVOCATIONID:
- value = LLVMGetParam(radeon_bld->main_fn,
- SI_PARAM_GS_INSTANCE_ID);
+ if (si_shader_ctx->type == TGSI_PROCESSOR_TESS_CTRL)
+ value = unpack_param(si_shader_ctx, SI_PARAM_REL_IDS, 8, 5);
+ else if (si_shader_ctx->type == TGSI_PROCESSOR_GEOMETRY)
+ value = LLVMGetParam(radeon_bld->main_fn,
+ SI_PARAM_GS_INSTANCE_ID);
+ else
+ assert(!"INVOCATIONID not implemented");
break;
case TGSI_SEMANTIC_SAMPLEID:
@@ -683,6 +1110,48 @@ static void declare_system_value(
value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_SAMPLE_COVERAGE);
break;
+ case TGSI_SEMANTIC_TESSCOORD:
+ {
+ LLVMValueRef coord[4] = {
+ LLVMGetParam(radeon_bld->main_fn, si_shader_ctx->param_tes_u),
+ LLVMGetParam(radeon_bld->main_fn, si_shader_ctx->param_tes_v),
+ bld->zero,
+ bld->zero
+ };
+
+ /* For triangles, the vector should be (u, v, 1-u-v). */
+ if (si_shader_ctx->shader->selector->info.properties[TGSI_PROPERTY_TES_PRIM_MODE] ==
+ PIPE_PRIM_TRIANGLES)
+ coord[2] = lp_build_sub(bld, bld->one,
+ lp_build_add(bld, coord[0], coord[1]));
+
+ value = lp_build_gather_values(gallivm, coord, 4);
+ break;
+ }
+
+ case TGSI_SEMANTIC_VERTICESIN:
+ value = unpack_param(si_shader_ctx, SI_PARAM_TCS_OUT_LAYOUT, 26, 6);
+ break;
+
+ case TGSI_SEMANTIC_TESSINNER:
+ case TGSI_SEMANTIC_TESSOUTER:
+ {
+ LLVMValueRef dw_addr;
+ int param = si_shader_io_get_unique_index(decl->Semantic.Name, 0);
+
+ dw_addr = get_tcs_out_current_patch_data_offset(si_shader_ctx);
+ dw_addr = LLVMBuildAdd(gallivm->builder, dw_addr,
+ lp_build_const_int32(gallivm, param * 4), "");
+
+ value = lds_load(&radeon_bld->soa.bld_base, TGSI_TYPE_FLOAT,
+ ~0, dw_addr);
+ break;
+ }
+
+ case TGSI_SEMANTIC_PRIMID:
+ value = get_primitive_id(&radeon_bld->soa.bld_base, 0);
+ break;
+
default:
assert(!"unknown system value");
return;
@@ -1338,6 +1807,134 @@ handle_semantic:
}
}
+static void si_write_tess_factors(struct si_shader_context *si_shader_ctx,
+ unsigned name, LLVMValueRef *out_ptr)
+{
+ struct si_shader *shader = si_shader_ctx->shader;
+ struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMValueRef tf_base, rel_patch_id, byteoffset, buffer, rw_buffers;
+ LLVMValueRef output, out[4];
+ unsigned stride, outer_comps, inner_comps, i;
+
+ if (name != TGSI_SEMANTIC_TESSOUTER &&
+ name != TGSI_SEMANTIC_TESSINNER) {
+ assert(0);
+ return;
+ }
+
+ switch (shader->key.tcs.prim_mode) {
+ case PIPE_PRIM_LINES:
+ stride = 2;
+ outer_comps = 2;
+ inner_comps = 0;
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ stride = 4;
+ outer_comps = 3;
+ inner_comps = 1;
+ break;
+ case PIPE_PRIM_QUADS:
+ stride = 6;
+ outer_comps = 4;
+ inner_comps = 2;
+ break;
+ default:
+ assert(0);
+ }
+
+ /* Load the outputs as i32. */
+ for (i = 0; i < 4; i++)
+ out[i] = LLVMBuildBitCast(gallivm->builder,
+ LLVMBuildLoad(gallivm->builder, out_ptr[i], ""),
+ bld_base->uint_bld.elem_type, "");
+
+ /* Convert the outputs to vectors. */
+ if (name == TGSI_SEMANTIC_TESSOUTER)
+ output = lp_build_gather_values(gallivm, out,
+ util_next_power_of_two(outer_comps));
+ else if (inner_comps > 1)
+ output = lp_build_gather_values(gallivm, out, inner_comps);
+ else if (inner_comps == 1)
+ output = out[0];
+ else
+ return;
+
+ /* Get the buffer. */
+ rw_buffers = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ SI_PARAM_RW_BUFFERS);
+ buffer = build_indexed_load_const(si_shader_ctx, rw_buffers,
+ lp_build_const_int32(gallivm, SI_RING_TESS_FACTOR));
+
+ /* Get offsets. */
+ tf_base = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ SI_PARAM_TESS_FACTOR_OFFSET);
+ rel_patch_id = get_rel_patch_id(si_shader_ctx);
+ byteoffset = LLVMBuildMul(gallivm->builder, rel_patch_id,
+ lp_build_const_int32(gallivm, 4 * stride), "");
+
+ /* Store the output. */
+ if (name == TGSI_SEMANTIC_TESSOUTER) {
+ build_tbuffer_store_dwords(si_shader_ctx, buffer, output,
+ outer_comps, byteoffset, tf_base, 0);
+ } else if (inner_comps) {
+ build_tbuffer_store_dwords(si_shader_ctx, buffer, output,
+ inner_comps, byteoffset, tf_base,
+ outer_comps * 4);
+ }
+}
+
+static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context * bld_base)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ struct si_shader *shader = si_shader_ctx->shader;
+ struct tgsi_shader_info *info = &shader->selector->info;
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ unsigned i, chan;
+ LLVMValueRef vertex_id = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ si_shader_ctx->param_rel_auto_id);
+ LLVMValueRef vertex_dw_stride =
+ unpack_param(si_shader_ctx, SI_PARAM_LS_OUT_LAYOUT, 13, 8);
+ LLVMValueRef base_dw_addr = LLVMBuildMul(gallivm->builder, vertex_id,
+ vertex_dw_stride, "");
+
+ /* Write outputs to LDS. The next shader (TCS aka HS) will read
+ * its inputs from it. */
+ for (i = 0; i < info->num_outputs; i++) {
+ LLVMValueRef *out_ptr = si_shader_ctx->radeon_bld.soa.outputs[i];
+ unsigned name = info->output_semantic_name[i];
+ unsigned index = info->output_semantic_index[i];
+ int param = si_shader_io_get_unique_index(name, index);
+ LLVMValueRef dw_addr = LLVMBuildAdd(gallivm->builder, base_dw_addr,
+ lp_build_const_int32(gallivm, param * 4), "");
+
+ for (chan = 0; chan < 4; chan++) {
+ lds_store(bld_base, chan, dw_addr,
+ LLVMBuildLoad(gallivm->builder, out_ptr[chan], ""));
+ }
+ }
+}
+
+static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context * bld_base)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ struct si_shader *shader = si_shader_ctx->shader;
+ struct tgsi_shader_info *info = &shader->selector->info;
+ unsigned i;
+
+ /* Only write tessellation factors. Other outputs have already been
+ * written to LDS by instructions. */
+ for (i = 0; i < info->num_outputs; i++) {
+ LLVMValueRef *out_ptr = si_shader_ctx->radeon_bld.soa.outputs[i];
+ unsigned name = info->output_semantic_name[i];
+
+ if (name == TGSI_SEMANTIC_TESSINNER ||
+ name == TGSI_SEMANTIC_TESSOUTER) {
+ si_write_tess_factors(si_shader_ctx, name, out_ptr);
+ }
+ }
+}
+
static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context * bld_base)
{
struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
@@ -1347,6 +1944,9 @@ static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context * bld_base)
LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
LLVMValueRef soffset = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
si_shader_ctx->param_es2gs_offset);
+ uint64_t enabled_outputs = si_shader_ctx->type == TGSI_PROCESSOR_TESS_EVAL ?
+ es->key.tes.es_enabled_outputs :
+ es->key.vs.es_enabled_outputs;
unsigned chan;
int i;
@@ -1361,7 +1961,7 @@ static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context * bld_base)
param_index = get_param_index(info->output_semantic_name[i],
info->output_semantic_index[i],
- es->key.vs.gs_used_inputs);
+ enabled_outputs);
if (param_index < 0)
continue;
@@ -2201,19 +2801,19 @@ static void si_llvm_emit_ddxy(
indices[0] = bld_base->uint_bld.zero;
indices[1] = build_intrinsic(gallivm->builder, "llvm.SI.tid", i32,
NULL, 0, LLVMReadNoneAttribute);
- store_ptr = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
+ store_ptr = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
indices, 2, "");
indices[1] = LLVMBuildAnd(gallivm->builder, indices[1],
lp_build_const_int32(gallivm, 0xfffffffc), "");
- load_ptr0 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
+ load_ptr0 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
indices, 2, "");
indices[1] = LLVMBuildAdd(gallivm->builder, indices[1],
lp_build_const_int32(gallivm,
opcode == TGSI_OPCODE_DDX ? 1 : 2),
"");
- load_ptr1 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
+ load_ptr1 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
indices, 2, "");
for (c = 0; c < 4; ++c) {
@@ -2432,6 +3032,9 @@ static void create_function(struct si_shader_context *si_shader_ctx)
if (shader->key.vs.as_es) {
params[si_shader_ctx->param_es2gs_offset = num_params++] = i32;
+ } else if (shader->key.vs.as_ls) {
+ params[SI_PARAM_LS_OUT_LAYOUT] = i32;
+ num_params = SI_PARAM_LS_OUT_LAYOUT+1;
} else {
if (shader->is_gs_copy_shader) {
last_array_pointer = SI_PARAM_CONST;
@@ -2447,11 +3050,44 @@ static void create_function(struct si_shader_context *si_shader_ctx)
/* VGPRs */
params[si_shader_ctx->param_vertex_id = num_params++] = i32;
- params[num_params++] = i32; /* unused*/
+ params[si_shader_ctx->param_rel_auto_id = num_params++] = i32;
params[num_params++] = i32; /* unused */
params[si_shader_ctx->param_instance_id = num_params++] = i32;
break;
+ case TGSI_PROCESSOR_TESS_CTRL:
+ params[SI_PARAM_TCS_OUT_OFFSETS] = i32;
+ params[SI_PARAM_TCS_OUT_LAYOUT] = i32;
+ params[SI_PARAM_TCS_IN_LAYOUT] = i32;
+ params[SI_PARAM_TESS_FACTOR_OFFSET] = i32;
+ last_sgpr = SI_PARAM_TESS_FACTOR_OFFSET;
+
+ /* VGPRs */
+ params[SI_PARAM_PATCH_ID] = i32;
+ params[SI_PARAM_REL_IDS] = i32;
+ num_params = SI_PARAM_REL_IDS+1;
+ break;
+
+ case TGSI_PROCESSOR_TESS_EVAL:
+ params[SI_PARAM_TCS_OUT_OFFSETS] = i32;
+ params[SI_PARAM_TCS_OUT_LAYOUT] = i32;
+ num_params = SI_PARAM_TCS_OUT_LAYOUT+1;
+
+ if (shader->key.tes.as_es) {
+ params[si_shader_ctx->param_es2gs_offset = num_params++] = i32;
+ } else {
+ declare_streamout_params(si_shader_ctx, &shader->selector->so,
+ params, i32, &num_params);
+ }
+ last_sgpr = num_params - 1;
+
+ /* VGPRs */
+ params[si_shader_ctx->param_tes_u = num_params++] = f32;
+ params[si_shader_ctx->param_tes_v = num_params++] = f32;
+ params[si_shader_ctx->param_tes_rel_patch_id = num_params++] = i32;
+ params[si_shader_ctx->param_tes_patch_id = num_params++] = i32;
+ break;
+
case TGSI_PROCESSOR_GEOMETRY:
params[SI_PARAM_GS2VS_OFFSET] = i32;
params[SI_PARAM_GS_WAVE_ID] = i32;
@@ -2519,11 +3155,30 @@ static void create_function(struct si_shader_context *si_shader_ctx)
if (bld_base->info &&
(bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 ||
bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0))
- si_shader_ctx->ddxy_lds =
+ si_shader_ctx->lds =
LLVMAddGlobalInAddressSpace(gallivm->module,
LLVMArrayType(i32, 64),
"ddxy_lds",
LOCAL_ADDR_SPACE);
+
+ if ((si_shader_ctx->type == TGSI_PROCESSOR_VERTEX && shader->key.vs.as_ls) ||
+ si_shader_ctx->type == TGSI_PROCESSOR_TESS_CTRL ||
+ si_shader_ctx->type == TGSI_PROCESSOR_TESS_EVAL) {
+ /* This is the upper bound, maximum is 32 inputs times 32 vertices */
+ unsigned vertex_data_dw_size = 32*32*4;
+ unsigned patch_data_dw_size = 32*4;
+ /* The formula is: TCS inputs + TCS outputs + TCS patch outputs. */
+ unsigned patch_dw_size = vertex_data_dw_size*2 + patch_data_dw_size;
+ unsigned lds_dwords = patch_dw_size;
+
+ /* The actual size is computed outside of the shader to reduce
+ * the number of shader variants. */
+ si_shader_ctx->lds =
+ LLVMAddGlobalInAddressSpace(gallivm->module,
+ LLVMArrayType(i32, lds_dwords),
+ "tess_lds",
+ LOCAL_ADDR_SPACE);
+ }
}
static void preload_constants(struct si_shader_context *si_shader_ctx)
@@ -2600,9 +3255,13 @@ static void preload_streamout_buffers(struct si_shader_context *si_shader_ctx)
struct gallivm_state * gallivm = bld_base->base.gallivm;
unsigned i;
- if (si_shader_ctx->type != TGSI_PROCESSOR_VERTEX ||
- si_shader_ctx->shader->key.vs.as_es ||
- !si_shader_ctx->shader->selector->so.num_outputs)
+ /* Streamout can only be used if the shader is compiled as VS. */
+ if (!si_shader_ctx->shader->selector->so.num_outputs ||
+ (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX &&
+ (si_shader_ctx->shader->key.vs.as_es ||
+ si_shader_ctx->shader->key.vs.as_ls)) ||
+ (si_shader_ctx->type == TGSI_PROCESSOR_TESS_EVAL &&
+ si_shader_ctx->shader->key.tes.as_es))
return;
LLVMValueRef buf_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
@@ -2633,6 +3292,8 @@ static void preload_ring_buffers(struct si_shader_context *si_shader_ctx)
if ((si_shader_ctx->type == TGSI_PROCESSOR_VERTEX &&
si_shader_ctx->shader->key.vs.as_es) ||
+ (si_shader_ctx->type == TGSI_PROCESSOR_TESS_EVAL &&
+ si_shader_ctx->shader->key.tes.as_es) ||
si_shader_ctx->type == TGSI_PROCESSOR_GEOMETRY) {
LLVMValueRef offset = lp_build_const_int32(gallivm, SI_RING_ESGS);
@@ -2893,9 +3554,21 @@ static void si_dump_key(unsigned shader, union si_shader_key *key)
fprintf(stderr, "}\n");
if (key->vs.as_es)
- fprintf(stderr, " gs_used_inputs = 0x%"PRIx64"\n",
- key->vs.gs_used_inputs);
+ fprintf(stderr, " es_enabled_outputs = 0x%"PRIx64"\n",
+ key->vs.es_enabled_outputs);
fprintf(stderr, " as_es = %u\n", key->vs.as_es);
+ fprintf(stderr, " as_es = %u\n", key->vs.as_ls);
+ break;
+
+ case PIPE_SHADER_TESS_CTRL:
+ fprintf(stderr, " prim_mode = %u\n", key->tcs.prim_mode);
+ break;
+
+ case PIPE_SHADER_TESS_EVAL:
+ if (key->tes.as_es)
+ fprintf(stderr, " es_enabled_outputs = 0x%"PRIx64"\n",
+ key->tes.es_enabled_outputs);
+ fprintf(stderr, " as_es = %u\n", key->tes.as_es);
break;
case PIPE_SHADER_GEOMETRY:
@@ -2995,11 +3668,25 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
switch (si_shader_ctx.type) {
case TGSI_PROCESSOR_VERTEX:
si_shader_ctx.radeon_bld.load_input = declare_input_vs;
- if (shader->key.vs.as_es) {
+ if (shader->key.vs.as_ls)
+ bld_base->emit_epilogue = si_llvm_emit_ls_epilogue;
+ else if (shader->key.vs.as_es)
bld_base->emit_epilogue = si_llvm_emit_es_epilogue;
- } else {
+ else
+ bld_base->emit_epilogue = si_llvm_emit_vs_epilogue;
+ break;
+ case TGSI_PROCESSOR_TESS_CTRL:
+ bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tcs;
+ bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = fetch_output_tcs;
+ bld_base->emit_store = store_output_tcs;
+ bld_base->emit_epilogue = si_llvm_emit_tcs_epilogue;
+ break;
+ case TGSI_PROCESSOR_TESS_EVAL:
+ bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tes;
+ if (shader->key.tes.as_es)
+ bld_base->emit_epilogue = si_llvm_emit_es_epilogue;
+ else
bld_base->emit_epilogue = si_llvm_emit_vs_epilogue;
- }
break;
case TGSI_PROCESSOR_GEOMETRY:
bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_gs;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index be3e98a361a..2cba96f9587 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -26,6 +26,46 @@
* Christian König <[email protected]>
*/
+/* How linking tessellation shader inputs and outputs works.
+ *
+ * Inputs and outputs between shaders are stored in a buffer. This buffer
+ * lives in LDS (typical case for tessellation), but it can also live
+ * in memory. Each input or output has a fixed location within a vertex.
+ * The highest used input or output determines the stride between vertices.
+ *
+ * Since tessellation is only enabled in the OpenGL core profile,
+ * only these semantics are valid for per-vertex data:
+ *
+ * Name Location
+ *
+ * POSITION 0
+ * PSIZE 1
+ * CLIPDIST0..1 2..3
+ * CULLDIST0..1 (not implemented)
+ * GENERIC0..31 4..35
+ *
+ * For example, a shader only writing GENERIC0 has the output stride of 5.
+ *
+ * Only these semantics are valid for per-patch data:
+ *
+ * Name Location
+ *
+ * TESSOUTER 0
+ * TESSINNER 1
+ * PATCH0..29 2..31
+ *
+ * That's how independent shaders agree on input and output locations.
+ * The si_shader_io_get_unique_index function assigns the locations.
+ *
+ * Other required information for calculating the input and output addresses
+ * like the vertex stride, the patch stride, and the offsets where per-vertex
+ * and per-patch data start, is passed to the shader via user data SGPRs.
+ * The offsets and strides are calculated at draw time and aren't available
+ * at compile time.
+ *
+ * The same approach should be used for linking ES->GS in the future.
+ */
+
#ifndef SI_SHADER_H
#define SI_SHADER_H
@@ -43,9 +83,16 @@ struct radeon_shader_reloc;
#define SI_SGPR_VERTEX_BUFFER 8 /* VS only */
#define SI_SGPR_BASE_VERTEX 10 /* VS only */
#define SI_SGPR_START_INSTANCE 11 /* VS only */
+#define SI_SGPR_LS_OUT_LAYOUT 12 /* VS(LS) only */
+#define SI_SGPR_TCS_OUT_OFFSETS 8 /* TCS & TES only */
+#define SI_SGPR_TCS_OUT_LAYOUT 9 /* TCS & TES only */
+#define SI_SGPR_TCS_IN_LAYOUT 10 /* TCS only */
#define SI_SGPR_ALPHA_REF 8 /* PS only */
#define SI_VS_NUM_USER_SGPR 12
+#define SI_LS_NUM_USER_SGPR 13
+#define SI_TCS_NUM_USER_SGPR 11
+#define SI_TES_NUM_USER_SGPR 10
#define SI_GS_NUM_USER_SGPR 8
#define SI_GSCOPY_NUM_USER_SGPR 4
#define SI_PS_NUM_USER_SGPR 9
@@ -62,6 +109,31 @@ struct radeon_shader_reloc;
#define SI_PARAM_START_INSTANCE 6
/* the other VS parameters are assigned dynamically */
+/* Offsets where TCS outputs and TCS patch outputs live in LDS:
+ * [0:15] = TCS output patch0 offset / 16, max = NUM_PATCHES * 32 * 32
+ * [16:31] = TCS output patch0 offset for per-patch / 16, max = NUM_PATCHES*32*32* + 32*32
+ */
+#define SI_PARAM_TCS_OUT_OFFSETS 4 /* for TCS & TES */
+
+/* Layout of TCS outputs / TES inputs:
+ * [0:12] = stride between output patches in dwords, num_outputs * num_vertices * 4, max = 32*32*4
+ * [13:20] = stride between output vertices in dwords = num_inputs * 4, max = 32*4
+ * [26:31] = gl_PatchVerticesIn, max = 32
+ */
+#define SI_PARAM_TCS_OUT_LAYOUT 5 /* for TCS & TES */
+
+/* Layout of LS outputs / TCS inputs
+ * [0:12] = stride between patches in dwords = num_inputs * num_vertices * 4, max = 32*32*4
+ * [13:20] = stride between vertices in dwords = num_inputs * 4, max = 32*4
+ */
+#define SI_PARAM_TCS_IN_LAYOUT 6 /* TCS only */
+#define SI_PARAM_LS_OUT_LAYOUT 7 /* same value as TCS_IN_LAYOUT, LS only */
+
+/* TCS only parameters. */
+#define SI_PARAM_TESS_FACTOR_OFFSET 7
+#define SI_PARAM_PATCH_ID 8
+#define SI_PARAM_REL_IDS 9
+
/* GS only parameters */
#define SI_PARAM_GS2VS_OFFSET 4
#define SI_PARAM_GS_WAVE_ID 5
@@ -113,9 +185,24 @@ struct si_shader_selector {
unsigned gs_output_prim;
unsigned gs_max_out_vertices;
unsigned gs_num_invocations;
- uint64_t gs_used_inputs; /* mask of "get_unique_index" bits */
+
+ /* masks of "get_unique_index" bits */
+ uint64_t inputs_read;
+ uint64_t outputs_written;
+ uint32_t patch_outputs_written;
};
+/* Valid shader configurations:
+ *
+ * API shaders VS | TCS | TES | GS |pass| PS
+ * are compiled as: | | | |thru|
+ * | | | | |
+ * Only VS & PS: VS | -- | -- | -- | -- | PS
+ * With GS: ES | -- | -- | GS | VS | PS
+ * With Tessel.: LS | HS | VS | -- | -- | PS
+ * With both: LS | HS | ES | GS | VS | PS
+ */
+
union si_shader_key {
struct {
unsigned export_16bpc:8;
@@ -128,11 +215,23 @@ union si_shader_key {
} ps;
struct {
unsigned instance_divisors[SI_NUM_VERTEX_BUFFERS];
- /* The mask of "get_unique_index" bits, needed for ES,
- * it describes how the ES->GS ring buffer is laid out. */
- uint64_t gs_used_inputs;
- unsigned as_es:1;
+ /* Mask of "get_unique_index" bits - which outputs are read
+ * by the next stage (needed by ES).
+ * This describes how outputs are laid out in memory. */
+ uint64_t es_enabled_outputs;
+ unsigned as_es:1; /* export shader */
+ unsigned as_ls:1; /* local shader */
} vs;
+ struct {
+ unsigned prim_mode:3;
+ } tcs; /* tessellation control shader */
+ struct {
+ /* Mask of "get_unique_index" bits - which outputs are read
+ * by the next stage (needed by ES).
+ * This describes how outputs are laid out in memory. */
+ uint64_t es_enabled_outputs;
+ unsigned as_es:1; /* export shader */
+ } tes; /* tessellation evaluation shader */
};
struct si_shader {
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 6174dad7190..af001b39b3a 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -142,8 +142,9 @@ struct si_shader_data {
* Ring buffers: 0..1
* Streamout buffers: 2..5
*/
-#define SI_RING_ESGS 0
-#define SI_RING_GSVS 1
+#define SI_RING_TESS_FACTOR 0 /* for HS (TCS) */
+#define SI_RING_ESGS 0 /* for ES, GS */
+#define SI_RING_GSVS 1 /* for GS, VS */
#define SI_NUM_RING_BUFFERS 2
#define SI_SO_BUF_OFFSET SI_NUM_RING_BUFFERS
#define SI_NUM_RW_BUFFERS (SI_SO_BUF_OFFSET + 4)
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 3eec217fbd2..2a0ff10285f 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -112,7 +112,7 @@ static void si_shader_gs(struct si_shader *shader)
si_pm4_set_reg(pm4, R_028A68_VGT_GSVS_RING_OFFSET_3, gsvs_itemsize);
si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
- util_bitcount64(shader->selector->gs_used_inputs) * (16 >> 2));
+ util_bitcount64(shader->selector->inputs_read) * (16 >> 2));
si_pm4_set_reg(pm4, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize);
si_pm4_set_reg(pm4, R_028B38_VGT_GS_MAX_VERT_OUT, gs_max_vert_out);
@@ -351,9 +351,21 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
key->vs.instance_divisors[i] =
sctx->vertex_elements->elements[i].instance_divisor;
- if (sctx->gs_shader) {
+ if (sctx->tes_shader)
+ key->vs.as_ls = 1;
+ else if (sctx->gs_shader) {
key->vs.as_es = 1;
- key->vs.gs_used_inputs = sctx->gs_shader->gs_used_inputs;
+ key->vs.es_enabled_outputs = sctx->gs_shader->inputs_read;
+ }
+ break;
+ case PIPE_SHADER_TESS_CTRL:
+ key->tcs.prim_mode =
+ sctx->tes_shader->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
+ break;
+ case PIPE_SHADER_TESS_EVAL:
+ if (sctx->gs_shader) {
+ key->tes.as_es = 1;
+ key->tes.es_enabled_outputs = sctx->gs_shader->inputs_read;
}
break;
case PIPE_SHADER_GEOMETRY:
@@ -487,10 +499,31 @@ static void *si_create_shader_state(struct pipe_context *ctx,
case TGSI_SEMANTIC_PRIMID:
break;
default:
- sel->gs_used_inputs |=
+ sel->inputs_read |=
1llu << si_shader_io_get_unique_index(name, index);
}
}
+ break;
+
+ case PIPE_SHADER_VERTEX:
+ case PIPE_SHADER_TESS_CTRL:
+ for (i = 0; i < sel->info.num_outputs; i++) {
+ unsigned name = sel->info.output_semantic_name[i];
+ unsigned index = sel->info.output_semantic_index[i];
+
+ switch (name) {
+ case TGSI_SEMANTIC_TESSINNER:
+ case TGSI_SEMANTIC_TESSOUTER:
+ case TGSI_SEMANTIC_PATCH:
+ sel->patch_outputs_written |=
+ 1llu << si_shader_io_get_unique_index(name, index);
+ break;
+ default:
+ sel->outputs_written |=
+ 1llu << si_shader_io_get_unique_index(name, index);
+ }
+ }
+ break;
}
if (sscreen->b.debug_flags & DBG_PRECOMPILE)