summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h3
-rw-r--r--src/gallium/drivers/radeonsi/si_state.h2
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c211
3 files changed, 203 insertions, 13 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 374021817cd..b52714d5abe 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -264,6 +264,9 @@ struct si_shader {
unsigned nr_param_exports;
bool is_gs_copy_shader;
bool dx10_clamp_mode; /* convert NaNs to 0 */
+
+ unsigned ls_rsrc1;
+ unsigned ls_rsrc2;
};
static inline struct tgsi_shader_info *si_get_vs_info(struct si_context *sctx)
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index af001b39b3a..b17f2f09012 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -102,6 +102,8 @@ union si_state {
struct si_pm4_state *fb_blend;
struct si_pm4_state *dsa_stencil_ref;
struct si_pm4_state *ta_bordercolor_base;
+ struct si_pm4_state *ls;
+ struct si_pm4_state *hs;
struct si_pm4_state *es;
struct si_pm4_state *gs;
struct si_pm4_state *gs_rings;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 2a0ff10285f..937e71de723 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -33,6 +33,129 @@
#include "util/u_memory.h"
#include "util/u_simple_shaders.h"
+static void si_set_tesseval_regs(struct si_shader *shader,
+ struct si_pm4_state *pm4)
+{
+ struct tgsi_shader_info *info = &shader->selector->info;
+ unsigned tes_prim_mode = info->properties[TGSI_PROPERTY_TES_PRIM_MODE];
+ unsigned tes_spacing = info->properties[TGSI_PROPERTY_TES_SPACING];
+ bool tes_vertex_order_cw = info->properties[TGSI_PROPERTY_TES_VERTEX_ORDER_CW];
+ bool tes_point_mode = info->properties[TGSI_PROPERTY_TES_POINT_MODE];
+ unsigned type, partitioning, topology;
+
+ switch (tes_prim_mode) {
+ case PIPE_PRIM_LINES:
+ type = V_028B6C_TESS_ISOLINE;
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ type = V_028B6C_TESS_TRIANGLE;
+ break;
+ case PIPE_PRIM_QUADS:
+ type = V_028B6C_TESS_QUAD;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ switch (tes_spacing) {
+ case PIPE_TESS_SPACING_FRACTIONAL_ODD:
+ partitioning = V_028B6C_PART_FRAC_ODD;
+ break;
+ case PIPE_TESS_SPACING_FRACTIONAL_EVEN:
+ partitioning = V_028B6C_PART_FRAC_EVEN;
+ break;
+ case PIPE_TESS_SPACING_EQUAL:
+ partitioning = V_028B6C_PART_INTEGER;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ if (tes_point_mode)
+ topology = V_028B6C_OUTPUT_POINT;
+ else if (tes_prim_mode == PIPE_PRIM_LINES)
+ topology = V_028B6C_OUTPUT_LINE;
+ else if (tes_vertex_order_cw)
+ /* for some reason, this must be the other way around */
+ topology = V_028B6C_OUTPUT_TRIANGLE_CCW;
+ else
+ topology = V_028B6C_OUTPUT_TRIANGLE_CW;
+
+ si_pm4_set_reg(pm4, R_028B6C_VGT_TF_PARAM,
+ S_028B6C_TYPE(type) |
+ S_028B6C_PARTITIONING(partitioning) |
+ S_028B6C_TOPOLOGY(topology));
+}
+
+static void si_shader_ls(struct si_shader *shader)
+{
+ struct si_pm4_state *pm4;
+ unsigned num_sgprs, num_user_sgprs;
+ unsigned vgpr_comp_cnt;
+ uint64_t va;
+
+ pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
+ if (pm4 == NULL)
+ return;
+
+ va = shader->bo->gpu_address;
+ si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+
+ /* We need at least 2 components for LS.
+ * VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */
+ vgpr_comp_cnt = shader->uses_instanceid ? 3 : 1;
+
+ num_user_sgprs = SI_LS_NUM_USER_SGPR;
+ num_sgprs = shader->num_sgprs;
+ if (num_user_sgprs > num_sgprs) {
+ /* Last 2 reserved SGPRs are used for VCC */
+ num_sgprs = num_user_sgprs + 2;
+ }
+ assert(num_sgprs <= 104);
+
+ si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
+ si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, va >> 40);
+
+ shader->ls_rsrc1 = S_00B528_VGPRS((shader->num_vgprs - 1) / 4) |
+ S_00B528_SGPRS((num_sgprs - 1) / 8) |
+ S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt);
+ shader->ls_rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs);
+}
+
+static void si_shader_hs(struct si_shader *shader)
+{
+ struct si_pm4_state *pm4;
+ unsigned num_sgprs, num_user_sgprs;
+ uint64_t va;
+
+ pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
+ if (pm4 == NULL)
+ return;
+
+ va = shader->bo->gpu_address;
+ si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+
+ num_user_sgprs = SI_TCS_NUM_USER_SGPR;
+ num_sgprs = shader->num_sgprs;
+ /* One SGPR after user SGPRs is pre-loaded with tessellation factor
+ * buffer offset. */
+ if ((num_user_sgprs + 1) > num_sgprs) {
+ /* Last 2 reserved SGPRs are used for VCC */
+ num_sgprs = num_user_sgprs + 1 + 2;
+ }
+ assert(num_sgprs <= 104);
+
+ si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
+ si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40);
+ si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS,
+ S_00B428_VGPRS((shader->num_vgprs - 1) / 4) |
+ S_00B428_SGPRS((num_sgprs - 1) / 8));
+ si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
+ S_00B42C_USER_SGPR(num_user_sgprs));
+}
+
static void si_shader_es(struct si_shader *shader)
{
struct si_pm4_state *pm4;
@@ -48,9 +171,15 @@ static void si_shader_es(struct si_shader *shader)
va = shader->bo->gpu_address;
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
- vgpr_comp_cnt = shader->uses_instanceid ? 3 : 0;
+ if (shader->selector->type == PIPE_SHADER_VERTEX) {
+ vgpr_comp_cnt = shader->uses_instanceid ? 3 : 0;
+ num_user_sgprs = SI_VS_NUM_USER_SGPR;
+ } else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) {
+ vgpr_comp_cnt = 3; /* all components are needed for TES */
+ num_user_sgprs = SI_TES_NUM_USER_SGPR;
+ } else
+ assert(0);
- num_user_sgprs = SI_VS_NUM_USER_SGPR;
num_sgprs = shader->num_sgprs;
/* One SGPR after user SGPRs is pre-loaded with es2gs_offset */
if ((num_user_sgprs + 1) > num_sgprs) {
@@ -69,6 +198,9 @@ static void si_shader_es(struct si_shader *shader)
si_pm4_set_reg(pm4, R_00B32C_SPI_SHADER_PGM_RSRC2_ES,
S_00B32C_USER_SGPR(num_user_sgprs) |
S_00B32C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
+
+ if (shader->selector->type == PIPE_SHADER_TESS_EVAL)
+ si_set_tesseval_regs(shader, pm4);
}
static void si_shader_gs(struct si_shader *shader)
@@ -169,6 +301,9 @@ static void si_shader_vs(struct si_shader *shader)
} else if (shader->selector->type == PIPE_SHADER_VERTEX) {
vgpr_comp_cnt = shader->uses_instanceid ? 3 : 0;
num_user_sgprs = SI_VS_NUM_USER_SGPR;
+ } else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) {
+ vgpr_comp_cnt = 3; /* all components are needed for TES */
+ num_user_sgprs = SI_TES_NUM_USER_SGPR;
} else
assert(0);
@@ -220,6 +355,9 @@ static void si_shader_vs(struct si_shader *shader)
S_028818_VPORT_X_SCALE_ENA(1) | S_028818_VPORT_X_OFFSET_ENA(1) |
S_028818_VPORT_Y_SCALE_ENA(1) | S_028818_VPORT_Y_OFFSET_ENA(1) |
S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1));
+
+ if (shader->selector->type == PIPE_SHADER_TESS_EVAL)
+ si_set_tesseval_regs(shader, pm4);
}
static void si_shader_ps(struct si_shader *shader)
@@ -317,7 +455,18 @@ static void si_shader_init_pm4_state(struct si_shader *shader)
switch (shader->selector->type) {
case PIPE_SHADER_VERTEX:
- if (shader->key.vs.as_es)
+ if (shader->key.vs.as_ls)
+ si_shader_ls(shader);
+ else if (shader->key.vs.as_es)
+ si_shader_es(shader);
+ else
+ si_shader_vs(shader);
+ break;
+ case PIPE_SHADER_TESS_CTRL:
+ si_shader_hs(shader);
+ break;
+ case PIPE_SHADER_TESS_EVAL:
+ if (shader->key.tes.as_es)
si_shader_es(shader);
else
si_shader_vs(shader);
@@ -657,7 +806,18 @@ static void si_delete_shader_selector(struct pipe_context *ctx,
c = p->next_variant;
switch (sel->type) {
case PIPE_SHADER_VERTEX:
- if (p->key.vs.as_es)
+ if (p->key.vs.as_ls)
+ si_pm4_delete_state(sctx, ls, p->pm4);
+ else if (p->key.vs.as_es)
+ si_pm4_delete_state(sctx, es, p->pm4);
+ else
+ si_pm4_delete_state(sctx, vs, p->pm4);
+ break;
+ case PIPE_SHADER_TESS_CTRL:
+ si_pm4_delete_state(sctx, hs, p->pm4);
+ break;
+ case PIPE_SHADER_TESS_EVAL:
+ if (p->key.tes.as_es)
si_pm4_delete_state(sctx, es, p->pm4);
else
si_pm4_delete_state(sctx, vs, p->pm4);
@@ -995,16 +1155,46 @@ void si_update_shaders(struct si_context *sctx)
struct pipe_context *ctx = (struct pipe_context*)sctx;
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
+ /* Update stages before GS. */
+ if (sctx->tes_shader) {
+ /* VS as LS */
+ si_shader_select(ctx, sctx->vs_shader);
+ si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4);
+
+ if (sctx->tcs_shader) {
+ si_shader_select(ctx, sctx->tcs_shader);
+ si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4);
+ } else {
+ assert(!"generate TCS shader");
+ }
+
+ si_shader_select(ctx, sctx->tes_shader);
+ if (sctx->gs_shader) {
+ /* TES as ES */
+ si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4);
+ } else {
+ /* TES as VS */
+ si_pm4_bind_state(sctx, vs, sctx->tes_shader->current->pm4);
+ sctx->b.streamout.stride_in_dw = sctx->tes_shader->so.stride;
+ }
+ } else if (sctx->gs_shader) {
+ /* VS as ES */
+ si_shader_select(ctx, sctx->vs_shader);
+ si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4);
+ } else {
+ /* VS as VS */
+ si_shader_select(ctx, sctx->vs_shader);
+ si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
+ sctx->b.streamout.stride_in_dw = sctx->vs_shader->so.stride;
+ }
+
+ /* Update GS. */
if (sctx->gs_shader) {
si_shader_select(ctx, sctx->gs_shader);
si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
si_pm4_bind_state(sctx, vs, sctx->gs_shader->current->gs_copy_shader->pm4);
-
sctx->b.streamout.stride_in_dw = sctx->gs_shader->so.stride;
- si_shader_select(ctx, sctx->vs_shader);
- si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4);
-
if (!sctx->gs_rings)
si_init_gs_rings(sctx);
if (sctx->emitted.named.gs_rings != sctx->gs_rings)
@@ -1017,11 +1207,6 @@ void si_update_shaders(struct si_context *sctx)
sctx->gs_shader->info.num_outputs * 16,
64, true, true, 4, 16);
} else {
- si_shader_select(ctx, sctx->vs_shader);
- si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
-
- sctx->b.streamout.stride_in_dw = sctx->vs_shader->so.stride;
-
si_pm4_bind_state(sctx, gs_rings, NULL);
si_pm4_bind_state(sctx, gs, NULL);
si_pm4_bind_state(sctx, es, NULL);