summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/amd/common/ac_nir_to_llvm.c54
-rw-r--r--src/amd/common/ac_nir_to_llvm.h30
-rw-r--r--src/amd/vulkan/radv_cmd_buffer.c64
-rw-r--r--src/amd/vulkan/radv_pipeline.c8
4 files changed, 86 insertions, 70 deletions
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 6e36c192c3c..cfbdeae1a3b 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -4228,11 +4228,11 @@ handle_shader_output_decl(struct nir_to_llvm_context *ctx,
int length = glsl_get_length(variable->type);
if (idx == VARYING_SLOT_CLIP_DIST0) {
if (ctx->stage == MESA_SHADER_VERTEX)
- ctx->shader_info->vs.clip_dist_mask = (1 << length) - 1;
+ ctx->shader_info->vs.outinfo.clip_dist_mask = (1 << length) - 1;
ctx->num_output_clips = length;
} else if (idx == VARYING_SLOT_CULL_DIST0) {
if (ctx->stage == MESA_SHADER_VERTEX)
- ctx->shader_info->vs.cull_dist_mask = (1 << length) - 1;
+ ctx->shader_info->vs.outinfo.cull_dist_mask = (1 << length) - 1;
ctx->num_output_culls = length;
}
if (length > 4)
@@ -4448,7 +4448,8 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
}
static void
-handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
+handle_vs_outputs_post(struct nir_to_llvm_context *ctx,
+ struct ac_vs_output_info *outinfo)
{
uint32_t param_count = 0;
unsigned target;
@@ -4461,14 +4462,14 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
(1ull << VARYING_SLOT_CULL_DIST0) |
(1ull << VARYING_SLOT_CULL_DIST1));
- ctx->shader_info->vs.prim_id_output = 0xffffffff;
- ctx->shader_info->vs.layer_output = 0xffffffff;
+ outinfo->prim_id_output = 0xffffffff;
+ outinfo->layer_output = 0xffffffff;
if (clip_mask) {
LLVMValueRef slots[8];
unsigned j;
- if (ctx->shader_info->vs.cull_dist_mask)
- ctx->shader_info->vs.cull_dist_mask <<= ctx->num_output_clips;
+ if (outinfo->cull_dist_mask)
+ outinfo->cull_dist_mask <<= ctx->num_output_clips;
i = VARYING_SLOT_CLIP_DIST0;
for (j = 0; j < ctx->num_output_clips; j++)
@@ -4513,25 +4514,25 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
i == VARYING_SLOT_CULL_DIST1) {
continue;
} else if (i == VARYING_SLOT_PSIZ) {
- ctx->shader_info->vs.writes_pointsize = true;
+ outinfo->writes_pointsize = true;
psize_value = values[0];
continue;
} else if (i == VARYING_SLOT_LAYER) {
- ctx->shader_info->vs.writes_layer = true;
+ outinfo->writes_layer = true;
layer_value = values[0];
- ctx->shader_info->vs.layer_output = param_count;
+ outinfo->layer_output = param_count;
target = V_008DFC_SQ_EXP_PARAM + param_count;
param_count++;
} else if (i == VARYING_SLOT_VIEWPORT) {
- ctx->shader_info->vs.writes_viewport_index = true;
+ outinfo->writes_viewport_index = true;
viewport_index_value = values[0];
continue;
} else if (i == VARYING_SLOT_PRIMITIVE_ID) {
- ctx->shader_info->vs.prim_id_output = param_count;
+ outinfo->prim_id_output = param_count;
target = V_008DFC_SQ_EXP_PARAM + param_count;
param_count++;
} else if (i >= VARYING_SLOT_VAR0) {
- ctx->shader_info->vs.export_mask |= 1u << (i - VARYING_SLOT_VAR0);
+ outinfo->export_mask |= 1u << (i - VARYING_SLOT_VAR0);
target = V_008DFC_SQ_EXP_PARAM + param_count;
param_count++;
}
@@ -4560,9 +4561,9 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
pos_args[0].out[3] = ctx->f32one; /* W */
}
- uint32_t mask = ((ctx->shader_info->vs.writes_pointsize == true ? 1 : 0) |
- (ctx->shader_info->vs.writes_layer == true ? 4 : 0) |
- (ctx->shader_info->vs.writes_viewport_index == true ? 8 : 0));
+ uint32_t mask = ((outinfo->writes_pointsize == true ? 1 : 0) |
+ (outinfo->writes_layer == true ? 4 : 0) |
+ (outinfo->writes_viewport_index == true ? 8 : 0));
if (mask) {
pos_args[1].enabled_channels = mask;
pos_args[1].valid_mask = 0;
@@ -4574,11 +4575,11 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
pos_args[1].out[2] = ctx->f32zero; /* Z */
pos_args[1].out[3] = ctx->f32zero; /* W */
- if (ctx->shader_info->vs.writes_pointsize == true)
+ if (outinfo->writes_pointsize == true)
pos_args[1].out[0] = psize_value;
- if (ctx->shader_info->vs.writes_layer == true)
+ if (outinfo->writes_layer == true)
pos_args[1].out[2] = layer_value;
- if (ctx->shader_info->vs.writes_viewport_index == true)
+ if (outinfo->writes_viewport_index == true)
pos_args[1].out[3] = viewport_index_value;
}
for (i = 0; i < 4; i++) {
@@ -4598,12 +4599,13 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
ac_build_export(&ctx->ac, &pos_args[i]);
}
- ctx->shader_info->vs.pos_exports = num_pos_exports;
- ctx->shader_info->vs.param_exports = param_count;
+ outinfo->pos_exports = num_pos_exports;
+ outinfo->param_exports = param_count;
}
static void
-handle_es_outputs_post(struct nir_to_llvm_context *ctx)
+handle_es_outputs_post(struct nir_to_llvm_context *ctx,
+ struct ac_es_output_info *outinfo)
{
int j;
uint64_t max_output_written = 0;
@@ -4638,7 +4640,7 @@ handle_es_outputs_post(struct nir_to_llvm_context *ctx)
1, 1, true, true);
}
}
- ctx->shader_info->vs.esgs_itemsize = (max_output_written + 1) * 16;
+ outinfo->esgs_itemsize = (max_output_written + 1) * 16;
}
static void
@@ -4761,9 +4763,9 @@ handle_shader_outputs_post(struct nir_to_llvm_context *ctx)
switch (ctx->stage) {
case MESA_SHADER_VERTEX:
if (ctx->options->key.vs.as_es)
- handle_es_outputs_post(ctx);
+ handle_es_outputs_post(ctx, &ctx->shader_info->vs.es_info);
else
- handle_vs_outputs_post(ctx);
+ handle_vs_outputs_post(ctx, &ctx->shader_info->vs.outinfo);
break;
case MESA_SHADER_FRAGMENT:
handle_fs_outputs_post(ctx);
@@ -5170,7 +5172,7 @@ ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx)
}
idx += slot_inc;
}
- handle_vs_outputs_post(ctx);
+ handle_vs_outputs_post(ctx, &ctx->shader_info->vs.outinfo);
}
void ac_create_gs_copy_shader(LLVMTargetMachineRef tm,
diff --git a/src/amd/common/ac_nir_to_llvm.h b/src/amd/common/ac_nir_to_llvm.h
index b4c4a73a1c1..15afbd77456 100644
--- a/src/amd/common/ac_nir_to_llvm.h
+++ b/src/amd/common/ac_nir_to_llvm.h
@@ -91,6 +91,23 @@ struct ac_userdata_locations {
struct ac_userdata_info shader_data[AC_UD_MAX_UD];
};
+struct ac_vs_output_info {
+ uint8_t clip_dist_mask;
+ uint8_t cull_dist_mask;
+ bool writes_pointsize;
+ bool writes_layer;
+ bool writes_viewport_index;
+ uint32_t prim_id_output;
+ uint32_t layer_output;
+ uint32_t export_mask;
+ unsigned param_exports;
+ unsigned pos_exports;
+};
+
+struct ac_es_output_info {
+ uint32_t esgs_itemsize;
+};
+
struct ac_shader_variant_info {
struct ac_userdata_locations user_sgprs_locs;
unsigned num_user_sgprs;
@@ -98,19 +115,10 @@ struct ac_shader_variant_info {
unsigned num_input_vgprs;
union {
struct {
- unsigned param_exports;
- unsigned pos_exports;
+ struct ac_vs_output_info outinfo;
+ struct ac_es_output_info es_info;
unsigned vgpr_comp_cnt;
- uint32_t export_mask;
- bool writes_pointsize;
- bool writes_layer;
- bool writes_viewport_index;
bool as_es;
- uint8_t clip_dist_mask;
- uint8_t cull_dist_mask;
- uint32_t esgs_itemsize;
- uint32_t prim_id_output;
- uint32_t layer_output;
} vs;
struct {
unsigned num_interp;
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index eb2a7b0ddea..ce34204b8ac 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -502,7 +502,8 @@ radv_emit_graphics_raster_state(struct radv_cmd_buffer *cmd_buffer,
static void
radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer,
struct radv_pipeline *pipeline,
- struct radv_shader_variant *shader)
+ struct radv_shader_variant *shader,
+ struct ac_vs_output_info *outinfo)
{
struct radeon_winsys *ws = cmd_buffer->device->ws;
uint64_t va = ws->buffer_get_va(shader->bo);
@@ -510,19 +511,19 @@ radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer,
ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8);
- export_count = MAX2(1, shader->info.vs.param_exports);
+ export_count = MAX2(1, outinfo->param_exports);
radeon_set_context_reg(cmd_buffer->cs, R_0286C4_SPI_VS_OUT_CONFIG,
S_0286C4_VS_EXPORT_COUNT(export_count - 1));
radeon_set_context_reg(cmd_buffer->cs, R_02870C_SPI_SHADER_POS_FORMAT,
S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
- S_02870C_POS1_EXPORT_FORMAT(shader->info.vs.pos_exports > 1 ?
+ S_02870C_POS1_EXPORT_FORMAT(outinfo->pos_exports > 1 ?
V_02870C_SPI_SHADER_4COMP :
V_02870C_SPI_SHADER_NONE) |
- S_02870C_POS2_EXPORT_FORMAT(shader->info.vs.pos_exports > 2 ?
+ S_02870C_POS2_EXPORT_FORMAT(outinfo->pos_exports > 2 ?
V_02870C_SPI_SHADER_4COMP :
V_02870C_SPI_SHADER_NONE) |
- S_02870C_POS3_EXPORT_FORMAT(shader->info.vs.pos_exports > 3 ?
+ S_02870C_POS3_EXPORT_FORMAT(outinfo->pos_exports > 3 ?
V_02870C_SPI_SHADER_4COMP :
V_02870C_SPI_SHADER_NONE));
@@ -540,17 +541,17 @@ radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer,
S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1));
unsigned clip_dist_mask, cull_dist_mask, total_mask;
- clip_dist_mask = shader->info.vs.clip_dist_mask;
- cull_dist_mask = shader->info.vs.cull_dist_mask;
+ clip_dist_mask = outinfo->clip_dist_mask;
+ cull_dist_mask = outinfo->cull_dist_mask;
total_mask = clip_dist_mask | cull_dist_mask;
radeon_set_context_reg(cmd_buffer->cs, R_02881C_PA_CL_VS_OUT_CNTL,
- S_02881C_USE_VTX_POINT_SIZE(shader->info.vs.writes_pointsize) |
- S_02881C_USE_VTX_RENDER_TARGET_INDX(shader->info.vs.writes_layer) |
- S_02881C_USE_VTX_VIEWPORT_INDX(shader->info.vs.writes_viewport_index) |
- S_02881C_VS_OUT_MISC_VEC_ENA(shader->info.vs.writes_pointsize ||
- shader->info.vs.writes_layer ||
- shader->info.vs.writes_viewport_index) |
+ S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) |
+ S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) |
+ S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) |
+ S_02881C_VS_OUT_MISC_VEC_ENA(outinfo->writes_pointsize ||
+ outinfo->writes_layer ||
+ outinfo->writes_viewport_index) |
S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) |
pipeline->graphics.raster.pa_cl_vs_out_cntl |
@@ -558,12 +559,13 @@ radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer,
clip_dist_mask);
radeon_set_context_reg(cmd_buffer->cs, R_028AB4_VGT_REUSE_OFF,
- S_028AB4_REUSE_OFF(shader->info.vs.writes_viewport_index));
+ S_028AB4_REUSE_OFF(outinfo->writes_viewport_index));
}
static void
radv_emit_hw_es(struct radv_cmd_buffer *cmd_buffer,
- struct radv_shader_variant *shader)
+ struct radv_shader_variant *shader,
+ struct ac_es_output_info *outinfo)
{
struct radeon_winsys *ws = cmd_buffer->device->ws;
uint64_t va = ws->buffer_get_va(shader->bo);
@@ -571,7 +573,7 @@ radv_emit_hw_es(struct radv_cmd_buffer *cmd_buffer,
ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8);
radeon_set_context_reg(cmd_buffer->cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
- shader->info.vs.esgs_itemsize / 4);
+ outinfo->esgs_itemsize / 4);
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B320_SPI_SHADER_PGM_LO_ES, 4);
radeon_emit(cmd_buffer->cs, va >> 8);
radeon_emit(cmd_buffer->cs, va >> 40);
@@ -590,9 +592,9 @@ radv_emit_vertex_shader(struct radv_cmd_buffer *cmd_buffer,
vs = pipeline->shaders[MESA_SHADER_VERTEX];
if (vs->info.vs.as_es)
- radv_emit_hw_es(cmd_buffer, vs);
+ radv_emit_hw_es(cmd_buffer, vs, &vs->info.vs.es_info);
else
- radv_emit_hw_vs(cmd_buffer, pipeline, vs);
+ radv_emit_hw_vs(cmd_buffer, pipeline, vs, &vs->info.vs.outinfo);
radeon_set_context_reg(cmd_buffer->cs, R_028A84_VGT_PRIMITIVEID_EN, 0);
}
@@ -666,7 +668,7 @@ radv_emit_geometry_shader(struct radv_cmd_buffer *cmd_buffer,
radeon_emit(cmd_buffer->cs, gs->rsrc1);
radeon_emit(cmd_buffer->cs, gs->rsrc2);
- radv_emit_hw_vs(cmd_buffer, pipeline, pipeline->gs_copy_shader);
+ radv_emit_hw_vs(cmd_buffer, pipeline, pipeline->gs_copy_shader, &pipeline->gs_copy_shader->info.vs.outinfo);
struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_GEOMETRY,
AC_UD_GS_VS_RING_STRIDE_ENTRIES);
@@ -696,10 +698,14 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer,
struct radv_blend_state *blend = &pipeline->graphics.blend;
unsigned ps_offset = 0;
unsigned z_order;
+ struct ac_vs_output_info *outinfo;
assert (pipeline->shaders[MESA_SHADER_FRAGMENT]);
ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
vs = radv_pipeline_has_gs(pipeline) ? pipeline->gs_copy_shader : pipeline->shaders[MESA_SHADER_VERTEX];
+
+ outinfo = &vs->info.vs.outinfo;
+
va = ws->buffer_get_va(ps->bo);
ws->cs_add_buffer(cmd_buffer->cs, ps->bo, 8);
@@ -757,20 +763,20 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer,
ps_offset++;
}
- if (ps->info.fs.prim_id_input && (vs->info.vs.prim_id_output != 0xffffffff)) {
+ if (ps->info.fs.prim_id_input && (outinfo->prim_id_output != 0xffffffff)) {
unsigned vs_offset, flat_shade;
unsigned val;
- vs_offset = vs->info.vs.prim_id_output;
+ vs_offset = outinfo->prim_id_output;
flat_shade = true;
val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade);
radeon_set_context_reg(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, val);
++ps_offset;
}
- if (ps->info.fs.layer_input && (vs->info.vs.layer_output != 0xffffffff)) {
+ if (ps->info.fs.layer_input && (outinfo->layer_output != 0xffffffff)) {
unsigned vs_offset, flat_shade;
unsigned val;
- vs_offset = vs->info.vs.layer_output;
+ vs_offset = outinfo->layer_output;
flat_shade = true;
val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade);
radeon_set_context_reg(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, val);
@@ -785,20 +791,20 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer,
continue;
- if (!(vs->info.vs.export_mask & (1u << i))) {
+ if (!(outinfo->export_mask & (1u << i))) {
radeon_set_context_reg(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset,
S_028644_OFFSET(0x20));
++ps_offset;
continue;
}
- vs_offset = util_bitcount(vs->info.vs.export_mask & ((1u << i) - 1));
- if (vs->info.vs.prim_id_output != 0xffffffff) {
- if (vs_offset >= vs->info.vs.prim_id_output)
+ vs_offset = util_bitcount(outinfo->export_mask & ((1u << i) - 1));
+ if (outinfo->prim_id_output != 0xffffffff) {
+ if (vs_offset >= outinfo->prim_id_output)
vs_offset++;
}
- if (vs->info.vs.layer_output != 0xffffffff) {
- if (vs_offset >= vs->info.vs.layer_output)
+ if (outinfo->layer_output != 0xffffffff) {
+ if (vs_offset >= outinfo->layer_output)
vs_offset++;
}
flat_shade = !!(ps->info.fs.flat_shaded_mask & (1u << ps_offset));
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 1becb65055b..2c710f4eb8f 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -1467,15 +1467,15 @@ calculate_gs_ring_sizes(struct radv_pipeline *pipeline)
unsigned alignment = 256 * num_se;
/* The maximum size is 63.999 MB per SE. */
unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se;
-
+ struct ac_es_output_info *es_info = &pipeline->shaders[MESA_SHADER_VERTEX]->info.vs.es_info;
struct ac_shader_variant_info *gs_info = &pipeline->shaders[MESA_SHADER_GEOMETRY]->info;
- struct ac_shader_variant_info *es_info = &pipeline->shaders[MESA_SHADER_VERTEX]->info;
+
/* Calculate the minimum size. */
- unsigned min_esgs_ring_size = align(es_info->vs.esgs_itemsize * gs_vertex_reuse *
+ unsigned min_esgs_ring_size = align(es_info->esgs_itemsize * gs_vertex_reuse *
wave_size, alignment);
/* These are recommended sizes, not minimum sizes. */
unsigned esgs_ring_size = max_gs_waves * 2 * wave_size *
- es_info->vs.esgs_itemsize * gs_info->gs.vertices_in;
+ es_info->esgs_itemsize * gs_info->gs.vertices_in;
unsigned gsvs_ring_size = max_gs_waves * 2 * wave_size *
gs_info->gs.max_gsvs_emit_size * 1; // no streams in VK (gs->max_gs_stream + 1);