summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Airlie <[email protected]>2017-03-28 11:43:48 +1000
committerDave Airlie <[email protected]>2017-03-28 17:40:20 +1000
commit92e9c14a6a8d536404ef5b41217662bb2286d946 (patch)
treef3c806f11ce09779e86f343be40f939b3ea45a54
parent4b467c759ea1e9d5960a5e668a166f33ef03e9d6 (diff)
radv: move calculating fragment shader i/os to pipeline.
There is no need to calculate this on each command submit. Reviewed-by: Bas Nieuwenhuizen <[email protected]> Signed-off-by: Dave Airlie <[email protected]>
-rw-r--r--src/amd/vulkan/radv_cmd_buffer.c67
-rw-r--r--src/amd/vulkan/radv_pipeline.c71
-rw-r--r--src/amd/vulkan/radv_private.h2
3 files changed, 77 insertions, 63 deletions
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index c3b141ea3a6..92e68efa861 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -669,18 +669,13 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer,
struct radv_pipeline *pipeline)
{
struct radeon_winsys *ws = cmd_buffer->device->ws;
- struct radv_shader_variant *ps, *vs;
+ struct radv_shader_variant *ps;
uint64_t va;
unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
struct radv_blend_state *blend = &pipeline->graphics.blend;
- unsigned ps_offset = 0;
- struct ac_vs_output_info *outinfo;
assert (pipeline->shaders[MESA_SHADER_FRAGMENT]);
ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
- vs = radv_pipeline_has_gs(pipeline) ? pipeline->gs_copy_shader : pipeline->shaders[MESA_SHADER_VERTEX];
-
- outinfo = &vs->info.vs.outinfo;
va = ws->buffer_get_va(ps->bo);
ws->cs_add_buffer(cmd_buffer->cs, ps->bo, 8);
@@ -716,63 +711,9 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer,
radeon_set_context_reg(cmd_buffer->cs, R_028238_CB_TARGET_MASK, blend->cb_target_mask);
radeon_set_context_reg(cmd_buffer->cs, R_02823C_CB_SHADER_MASK, blend->cb_shader_mask);
- if (ps->info.fs.has_pcoord) {
- unsigned val;
- val = S_028644_PT_SPRITE_TEX(1) | S_028644_OFFSET(0x20);
- radeon_set_context_reg(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, val);
- ps_offset++;
- }
-
- if (ps->info.fs.prim_id_input && (outinfo->prim_id_output != 0xffffffff)) {
- unsigned vs_offset, flat_shade;
- unsigned val;
- vs_offset = outinfo->prim_id_output;
- flat_shade = true;
- val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade);
- radeon_set_context_reg(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, val);
- ++ps_offset;
- }
-
- if (ps->info.fs.layer_input && (outinfo->layer_output != 0xffffffff)) {
- unsigned vs_offset, flat_shade;
- unsigned val;
- vs_offset = outinfo->layer_output;
- flat_shade = true;
- val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade);
- radeon_set_context_reg(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, val);
- ++ps_offset;
- }
-
- for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; ++i) {
- unsigned vs_offset, flat_shade;
- unsigned val;
-
- if (!(ps->info.fs.input_mask & (1u << i)))
- continue;
-
-
- if (!(outinfo->export_mask & (1u << i))) {
- radeon_set_context_reg(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset,
- S_028644_OFFSET(0x20));
- ++ps_offset;
- continue;
- }
-
- vs_offset = util_bitcount(outinfo->export_mask & ((1u << i) - 1));
- if (outinfo->prim_id_output != 0xffffffff) {
- if (vs_offset >= outinfo->prim_id_output)
- vs_offset++;
- }
- if (outinfo->layer_output != 0xffffffff) {
- if (vs_offset >= outinfo->layer_output)
- vs_offset++;
- }
- flat_shade = !!(ps->info.fs.flat_shaded_mask & (1u << ps_offset));
-
- val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade);
- radeon_set_context_reg(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, val);
- ++ps_offset;
- }
+ radeon_set_context_reg_seq(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0, pipeline->graphics.ps_input_cntl_num);
+ for (unsigned i = 0; i < pipeline->graphics.ps_input_cntl_num; i++)
+ radeon_emit(cmd_buffer->cs, pipeline->graphics.ps_input_cntl[i]);
}
static void
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 550b773e9a5..c7d74805a27 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -1527,6 +1527,76 @@ static uint32_t si_vgt_gs_mode(struct radv_shader_variant *gs)
S_028A40_GS_WRITE_OPTIMIZE(1);
}
+static void calculate_ps_inputs(struct radv_pipeline *pipeline)
+{
+ struct radv_shader_variant *ps, *vs;
+ struct ac_vs_output_info *outinfo;
+
+ ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
+ vs = radv_pipeline_has_gs(pipeline) ? pipeline->gs_copy_shader : pipeline->shaders[MESA_SHADER_VERTEX];
+
+ outinfo = &vs->info.vs.outinfo;
+
+ unsigned ps_offset = 0;
+ if (ps->info.fs.has_pcoord) {
+ unsigned val;
+ val = S_028644_PT_SPRITE_TEX(1) | S_028644_OFFSET(0x20);
+ pipeline->graphics.ps_input_cntl[ps_offset] = val;
+ ps_offset++;
+ }
+
+ if (ps->info.fs.prim_id_input && (outinfo->prim_id_output != 0xffffffff)) {
+ unsigned vs_offset, flat_shade;
+ unsigned val;
+ vs_offset = outinfo->prim_id_output;
+ flat_shade = true;
+ val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade);
+ pipeline->graphics.ps_input_cntl[ps_offset] = val;
+ ++ps_offset;
+ }
+
+ if (ps->info.fs.layer_input && (outinfo->layer_output != 0xffffffff)) {
+ unsigned vs_offset, flat_shade;
+ unsigned val;
+ vs_offset = outinfo->layer_output;
+ flat_shade = true;
+ val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade);
+ pipeline->graphics.ps_input_cntl[ps_offset] = val;
+ ++ps_offset;
+ }
+
+ for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; ++i) {
+ unsigned vs_offset, flat_shade;
+ unsigned val;
+
+ if (!(ps->info.fs.input_mask & (1u << i)))
+ continue;
+
+ if (!(outinfo->export_mask & (1u << i))) {
+ pipeline->graphics.ps_input_cntl[ps_offset] = S_028644_OFFSET(0x20);
+ ++ps_offset;
+ continue;
+ }
+
+ vs_offset = util_bitcount(outinfo->export_mask & ((1u << i) - 1));
+ if (outinfo->prim_id_output != 0xffffffff) {
+ if (vs_offset >= outinfo->prim_id_output)
+ vs_offset++;
+ }
+ if (outinfo->layer_output != 0xffffffff) {
+ if (vs_offset >= outinfo->layer_output)
+ vs_offset++;
+ }
+ flat_shade = !!(ps->info.fs.flat_shaded_mask & (1u << ps_offset));
+
+ val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade);
+ pipeline->graphics.ps_input_cntl[ps_offset] = val;
+ ++ps_offset;
+ }
+
+ pipeline->graphics.ps_input_cntl_num = ps_offset;
+}
+
VkResult
radv_pipeline_init(struct radv_pipeline *pipeline,
struct radv_device *device,
@@ -1672,6 +1742,7 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
ps->info.fs.writes_z ? V_028710_SPI_SHADER_32_R :
V_028710_SPI_SHADER_ZERO;
+ calculate_ps_inputs(pipeline);
const VkPipelineVertexInputStateCreateInfo *vi_info =
pCreateInfo->pVertexInputState;
for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index dff0aef8328..bf3d19ce9e6 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -963,6 +963,8 @@ struct radv_pipeline {
bool prim_restart_enable;
unsigned esgs_ring_size;
unsigned gsvs_ring_size;
+ uint32_t ps_input_cntl[32];
+ uint32_t ps_input_cntl_num;
struct radv_prim_vertex_count prim_vertex_count;
} graphics;
};