summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/r600/evergreen_state.c151
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h28
-rw-r--r--src/gallium/drivers/r600/r600_state_common.c12
3 files changed, 188 insertions, 3 deletions
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index bd68503f9c6..79cdd7c2cda 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -3677,6 +3677,7 @@ static void evergreen_set_tess_state(struct pipe_context *ctx,
memcpy(rctx->tess_state, default_outer_level, sizeof(float) * 4);
memcpy(rctx->tess_state+4, default_inner_level, sizeof(float) * 2);
+ rctx->tess_state_dirty = true;
}
void evergreen_init_state_functions(struct r600_context *rctx)
@@ -3770,3 +3771,153 @@ void evergreen_init_state_functions(struct r600_context *rctx)
evergreen_init_compute_state_functions(rctx);
}
+
+/**
+ * This calculates the LDS size for tessellation shaders (VS, TCS, TES).
+ *
+ * The information about LDS and other non-compile-time parameters is then
+ * written to the const buffer.
+
+ * const buffer contains -
+ * uint32_t input_patch_size
+ * uint32_t input_vertex_size
+ * uint32_t num_tcs_input_cp
+ * uint32_t num_tcs_output_cp;
+ * uint32_t output_patch_size
+ * uint32_t output_vertex_size
+ * uint32_t output_patch0_offset
+ * uint32_t perpatch_output_offset
+ * and the same constbuf is bound to LS/HS/VS(ES).
+ */
+void evergreen_setup_tess_constants(struct r600_context *rctx, const struct pipe_draw_info *info, unsigned *num_patches)
+{
+ struct pipe_constant_buffer constbuf = {0};
+ struct r600_pipe_shader_selector *tcs = rctx->tcs_shader ? rctx->tcs_shader : rctx->tes_shader;
+ struct r600_pipe_shader_selector *ls = rctx->vs_shader;
+ unsigned num_tcs_input_cp = info->vertices_per_patch;
+ unsigned num_tcs_outputs;
+ unsigned num_tcs_output_cp;
+ unsigned num_tcs_patch_outputs;
+ unsigned num_tcs_inputs;
+ unsigned input_vertex_size, output_vertex_size;
+ unsigned input_patch_size, pervertex_output_patch_size, output_patch_size;
+ unsigned output_patch0_offset, perpatch_output_offset, lds_size;
+ uint32_t values[16];
+ unsigned num_waves;
+ unsigned num_pipes = rctx->screen->b.info.r600_max_pipes;
+ unsigned wave_divisor = (16 * num_pipes);
+
+ *num_patches = 1;
+
+ if (!rctx->tes_shader) {
+ rctx->lds_alloc = 0;
+ rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX,
+ R600_LDS_INFO_CONST_BUFFER, NULL);
+ rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_CTRL,
+ R600_LDS_INFO_CONST_BUFFER, NULL);
+ rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_EVAL,
+ R600_LDS_INFO_CONST_BUFFER, NULL);
+ return;
+ }
+
+ if (rctx->lds_alloc != 0 &&
+ rctx->last_ls == ls &&
+ !rctx->tess_state_dirty &&
+ rctx->last_num_tcs_input_cp == num_tcs_input_cp &&
+ rctx->last_tcs == tcs)
+ return;
+
+ num_tcs_inputs = util_last_bit64(ls->lds_outputs_written_mask);
+
+ if (rctx->tcs_shader) {
+ num_tcs_outputs = util_last_bit64(tcs->lds_outputs_written_mask);
+ num_tcs_output_cp = tcs->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT];
+ num_tcs_patch_outputs = util_last_bit64(tcs->lds_patch_outputs_written_mask);
+ } else {
+ num_tcs_outputs = num_tcs_inputs;
+ num_tcs_output_cp = num_tcs_input_cp;
+ num_tcs_patch_outputs = 2; /* TESSINNER + TESSOUTER */
+ }
+
+ /* size in bytes */
+ input_vertex_size = num_tcs_inputs * 16;
+ output_vertex_size = num_tcs_outputs * 16;
+
+ input_patch_size = num_tcs_input_cp * input_vertex_size;
+
+ pervertex_output_patch_size = num_tcs_output_cp * output_vertex_size;
+ output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
+
+ output_patch0_offset = rctx->tcs_shader ? input_patch_size * *num_patches : 0;
+ perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size;
+
+ lds_size = output_patch0_offset + output_patch_size * *num_patches;
+
+ values[0] = input_patch_size;
+ values[1] = input_vertex_size;
+ values[2] = num_tcs_input_cp;
+ values[3] = num_tcs_output_cp;
+
+ values[4] = output_patch_size;
+ values[5] = output_vertex_size;
+ values[6] = output_patch0_offset;
+ values[7] = perpatch_output_offset;
+
+ /* docs say HS_NUM_WAVES - CEIL((LS_HS_CONFIG.NUM_PATCHES *
+ LS_HS_CONFIG.HS_NUM_OUTPUT_CP) / (NUM_GOOD_PIPES * 16)) */
+ num_waves = ceilf((float)(*num_patches * num_tcs_output_cp) / (float)wave_divisor);
+
+ rctx->lds_alloc = (lds_size | (num_waves << 14));
+
+ memcpy(&values[8], rctx->tess_state, 6 * sizeof(float));
+ values[14] = 0;
+ values[15] = 0;
+
+ rctx->tess_state_dirty = false;
+ rctx->last_ls = ls;
+ rctx->last_tcs = tcs;
+ rctx->last_num_tcs_input_cp = num_tcs_input_cp;
+
+ constbuf.user_buffer = values;
+ constbuf.buffer_size = 16 * 4;
+
+ rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX,
+ R600_LDS_INFO_CONST_BUFFER, &constbuf);
+ rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_CTRL,
+ R600_LDS_INFO_CONST_BUFFER, &constbuf);
+ rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_EVAL,
+ R600_LDS_INFO_CONST_BUFFER, &constbuf);
+ pipe_resource_reference(&constbuf.buffer, NULL);
+}
+
+uint32_t evergreen_get_ls_hs_config(struct r600_context *rctx,
+ const struct pipe_draw_info *info,
+ unsigned num_patches)
+{
+ unsigned num_output_cp;
+
+ if (!rctx->tes_shader)
+ return 0;
+
+ num_output_cp = rctx->tcs_shader ?
+ rctx->tcs_shader->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] :
+ info->vertices_per_patch;
+
+ return S_028B58_NUM_PATCHES(num_patches) |
+ S_028B58_HS_NUM_INPUT_CP(info->vertices_per_patch) |
+ S_028B58_HS_NUM_OUTPUT_CP(num_output_cp);
+}
+
+void evergreen_set_ls_hs_config(struct r600_context *rctx,
+ struct radeon_winsys_cs *cs,
+ uint32_t ls_hs_config)
+{
+ radeon_set_context_reg(cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config);
+}
+
+void evergreen_set_lds_alloc(struct r600_context *rctx,
+ struct radeon_winsys_cs *cs,
+ uint32_t lds_alloc)
+{
+ radeon_set_context_reg(cs, R_0288E8_SQ_LDS_ALLOC, lds_alloc);
+}
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index ac06d1f51b6..98dc6fc3d01 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -59,11 +59,11 @@
/* the number of CS dwords for flushing and drawing */
#define R600_MAX_FLUSH_CS_DWORDS 16
-#define R600_MAX_DRAW_CS_DWORDS 52
+#define R600_MAX_DRAW_CS_DWORDS 58
#define R600_TRACE_CS_DWORDS 7
#define R600_MAX_USER_CONST_BUFFERS 13
-#define R600_MAX_DRIVER_CONST_BUFFERS 2
+#define R600_MAX_DRIVER_CONST_BUFFERS 3
#define R600_MAX_CONST_BUFFERS (R600_MAX_USER_CONST_BUFFERS + R600_MAX_DRIVER_CONST_BUFFERS)
/* start driver buffers after user buffers */
@@ -71,7 +71,12 @@
#define R600_UCP_SIZE (4*4*8)
#define R600_BUFFER_INFO_OFFSET (R600_UCP_SIZE)
-#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
+#define R600_LDS_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
+/*
+ * Note GS doesn't use a constant buffer binding, just a resource index,
+ * so it's fine to have it exist at index 16.
+ */
+#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2)
/* Currently R600_MAX_CONST_BUFFERS just fits on the hw, which has a limit
* of 16 const buffers.
* UCP/SAMPLE_POSITIONS are never accessed by same shader stage so they can use the same id.
@@ -525,6 +530,11 @@ struct r600_context {
struct r600_isa *isa;
float sample_positions[4 * 16];
float tess_state[8];
+ bool tess_state_dirty;
+ struct r600_pipe_shader_selector *last_ls;
+ struct r600_pipe_shader_selector *last_tcs;
+ unsigned last_num_tcs_input_cp;
+ unsigned lds_alloc;
};
static inline void r600_emit_command_buffer(struct radeon_winsys_cs *cs,
@@ -702,6 +712,18 @@ void evergreen_dma_copy_buffer(struct r600_context *rctx,
uint64_t dst_offset,
uint64_t src_offset,
uint64_t size);
+void evergreen_setup_tess_constants(struct r600_context *rctx,
+ const struct pipe_draw_info *info,
+ unsigned *num_patches);
+uint32_t evergreen_get_ls_hs_config(struct r600_context *rctx,
+ const struct pipe_draw_info *info,
+ unsigned num_patches);
+void evergreen_set_ls_hs_config(struct r600_context *rctx,
+ struct radeon_winsys_cs *cs,
+ uint32_t ls_hs_config);
+void evergreen_set_lds_alloc(struct r600_context *rctx,
+ struct radeon_winsys_cs *cs,
+ uint32_t lds_alloc);
/* r600_state_common.c */
void r600_init_common_state_functions(struct r600_context *rctx);
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index de90a99ac1a..7cc5adcb2b8 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1612,6 +1612,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
bool render_cond_bit = rctx->b.render_cond && !rctx->b.render_cond_force_off;
uint64_t mask;
+ unsigned num_patches;
if (!info.indirect && !info.count && (info.indexed || !info.count_from_stream_output)) {
return;
@@ -1717,6 +1718,9 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
}
+ if (rctx->b.chip_class >= EVERGREEN)
+ evergreen_setup_tess_constants(rctx, &info, &num_patches);
+
/* Emit states. */
r600_need_cs_space(rctx, ib.user_buffer ? 5 : 0, TRUE);
r600_flush_emit(rctx);
@@ -1750,6 +1754,14 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1));
}
+ if (rctx->b.chip_class >= EVERGREEN) {
+ uint32_t ls_hs_config = evergreen_get_ls_hs_config(rctx, &info,
+ num_patches);
+
+ evergreen_set_ls_hs_config(rctx, cs, ls_hs_config);
+ evergreen_set_lds_alloc(rctx, cs, rctx->lds_alloc);
+ }
+
/* On R6xx, CULL_FRONT=1 culls all points, lines, and rectangles,
* even though it should have no effect on those. */
if (rctx->b.chip_class == R600 && rctx->rasterizer) {