From 731ff3766f0c4f0792ea518907d65f7b632d4053 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 30 Nov 2015 14:56:10 +1000 Subject: r600: create LDS info constants buffer and write LDS registers. (v2) This creates a constant buffer with the information about the layout of the LDS memory that is given to the vertex, tess control and tess evaluation shaders. This also programs the LDS size and the LS_HS_CONFIG registers, on evergreen only. v2: calculate lds hs num waves properly (Marek) Emit the state only when something has changed (airlied). Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/evergreen_state.c | 151 +++++++++++++++++++++++++++ src/gallium/drivers/r600/r600_pipe.h | 28 ++++- src/gallium/drivers/r600/r600_state_common.c | 12 +++ 3 files changed, 188 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index bd68503f9c6..79cdd7c2cda 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -3677,6 +3677,7 @@ static void evergreen_set_tess_state(struct pipe_context *ctx, memcpy(rctx->tess_state, default_outer_level, sizeof(float) * 4); memcpy(rctx->tess_state+4, default_inner_level, sizeof(float) * 2); + rctx->tess_state_dirty = true; } void evergreen_init_state_functions(struct r600_context *rctx) @@ -3770,3 +3771,153 @@ void evergreen_init_state_functions(struct r600_context *rctx) evergreen_init_compute_state_functions(rctx); } + +/** + * This calculates the LDS size for tessellation shaders (VS, TCS, TES). + * + * The information about LDS and other non-compile-time parameters is then + * written to the const buffer. + + * const buffer contains - + * uint32_t input_patch_size + * uint32_t input_vertex_size + * uint32_t num_tcs_input_cp + * uint32_t num_tcs_output_cp; + * uint32_t output_patch_size + * uint32_t output_vertex_size + * uint32_t output_patch0_offset + * uint32_t perpatch_output_offset + * and the same constbuf is bound to LS/HS/VS(ES). + */ +void evergreen_setup_tess_constants(struct r600_context *rctx, const struct pipe_draw_info *info, unsigned *num_patches) +{ + struct pipe_constant_buffer constbuf = {0}; + struct r600_pipe_shader_selector *tcs = rctx->tcs_shader ? rctx->tcs_shader : rctx->tes_shader; + struct r600_pipe_shader_selector *ls = rctx->vs_shader; + unsigned num_tcs_input_cp = info->vertices_per_patch; + unsigned num_tcs_outputs; + unsigned num_tcs_output_cp; + unsigned num_tcs_patch_outputs; + unsigned num_tcs_inputs; + unsigned input_vertex_size, output_vertex_size; + unsigned input_patch_size, pervertex_output_patch_size, output_patch_size; + unsigned output_patch0_offset, perpatch_output_offset, lds_size; + uint32_t values[16]; + unsigned num_waves; + unsigned num_pipes = rctx->screen->b.info.r600_max_pipes; + unsigned wave_divisor = (16 * num_pipes); + + *num_patches = 1; + + if (!rctx->tes_shader) { + rctx->lds_alloc = 0; + rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX, + R600_LDS_INFO_CONST_BUFFER, NULL); + rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_CTRL, + R600_LDS_INFO_CONST_BUFFER, NULL); + rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_EVAL, + R600_LDS_INFO_CONST_BUFFER, NULL); + return; + } + + if (rctx->lds_alloc != 0 && + rctx->last_ls == ls && + !rctx->tess_state_dirty && + rctx->last_num_tcs_input_cp == num_tcs_input_cp && + rctx->last_tcs == tcs) + return; + + num_tcs_inputs = util_last_bit64(ls->lds_outputs_written_mask); + + if (rctx->tcs_shader) { + num_tcs_outputs = util_last_bit64(tcs->lds_outputs_written_mask); + num_tcs_output_cp = tcs->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT]; + num_tcs_patch_outputs = util_last_bit64(tcs->lds_patch_outputs_written_mask); + } else { + num_tcs_outputs = num_tcs_inputs; + num_tcs_output_cp = num_tcs_input_cp; + num_tcs_patch_outputs = 2; /* TESSINNER + TESSOUTER */ + } + + /* size in bytes */ + input_vertex_size = num_tcs_inputs * 16; + output_vertex_size = num_tcs_outputs * 16; + + input_patch_size = num_tcs_input_cp * input_vertex_size; + + pervertex_output_patch_size = num_tcs_output_cp * output_vertex_size; + output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16; + + output_patch0_offset = rctx->tcs_shader ? input_patch_size * *num_patches : 0; + perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size; + + lds_size = output_patch0_offset + output_patch_size * *num_patches; + + values[0] = input_patch_size; + values[1] = input_vertex_size; + values[2] = num_tcs_input_cp; + values[3] = num_tcs_output_cp; + + values[4] = output_patch_size; + values[5] = output_vertex_size; + values[6] = output_patch0_offset; + values[7] = perpatch_output_offset; + + /* docs say HS_NUM_WAVES - CEIL((LS_HS_CONFIG.NUM_PATCHES * + LS_HS_CONFIG.HS_NUM_OUTPUT_CP) / (NUM_GOOD_PIPES * 16)) */ + num_waves = ceilf((float)(*num_patches * num_tcs_output_cp) / (float)wave_divisor); + + rctx->lds_alloc = (lds_size | (num_waves << 14)); + + memcpy(&values[8], rctx->tess_state, 6 * sizeof(float)); + values[14] = 0; + values[15] = 0; + + rctx->tess_state_dirty = false; + rctx->last_ls = ls; + rctx->last_tcs = tcs; + rctx->last_num_tcs_input_cp = num_tcs_input_cp; + + constbuf.user_buffer = values; + constbuf.buffer_size = 16 * 4; + + rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX, + R600_LDS_INFO_CONST_BUFFER, &constbuf); + rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_CTRL, + R600_LDS_INFO_CONST_BUFFER, &constbuf); + rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_EVAL, + R600_LDS_INFO_CONST_BUFFER, &constbuf); + pipe_resource_reference(&constbuf.buffer, NULL); +} + +uint32_t evergreen_get_ls_hs_config(struct r600_context *rctx, + const struct pipe_draw_info *info, + unsigned num_patches) +{ + unsigned num_output_cp; + + if (!rctx->tes_shader) + return 0; + + num_output_cp = rctx->tcs_shader ? + rctx->tcs_shader->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] : + info->vertices_per_patch; + + return S_028B58_NUM_PATCHES(num_patches) | + S_028B58_HS_NUM_INPUT_CP(info->vertices_per_patch) | + S_028B58_HS_NUM_OUTPUT_CP(num_output_cp); +} + +void evergreen_set_ls_hs_config(struct r600_context *rctx, + struct radeon_winsys_cs *cs, + uint32_t ls_hs_config) +{ + radeon_set_context_reg(cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config); +} + +void evergreen_set_lds_alloc(struct r600_context *rctx, + struct radeon_winsys_cs *cs, + uint32_t lds_alloc) +{ + radeon_set_context_reg(cs, R_0288E8_SQ_LDS_ALLOC, lds_alloc); +} diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index ac06d1f51b6..98dc6fc3d01 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -59,11 +59,11 @@ /* the number of CS dwords for flushing and drawing */ #define R600_MAX_FLUSH_CS_DWORDS 16 -#define R600_MAX_DRAW_CS_DWORDS 52 +#define R600_MAX_DRAW_CS_DWORDS 58 #define R600_TRACE_CS_DWORDS 7 #define R600_MAX_USER_CONST_BUFFERS 13 -#define R600_MAX_DRIVER_CONST_BUFFERS 2 +#define R600_MAX_DRIVER_CONST_BUFFERS 3 #define R600_MAX_CONST_BUFFERS (R600_MAX_USER_CONST_BUFFERS + R600_MAX_DRIVER_CONST_BUFFERS) /* start driver buffers after user buffers */ @@ -71,7 +71,12 @@ #define R600_UCP_SIZE (4*4*8) #define R600_BUFFER_INFO_OFFSET (R600_UCP_SIZE) -#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1) +#define R600_LDS_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1) +/* + * Note GS doesn't use a constant buffer binding, just a resource index, + * so it's fine to have it exist at index 16. + */ +#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2) /* Currently R600_MAX_CONST_BUFFERS just fits on the hw, which has a limit * of 16 const buffers. * UCP/SAMPLE_POSITIONS are never accessed by same shader stage so they can use the same id. @@ -525,6 +530,11 @@ struct r600_context { struct r600_isa *isa; float sample_positions[4 * 16]; float tess_state[8]; + bool tess_state_dirty; + struct r600_pipe_shader_selector *last_ls; + struct r600_pipe_shader_selector *last_tcs; + unsigned last_num_tcs_input_cp; + unsigned lds_alloc; }; static inline void r600_emit_command_buffer(struct radeon_winsys_cs *cs, @@ -702,6 +712,18 @@ void evergreen_dma_copy_buffer(struct r600_context *rctx, uint64_t dst_offset, uint64_t src_offset, uint64_t size); +void evergreen_setup_tess_constants(struct r600_context *rctx, + const struct pipe_draw_info *info, + unsigned *num_patches); +uint32_t evergreen_get_ls_hs_config(struct r600_context *rctx, + const struct pipe_draw_info *info, + unsigned num_patches); +void evergreen_set_ls_hs_config(struct r600_context *rctx, + struct radeon_winsys_cs *cs, + uint32_t ls_hs_config); +void evergreen_set_lds_alloc(struct r600_context *rctx, + struct radeon_winsys_cs *cs, + uint32_t lds_alloc); /* r600_state_common.c */ void r600_init_common_state_functions(struct r600_context *rctx); diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index de90a99ac1a..7cc5adcb2b8 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -1612,6 +1612,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info struct radeon_winsys_cs *cs = rctx->b.gfx.cs; bool render_cond_bit = rctx->b.render_cond && !rctx->b.render_cond_force_off; uint64_t mask; + unsigned num_patches; if (!info.indirect && !info.count && (info.indexed || !info.count_from_stream_output)) { return; @@ -1717,6 +1718,9 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom); } + if (rctx->b.chip_class >= EVERGREEN) + evergreen_setup_tess_constants(rctx, &info, &num_patches); + /* Emit states. */ r600_need_cs_space(rctx, ib.user_buffer ? 5 : 0, TRUE); r600_flush_emit(rctx); @@ -1750,6 +1754,14 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1)); } + if (rctx->b.chip_class >= EVERGREEN) { + uint32_t ls_hs_config = evergreen_get_ls_hs_config(rctx, &info, + num_patches); + + evergreen_set_ls_hs_config(rctx, cs, ls_hs_config); + evergreen_set_lds_alloc(rctx, cs, rctx->lds_alloc); + } + /* On R6xx, CULL_FRONT=1 culls all points, lines, and rectangles, * even though it should have no effect on those. */ if (rctx->b.chip_class == R600 && rctx->rasterizer) { -- cgit v1.2.3