diff options
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/r600/evergreen_state.c | 151 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.h | 28 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state_common.c | 12 |
3 files changed, 188 insertions, 3 deletions
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index bd68503f9c6..79cdd7c2cda 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -3677,6 +3677,7 @@ static void evergreen_set_tess_state(struct pipe_context *ctx, memcpy(rctx->tess_state, default_outer_level, sizeof(float) * 4); memcpy(rctx->tess_state+4, default_inner_level, sizeof(float) * 2); + rctx->tess_state_dirty = true; } void evergreen_init_state_functions(struct r600_context *rctx) @@ -3770,3 +3771,153 @@ void evergreen_init_state_functions(struct r600_context *rctx) evergreen_init_compute_state_functions(rctx); } + +/** + * This calculates the LDS size for tessellation shaders (VS, TCS, TES). + * + * The information about LDS and other non-compile-time parameters is then + * written to the const buffer. + + * const buffer contains - + * uint32_t input_patch_size + * uint32_t input_vertex_size + * uint32_t num_tcs_input_cp + * uint32_t num_tcs_output_cp; + * uint32_t output_patch_size + * uint32_t output_vertex_size + * uint32_t output_patch0_offset + * uint32_t perpatch_output_offset + * and the same constbuf is bound to LS/HS/VS(ES). + */ +void evergreen_setup_tess_constants(struct r600_context *rctx, const struct pipe_draw_info *info, unsigned *num_patches) +{ + struct pipe_constant_buffer constbuf = {0}; + struct r600_pipe_shader_selector *tcs = rctx->tcs_shader ? rctx->tcs_shader : rctx->tes_shader; + struct r600_pipe_shader_selector *ls = rctx->vs_shader; + unsigned num_tcs_input_cp = info->vertices_per_patch; + unsigned num_tcs_outputs; + unsigned num_tcs_output_cp; + unsigned num_tcs_patch_outputs; + unsigned num_tcs_inputs; + unsigned input_vertex_size, output_vertex_size; + unsigned input_patch_size, pervertex_output_patch_size, output_patch_size; + unsigned output_patch0_offset, perpatch_output_offset, lds_size; + uint32_t values[16]; + unsigned num_waves; + unsigned num_pipes = rctx->screen->b.info.r600_max_pipes; + unsigned wave_divisor = (16 * num_pipes); + + *num_patches = 1; + + if (!rctx->tes_shader) { + rctx->lds_alloc = 0; + rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX, + R600_LDS_INFO_CONST_BUFFER, NULL); + rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_CTRL, + R600_LDS_INFO_CONST_BUFFER, NULL); + rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_EVAL, + R600_LDS_INFO_CONST_BUFFER, NULL); + return; + } + + if (rctx->lds_alloc != 0 && + rctx->last_ls == ls && + !rctx->tess_state_dirty && + rctx->last_num_tcs_input_cp == num_tcs_input_cp && + rctx->last_tcs == tcs) + return; + + num_tcs_inputs = util_last_bit64(ls->lds_outputs_written_mask); + + if (rctx->tcs_shader) { + num_tcs_outputs = util_last_bit64(tcs->lds_outputs_written_mask); + num_tcs_output_cp = tcs->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT]; + num_tcs_patch_outputs = util_last_bit64(tcs->lds_patch_outputs_written_mask); + } else { + num_tcs_outputs = num_tcs_inputs; + num_tcs_output_cp = num_tcs_input_cp; + num_tcs_patch_outputs = 2; /* TESSINNER + TESSOUTER */ + } + + /* size in bytes */ + input_vertex_size = num_tcs_inputs * 16; + output_vertex_size = num_tcs_outputs * 16; + + input_patch_size = num_tcs_input_cp * input_vertex_size; + + pervertex_output_patch_size = num_tcs_output_cp * output_vertex_size; + output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16; + + output_patch0_offset = rctx->tcs_shader ? input_patch_size * *num_patches : 0; + perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size; + + lds_size = output_patch0_offset + output_patch_size * *num_patches; + + values[0] = input_patch_size; + values[1] = input_vertex_size; + values[2] = num_tcs_input_cp; + values[3] = num_tcs_output_cp; + + values[4] = output_patch_size; + values[5] = output_vertex_size; + values[6] = output_patch0_offset; + values[7] = perpatch_output_offset; + + /* docs say HS_NUM_WAVES - CEIL((LS_HS_CONFIG.NUM_PATCHES * + LS_HS_CONFIG.HS_NUM_OUTPUT_CP) / (NUM_GOOD_PIPES * 16)) */ + num_waves = ceilf((float)(*num_patches * num_tcs_output_cp) / (float)wave_divisor); + + rctx->lds_alloc = (lds_size | (num_waves << 14)); + + memcpy(&values[8], rctx->tess_state, 6 * sizeof(float)); + values[14] = 0; + values[15] = 0; + + rctx->tess_state_dirty = false; + rctx->last_ls = ls; + rctx->last_tcs = tcs; + rctx->last_num_tcs_input_cp = num_tcs_input_cp; + + constbuf.user_buffer = values; + constbuf.buffer_size = 16 * 4; + + rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX, + R600_LDS_INFO_CONST_BUFFER, &constbuf); + rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_CTRL, + R600_LDS_INFO_CONST_BUFFER, &constbuf); + rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_EVAL, + R600_LDS_INFO_CONST_BUFFER, &constbuf); + pipe_resource_reference(&constbuf.buffer, NULL); +} + +uint32_t evergreen_get_ls_hs_config(struct r600_context *rctx, + const struct pipe_draw_info *info, + unsigned num_patches) +{ + unsigned num_output_cp; + + if (!rctx->tes_shader) + return 0; + + num_output_cp = rctx->tcs_shader ? + rctx->tcs_shader->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] : + info->vertices_per_patch; + + return S_028B58_NUM_PATCHES(num_patches) | + S_028B58_HS_NUM_INPUT_CP(info->vertices_per_patch) | + S_028B58_HS_NUM_OUTPUT_CP(num_output_cp); +} + +void evergreen_set_ls_hs_config(struct r600_context *rctx, + struct radeon_winsys_cs *cs, + uint32_t ls_hs_config) +{ + radeon_set_context_reg(cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config); +} + +void evergreen_set_lds_alloc(struct r600_context *rctx, + struct radeon_winsys_cs *cs, + uint32_t lds_alloc) +{ + radeon_set_context_reg(cs, R_0288E8_SQ_LDS_ALLOC, lds_alloc); +} diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index ac06d1f51b6..98dc6fc3d01 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -59,11 +59,11 @@ /* the number of CS dwords for flushing and drawing */ #define R600_MAX_FLUSH_CS_DWORDS 16 -#define R600_MAX_DRAW_CS_DWORDS 52 +#define R600_MAX_DRAW_CS_DWORDS 58 #define R600_TRACE_CS_DWORDS 7 #define R600_MAX_USER_CONST_BUFFERS 13 -#define R600_MAX_DRIVER_CONST_BUFFERS 2 +#define R600_MAX_DRIVER_CONST_BUFFERS 3 #define R600_MAX_CONST_BUFFERS (R600_MAX_USER_CONST_BUFFERS + R600_MAX_DRIVER_CONST_BUFFERS) /* start driver buffers after user buffers */ @@ -71,7 +71,12 @@ #define R600_UCP_SIZE (4*4*8) #define R600_BUFFER_INFO_OFFSET (R600_UCP_SIZE) -#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1) +#define R600_LDS_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1) +/* + * Note GS doesn't use a constant buffer binding, just a resource index, + * so it's fine to have it exist at index 16. + */ +#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2) /* Currently R600_MAX_CONST_BUFFERS just fits on the hw, which has a limit * of 16 const buffers. * UCP/SAMPLE_POSITIONS are never accessed by same shader stage so they can use the same id. @@ -525,6 +530,11 @@ struct r600_context { struct r600_isa *isa; float sample_positions[4 * 16]; float tess_state[8]; + bool tess_state_dirty; + struct r600_pipe_shader_selector *last_ls; + struct r600_pipe_shader_selector *last_tcs; + unsigned last_num_tcs_input_cp; + unsigned lds_alloc; }; static inline void r600_emit_command_buffer(struct radeon_winsys_cs *cs, @@ -702,6 +712,18 @@ void evergreen_dma_copy_buffer(struct r600_context *rctx, uint64_t dst_offset, uint64_t src_offset, uint64_t size); +void evergreen_setup_tess_constants(struct r600_context *rctx, + const struct pipe_draw_info *info, + unsigned *num_patches); +uint32_t evergreen_get_ls_hs_config(struct r600_context *rctx, + const struct pipe_draw_info *info, + unsigned num_patches); +void evergreen_set_ls_hs_config(struct r600_context *rctx, + struct radeon_winsys_cs *cs, + uint32_t ls_hs_config); +void evergreen_set_lds_alloc(struct r600_context *rctx, + struct radeon_winsys_cs *cs, + uint32_t lds_alloc); /* r600_state_common.c */ void r600_init_common_state_functions(struct r600_context *rctx); diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index de90a99ac1a..7cc5adcb2b8 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -1612,6 +1612,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info struct radeon_winsys_cs *cs = rctx->b.gfx.cs; bool render_cond_bit = rctx->b.render_cond && !rctx->b.render_cond_force_off; uint64_t mask; + unsigned num_patches; if (!info.indirect && !info.count && (info.indexed || !info.count_from_stream_output)) { return; @@ -1717,6 +1718,9 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom); } + if (rctx->b.chip_class >= EVERGREEN) + evergreen_setup_tess_constants(rctx, &info, &num_patches); + /* Emit states. */ r600_need_cs_space(rctx, ib.user_buffer ? 5 : 0, TRUE); r600_flush_emit(rctx); @@ -1750,6 +1754,14 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1)); } + if (rctx->b.chip_class >= EVERGREEN) { + uint32_t ls_hs_config = evergreen_get_ls_hs_config(rctx, &info, + num_patches); + + evergreen_set_ls_hs_config(rctx, cs, ls_hs_config); + evergreen_set_lds_alloc(rctx, cs, rctx->lds_alloc); + } + /* On R6xx, CULL_FRONT=1 culls all points, lines, and rectangles, * even though it should have no effect on those. */ if (rctx->b.chip_class == R600 && rctx->rasterizer) { |