diff options
author | Marek Olšák <[email protected]> | 2015-08-30 14:13:10 +0200 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2015-09-01 21:51:15 +0200 |
commit | a9971e85d9a4038645bdc7496d73906fc324b805 (patch) | |
tree | f2258bd2f71b042892c97f659c4066ded6fde143 /src | |
parent | 5e2619ef3078fe4f9c3e0780ee520fbfb727ee54 (diff) |
radeonsi: rework uploading border colors
The border colors are uploaded only once when the state is created.
This brings truly immutable sampler descriptors, because they don't have
to be updated every time a sampler state is re-bound.
It also moves the TA_BC_BASE_ADDR registers to init_config, removing one
more state. The catch is there is now a limit: only 4096 border colors can
be used by one context. I don't think that will be a problem.
Reviewed-by: Alex Deucher <[email protected]>
Acked-by: Christian König <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_descriptors.c | 9 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.c | 23 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.h | 8 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.c | 124 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.h | 3 |
5 files changed, 75 insertions, 92 deletions
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 3041da621c3..92a7068e715 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -273,13 +273,17 @@ static void si_sampler_states_begin_new_cs(struct si_context *sctx, RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA); } -void si_set_sampler_descriptors(struct si_context *sctx, unsigned shader, - unsigned start, unsigned count, void **states) +static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader, + unsigned start, unsigned count, void **states) { + struct si_context *sctx = (struct si_context *)ctx; struct si_sampler_states *samplers = &sctx->samplers[shader].states; struct si_sampler_state **sstates = (struct si_sampler_state**)states; int i; + if (!count || shader >= SI_NUM_SHADERS) + return; + if (start == 0) samplers->saved_states[0] = states[0]; if (start == 1) @@ -1022,6 +1026,7 @@ void si_init_all_descriptors(struct si_context *sctx) 4, SI_NUM_VERTEX_BUFFERS); /* Set pipe_context functions. */ + sctx->b.b.bind_sampler_states = si_bind_sampler_states; sctx->b.b.set_constant_buffer = si_set_constant_buffer; sctx->b.b.set_sampler_views = si_set_sampler_views; sctx->b.b.set_stream_output_targets = si_set_streamout_targets; diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index da774789525..d68ea5fb31d 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -44,7 +44,8 @@ static void si_destroy_context(struct pipe_context *context) pipe_resource_reference(&sctx->gsvs_ring, NULL); pipe_resource_reference(&sctx->tf_ring, NULL); pipe_resource_reference(&sctx->null_const_buf.buffer, NULL); - r600_resource_reference(&sctx->border_color_table, NULL); + r600_resource_reference(&sctx->border_color_buffer, NULL); + free(sctx->border_color_table); r600_resource_reference(&sctx->scratch_buffer, NULL); sctx->b.ws->fence_reference(&sctx->last_gfx_fence, NULL); @@ -139,6 +140,25 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, sscreen->b.trace_bo->cs_buf : NULL); sctx->b.rings.gfx.flush = si_context_gfx_flush; + /* Border colors. */ + sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS * + sizeof(*sctx->border_color_table)); + if (!sctx->border_color_table) + goto fail; + + sctx->border_color_buffer = (struct r600_resource*) + pipe_buffer_create(screen, PIPE_BIND_CUSTOM, PIPE_USAGE_DEFAULT, + SI_MAX_BORDER_COLORS * + sizeof(*sctx->border_color_table)); + if (!sctx->border_color_buffer) + goto fail; + + sctx->border_color_map = + ws->buffer_map(sctx->border_color_buffer->cs_buf, + NULL, PIPE_TRANSFER_WRITE); + if (!sctx->border_color_map) + goto fail; + si_init_all_descriptors(sctx); si_init_state_functions(sctx); si_init_shader_functions(sctx); @@ -197,6 +217,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, return &sctx->b.b; fail: + fprintf(stderr, "radeonsi: Failed to create a context.\n"); si_destroy_context(&sctx->b.b); return NULL; } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 02d75f00f98..847853e59e9 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -79,6 +79,7 @@ #define SI_GET_TRACE_POINT_ID(x) ((x) & 0xffff) #define SI_MAX_VIEWPORTS 16 +#define SI_MAX_BORDER_COLORS 4096 struct si_compute; @@ -103,7 +104,6 @@ struct si_sampler_view { struct si_sampler_state { uint32_t val[4]; - uint32_t border_color[4]; }; struct si_cs_shader_state { @@ -219,8 +219,10 @@ struct si_context { struct pipe_resource *esgs_ring; struct pipe_resource *gsvs_ring; struct pipe_resource *tf_ring; - struct r600_resource *border_color_table; - unsigned border_color_offset; + union pipe_color_union *border_color_table; /* in CPU memory, any endian */ + struct r600_resource *border_color_buffer; + union pipe_color_union *border_color_map; /* in VRAM (slow access), little endian */ + unsigned border_color_count; /* Vertex and index buffers. */ bool vertex_buffers_dirty; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 52fa8fec033..e31895d6933 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2701,9 +2701,10 @@ static bool sampler_state_needs_border_color(const struct pipe_sampler_state *st static void *si_create_sampler_state(struct pipe_context *ctx, const struct pipe_sampler_state *state) { + struct si_context *sctx = (struct si_context *)ctx; struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state); unsigned aniso_flag_offset = state->max_anisotropy > 1 ? 2 : 0; - unsigned border_color_type; + unsigned border_color_type, border_color_index = 0; if (rstate == NULL) { return NULL; @@ -2726,9 +2727,38 @@ static void *si_create_sampler_state(struct pipe_context *ctx, state->border_color.f[2] == 1 && state->border_color.f[3] == 1) border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE; - else + else { + int i; + border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER; + /* Check if the border has been uploaded already. */ + for (i = 0; i < sctx->border_color_count; i++) + if (memcmp(&sctx->border_color_table[i], &state->border_color, + sizeof(state->border_color)) == 0) + break; + + if (i >= SI_MAX_BORDER_COLORS) { + /* Getting 4096 unique border colors is very unlikely. */ + fprintf(stderr, "radeonsi: The border color table is full. " + "Any new border colors will be just black. " + "Please file a bug.\n"); + border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK; + } else { + if (i == sctx->border_color_count) { + /* Upload a new border color. */ + memcpy(&sctx->border_color_table[i], &state->border_color, + sizeof(state->border_color)); + util_memcpy_cpu_to_le32(&sctx->border_color_map[i], + &state->border_color, + sizeof(state->border_color)); + sctx->border_color_count++; + } + + border_color_index = i; + } + } + rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) | S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) | S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) | @@ -2742,89 +2772,11 @@ static void *si_create_sampler_state(struct pipe_context *ctx, S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter) | aniso_flag_offset) | S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter) | aniso_flag_offset) | S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter))); - rstate->val[3] = S_008F3C_BORDER_COLOR_TYPE(border_color_type); - - if (border_color_type == V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) { - memcpy(rstate->border_color, state->border_color.ui, - sizeof(rstate->border_color)); - } - + rstate->val[3] = S_008F3C_BORDER_COLOR_PTR(border_color_index) | + S_008F3C_BORDER_COLOR_TYPE(border_color_type); return rstate; } -/* Upload border colors and update the pointers in resource descriptors. - * There can only be 4096 border colors per context. - * - * XXX: This is broken if the buffer gets reallocated. - */ -static void si_set_border_colors(struct si_context *sctx, unsigned count, - void **states) -{ - struct si_sampler_state **rstates = (struct si_sampler_state **)states; - uint32_t *border_color_table = NULL; - int i, j; - - for (i = 0; i < count; i++) { - if (rstates[i] && - G_008F3C_BORDER_COLOR_TYPE(rstates[i]->val[3]) == - V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) { - if (!sctx->border_color_table || - ((sctx->border_color_offset + count - i) & - C_008F3C_BORDER_COLOR_PTR)) { - r600_resource_reference(&sctx->border_color_table, NULL); - sctx->border_color_offset = 0; - - sctx->border_color_table = - si_resource_create_custom(&sctx->screen->b.b, - PIPE_USAGE_DYNAMIC, - 4096 * 4 * 4); - } - - if (!border_color_table) { - border_color_table = - sctx->b.ws->buffer_map(sctx->border_color_table->cs_buf, - sctx->b.rings.gfx.cs, - PIPE_TRANSFER_WRITE | - PIPE_TRANSFER_UNSYNCHRONIZED); - } - - for (j = 0; j < 4; j++) { - border_color_table[4 * sctx->border_color_offset + j] = - util_le32_to_cpu(rstates[i]->border_color[j]); - } - - rstates[i]->val[3] &= C_008F3C_BORDER_COLOR_PTR; - rstates[i]->val[3] |= S_008F3C_BORDER_COLOR_PTR(sctx->border_color_offset++); - } - } - - if (border_color_table) { - struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); - - uint64_t va_offset = sctx->border_color_table->gpu_address; - - si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, va_offset >> 8); - if (sctx->b.chip_class >= CIK) - si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, va_offset >> 40); - si_pm4_add_bo(pm4, sctx->border_color_table, RADEON_USAGE_READ, - RADEON_PRIO_SHADER_DATA); - si_pm4_set_state(sctx, ta_bordercolor_base, pm4); - } -} - -static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader, - unsigned start, unsigned count, - void **states) -{ - struct si_context *sctx = (struct si_context *)ctx; - - if (!count || shader >= SI_NUM_SHADERS) - return; - - si_set_border_colors(sctx, count, states); - si_set_sampler_descriptors(sctx, shader, start, count, states); -} - static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask) { struct si_context *sctx = (struct si_context *)ctx; @@ -3105,7 +3057,6 @@ void si_init_state_functions(struct si_context *sctx) sctx->b.b.get_sample_position = cayman_get_sample_position; sctx->b.b.create_sampler_state = si_create_sampler_state; - sctx->b.b.bind_sampler_states = si_bind_sampler_states; sctx->b.b.delete_sampler_state = si_delete_sampler_state; sctx->b.b.create_sampler_view = si_create_sampler_view; @@ -3270,6 +3221,7 @@ static void si_init_config(struct si_context *sctx) unsigned num_rb = MIN2(sctx->screen->b.info.r600_num_backends, 16); unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask; unsigned raster_config, raster_config_1; + uint64_t border_color_va = sctx->border_color_buffer->gpu_address; struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); int i; @@ -3434,5 +3386,11 @@ static void si_init_config(struct si_context *sctx) si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32); } + si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8); + if (sctx->b.chip_class >= CIK) + si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40); + si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ, + RADEON_PRIO_SHADER_DATA); + sctx->init_config = pm4; } diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 49f9f65bc14..f5726f0c5de 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -91,7 +91,6 @@ union si_state { struct si_state_rasterizer *rasterizer; struct si_state_dsa *dsa; struct si_pm4_state *poly_offset; - struct si_pm4_state *ta_bordercolor_base; struct si_pm4_state *ls; struct si_pm4_state *hs; struct si_pm4_state *es; @@ -246,8 +245,6 @@ struct si_buffer_resources { } while(0) /* si_descriptors.c */ -void si_set_sampler_descriptors(struct si_context *sctx, unsigned shader, - unsigned start, unsigned count, void **states); void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot, struct pipe_resource *buffer, unsigned stride, unsigned num_records, |