diff options
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r-- | src/gallium/drivers/radeonsi/Makefile.sources | 1 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/r600_blit.c | 12 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/r600_hw_context.c | 22 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/radeonsi_pipe.c | 7 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/radeonsi_pipe.h | 19 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_descriptors.c | 355 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.c | 47 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.h | 56 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_draw.c | 18 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/sid.h | 54 |
10 files changed, 535 insertions, 56 deletions
diff --git a/src/gallium/drivers/radeonsi/Makefile.sources b/src/gallium/drivers/radeonsi/Makefile.sources index b3ffa727eb5..68c82820015 100644 --- a/src/gallium/drivers/radeonsi/Makefile.sources +++ b/src/gallium/drivers/radeonsi/Makefile.sources @@ -10,6 +10,7 @@ C_SOURCES := \ r600_translate.c \ radeonsi_pm4.c \ radeonsi_compute.c \ + si_descriptors.c \ si_state.c \ si_state_streamout.c \ si_state_draw.c \ diff --git a/src/gallium/drivers/radeonsi/r600_blit.c b/src/gallium/drivers/radeonsi/r600_blit.c index bab108e7c91..bdd9bb43c10 100644 --- a/src/gallium/drivers/radeonsi/r600_blit.c +++ b/src/gallium/drivers/radeonsi/r600_blit.c @@ -70,12 +70,12 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op if (op & R600_SAVE_TEXTURES) { util_blitter_save_fragment_sampler_states( - rctx->blitter, rctx->ps_samplers.n_samplers, - (void**)rctx->ps_samplers.samplers); + rctx->blitter, rctx->samplers[PIPE_SHADER_FRAGMENT].n_samplers, + (void**)rctx->samplers[PIPE_SHADER_FRAGMENT].samplers); - util_blitter_save_fragment_sampler_views( - rctx->blitter, rctx->ps_samplers.n_views, - (struct pipe_sampler_view**)rctx->ps_samplers.views); + util_blitter_save_fragment_sampler_views(rctx->blitter, + util_last_bit(rctx->samplers[PIPE_SHADER_FRAGMENT].views.desc.enabled_mask), + rctx->samplers[PIPE_SHADER_FRAGMENT].views.views); } if ((op & R600_DISABLE_RENDER_COND) && rctx->current_render_cond) { @@ -224,7 +224,7 @@ void si_flush_depth_textures(struct r600_context *rctx, struct pipe_sampler_view *view; struct r600_texture *tex; - view = &textures->views[i]->base; + view = textures->views.views[i]; if (!view) continue; tex = (struct r600_texture *)view->texture; diff --git a/src/gallium/drivers/radeonsi/r600_hw_context.c b/src/gallium/drivers/radeonsi/r600_hw_context.c index 25c972bbf62..bc6ba0bd1f0 100644 --- a/src/gallium/drivers/radeonsi/r600_hw_context.c +++ b/src/gallium/drivers/radeonsi/r600_hw_context.c @@ -114,9 +114,17 @@ err: void si_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in) { + int i; + /* The number of dwords we already used in the CS so far. */ num_dw += ctx->cs->cdw; + for (i = 0; i < SI_NUM_ATOMS(ctx); i++) { + if (ctx->atoms.array[i]->dirty) { + num_dw += ctx->atoms.array[i]->num_dw; + } + } + if (count_draw_in) { /* The number of dwords all the dirty states would take. */ num_dw += ctx->pm4_dirty_cdwords; @@ -254,6 +262,15 @@ void si_context_flush(struct r600_context *ctx, unsigned flags) ctx->pm4_dirty_cdwords = 0; ctx->flags = 0; + /* set all valid group as dirty so they get reemited on + * next draw command + */ + si_pm4_reset_emitted(ctx); + + /* The CS initialization should be emitted before everything else. */ + si_pm4_emit(ctx, ctx->queued.named.init); + ctx->emitted.named.init = ctx->queued.named.init; + #if 0 if (streamout_suspended) { ctx->streamout_start = TRUE; @@ -266,10 +283,7 @@ void si_context_flush(struct r600_context *ctx, unsigned flags) r600_context_queries_resume(ctx); } - /* set all valid group as dirty so they get reemited on - * next draw command - */ - si_pm4_reset_emitted(ctx); + si_all_descriptors_begin_new_cs(ctx); } void si_context_emit_fence(struct r600_context *ctx, struct si_resource *fence_bo, unsigned offset, unsigned value) diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.c b/src/gallium/drivers/radeonsi/radeonsi_pipe.c index b4a1ca93fe6..9afc7f2714b 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.c +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.c @@ -178,6 +178,8 @@ static void r600_destroy_context(struct pipe_context *context) { struct r600_context *rctx = (struct r600_context *)context; + si_release_all_descriptors(rctx); + si_resource_reference(&rctx->border_color_table, NULL); if (rctx->dummy_pixel_shader) { @@ -231,12 +233,15 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void rctx->context.create_video_buffer = vl_video_buffer_create; } + rctx->cs = rctx->ws->cs_create(rctx->ws, RING_GFX, NULL); + + si_init_all_descriptors(rctx); + switch (rctx->chip_class) { case SI: case CIK: si_init_state_functions(rctx); LIST_INITHEAD(&rctx->active_query_list); - rctx->cs = rctx->ws->cs_create(rctx->ws, RING_GFX, NULL); rctx->max_db = 8; si_init_config(rctx); break; diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h b/src/gallium/drivers/radeonsi/radeonsi_pipe.h index 6fbe6539d87..674c6303b7a 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h @@ -94,11 +94,8 @@ struct si_cs_shader_state { struct si_pipe_compute *program; }; -/* needed for blitter save */ -#define NUM_TEX_UNITS 16 - struct r600_textures_info { - struct si_pipe_sampler_view *views[NUM_TEX_UNITS]; + struct si_sampler_views views; struct si_pipe_sampler_state *samplers[NUM_TEX_UNITS]; unsigned n_views; uint32_t depth_texture_mask; /* which textures are depth */ @@ -131,6 +128,9 @@ struct r600_constbuf_state uint32_t dirty_mask; }; +#define SI_NUM_ATOMS(rctx) (sizeof((rctx)->atoms)/sizeof((rctx)->atoms.array[0])) +#define SI_NUM_SHADERS (PIPE_SHADER_FRAGMENT+1) + struct r600_context { struct pipe_context context; struct blitter_context *blitter; @@ -142,6 +142,14 @@ struct r600_context { void *custom_dsa_flush_inplace; struct r600_screen *screen; struct radeon_winsys *ws; + + union { + struct { + struct si_atom *sampler_views[SI_NUM_SHADERS]; + }; + struct si_atom *array[0]; + } atoms; + struct si_vertex_element *vertex_elements; struct pipe_framebuffer_state framebuffer; unsigned pa_sc_line_stipple; @@ -161,8 +169,7 @@ struct r600_context { unsigned sprite_coord_enable; unsigned export_16bpc; struct r600_constbuf_state constbuf_state[PIPE_SHADER_TYPES]; - struct r600_textures_info vs_samplers; - struct r600_textures_info ps_samplers; + struct r600_textures_info samplers[SI_NUM_SHADERS]; struct si_resource *border_color_table; unsigned border_color_offset; diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c new file mode 100644 index 00000000000..f05c8f490bb --- /dev/null +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -0,0 +1,355 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Marek Olšák <[email protected]> + */ + +#include "radeonsi_pipe.h" +#include "radeonsi_resource.h" +#include "radeonsi_shader.h" +#include "r600_hw_context_priv.h" + +#include "util/u_memory.h" + +#define SI_NUM_CONTEXTS 256 + +static const uint32_t null_desc[8]; /* zeros */ + +/* Set this if you want the 3D engine to wait until CP DMA is done. + * It should be set on the last CP DMA packet. */ +#define R600_CP_DMA_SYNC (1 << 0) /* R600+ */ + +/* Set this if the source data was used as a destination in a previous CP DMA + * packet. It's for preventing a read-after-write (RAW) hazard between two + * CP DMA packets. */ +#define SI_CP_DMA_RAW_WAIT (1 << 1) /* SI+ */ + +/* Emit a CP DMA packet to do a copy from one buffer to another. + * The size must fit in bits [20:0]. Notes: + */ +static void si_emit_cp_dma_copy_buffer(struct r600_context *rctx, + uint64_t dst_va, uint64_t src_va, + unsigned size, unsigned flags) +{ + struct radeon_winsys_cs *cs = rctx->cs; + uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? PKT3_CP_DMA_CP_SYNC : 0; + uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? PKT3_CP_DMA_CMD_RAW_WAIT : 0; + + assert(size); + assert((size & ((1<<21)-1)) == size); + + if (rctx->chip_class >= CIK) { + radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0)); + radeon_emit(cs, sync_flag); /* CP_SYNC [31] */ + radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */ + radeon_emit(cs, src_va >> 32); /* SRC_ADDR_HI [31:0] */ + radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */ + radeon_emit(cs, dst_va >> 32); /* DST_ADDR_HI [31:0] */ + radeon_emit(cs, size | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */ + } else { + radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0)); + radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */ + radeon_emit(cs, sync_flag | ((src_va >> 32) & 0xffff)); /* CP_SYNC [31] | SRC_ADDR_HI [15:0] */ + radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */ + radeon_emit(cs, (dst_va >> 32) & 0xffff); /* DST_ADDR_HI [15:0] */ + radeon_emit(cs, size | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */ + } +} + +/* Emit a CP DMA packet to clear a buffer. The size must fit in bits [20:0]. */ +static void si_emit_cp_dma_clear_buffer(struct r600_context *rctx, + uint64_t dst_va, unsigned size, + uint32_t clear_value, unsigned flags) +{ + struct radeon_winsys_cs *cs = rctx->cs; + uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? PKT3_CP_DMA_CP_SYNC : 0; + uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? PKT3_CP_DMA_CMD_RAW_WAIT : 0; + + assert(size); + assert((size & ((1<<21)-1)) == size); + + if (rctx->chip_class >= CIK) { + radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0)); + radeon_emit(cs, sync_flag | PKT3_CP_DMA_SRC_SEL(2)); /* CP_SYNC [31] | SRC_SEL[30:29] */ + radeon_emit(cs, clear_value); /* DATA [31:0] */ + radeon_emit(cs, 0); + radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */ + radeon_emit(cs, dst_va >> 32); /* DST_ADDR_HI [15:0] */ + radeon_emit(cs, size | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */ + } else { + radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0)); + radeon_emit(cs, clear_value); /* DATA [31:0] */ + radeon_emit(cs, sync_flag | PKT3_CP_DMA_SRC_SEL(2)); /* CP_SYNC [31] | SRC_SEL[30:29] */ + radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */ + radeon_emit(cs, (dst_va >> 32) & 0xffff); /* DST_ADDR_HI [15:0] */ + radeon_emit(cs, size | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */ + } +} + +static void si_init_descriptors(struct r600_context *rctx, + struct si_descriptors *desc, + unsigned shader_userdata_reg, + unsigned element_dw_size, + unsigned num_elements, + void (*emit_func)(struct r600_context *ctx, struct si_atom *state)) +{ + uint64_t va; + + desc->atom.emit = emit_func; + desc->shader_userdata_reg = shader_userdata_reg; + desc->element_dw_size = element_dw_size; + desc->num_elements = num_elements; + desc->context_size = num_elements * element_dw_size * 4; + + desc->buffer = (struct si_resource*) + pipe_buffer_create(rctx->context.screen, PIPE_BIND_CUSTOM, + PIPE_USAGE_STATIC, + SI_NUM_CONTEXTS * desc->context_size); + + r600_context_bo_reloc(rctx, desc->buffer, RADEON_USAGE_READWRITE); + va = r600_resource_va(rctx->context.screen, &desc->buffer->b.b); + + /* We don't check for CS space here, because this should be called + * only once at context initialization. */ + si_emit_cp_dma_clear_buffer(rctx, va, desc->buffer->b.b.width0, 0, + R600_CP_DMA_SYNC); +} + +static void si_release_descriptors(struct si_descriptors *desc) +{ + pipe_resource_reference((struct pipe_resource**)&desc->buffer, NULL); +} + +static void si_update_descriptors(struct si_descriptors *desc) +{ + if (desc->dirty_mask) { + desc->atom.num_dw = + 7 + /* copy */ + (4 + desc->element_dw_size) * util_bitcount(desc->dirty_mask) + /* update */ + 4; /* pointer update */ + desc->atom.dirty = true; + } else { + desc->atom.dirty = false; + } +} + +static void si_emit_shader_pointer(struct r600_context *rctx, + struct si_descriptors *desc) +{ + struct radeon_winsys_cs *cs = rctx->cs; + uint64_t va = r600_resource_va(rctx->context.screen, &desc->buffer->b.b) + + desc->current_context_id * desc->context_size; + + radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 2, 0)); + radeon_emit(cs, (desc->shader_userdata_reg - SI_SH_REG_OFFSET) >> 2); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); +} + +static void si_emit_descriptors(struct r600_context *rctx, + struct si_descriptors *desc, + const uint32_t **descriptors) +{ + struct radeon_winsys_cs *cs = rctx->cs; + uint64_t va_base; + int packet_start; + int packet_size = 0; + int last_index = desc->num_elements; /* point to a non-existing element */ + unsigned dirty_mask = desc->dirty_mask; + unsigned new_context_id = (desc->current_context_id + 1) % SI_NUM_CONTEXTS; + + assert(dirty_mask); + + va_base = r600_resource_va(rctx->context.screen, &desc->buffer->b.b); + + /* Copy the descriptors to a new context slot. */ + si_emit_cp_dma_copy_buffer(rctx, + va_base + new_context_id * desc->context_size, + va_base + desc->current_context_id * desc->context_size, + desc->context_size, R600_CP_DMA_SYNC); + + va_base += new_context_id * desc->context_size; + + /* Update the descriptors. + * Updates of consecutive descriptors are merged to one WRITE_DATA packet. + * + * XXX When unbinding lots of resources, consider clearing the memory + * with CP DMA instead of emitting zeros. + */ + while (dirty_mask) { + int i = u_bit_scan(&dirty_mask); + + assert(i < desc->num_elements); + + if (last_index+1 == i && packet_size) { + /* Append new data at the end of the last packet. */ + packet_size += desc->element_dw_size; + cs->buf[packet_start] = PKT3(PKT3_WRITE_DATA, packet_size, 0); + } else { + /* Start a new packet. */ + uint64_t va = va_base + i * desc->element_dw_size * 4; + + packet_start = cs->cdw; + packet_size = 2 + desc->element_dw_size; + + radeon_emit(cs, PKT3(PKT3_WRITE_DATA, packet_size, 0)); + radeon_emit(cs, PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_MEM_SYNC) | + PKT3_WRITE_DATA_WR_CONFIRM | + PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME)); + radeon_emit(cs, va & 0xFFFFFFFFUL); + radeon_emit(cs, (va >> 32UL) & 0xFFFFFFFFUL); + } + + radeon_emit_array(cs, descriptors[i], desc->element_dw_size); + + last_index = i; + } + + desc->dirty_mask = 0; + desc->current_context_id = new_context_id; + + /* Now update the shader userdata pointer. */ + si_emit_shader_pointer(rctx, desc); +} + +static unsigned si_get_shader_user_data_base(unsigned shader) +{ + switch (shader) { + case PIPE_SHADER_VERTEX: + return R_00B130_SPI_SHADER_USER_DATA_VS_0; + case PIPE_SHADER_GEOMETRY: + return R_00B230_SPI_SHADER_USER_DATA_GS_0; + case PIPE_SHADER_FRAGMENT: + return R_00B030_SPI_SHADER_USER_DATA_PS_0; + default: + assert(0); + return 0; + } +} + +/* SAMPLER VIEWS */ + +static void si_emit_sampler_views(struct r600_context *rctx, struct si_atom *atom) +{ + struct si_sampler_views *views = (struct si_sampler_views*)atom; + + si_emit_descriptors(rctx, &views->desc, views->desc_data); +} + +static void si_init_sampler_views(struct r600_context *rctx, + struct si_sampler_views *views, + unsigned shader) +{ + si_init_descriptors(rctx, &views->desc, + si_get_shader_user_data_base(shader) + + SI_SGPR_RESOURCE * 4, + 8, 16, si_emit_sampler_views); +} + +static void si_release_sampler_views(struct si_sampler_views *views) +{ + int i; + + for (i = 0; i < Elements(views->views); i++) { + pipe_sampler_view_reference(&views->views[i], NULL); + } + si_release_descriptors(&views->desc); +} + +static void si_sampler_views_begin_new_cs(struct r600_context *rctx, + struct si_sampler_views *views) +{ + unsigned mask = views->desc.enabled_mask; + + /* Add relocations to the CS. */ + while (mask) { + int i = u_bit_scan(&mask); + struct si_pipe_sampler_view *rview = + (struct si_pipe_sampler_view*)views->views[i]; + + r600_context_bo_reloc(rctx, rview->resource, RADEON_USAGE_READ); + } + + r600_context_bo_reloc(rctx, views->desc.buffer, RADEON_USAGE_READWRITE); + + si_emit_shader_pointer(rctx, &views->desc); +} + +void si_set_sampler_view(struct r600_context *rctx, unsigned shader, + unsigned slot, struct pipe_sampler_view *view, + unsigned *view_desc) +{ + struct si_sampler_views *views = &rctx->samplers[shader].views; + + if (views->views[slot] == view) + return; + + if (view) { + struct si_pipe_sampler_view *rview = + (struct si_pipe_sampler_view*)view; + + r600_context_bo_reloc(rctx, rview->resource, RADEON_USAGE_READ); + + pipe_sampler_view_reference(&views->views[slot], view); + views->desc_data[slot] = view_desc; + views->desc.enabled_mask |= 1 << slot; + } else { + pipe_sampler_view_reference(&views->views[slot], NULL); + views->desc_data[slot] = null_desc; + views->desc.enabled_mask &= ~(1 << slot); + } + + views->desc.dirty_mask |= 1 << slot; + si_update_descriptors(&views->desc); +} + +/* INIT/DEINIT */ + +void si_init_all_descriptors(struct r600_context *rctx) +{ + int i; + + for (i = 0; i < SI_NUM_SHADERS; i++) { + si_init_sampler_views(rctx, &rctx->samplers[i].views, i); + + rctx->atoms.sampler_views[i] = &rctx->samplers[i].views.desc.atom; + } +} + +void si_release_all_descriptors(struct r600_context *rctx) +{ + int i; + + for (i = 0; i < SI_NUM_SHADERS; i++) { + si_release_sampler_views(&rctx->samplers[i].views); + } +} + +void si_all_descriptors_begin_new_cs(struct r600_context *rctx) +{ + int i; + + for (i = 0; i < SI_NUM_SHADERS; i++) { + si_sampler_views_begin_new_cs(rctx, &rctx->samplers[i].views); + } +} diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index d1e3c9d5279..7d637e75189 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2493,26 +2493,17 @@ static void *si_create_sampler_state(struct pipe_context *ctx, } static struct si_pm4_state *si_set_sampler_views(struct r600_context *rctx, - unsigned count, - struct pipe_sampler_view **views, - struct r600_textures_info *samplers, - unsigned user_data_reg) + unsigned shader, unsigned count, + struct pipe_sampler_view **views) { - struct si_pipe_sampler_view **resource = (struct si_pipe_sampler_view **)views; + struct r600_textures_info *samplers = &rctx->samplers[shader]; + struct si_pipe_sampler_view **rviews = (struct si_pipe_sampler_view **)views; struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx); - int i, j; - - if (!count) - goto out; + int i; si_pm4_inval_texture_cache(pm4); - si_pm4_sh_data_begin(pm4); for (i = 0; i < count; i++) { - pipe_sampler_view_reference( - (struct pipe_sampler_view **)&samplers->views[i], - views[i]); - if (views[i]) { struct r600_texture *rtex = (struct r600_texture*)views[i]->texture; @@ -2523,25 +2514,17 @@ static struct si_pm4_state *si_set_sampler_views(struct r600_context *rctx, samplers->depth_texture_mask &= ~(1 << i); } - si_pm4_add_bo(pm4, resource[i]->resource, RADEON_USAGE_READ); + si_set_sampler_view(rctx, shader, i, views[i], rviews[i]->state); } else { samplers->depth_texture_mask &= ~(1 << i); - } - - for (j = 0; j < Elements(resource[i]->state); ++j) { - si_pm4_sh_data_add(pm4, resource[i] ? resource[i]->state[j] : 0); + si_set_sampler_view(rctx, shader, i, NULL, NULL); } } - - for (i = count; i < NUM_TEX_UNITS; i++) { - if (samplers->views[i]) - pipe_sampler_view_reference((struct pipe_sampler_view **)&samplers->views[i], NULL); + for (; i < samplers->n_views; i++) { + si_set_sampler_view(rctx, shader, i, NULL, NULL); } - si_pm4_sh_data_end(pm4, user_data_reg, SI_SGPR_RESOURCE); - -out: - rctx->ps_samplers.n_views = count; + samplers->n_views = count; return pm4; } @@ -2551,8 +2534,7 @@ static void si_set_vs_sampler_views(struct pipe_context *ctx, unsigned count, struct r600_context *rctx = (struct r600_context *)ctx; struct si_pm4_state *pm4; - pm4 = si_set_sampler_views(rctx, count, views, &rctx->vs_samplers, - R_00B130_SPI_SHADER_USER_DATA_VS_0); + pm4 = si_set_sampler_views(rctx, PIPE_SHADER_VERTEX, count, views); si_pm4_set_state(rctx, vs_sampler_views, pm4); } @@ -2562,8 +2544,7 @@ static void si_set_ps_sampler_views(struct pipe_context *ctx, unsigned count, struct r600_context *rctx = (struct r600_context *)ctx; struct si_pm4_state *pm4; - pm4 = si_set_sampler_views(rctx, count, views, &rctx->ps_samplers, - R_00B030_SPI_SHADER_USER_DATA_PS_0); + pm4 = si_set_sampler_views(rctx, PIPE_SHADER_FRAGMENT, count, views); si_pm4_set_state(rctx, ps_sampler_views, pm4); } @@ -2646,7 +2627,7 @@ static void si_bind_vs_sampler_states(struct pipe_context *ctx, unsigned count, struct r600_context *rctx = (struct r600_context *)ctx; struct si_pm4_state *pm4; - pm4 = si_bind_sampler_states(rctx, count, states, &rctx->vs_samplers, + pm4 = si_bind_sampler_states(rctx, count, states, &rctx->samplers[PIPE_SHADER_VERTEX], R_00B130_SPI_SHADER_USER_DATA_VS_0); si_pm4_set_state(rctx, vs_sampler, pm4); } @@ -2656,7 +2637,7 @@ static void si_bind_ps_sampler_states(struct pipe_context *ctx, unsigned count, struct r600_context *rctx = (struct r600_context *)ctx; struct si_pm4_state *pm4; - pm4 = si_bind_sampler_states(rctx, count, states, &rctx->ps_samplers, + pm4 = si_bind_sampler_states(rctx, count, states, &rctx->samplers[PIPE_SHADER_FRAGMENT], R_00B030_SPI_SHADER_USER_DATA_PS_0); si_pm4_set_state(rctx, ps_sampler, pm4); } diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 7ce084e5794..610303bb9a5 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -29,6 +29,14 @@ #include "radeonsi_pm4.h" +/* This encapsulates a state or an operation which can emitted into the GPU + * command stream. */ +struct si_atom { + void (*emit)(struct r600_context *ctx, struct si_atom *state); + unsigned num_dw; + bool dirty; +}; + struct si_state_blend { struct si_pm4_state pm4; uint32_t cb_target_mask; @@ -103,6 +111,46 @@ union si_state { struct si_pm4_state *array[0]; }; +#define NUM_TEX_UNITS 16 + +/* This represents resource descriptors in memory, such as buffer resources, + * image resources, and sampler states. + */ +struct si_descriptors { + struct si_atom atom; + + /* The size of one resource descriptor. */ + unsigned element_dw_size; + /* The maximum number of resource descriptors. */ + unsigned num_elements; + + /* The buffer where resource descriptors are stored. */ + struct si_resource *buffer; + + /* The i-th bit is set if that element is dirty (changed but not emitted). */ + unsigned dirty_mask; + /* The i-th bit is set if that element is enabled (non-NULL resource). */ + unsigned enabled_mask; + + /* We can't update descriptors directly because the GPU might be + * reading them at the same time, so we have to update them + * in a copy-on-write manner. Each such copy is called a context, + * which is just another array descriptors in the same buffer. */ + unsigned current_context_id; + /* The size of a context, should be equal to 4*element_dw_size*num_elements. */ + unsigned context_size; + + /* The shader userdata register where the 64-bit pointer to the descriptor + * array will be stored. */ + unsigned shader_userdata_reg; +}; + +struct si_sampler_views { + struct si_descriptors desc; + struct pipe_sampler_view *views[NUM_TEX_UNITS]; + const uint32_t *desc_data[NUM_TEX_UNITS]; +}; + #define si_pm4_block_idx(member) \ (offsetof(union si_state, named.member) / sizeof(struct si_pm4_state *)) @@ -133,6 +181,14 @@ union si_state { } \ } while(0) +/* si_descriptors.c */ +void si_set_sampler_view(struct r600_context *rctx, unsigned shader, + unsigned slot, struct pipe_sampler_view *view, + unsigned *view_desc); +void si_init_all_descriptors(struct r600_context *rctx); +void si_release_all_descriptors(struct r600_context *rctx); +void si_all_descriptors_begin_new_cs(struct r600_context *rctx); + /* si_state.c */ struct si_pipe_shader_selector; diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 47e64d8634e..f03b34f4039 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -412,11 +412,10 @@ static void si_update_derived_state(struct r600_context *rctx) if (!rctx->blitter->running) { /* Flush depth textures which need to be flushed. */ - if (rctx->vs_samplers.depth_texture_mask) { - si_flush_depth_textures(rctx, &rctx->vs_samplers); - } - if (rctx->ps_samplers.depth_texture_mask) { - si_flush_depth_textures(rctx, &rctx->ps_samplers); + for (int i = 0; i < SI_NUM_SHADERS; i++) { + if (rctx->samplers[i].depth_texture_mask) { + si_flush_depth_textures(rctx, &rctx->samplers[i]); + } } } @@ -651,7 +650,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) { struct r600_context *rctx = (struct r600_context *)ctx; struct pipe_index_buffer ib = {}; - uint32_t cp_coher_cntl; + uint32_t cp_coher_cntl, i; if (!info->count && (info->indexed || !info->count_from_stream_output)) return; @@ -704,6 +703,13 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) si_need_cs_space(rctx, 0, TRUE); + for (i = 0; i < SI_NUM_ATOMS(rctx); i++) { + if (rctx->atoms.array[i]->dirty) { + rctx->atoms.array[i]->emit(rctx, rctx->atoms.array[i]); + rctx->atoms.array[i]->dirty = false; + } + } + si_pm4_emit_dirty(rctx); rctx->pm4_dirty_cdwords = 0; diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h index 208d3a88da0..57ce72e0628 100644 --- a/src/gallium/drivers/radeonsi/sid.h +++ b/src/gallium/drivers/radeonsi/sid.h @@ -134,6 +134,60 @@ #define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count)) #define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PREDICATE(predicate)) +#define PKT3_CP_DMA 0x41 +/* 1. header + * 2. SRC_ADDR_LO [31:0] or DATA [31:0] + * 3. CP_SYNC [31] | SRC_SEL [30:29] | ENGINE [27] | DST_SEL [21:20] | SRC_ADDR_HI [15:0] + * 4. DST_ADDR_LO [31:0] + * 5. DST_ADDR_HI [15:0] + * 6. COMMAND [29:22] | BYTE_COUNT [20:0] + */ +#define PKT3_CP_DMA_CP_SYNC (1 << 31) +#define PKT3_CP_DMA_SRC_SEL(x) ((x) << 29) +/* 0 - SRC_ADDR + * 1 - GDS (program SAS to 1 as well) + * 2 - DATA + */ +#define PKT3_CP_DMA_DST_SEL(x) ((x) << 20) +/* 0 - DST_ADDR + * 1 - GDS (program DAS to 1 as well) + */ +/* COMMAND */ +#define PKT3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23) +/* 0 - none + * 1 - 8 in 16 + * 2 - 8 in 32 + * 3 - 8 in 64 + */ +#define PKT3_CP_DMA_CMD_DST_SWAP(x) ((x) << 24) +/* 0 - none + * 1 - 8 in 16 + * 2 - 8 in 32 + * 3 - 8 in 64 + */ +#define PKT3_CP_DMA_CMD_SAS (1 << 26) +/* 0 - memory + * 1 - register + */ +#define PKT3_CP_DMA_CMD_DAS (1 << 27) +/* 0 - memory + * 1 - register + */ +#define PKT3_CP_DMA_CMD_SAIC (1 << 28) +#define PKT3_CP_DMA_CMD_DAIC (1 << 29) +#define PKT3_CP_DMA_CMD_RAW_WAIT (1 << 30) + +#define PKT3_DMA_DATA 0x50 /* new for CIK */ +/* 1. header + * 2. CP_SYNC [31] | SRC_SEL [30:29] | DST_SEL [21:20] | ENGINE [0] + * 2. SRC_ADDR_LO [31:0] or DATA [31:0] + * 3. SRC_ADDR_HI [31:0] + * 4. DST_ADDR_LO [31:0] + * 5. DST_ADDR_HI [31:0] + * 6. COMMAND [29:22] | BYTE_COUNT [20:0] + */ + + #define R_0084FC_CP_STRMOUT_CNTL 0x0084FC #define S_0084FC_OFFSET_UPDATE_DONE(x) (((x) & 0x1) << 0) #define R_0085F0_CP_COHER_CNTL 0x0085F0 |