summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/radeonsi/Makefile.sources1
-rw-r--r--src/gallium/drivers/radeonsi/r600_blit.c12
-rw-r--r--src/gallium/drivers/radeonsi/r600_hw_context.c22
-rw-r--r--src/gallium/drivers/radeonsi/radeonsi_pipe.c7
-rw-r--r--src/gallium/drivers/radeonsi/radeonsi_pipe.h19
-rw-r--r--src/gallium/drivers/radeonsi/si_descriptors.c355
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c47
-rw-r--r--src/gallium/drivers/radeonsi/si_state.h56
-rw-r--r--src/gallium/drivers/radeonsi/si_state_draw.c18
-rw-r--r--src/gallium/drivers/radeonsi/sid.h54
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_winsys.h12
11 files changed, 547 insertions, 56 deletions
diff --git a/src/gallium/drivers/radeonsi/Makefile.sources b/src/gallium/drivers/radeonsi/Makefile.sources
index b3ffa727eb5..68c82820015 100644
--- a/src/gallium/drivers/radeonsi/Makefile.sources
+++ b/src/gallium/drivers/radeonsi/Makefile.sources
@@ -10,6 +10,7 @@ C_SOURCES := \
r600_translate.c \
radeonsi_pm4.c \
radeonsi_compute.c \
+ si_descriptors.c \
si_state.c \
si_state_streamout.c \
si_state_draw.c \
diff --git a/src/gallium/drivers/radeonsi/r600_blit.c b/src/gallium/drivers/radeonsi/r600_blit.c
index bab108e7c91..bdd9bb43c10 100644
--- a/src/gallium/drivers/radeonsi/r600_blit.c
+++ b/src/gallium/drivers/radeonsi/r600_blit.c
@@ -70,12 +70,12 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op
if (op & R600_SAVE_TEXTURES) {
util_blitter_save_fragment_sampler_states(
- rctx->blitter, rctx->ps_samplers.n_samplers,
- (void**)rctx->ps_samplers.samplers);
+ rctx->blitter, rctx->samplers[PIPE_SHADER_FRAGMENT].n_samplers,
+ (void**)rctx->samplers[PIPE_SHADER_FRAGMENT].samplers);
- util_blitter_save_fragment_sampler_views(
- rctx->blitter, rctx->ps_samplers.n_views,
- (struct pipe_sampler_view**)rctx->ps_samplers.views);
+ util_blitter_save_fragment_sampler_views(rctx->blitter,
+ util_last_bit(rctx->samplers[PIPE_SHADER_FRAGMENT].views.desc.enabled_mask),
+ rctx->samplers[PIPE_SHADER_FRAGMENT].views.views);
}
if ((op & R600_DISABLE_RENDER_COND) && rctx->current_render_cond) {
@@ -224,7 +224,7 @@ void si_flush_depth_textures(struct r600_context *rctx,
struct pipe_sampler_view *view;
struct r600_texture *tex;
- view = &textures->views[i]->base;
+ view = textures->views.views[i];
if (!view) continue;
tex = (struct r600_texture *)view->texture;
diff --git a/src/gallium/drivers/radeonsi/r600_hw_context.c b/src/gallium/drivers/radeonsi/r600_hw_context.c
index 25c972bbf62..bc6ba0bd1f0 100644
--- a/src/gallium/drivers/radeonsi/r600_hw_context.c
+++ b/src/gallium/drivers/radeonsi/r600_hw_context.c
@@ -114,9 +114,17 @@ err:
void si_need_cs_space(struct r600_context *ctx, unsigned num_dw,
boolean count_draw_in)
{
+ int i;
+
/* The number of dwords we already used in the CS so far. */
num_dw += ctx->cs->cdw;
+ for (i = 0; i < SI_NUM_ATOMS(ctx); i++) {
+ if (ctx->atoms.array[i]->dirty) {
+ num_dw += ctx->atoms.array[i]->num_dw;
+ }
+ }
+
if (count_draw_in) {
/* The number of dwords all the dirty states would take. */
num_dw += ctx->pm4_dirty_cdwords;
@@ -254,6 +262,15 @@ void si_context_flush(struct r600_context *ctx, unsigned flags)
ctx->pm4_dirty_cdwords = 0;
ctx->flags = 0;
+ /* set all valid group as dirty so they get reemited on
+ * next draw command
+ */
+ si_pm4_reset_emitted(ctx);
+
+ /* The CS initialization should be emitted before everything else. */
+ si_pm4_emit(ctx, ctx->queued.named.init);
+ ctx->emitted.named.init = ctx->queued.named.init;
+
#if 0
if (streamout_suspended) {
ctx->streamout_start = TRUE;
@@ -266,10 +283,7 @@ void si_context_flush(struct r600_context *ctx, unsigned flags)
r600_context_queries_resume(ctx);
}
- /* set all valid group as dirty so they get reemited on
- * next draw command
- */
- si_pm4_reset_emitted(ctx);
+ si_all_descriptors_begin_new_cs(ctx);
}
void si_context_emit_fence(struct r600_context *ctx, struct si_resource *fence_bo, unsigned offset, unsigned value)
diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.c b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
index b4a1ca93fe6..9afc7f2714b 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_pipe.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
@@ -178,6 +178,8 @@ static void r600_destroy_context(struct pipe_context *context)
{
struct r600_context *rctx = (struct r600_context *)context;
+ si_release_all_descriptors(rctx);
+
si_resource_reference(&rctx->border_color_table, NULL);
if (rctx->dummy_pixel_shader) {
@@ -231,12 +233,15 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
rctx->context.create_video_buffer = vl_video_buffer_create;
}
+ rctx->cs = rctx->ws->cs_create(rctx->ws, RING_GFX, NULL);
+
+ si_init_all_descriptors(rctx);
+
switch (rctx->chip_class) {
case SI:
case CIK:
si_init_state_functions(rctx);
LIST_INITHEAD(&rctx->active_query_list);
- rctx->cs = rctx->ws->cs_create(rctx->ws, RING_GFX, NULL);
rctx->max_db = 8;
si_init_config(rctx);
break;
diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
index 6fbe6539d87..674c6303b7a 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h
+++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
@@ -94,11 +94,8 @@ struct si_cs_shader_state {
struct si_pipe_compute *program;
};
-/* needed for blitter save */
-#define NUM_TEX_UNITS 16
-
struct r600_textures_info {
- struct si_pipe_sampler_view *views[NUM_TEX_UNITS];
+ struct si_sampler_views views;
struct si_pipe_sampler_state *samplers[NUM_TEX_UNITS];
unsigned n_views;
uint32_t depth_texture_mask; /* which textures are depth */
@@ -131,6 +128,9 @@ struct r600_constbuf_state
uint32_t dirty_mask;
};
+#define SI_NUM_ATOMS(rctx) (sizeof((rctx)->atoms)/sizeof((rctx)->atoms.array[0]))
+#define SI_NUM_SHADERS (PIPE_SHADER_FRAGMENT+1)
+
struct r600_context {
struct pipe_context context;
struct blitter_context *blitter;
@@ -142,6 +142,14 @@ struct r600_context {
void *custom_dsa_flush_inplace;
struct r600_screen *screen;
struct radeon_winsys *ws;
+
+ union {
+ struct {
+ struct si_atom *sampler_views[SI_NUM_SHADERS];
+ };
+ struct si_atom *array[0];
+ } atoms;
+
struct si_vertex_element *vertex_elements;
struct pipe_framebuffer_state framebuffer;
unsigned pa_sc_line_stipple;
@@ -161,8 +169,7 @@ struct r600_context {
unsigned sprite_coord_enable;
unsigned export_16bpc;
struct r600_constbuf_state constbuf_state[PIPE_SHADER_TYPES];
- struct r600_textures_info vs_samplers;
- struct r600_textures_info ps_samplers;
+ struct r600_textures_info samplers[SI_NUM_SHADERS];
struct si_resource *border_color_table;
unsigned border_color_offset;
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
new file mode 100644
index 00000000000..f05c8f490bb
--- /dev/null
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -0,0 +1,355 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Marek Olšák <[email protected]>
+ */
+
+#include "radeonsi_pipe.h"
+#include "radeonsi_resource.h"
+#include "radeonsi_shader.h"
+#include "r600_hw_context_priv.h"
+
+#include "util/u_memory.h"
+
+#define SI_NUM_CONTEXTS 256
+
+static const uint32_t null_desc[8]; /* zeros */
+
+/* Set this if you want the 3D engine to wait until CP DMA is done.
+ * It should be set on the last CP DMA packet. */
+#define R600_CP_DMA_SYNC (1 << 0) /* R600+ */
+
+/* Set this if the source data was used as a destination in a previous CP DMA
+ * packet. It's for preventing a read-after-write (RAW) hazard between two
+ * CP DMA packets. */
+#define SI_CP_DMA_RAW_WAIT (1 << 1) /* SI+ */
+
+/* Emit a CP DMA packet to do a copy from one buffer to another.
+ * The size must fit in bits [20:0]. Notes:
+ */
+static void si_emit_cp_dma_copy_buffer(struct r600_context *rctx,
+ uint64_t dst_va, uint64_t src_va,
+ unsigned size, unsigned flags)
+{
+ struct radeon_winsys_cs *cs = rctx->cs;
+ uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? PKT3_CP_DMA_CP_SYNC : 0;
+ uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? PKT3_CP_DMA_CMD_RAW_WAIT : 0;
+
+ assert(size);
+ assert((size & ((1<<21)-1)) == size);
+
+ if (rctx->chip_class >= CIK) {
+ radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
+ radeon_emit(cs, sync_flag); /* CP_SYNC [31] */
+ radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */
+ radeon_emit(cs, src_va >> 32); /* SRC_ADDR_HI [31:0] */
+ radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */
+ radeon_emit(cs, dst_va >> 32); /* DST_ADDR_HI [31:0] */
+ radeon_emit(cs, size | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
+ } else {
+ radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
+ radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */
+ radeon_emit(cs, sync_flag | ((src_va >> 32) & 0xffff)); /* CP_SYNC [31] | SRC_ADDR_HI [15:0] */
+ radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */
+ radeon_emit(cs, (dst_va >> 32) & 0xffff); /* DST_ADDR_HI [15:0] */
+ radeon_emit(cs, size | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
+ }
+}
+
+/* Emit a CP DMA packet to clear a buffer. The size must fit in bits [20:0]. */
+static void si_emit_cp_dma_clear_buffer(struct r600_context *rctx,
+ uint64_t dst_va, unsigned size,
+ uint32_t clear_value, unsigned flags)
+{
+ struct radeon_winsys_cs *cs = rctx->cs;
+ uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? PKT3_CP_DMA_CP_SYNC : 0;
+ uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? PKT3_CP_DMA_CMD_RAW_WAIT : 0;
+
+ assert(size);
+ assert((size & ((1<<21)-1)) == size);
+
+ if (rctx->chip_class >= CIK) {
+ radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
+ radeon_emit(cs, sync_flag | PKT3_CP_DMA_SRC_SEL(2)); /* CP_SYNC [31] | SRC_SEL[30:29] */
+ radeon_emit(cs, clear_value); /* DATA [31:0] */
+ radeon_emit(cs, 0);
+ radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */
+ radeon_emit(cs, dst_va >> 32); /* DST_ADDR_HI [15:0] */
+ radeon_emit(cs, size | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
+ } else {
+ radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
+ radeon_emit(cs, clear_value); /* DATA [31:0] */
+ radeon_emit(cs, sync_flag | PKT3_CP_DMA_SRC_SEL(2)); /* CP_SYNC [31] | SRC_SEL[30:29] */
+ radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */
+ radeon_emit(cs, (dst_va >> 32) & 0xffff); /* DST_ADDR_HI [15:0] */
+ radeon_emit(cs, size | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
+ }
+}
+
+static void si_init_descriptors(struct r600_context *rctx,
+ struct si_descriptors *desc,
+ unsigned shader_userdata_reg,
+ unsigned element_dw_size,
+ unsigned num_elements,
+ void (*emit_func)(struct r600_context *ctx, struct si_atom *state))
+{
+ uint64_t va;
+
+ desc->atom.emit = emit_func;
+ desc->shader_userdata_reg = shader_userdata_reg;
+ desc->element_dw_size = element_dw_size;
+ desc->num_elements = num_elements;
+ desc->context_size = num_elements * element_dw_size * 4;
+
+ desc->buffer = (struct si_resource*)
+ pipe_buffer_create(rctx->context.screen, PIPE_BIND_CUSTOM,
+ PIPE_USAGE_STATIC,
+ SI_NUM_CONTEXTS * desc->context_size);
+
+ r600_context_bo_reloc(rctx, desc->buffer, RADEON_USAGE_READWRITE);
+ va = r600_resource_va(rctx->context.screen, &desc->buffer->b.b);
+
+ /* We don't check for CS space here, because this should be called
+ * only once at context initialization. */
+ si_emit_cp_dma_clear_buffer(rctx, va, desc->buffer->b.b.width0, 0,
+ R600_CP_DMA_SYNC);
+}
+
+static void si_release_descriptors(struct si_descriptors *desc)
+{
+ pipe_resource_reference((struct pipe_resource**)&desc->buffer, NULL);
+}
+
+static void si_update_descriptors(struct si_descriptors *desc)
+{
+ if (desc->dirty_mask) {
+ desc->atom.num_dw =
+ 7 + /* copy */
+ (4 + desc->element_dw_size) * util_bitcount(desc->dirty_mask) + /* update */
+ 4; /* pointer update */
+ desc->atom.dirty = true;
+ } else {
+ desc->atom.dirty = false;
+ }
+}
+
+static void si_emit_shader_pointer(struct r600_context *rctx,
+ struct si_descriptors *desc)
+{
+ struct radeon_winsys_cs *cs = rctx->cs;
+ uint64_t va = r600_resource_va(rctx->context.screen, &desc->buffer->b.b) +
+ desc->current_context_id * desc->context_size;
+
+ radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 2, 0));
+ radeon_emit(cs, (desc->shader_userdata_reg - SI_SH_REG_OFFSET) >> 2);
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+}
+
+static void si_emit_descriptors(struct r600_context *rctx,
+ struct si_descriptors *desc,
+ const uint32_t **descriptors)
+{
+ struct radeon_winsys_cs *cs = rctx->cs;
+ uint64_t va_base;
+ int packet_start;
+ int packet_size = 0;
+ int last_index = desc->num_elements; /* point to a non-existing element */
+ unsigned dirty_mask = desc->dirty_mask;
+ unsigned new_context_id = (desc->current_context_id + 1) % SI_NUM_CONTEXTS;
+
+ assert(dirty_mask);
+
+ va_base = r600_resource_va(rctx->context.screen, &desc->buffer->b.b);
+
+ /* Copy the descriptors to a new context slot. */
+ si_emit_cp_dma_copy_buffer(rctx,
+ va_base + new_context_id * desc->context_size,
+ va_base + desc->current_context_id * desc->context_size,
+ desc->context_size, R600_CP_DMA_SYNC);
+
+ va_base += new_context_id * desc->context_size;
+
+ /* Update the descriptors.
+ * Updates of consecutive descriptors are merged to one WRITE_DATA packet.
+ *
+ * XXX When unbinding lots of resources, consider clearing the memory
+ * with CP DMA instead of emitting zeros.
+ */
+ while (dirty_mask) {
+ int i = u_bit_scan(&dirty_mask);
+
+ assert(i < desc->num_elements);
+
+ if (last_index+1 == i && packet_size) {
+ /* Append new data at the end of the last packet. */
+ packet_size += desc->element_dw_size;
+ cs->buf[packet_start] = PKT3(PKT3_WRITE_DATA, packet_size, 0);
+ } else {
+ /* Start a new packet. */
+ uint64_t va = va_base + i * desc->element_dw_size * 4;
+
+ packet_start = cs->cdw;
+ packet_size = 2 + desc->element_dw_size;
+
+ radeon_emit(cs, PKT3(PKT3_WRITE_DATA, packet_size, 0));
+ radeon_emit(cs, PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_MEM_SYNC) |
+ PKT3_WRITE_DATA_WR_CONFIRM |
+ PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME));
+ radeon_emit(cs, va & 0xFFFFFFFFUL);
+ radeon_emit(cs, (va >> 32UL) & 0xFFFFFFFFUL);
+ }
+
+ radeon_emit_array(cs, descriptors[i], desc->element_dw_size);
+
+ last_index = i;
+ }
+
+ desc->dirty_mask = 0;
+ desc->current_context_id = new_context_id;
+
+ /* Now update the shader userdata pointer. */
+ si_emit_shader_pointer(rctx, desc);
+}
+
+static unsigned si_get_shader_user_data_base(unsigned shader)
+{
+ switch (shader) {
+ case PIPE_SHADER_VERTEX:
+ return R_00B130_SPI_SHADER_USER_DATA_VS_0;
+ case PIPE_SHADER_GEOMETRY:
+ return R_00B230_SPI_SHADER_USER_DATA_GS_0;
+ case PIPE_SHADER_FRAGMENT:
+ return R_00B030_SPI_SHADER_USER_DATA_PS_0;
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+/* SAMPLER VIEWS */
+
+static void si_emit_sampler_views(struct r600_context *rctx, struct si_atom *atom)
+{
+ struct si_sampler_views *views = (struct si_sampler_views*)atom;
+
+ si_emit_descriptors(rctx, &views->desc, views->desc_data);
+}
+
+static void si_init_sampler_views(struct r600_context *rctx,
+ struct si_sampler_views *views,
+ unsigned shader)
+{
+ si_init_descriptors(rctx, &views->desc,
+ si_get_shader_user_data_base(shader) +
+ SI_SGPR_RESOURCE * 4,
+ 8, 16, si_emit_sampler_views);
+}
+
+static void si_release_sampler_views(struct si_sampler_views *views)
+{
+ int i;
+
+ for (i = 0; i < Elements(views->views); i++) {
+ pipe_sampler_view_reference(&views->views[i], NULL);
+ }
+ si_release_descriptors(&views->desc);
+}
+
+static void si_sampler_views_begin_new_cs(struct r600_context *rctx,
+ struct si_sampler_views *views)
+{
+ unsigned mask = views->desc.enabled_mask;
+
+ /* Add relocations to the CS. */
+ while (mask) {
+ int i = u_bit_scan(&mask);
+ struct si_pipe_sampler_view *rview =
+ (struct si_pipe_sampler_view*)views->views[i];
+
+ r600_context_bo_reloc(rctx, rview->resource, RADEON_USAGE_READ);
+ }
+
+ r600_context_bo_reloc(rctx, views->desc.buffer, RADEON_USAGE_READWRITE);
+
+ si_emit_shader_pointer(rctx, &views->desc);
+}
+
+void si_set_sampler_view(struct r600_context *rctx, unsigned shader,
+ unsigned slot, struct pipe_sampler_view *view,
+ unsigned *view_desc)
+{
+ struct si_sampler_views *views = &rctx->samplers[shader].views;
+
+ if (views->views[slot] == view)
+ return;
+
+ if (view) {
+ struct si_pipe_sampler_view *rview =
+ (struct si_pipe_sampler_view*)view;
+
+ r600_context_bo_reloc(rctx, rview->resource, RADEON_USAGE_READ);
+
+ pipe_sampler_view_reference(&views->views[slot], view);
+ views->desc_data[slot] = view_desc;
+ views->desc.enabled_mask |= 1 << slot;
+ } else {
+ pipe_sampler_view_reference(&views->views[slot], NULL);
+ views->desc_data[slot] = null_desc;
+ views->desc.enabled_mask &= ~(1 << slot);
+ }
+
+ views->desc.dirty_mask |= 1 << slot;
+ si_update_descriptors(&views->desc);
+}
+
+/* INIT/DEINIT */
+
+void si_init_all_descriptors(struct r600_context *rctx)
+{
+ int i;
+
+ for (i = 0; i < SI_NUM_SHADERS; i++) {
+ si_init_sampler_views(rctx, &rctx->samplers[i].views, i);
+
+ rctx->atoms.sampler_views[i] = &rctx->samplers[i].views.desc.atom;
+ }
+}
+
+void si_release_all_descriptors(struct r600_context *rctx)
+{
+ int i;
+
+ for (i = 0; i < SI_NUM_SHADERS; i++) {
+ si_release_sampler_views(&rctx->samplers[i].views);
+ }
+}
+
+void si_all_descriptors_begin_new_cs(struct r600_context *rctx)
+{
+ int i;
+
+ for (i = 0; i < SI_NUM_SHADERS; i++) {
+ si_sampler_views_begin_new_cs(rctx, &rctx->samplers[i].views);
+ }
+}
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index d1e3c9d5279..7d637e75189 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2493,26 +2493,17 @@ static void *si_create_sampler_state(struct pipe_context *ctx,
}
static struct si_pm4_state *si_set_sampler_views(struct r600_context *rctx,
- unsigned count,
- struct pipe_sampler_view **views,
- struct r600_textures_info *samplers,
- unsigned user_data_reg)
+ unsigned shader, unsigned count,
+ struct pipe_sampler_view **views)
{
- struct si_pipe_sampler_view **resource = (struct si_pipe_sampler_view **)views;
+ struct r600_textures_info *samplers = &rctx->samplers[shader];
+ struct si_pipe_sampler_view **rviews = (struct si_pipe_sampler_view **)views;
struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx);
- int i, j;
-
- if (!count)
- goto out;
+ int i;
si_pm4_inval_texture_cache(pm4);
- si_pm4_sh_data_begin(pm4);
for (i = 0; i < count; i++) {
- pipe_sampler_view_reference(
- (struct pipe_sampler_view **)&samplers->views[i],
- views[i]);
-
if (views[i]) {
struct r600_texture *rtex =
(struct r600_texture*)views[i]->texture;
@@ -2523,25 +2514,17 @@ static struct si_pm4_state *si_set_sampler_views(struct r600_context *rctx,
samplers->depth_texture_mask &= ~(1 << i);
}
- si_pm4_add_bo(pm4, resource[i]->resource, RADEON_USAGE_READ);
+ si_set_sampler_view(rctx, shader, i, views[i], rviews[i]->state);
} else {
samplers->depth_texture_mask &= ~(1 << i);
- }
-
- for (j = 0; j < Elements(resource[i]->state); ++j) {
- si_pm4_sh_data_add(pm4, resource[i] ? resource[i]->state[j] : 0);
+ si_set_sampler_view(rctx, shader, i, NULL, NULL);
}
}
-
- for (i = count; i < NUM_TEX_UNITS; i++) {
- if (samplers->views[i])
- pipe_sampler_view_reference((struct pipe_sampler_view **)&samplers->views[i], NULL);
+ for (; i < samplers->n_views; i++) {
+ si_set_sampler_view(rctx, shader, i, NULL, NULL);
}
- si_pm4_sh_data_end(pm4, user_data_reg, SI_SGPR_RESOURCE);
-
-out:
- rctx->ps_samplers.n_views = count;
+ samplers->n_views = count;
return pm4;
}
@@ -2551,8 +2534,7 @@ static void si_set_vs_sampler_views(struct pipe_context *ctx, unsigned count,
struct r600_context *rctx = (struct r600_context *)ctx;
struct si_pm4_state *pm4;
- pm4 = si_set_sampler_views(rctx, count, views, &rctx->vs_samplers,
- R_00B130_SPI_SHADER_USER_DATA_VS_0);
+ pm4 = si_set_sampler_views(rctx, PIPE_SHADER_VERTEX, count, views);
si_pm4_set_state(rctx, vs_sampler_views, pm4);
}
@@ -2562,8 +2544,7 @@ static void si_set_ps_sampler_views(struct pipe_context *ctx, unsigned count,
struct r600_context *rctx = (struct r600_context *)ctx;
struct si_pm4_state *pm4;
- pm4 = si_set_sampler_views(rctx, count, views, &rctx->ps_samplers,
- R_00B030_SPI_SHADER_USER_DATA_PS_0);
+ pm4 = si_set_sampler_views(rctx, PIPE_SHADER_FRAGMENT, count, views);
si_pm4_set_state(rctx, ps_sampler_views, pm4);
}
@@ -2646,7 +2627,7 @@ static void si_bind_vs_sampler_states(struct pipe_context *ctx, unsigned count,
struct r600_context *rctx = (struct r600_context *)ctx;
struct si_pm4_state *pm4;
- pm4 = si_bind_sampler_states(rctx, count, states, &rctx->vs_samplers,
+ pm4 = si_bind_sampler_states(rctx, count, states, &rctx->samplers[PIPE_SHADER_VERTEX],
R_00B130_SPI_SHADER_USER_DATA_VS_0);
si_pm4_set_state(rctx, vs_sampler, pm4);
}
@@ -2656,7 +2637,7 @@ static void si_bind_ps_sampler_states(struct pipe_context *ctx, unsigned count,
struct r600_context *rctx = (struct r600_context *)ctx;
struct si_pm4_state *pm4;
- pm4 = si_bind_sampler_states(rctx, count, states, &rctx->ps_samplers,
+ pm4 = si_bind_sampler_states(rctx, count, states, &rctx->samplers[PIPE_SHADER_FRAGMENT],
R_00B030_SPI_SHADER_USER_DATA_PS_0);
si_pm4_set_state(rctx, ps_sampler, pm4);
}
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 7ce084e5794..610303bb9a5 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -29,6 +29,14 @@
#include "radeonsi_pm4.h"
+/* This encapsulates a state or an operation which can emitted into the GPU
+ * command stream. */
+struct si_atom {
+ void (*emit)(struct r600_context *ctx, struct si_atom *state);
+ unsigned num_dw;
+ bool dirty;
+};
+
struct si_state_blend {
struct si_pm4_state pm4;
uint32_t cb_target_mask;
@@ -103,6 +111,46 @@ union si_state {
struct si_pm4_state *array[0];
};
+#define NUM_TEX_UNITS 16
+
+/* This represents resource descriptors in memory, such as buffer resources,
+ * image resources, and sampler states.
+ */
+struct si_descriptors {
+ struct si_atom atom;
+
+ /* The size of one resource descriptor. */
+ unsigned element_dw_size;
+ /* The maximum number of resource descriptors. */
+ unsigned num_elements;
+
+ /* The buffer where resource descriptors are stored. */
+ struct si_resource *buffer;
+
+ /* The i-th bit is set if that element is dirty (changed but not emitted). */
+ unsigned dirty_mask;
+ /* The i-th bit is set if that element is enabled (non-NULL resource). */
+ unsigned enabled_mask;
+
+ /* We can't update descriptors directly because the GPU might be
+ * reading them at the same time, so we have to update them
+ * in a copy-on-write manner. Each such copy is called a context,
+ * which is just another array descriptors in the same buffer. */
+ unsigned current_context_id;
+ /* The size of a context, should be equal to 4*element_dw_size*num_elements. */
+ unsigned context_size;
+
+ /* The shader userdata register where the 64-bit pointer to the descriptor
+ * array will be stored. */
+ unsigned shader_userdata_reg;
+};
+
+struct si_sampler_views {
+ struct si_descriptors desc;
+ struct pipe_sampler_view *views[NUM_TEX_UNITS];
+ const uint32_t *desc_data[NUM_TEX_UNITS];
+};
+
#define si_pm4_block_idx(member) \
(offsetof(union si_state, named.member) / sizeof(struct si_pm4_state *))
@@ -133,6 +181,14 @@ union si_state {
} \
} while(0)
+/* si_descriptors.c */
+void si_set_sampler_view(struct r600_context *rctx, unsigned shader,
+ unsigned slot, struct pipe_sampler_view *view,
+ unsigned *view_desc);
+void si_init_all_descriptors(struct r600_context *rctx);
+void si_release_all_descriptors(struct r600_context *rctx);
+void si_all_descriptors_begin_new_cs(struct r600_context *rctx);
+
/* si_state.c */
struct si_pipe_shader_selector;
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 47e64d8634e..f03b34f4039 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -412,11 +412,10 @@ static void si_update_derived_state(struct r600_context *rctx)
if (!rctx->blitter->running) {
/* Flush depth textures which need to be flushed. */
- if (rctx->vs_samplers.depth_texture_mask) {
- si_flush_depth_textures(rctx, &rctx->vs_samplers);
- }
- if (rctx->ps_samplers.depth_texture_mask) {
- si_flush_depth_textures(rctx, &rctx->ps_samplers);
+ for (int i = 0; i < SI_NUM_SHADERS; i++) {
+ if (rctx->samplers[i].depth_texture_mask) {
+ si_flush_depth_textures(rctx, &rctx->samplers[i]);
+ }
}
}
@@ -651,7 +650,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
{
struct r600_context *rctx = (struct r600_context *)ctx;
struct pipe_index_buffer ib = {};
- uint32_t cp_coher_cntl;
+ uint32_t cp_coher_cntl, i;
if (!info->count && (info->indexed || !info->count_from_stream_output))
return;
@@ -704,6 +703,13 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
si_need_cs_space(rctx, 0, TRUE);
+ for (i = 0; i < SI_NUM_ATOMS(rctx); i++) {
+ if (rctx->atoms.array[i]->dirty) {
+ rctx->atoms.array[i]->emit(rctx, rctx->atoms.array[i]);
+ rctx->atoms.array[i]->dirty = false;
+ }
+ }
+
si_pm4_emit_dirty(rctx);
rctx->pm4_dirty_cdwords = 0;
diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h
index 208d3a88da0..57ce72e0628 100644
--- a/src/gallium/drivers/radeonsi/sid.h
+++ b/src/gallium/drivers/radeonsi/sid.h
@@ -134,6 +134,60 @@
#define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count))
#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PREDICATE(predicate))
+#define PKT3_CP_DMA 0x41
+/* 1. header
+ * 2. SRC_ADDR_LO [31:0] or DATA [31:0]
+ * 3. CP_SYNC [31] | SRC_SEL [30:29] | ENGINE [27] | DST_SEL [21:20] | SRC_ADDR_HI [15:0]
+ * 4. DST_ADDR_LO [31:0]
+ * 5. DST_ADDR_HI [15:0]
+ * 6. COMMAND [29:22] | BYTE_COUNT [20:0]
+ */
+#define PKT3_CP_DMA_CP_SYNC (1 << 31)
+#define PKT3_CP_DMA_SRC_SEL(x) ((x) << 29)
+/* 0 - SRC_ADDR
+ * 1 - GDS (program SAS to 1 as well)
+ * 2 - DATA
+ */
+#define PKT3_CP_DMA_DST_SEL(x) ((x) << 20)
+/* 0 - DST_ADDR
+ * 1 - GDS (program DAS to 1 as well)
+ */
+/* COMMAND */
+#define PKT3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23)
+/* 0 - none
+ * 1 - 8 in 16
+ * 2 - 8 in 32
+ * 3 - 8 in 64
+ */
+#define PKT3_CP_DMA_CMD_DST_SWAP(x) ((x) << 24)
+/* 0 - none
+ * 1 - 8 in 16
+ * 2 - 8 in 32
+ * 3 - 8 in 64
+ */
+#define PKT3_CP_DMA_CMD_SAS (1 << 26)
+/* 0 - memory
+ * 1 - register
+ */
+#define PKT3_CP_DMA_CMD_DAS (1 << 27)
+/* 0 - memory
+ * 1 - register
+ */
+#define PKT3_CP_DMA_CMD_SAIC (1 << 28)
+#define PKT3_CP_DMA_CMD_DAIC (1 << 29)
+#define PKT3_CP_DMA_CMD_RAW_WAIT (1 << 30)
+
+#define PKT3_DMA_DATA 0x50 /* new for CIK */
+/* 1. header
+ * 2. CP_SYNC [31] | SRC_SEL [30:29] | DST_SEL [21:20] | ENGINE [0]
+ * 2. SRC_ADDR_LO [31:0] or DATA [31:0]
+ * 3. SRC_ADDR_HI [31:0]
+ * 4. DST_ADDR_LO [31:0]
+ * 5. DST_ADDR_HI [31:0]
+ * 6. COMMAND [29:22] | BYTE_COUNT [20:0]
+ */
+
+
#define R_0084FC_CP_STRMOUT_CNTL 0x0084FC
#define S_0084FC_OFFSET_UPDATE_DONE(x) (((x) & 0x1) << 0)
#define R_0085F0_CP_COHER_CNTL 0x0085F0
diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h
index a619d709754..9c6589a7a96 100644
--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
@@ -501,4 +501,16 @@ struct radeon_winsys {
enum radeon_value_id value);
};
+static INLINE void radeon_emit(struct radeon_winsys_cs *cs, uint32_t value)
+{
+ cs->buf[cs->cdw++] = value;
+}
+
+static INLINE void radeon_emit_array(struct radeon_winsys_cs *cs,
+ const uint32_t *values, unsigned count)
+{
+ memcpy(cs->buf+cs->cdw, values, count * 4);
+ cs->cdw += count;
+}
+
#endif