aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/r600
diff options
context:
space:
mode:
authorDave Airlie <[email protected]>2015-09-11 04:43:53 +0100
committerDave Airlie <[email protected]>2015-09-12 06:56:58 +0100
commitf9caabe8f1bff86d19b53d9ecba5c72b238d9e23 (patch)
treeee2dd463bfbe042f19068d8d04ad8e8f7adf7907 /src/gallium/drivers/r600
parent0337a9b2af6cb72eb2ca3ee2e17d5e06ea7aeacd (diff)
r600g: lower number of driver const buffers
I'm going to want a driver constant buffer for tess to coordinate LDS storage, so before I go tackling that I decided to merge the clip/samplepos and texture info buffers into one. So I can steal the spare one. This creates a single constant buffer between the two, with clip/samplepos taking up a reserved 128 bytes at the start. Reviewed-by: Edward O'Callaghan <[email protected]> Reviewed-by: Glenn Kennard <[email protected]> Signed-off-by: Dave Airlie <[email protected]>
Diffstat (limited to 'src/gallium/drivers/r600')
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h28
-rw-r--r--src/gallium/drivers/r600/r600_shader.c21
-rw-r--r--src/gallium/drivers/r600/r600_state_common.c152
3 files changed, 131 insertions, 70 deletions
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 25df831339c..d0774de8573 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -63,13 +63,15 @@
#define R600_TRACE_CS_DWORDS 7
#define R600_MAX_USER_CONST_BUFFERS 13
-#define R600_MAX_DRIVER_CONST_BUFFERS 3
+#define R600_MAX_DRIVER_CONST_BUFFERS 2
#define R600_MAX_CONST_BUFFERS (R600_MAX_USER_CONST_BUFFERS + R600_MAX_DRIVER_CONST_BUFFERS)
/* start driver buffers after user buffers */
-#define R600_UCP_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
-#define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
-#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2)
+#define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
+#define R600_UCP_SIZE (4*4*8)
+#define R600_BUFFER_INFO_OFFSET (R600_UCP_SIZE)
+
+#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
/* Currently R600_MAX_CONST_BUFFERS just fits on the hw, which has a limit
* of 16 const buffers.
* UCP/SAMPLE_POSITIONS are never accessed by same shader stage so they can use the same id.
@@ -77,8 +79,6 @@
* In order to support d3d 11 mandated minimum of 15 user const buffers
* we'd have to squash all use cases into one driver buffer.
*/
-#define R600_SAMPLE_POSITIONS_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
-
#define R600_MAX_CONST_BUFFER_SIZE (4096 * sizeof(float[4]))
#ifdef PIPE_ARCH_BIG_ENDIAN
@@ -356,11 +356,15 @@ struct r600_textures_info {
struct r600_samplerview_state views;
struct r600_sampler_states states;
bool is_array_sampler[NUM_TEX_UNITS];
+};
- /* cube array txq workaround */
- uint32_t *txq_constants;
- /* buffer related workarounds */
- uint32_t *buffer_constants;
+struct r600_shader_driver_constants_info {
+ /* currently 128 bytes for UCP/samplepos + sampler buffer constants */
+ uint32_t *constants;
+ uint32_t alloc_size;
+ bool vs_ucp_dirty;
+ bool texture_const_dirty;
+ bool ps_sample_pos_dirty;
};
struct r600_constbuf_state
@@ -472,6 +476,9 @@ struct r600_context {
struct r600_gs_rings_state gs_rings;
struct r600_constbuf_state constbuf_state[PIPE_SHADER_TYPES];
struct r600_textures_info samplers[PIPE_SHADER_TYPES];
+
+ struct r600_shader_driver_constants_info driver_consts[PIPE_SHADER_TYPES];
+
/** Vertex buffers for fetch shaders */
struct r600_vertexbuf_state vertex_buffer_state;
/** Vertex buffers for compute shaders */
@@ -498,6 +505,7 @@ struct r600_context {
void *sb_context;
struct r600_isa *isa;
+ float sample_positions[4 * 16];
};
static inline void r600_emit_command_buffer(struct radeon_winsys_cs *cs,
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index f2c9e169f74..93b1bf7d5b4 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -60,6 +60,7 @@ issued in the w slot as well.
The compiler must issue the source argument to slots z, y, and x
*/
+#define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16)
static int r600_shader_from_tgsi(struct r600_context *rctx,
struct r600_pipe_shader *pipeshader,
union r600_shader_key key);
@@ -947,7 +948,7 @@ static int load_sample_position(struct r600_shader_ctx *ctx, struct r600_shader_
memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
vtx.op = FETCH_OP_VFETCH;
- vtx.buffer_id = R600_SAMPLE_POSITIONS_CONST_BUFFER;
+ vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER;
vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
if (sample_id == NULL) {
vtx.src_gpr = ctx->fixed_pt_position_gpr; // SAMPLEID is in .w;
@@ -2307,7 +2308,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
alu.src[0].chan = j;
alu.src[1].sel = 512 + i;
- alu.src[1].kc_bank = R600_UCP_CONST_BUFFER;
+ alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
alu.src[1].chan = j;
alu.dst.sel = clipdist_temp[oreg];
@@ -5499,7 +5500,8 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_l
alu.src[0].sel = vtx.dst_gpr;
alu.src[0].chan = i;
- alu.src[1].sel = 512 + (id * 2);
+ alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL;
+ alu.src[1].sel += (id * 2);
alu.src[1].chan = i % 4;
alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
@@ -5521,7 +5523,7 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_l
alu.src[0].sel = vtx.dst_gpr;
alu.src[0].chan = 3;
- alu.src[1].sel = 512 + (id * 2) + 1;
+ alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL + (id * 2) + 1;
alu.src[1].chan = 0;
alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
@@ -5542,14 +5544,14 @@ static int r600_do_buffer_txq(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ALU_OP1_MOV;
-
+ alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
if (ctx->bc->chip_class >= EVERGREEN) {
/* channel 0 or 2 of each word */
- alu.src[0].sel = 512 + (id / 2);
+ alu.src[0].sel += (id / 2);
alu.src[0].chan = (id % 2) * 2;
} else {
/* r600 we have them at channel 2 of the second dword */
- alu.src[0].sel = 512 + (id * 2) + 1;
+ alu.src[0].sel += (id * 2) + 1;
alu.src[0].chan = 1;
}
alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
@@ -6207,13 +6209,14 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ALU_OP1_MOV;
+ alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
if (ctx->bc->chip_class >= EVERGREEN) {
/* channel 1 or 3 of each word */
- alu.src[0].sel = 512 + (id / 2);
+ alu.src[0].sel += (id / 2);
alu.src[0].chan = ((id % 2) * 2) + 1;
} else {
/* r600 we have them at channel 2 of the second dword */
- alu.src[0].sel = 512 + (id * 2) + 1;
+ alu.src[0].sel += (id * 2) + 1;
alu.src[0].chan = 2;
}
alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index ae1341187cb..21c89dc0b61 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -240,17 +240,10 @@ static void r600_set_clip_state(struct pipe_context *ctx,
const struct pipe_clip_state *state)
{
struct r600_context *rctx = (struct r600_context *)ctx;
- struct pipe_constant_buffer cb;
rctx->clip_state.state = *state;
r600_mark_atom_dirty(rctx, &rctx->clip_state.atom);
-
- cb.buffer = NULL;
- cb.user_buffer = state->ucp;
- cb.buffer_offset = 0;
- cb.buffer_size = 4*4*8;
- ctx->set_constant_buffer(ctx, PIPE_SHADER_VERTEX, R600_UCP_CONST_BUFFER, &cb);
- pipe_resource_reference(&cb.buffer, NULL);
+ rctx->driver_consts[PIPE_SHADER_VERTEX].vs_ucp_dirty = true;
}
static void r600_set_stencil_ref(struct pipe_context *ctx,
@@ -1053,6 +1046,74 @@ static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask
r600_mark_atom_dirty(rctx, &rctx->sample_mask.atom);
}
+static void r600_update_driver_const_buffers(struct r600_context *rctx)
+{
+ int sh, size;;
+ void *ptr;
+ struct pipe_constant_buffer cb;
+ for (sh = 0; sh < PIPE_SHADER_TYPES; sh++) {
+ struct r600_shader_driver_constants_info *info = &rctx->driver_consts[sh];
+ if (!info->vs_ucp_dirty &&
+ !info->texture_const_dirty &&
+ !info->ps_sample_pos_dirty)
+ continue;
+
+ ptr = info->constants;
+ size = info->alloc_size;
+ if (info->vs_ucp_dirty) {
+ assert(sh == PIPE_SHADER_VERTEX);
+ if (!size) {
+ ptr = rctx->clip_state.state.ucp;
+ size = R600_UCP_SIZE;
+ } else {
+ memcpy(ptr, rctx->clip_state.state.ucp, R600_UCP_SIZE);
+ }
+ info->vs_ucp_dirty = false;
+ }
+
+ if (info->ps_sample_pos_dirty) {
+ assert(sh == PIPE_SHADER_FRAGMENT);
+ if (!size) {
+ ptr = rctx->sample_positions;
+ size = R600_UCP_SIZE;
+ } else {
+ memcpy(ptr, rctx->sample_positions, R600_UCP_SIZE);
+ }
+ info->ps_sample_pos_dirty = false;
+ }
+
+ if (info->texture_const_dirty) {
+ assert (ptr);
+ assert (size);
+ if (sh == PIPE_SHADER_VERTEX)
+ memcpy(ptr, rctx->clip_state.state.ucp, R600_UCP_SIZE);
+ if (sh == PIPE_SHADER_FRAGMENT)
+ memcpy(ptr, rctx->sample_positions, R600_UCP_SIZE);
+ }
+ info->texture_const_dirty = false;
+
+ cb.buffer = NULL;
+ cb.user_buffer = ptr;
+ cb.buffer_offset = 0;
+ cb.buffer_size = size;
+ rctx->b.b.set_constant_buffer(&rctx->b.b, sh, R600_BUFFER_INFO_CONST_BUFFER, &cb);
+ pipe_resource_reference(&cb.buffer, NULL);
+ }
+}
+
+static void *r600_alloc_buf_consts(struct r600_context *rctx, int shader_type,
+ int array_size, uint32_t *base_offset)
+{
+ struct r600_shader_driver_constants_info *info = &rctx->driver_consts[shader_type];
+ if (array_size + R600_UCP_SIZE > info->alloc_size) {
+ info->constants = realloc(info->constants, array_size + R600_UCP_SIZE);
+ info->alloc_size = array_size + R600_UCP_SIZE;
+ }
+ memset(info->constants + (R600_UCP_SIZE / 4), 0, array_size);
+ info->texture_const_dirty = true;
+ *base_offset = R600_UCP_SIZE;
+ return info->constants;
+}
/*
* On r600/700 hw we don't have vertex fetch swizzle, though TBO
* doesn't require full swizzles it does need masking and setting alpha
@@ -1067,9 +1128,9 @@ static void r600_setup_buffer_constants(struct r600_context *rctx, int shader_ty
struct r600_textures_info *samplers = &rctx->samplers[shader_type];
int bits;
uint32_t array_size;
- struct pipe_constant_buffer cb;
int i, j;
-
+ uint32_t *constants;
+ uint32_t base_offset;
if (!samplers->views.dirty_buffer_constants)
return;
@@ -1077,38 +1138,33 @@ static void r600_setup_buffer_constants(struct r600_context *rctx, int shader_ty
bits = util_last_bit(samplers->views.enabled_mask);
array_size = bits * 8 * sizeof(uint32_t) * 4;
- samplers->buffer_constants = realloc(samplers->buffer_constants, array_size);
- memset(samplers->buffer_constants, 0, array_size);
+
+ constants = r600_alloc_buf_consts(rctx, shader_type, array_size, &base_offset);
+
for (i = 0; i < bits; i++) {
if (samplers->views.enabled_mask & (1 << i)) {
- int offset = i * 8;
+ int offset = (base_offset / 4) + i * 8;
const struct util_format_description *desc;
desc = util_format_description(samplers->views.views[i]->base.format);
for (j = 0; j < 4; j++)
if (j < desc->nr_channels)
- samplers->buffer_constants[offset+j] = 0xffffffff;
+ constants[offset+j] = 0xffffffff;
else
- samplers->buffer_constants[offset+j] = 0x0;
+ constants[offset+j] = 0x0;
if (desc->nr_channels < 4) {
if (desc->channel[0].pure_integer)
- samplers->buffer_constants[offset+4] = 1;
+ constants[offset+4] = 1;
else
- samplers->buffer_constants[offset+4] = fui(1.0);
+ constants[offset+4] = fui(1.0);
} else
- samplers->buffer_constants[offset + 4] = 0;
+ constants[offset + 4] = 0;
- samplers->buffer_constants[offset + 5] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
- samplers->buffer_constants[offset + 6] = samplers->views.views[i]->base.texture->array_size / 6;
+ constants[offset + 5] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
+ constants[offset + 6] = samplers->views.views[i]->base.texture->array_size / 6;
}
}
- cb.buffer = NULL;
- cb.user_buffer = samplers->buffer_constants;
- cb.buffer_offset = 0;
- cb.buffer_size = array_size;
- rctx->b.b.set_constant_buffer(&rctx->b.b, shader_type, R600_BUFFER_INFO_CONST_BUFFER, &cb);
- pipe_resource_reference(&cb.buffer, NULL);
}
/* On evergreen we store two values
@@ -1120,9 +1176,9 @@ static void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type
struct r600_textures_info *samplers = &rctx->samplers[shader_type];
int bits;
uint32_t array_size;
- struct pipe_constant_buffer cb;
int i;
-
+ uint32_t *constants;
+ uint32_t base_offset;
if (!samplers->views.dirty_buffer_constants)
return;
@@ -1130,45 +1186,37 @@ static void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type
bits = util_last_bit(samplers->views.enabled_mask);
array_size = bits * 2 * sizeof(uint32_t) * 4;
- samplers->buffer_constants = realloc(samplers->buffer_constants, array_size);
- memset(samplers->buffer_constants, 0, array_size);
+
+ constants = r600_alloc_buf_consts(rctx, shader_type, array_size,
+ &base_offset);
+
for (i = 0; i < bits; i++) {
if (samplers->views.enabled_mask & (1 << i)) {
- uint32_t offset = i * 2;
- samplers->buffer_constants[offset] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
- samplers->buffer_constants[offset + 1] = samplers->views.views[i]->base.texture->array_size / 6;
+ uint32_t offset = (base_offset / 4) + i * 2;
+ constants[offset] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
+ constants[offset + 1] = samplers->views.views[i]->base.texture->array_size / 6;
}
}
-
- cb.buffer = NULL;
- cb.user_buffer = samplers->buffer_constants;
- cb.buffer_offset = 0;
- cb.buffer_size = array_size;
- rctx->b.b.set_constant_buffer(&rctx->b.b, shader_type, R600_BUFFER_INFO_CONST_BUFFER, &cb);
- pipe_resource_reference(&cb.buffer, NULL);
}
/* set sample xy locations as array of fragment shader constants */
void r600_set_sample_locations_constant_buffer(struct r600_context *rctx)
{
- struct pipe_constant_buffer constbuf = {0};
- float values[4*16] = {0.0f};
int i;
struct pipe_context *ctx = &rctx->b.b;
- assert(rctx->framebuffer.nr_samples <= Elements(values)/4);
+ assert(rctx->framebuffer.nr_samples < R600_UCP_SIZE);
+ assert(rctx->framebuffer.nr_samples <= Elements(rctx->sample_positions)/4);
+
+ memset(rctx->sample_positions, 0, 4 * 4 * 16);
for (i = 0; i < rctx->framebuffer.nr_samples; i++) {
- ctx->get_sample_position(ctx, rctx->framebuffer.nr_samples, i, &values[4*i]);
+ ctx->get_sample_position(ctx, rctx->framebuffer.nr_samples, i, &rctx->sample_positions[4*i]);
/* Also fill in center-zeroed positions used for interpolateAtSample */
- values[4*i + 2] = values[4*i + 0] - 0.5f;
- values[4*i + 3] = values[4*i + 1] - 0.5f;
+ rctx->sample_positions[4*i + 2] = rctx->sample_positions[4*i + 0] - 0.5f;
+ rctx->sample_positions[4*i + 3] = rctx->sample_positions[4*i + 1] - 0.5f;
}
- constbuf.user_buffer = values;
- constbuf.buffer_size = rctx->framebuffer.nr_samples * 4 * 4;
- ctx->set_constant_buffer(ctx, PIPE_SHADER_FRAGMENT,
- R600_SAMPLE_POSITIONS_CONST_BUFFER, &constbuf);
- pipe_resource_reference(&constbuf.buffer, NULL);
+ rctx->driver_consts[PIPE_SHADER_FRAGMENT].ps_sample_pos_dirty = true;
}
static void update_shader_atom(struct pipe_context *ctx,
@@ -1387,6 +1435,8 @@ static bool r600_update_derived_state(struct r600_context *rctx)
}
}
+ r600_update_driver_const_buffers(rctx);
+
if (rctx->b.chip_class < EVERGREEN && rctx->ps_shader && rctx->vs_shader) {
if (!r600_adjust_gprs(rctx)) {
/* discard rendering */