summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/radeonsi
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2015-08-28 21:08:49 +0200
committerMarek Olšák <[email protected]>2015-09-01 21:51:13 +0200
commitf6a10f60b75821c20ce7cf338b519b92ed0330fc (patch)
treed4cfd2d602bb3482fb4d481ff15bdf5a0690ff80 /src/gallium/drivers/radeonsi
parent02c8e06497c14bed37dc1780585348bb2675cab6 (diff)
radeonsi: optimize scissor states
- convert 16 states to 1 atom - only emit 1 scissor if VIEWPORT_INDEX isn't written - use only one packet when emitting consecutive scissors Reviewed-by: Alex Deucher <[email protected]> Acked-by: Christian König <[email protected]>
Diffstat (limited to 'src/gallium/drivers/radeonsi')
-rw-r--r--src/gallium/drivers/radeonsi/si_blit.c4
-rw-r--r--src/gallium/drivers/radeonsi/si_hw_context.c3
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.c2
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h10
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h4
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c57
-rw-r--r--src/gallium/drivers/radeonsi/si_state.h6
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c20
8 files changed, 79 insertions, 27 deletions
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index b7450b6fcec..b2f342f4fa1 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -68,9 +68,7 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op)
if (sctx->queued.named.viewport[0]) {
util_blitter_save_viewport(sctx->blitter, &sctx->queued.named.viewport[0]->viewport);
}
- if (sctx->queued.named.scissor[0]) {
- util_blitter_save_scissor(sctx->blitter, &sctx->queued.named.scissor[0]->scissor);
- }
+ util_blitter_save_scissor(sctx->blitter, &sctx->scissors.states[0]);
util_blitter_save_vertex_buffer_slot(sctx->blitter, sctx->vertex_buffer);
util_blitter_save_so_targets(sctx->blitter, sctx->b.streamout.num_targets,
(struct pipe_stream_output_target**)sctx->b.streamout.targets);
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index 563251ddd4b..873a4727976 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -194,6 +194,9 @@ void si_begin_new_cs(struct si_context *ctx)
si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);
si_all_descriptors_begin_new_cs(ctx);
+ ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
+ si_mark_atom_dirty(ctx, &ctx->scissors.atom);
+
r600_postflush_resume_features(&ctx->b);
ctx->b.initial_gfx_cs_size = ctx->b.rings.gfx.cs->cdw;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 92c6ae3de2b..330b94665b3 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -369,7 +369,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
return 8;
case PIPE_CAP_MAX_VIEWPORTS:
- return 16;
+ return SI_MAX_VIEWPORTS;
/* Timer queries, present when the clock frequency is non zero. */
case PIPE_CAP_QUERY_TIMESTAMP:
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 52167f24a95..9060f948971 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -85,6 +85,8 @@
#define SI_IS_TRACE_POINT(x) (((x) & 0xcafe0000) == 0xcafe0000)
#define SI_GET_TRACE_POINT_ID(x) ((x) & 0xffff)
+#define SI_MAX_VIEWPORTS 16
+
struct si_compute;
struct si_screen {
@@ -127,6 +129,12 @@ struct si_framebuffer {
unsigned export_16bpc;
};
+struct si_scissors {
+ struct r600_atom atom;
+ unsigned dirty_mask;
+ struct pipe_scissor_state states[SI_MAX_VIEWPORTS];
+};
+
#define SI_NUM_ATOMS(sctx) (sizeof((sctx)->atoms)/sizeof((sctx)->atoms.array[0]))
struct si_context {
@@ -154,6 +162,7 @@ struct si_context {
struct r600_atom *msaa_config;
struct r600_atom *clip_regs;
struct r600_atom *shader_userdata;
+ struct r600_atom *scissors;
} s;
struct r600_atom *array[0];
} atoms;
@@ -181,6 +190,7 @@ struct si_context {
struct r600_resource *border_color_table;
unsigned border_color_offset;
+ struct si_scissors scissors;
struct r600_atom clip_regs;
struct r600_atom msaa_sample_locs;
struct r600_atom msaa_config;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index ad32473b91e..c748f71430b 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -278,8 +278,10 @@ static inline struct tgsi_shader_info *si_get_vs_info(struct si_context *sctx)
return &sctx->gs_shader->info;
else if (sctx->tes_shader)
return &sctx->tes_shader->info;
- else
+ else if (sctx->vs_shader)
return &sctx->vs_shader->info;
+ else
+ return NULL;
}
static inline struct si_shader* si_get_vs_state(struct si_context *sctx)
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 62eda97dc7e..8bd35a8422e 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -526,26 +526,50 @@ static void si_set_scissor_states(struct pipe_context *ctx,
const struct pipe_scissor_state *state)
{
struct si_context *sctx = (struct si_context *)ctx;
- struct si_state_scissor *scissor;
- struct si_pm4_state *pm4;
int i;
- for (i = start_slot; i < start_slot + num_scissors; i++) {
- int idx = i - start_slot;
- int offset = i * 4 * 2;
+ for (i = 0; i < num_scissors; i++)
+ sctx->scissors.states[start_slot + i] = state[i];
- scissor = CALLOC_STRUCT(si_state_scissor);
- if (scissor == NULL)
- return;
- pm4 = &scissor->pm4;
- scissor->scissor = state[idx];
- si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL + offset,
- S_028250_TL_X(state[idx].minx) | S_028250_TL_Y(state[idx].miny) |
- S_028250_WINDOW_OFFSET_DISABLE(1));
- si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR + offset,
- S_028254_BR_X(state[idx].maxx) | S_028254_BR_Y(state[idx].maxy));
- si_pm4_set_state(sctx, scissor[i], scissor);
+ sctx->scissors.dirty_mask |= ((1 << num_scissors) - 1) << start_slot;
+ si_mark_atom_dirty(sctx, &sctx->scissors.atom);
+}
+
+static void si_emit_scissors(struct si_context *sctx, struct r600_atom *atom)
+{
+ struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct pipe_scissor_state *states = sctx->scissors.states;
+ unsigned mask = sctx->scissors.dirty_mask;
+
+ /* The simple case: Only 1 viewport is active. */
+ if (mask & 1 &&
+ !si_get_vs_info(sctx)->writes_viewport_index) {
+ r600_write_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2);
+ radeon_emit(cs, S_028250_TL_X(states[0].minx) |
+ S_028250_TL_Y(states[0].miny) |
+ S_028250_WINDOW_OFFSET_DISABLE(1));
+ radeon_emit(cs, S_028254_BR_X(states[0].maxx) |
+ S_028254_BR_Y(states[0].maxy));
+ sctx->scissors.dirty_mask &= ~1; /* clear one bit */
+ return;
+ }
+
+ while (mask) {
+ int start, count, i;
+
+ u_bit_scan_consecutive_range(&mask, &start, &count);
+
+ r600_write_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL +
+ start * 4 * 2, count * 2);
+ for (i = start; i < start+count; i++) {
+ radeon_emit(cs, S_028250_TL_X(states[i].minx) |
+ S_028250_TL_Y(states[i].miny) |
+ S_028250_WINDOW_OFFSET_DISABLE(1));
+ radeon_emit(cs, S_028254_BR_X(states[i].maxx) |
+ S_028254_BR_Y(states[i].maxy));
+ }
}
+ sctx->scissors.dirty_mask = 0;
}
static void si_set_viewport_states(struct pipe_context *ctx,
@@ -2986,6 +3010,7 @@ void si_init_state_functions(struct si_context *sctx)
si_init_atom(&sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state, 0);
si_init_atom(&sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state, 10);
si_init_atom(&sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs, 6);
+ si_init_atom(&sctx->scissors.atom, &sctx->atoms.s.scissors, si_emit_scissors, 16*4);
sctx->b.b.create_blend_state = si_create_blend_state;
sctx->b.b.bind_blend_state = si_bind_blend_state;
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index bf713c42e86..34dbba48050 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -48,11 +48,6 @@ struct si_state_sample_mask {
uint16_t sample_mask;
};
-struct si_state_scissor {
- struct si_pm4_state pm4;
- struct pipe_scissor_state scissor;
-};
-
struct si_state_viewport {
struct si_pm4_state pm4;
struct pipe_viewport_state viewport;
@@ -96,7 +91,6 @@ union si_state {
struct si_pm4_state *blend_color;
struct si_pm4_state *clip;
struct si_state_sample_mask *sample_mask;
- struct si_state_scissor *scissor[16];
struct si_state_viewport *viewport[16];
struct si_state_rasterizer *rasterizer;
struct si_state_dsa *dsa;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index b223e060be3..5a9ef29a549 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -760,6 +760,23 @@ static void *si_create_tes_state(struct pipe_context *ctx,
return si_create_shader_state(ctx, state, PIPE_SHADER_TESS_EVAL);
}
+/**
+ * Normally, we only emit 1 viewport and 1 scissor if no shader is using
+ * the VIEWPORT_INDEX output, and emitting the other viewports and scissors
+ * is delayed. When a shader with VIEWPORT_INDEX appears, this should be
+ * called to emit the rest.
+ */
+static void si_update_viewports_and_scissors(struct si_context *sctx)
+{
+ struct tgsi_shader_info *info = si_get_vs_info(sctx);
+
+ if (!info || !info->writes_viewport_index)
+ return;
+
+ if (sctx->scissors.dirty_mask)
+ si_mark_atom_dirty(sctx, &sctx->scissors.atom);
+}
+
static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
@@ -770,6 +787,7 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
sctx->vs_shader = sel;
si_mark_atom_dirty(sctx, &sctx->clip_regs);
+ si_update_viewports_and_scissors(sctx);
}
static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
@@ -787,6 +805,7 @@ static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
if (enable_changed)
si_shader_change_notify(sctx);
+ si_update_viewports_and_scissors(sctx);
}
static void si_bind_tcs_shader(struct pipe_context *ctx, void *state)
@@ -821,6 +840,7 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state)
si_shader_change_notify(sctx);
sctx->last_tes_sh_base = -1; /* invalidate derived tess state */
}
+ si_update_viewports_and_scissors(sctx);
}
static void si_make_dummy_ps(struct si_context *sctx)