aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorRob Clark <[email protected]>2013-09-06 18:21:25 -0400
committerRob Clark <[email protected]>2013-09-14 13:31:58 -0400
commit1a42d4ee34d73cbc3e5bff3dcce5a913cd58aaba (patch)
tree83ed06325e99a8b814670662ad4d625fd450cf9a /src/gallium
parent6e9c386d16b3b38be6d1496758ef983b64744844 (diff)
freedreno/a3xx: use INDIRECT state load for shaders
With a debug option to force DIRECT (mainly to make it easier for capturing cmdstream dumps). Using INDIRECT for large shaders at least makes a noticable reduction in CPU load, which helps for CPU limited games. Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_program.c35
-rw-r--r--src/gallium/drivers/freedreno/freedreno_screen.c1
-rw-r--r--src/gallium/drivers/freedreno/freedreno_util.h1
3 files changed, 29 insertions, 8 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
index d84bbe9c36f..b0eec6e66d3 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
@@ -186,7 +186,8 @@ emit_shader(struct fd_ringbuffer *ring, struct fd3_shader_stateobj *so)
{
struct ir3_shader_info *si = &so->info;
enum adreno_state_block sb;
- uint32_t i, *bin;
+ enum adreno_state_src src;
+ uint32_t i, sz, *bin;
if (so->type == SHADER_VERTEX) {
sb = SB_VERT_SHADER;
@@ -194,17 +195,31 @@ emit_shader(struct fd_ringbuffer *ring, struct fd3_shader_stateobj *so)
sb = SB_FRAG_SHADER;
}
- // XXX use SS_INDIRECT
- bin = fd_bo_map(so->bo);
- OUT_PKT3(ring, CP_LOAD_STATE, 2 + si->sizedwords);
+ if (fd_mesa_debug & FD_DBG_DIRECT) {
+ sz = si->sizedwords;
+ src = SS_DIRECT;
+ bin = fd_bo_map(so->bo);
+ } else {
+ sz = 0;
+ src = SS_INDIRECT;
+ bin = NULL;
+ }
+
+ OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
- CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+ CP_LOAD_STATE_0_STATE_SRC(src) |
CP_LOAD_STATE_0_STATE_BLOCK(sb) |
CP_LOAD_STATE_0_NUM_UNIT(so->instrlen));
- OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
- CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
- for (i = 0; i < si->sizedwords; i++)
+ if (bin) {
+ OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+ CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER));
+ } else {
+ OUT_RELOC(ring, so->bo, 0,
+ CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER), 0);
+ }
+ for (i = 0; i < sz; i++) {
OUT_RING(ring, bin[i]);
+ }
}
void
@@ -223,6 +238,10 @@ fd3_program_emit(struct fd_ringbuffer *ring,
OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 6);
OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
+ /* NOTE: I guess SHADERRESTART and CONSTFULLUPDATE maybe
+ * flush some caches? I think we only need to set those
+ * bits if we have updated const or shader..
+ */
A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART |
A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 7412e3dca96..eada1af9892 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -61,6 +61,7 @@ static const struct debug_named_value debug_options[] = {
{"dclear", FD_DBG_DCLEAR, "Mark all state dirty after clear"},
{"dgmem", FD_DBG_DGMEM, "Mark all state dirty after GMEM tile pass"},
{"dscis", FD_DBG_DSCIS, "Disable scissor optimization"},
+ {"direct", FD_DBG_DIRECT, "Force inline (SS_DIRECT) state loads"},
DEBUG_NAMED_VALUE_END
};
diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h
index f8672339cff..4c7c78b955d 100644
--- a/src/gallium/drivers/freedreno/freedreno_util.h
+++ b/src/gallium/drivers/freedreno/freedreno_util.h
@@ -57,6 +57,7 @@ enum adreno_stencil_op fd_stencil_op(unsigned op);
#define FD_DBG_DCLEAR 0x04
#define FD_DBG_DGMEM 0x08
#define FD_DBG_DSCIS 0x10
+#define FD_DBG_DIRECT 0x20
extern int fd_mesa_debug;
#define DBG(fmt, ...) \