summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2014-07-01 14:42:42 -0700
committerEric Anholt <[email protected]>2014-08-08 18:59:46 -0700
commitcf2d777fbe7fd87f9076d4bc0ad639b33e7ffb70 (patch)
treeb9e6ddfc4cc60ace7115107fba1ef552027cc9c2
parent6cf86dd48786b08991714c338c6b506dde5f75bb (diff)
vc4: Add shader variant caching to handle FS output swizzle.
-rw-r--r--src/gallium/drivers/vc4/vc4_context.h11
-rw-r--r--src/gallium/drivers/vc4/vc4_draw.c2
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c284
3 files changed, 232 insertions, 65 deletions
diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h
index dedd98b4e5b..8258d3046c0 100644
--- a/src/gallium/drivers/vc4/vc4_context.h
+++ b/src/gallium/drivers/vc4/vc4_context.h
@@ -72,6 +72,9 @@ struct vc4_shader_uniform_info {
struct vc4_shader_state {
struct pipe_shader_state base;
+};
+
+struct vc4_compiled_shader {
struct vc4_bo *bo;
struct vc4_shader_uniform_info uniforms[2];
@@ -80,7 +83,8 @@ struct vc4_shader_state {
};
struct vc4_program_stateobj {
- struct vc4_shader_state *vs, *fs;
+ struct vc4_shader_state *bind_vs, *bind_fs;
+ struct vc4_compiled_shader *vs, *fs;
uint32_t dirty;
uint8_t num_exports;
/* Indexed by semantic name or TGSI_SEMANTIC_COUNT + semantic index
@@ -138,6 +142,8 @@ struct vc4_context {
struct primconvert_context *primconvert;
+ struct util_hash_table *fs_cache, *vs_cache;
+
/** @{ Current pipeline state objects */
struct pipe_scissor_state scissor;
struct pipe_blend_state *blend;
@@ -188,7 +194,7 @@ void vc4_simulator_flush(struct vc4_context *vc4,
void *vc4_simulator_alloc(struct vc4_screen *screen, uint32_t size);
void vc4_get_uniform_bo(struct vc4_context *vc4,
- struct vc4_shader_state *shader,
+ struct vc4_compiled_shader *shader,
struct vc4_constbuf_stateobj *cb,
int shader_index, struct vc4_bo **out_bo,
uint32_t *out_offset);
@@ -196,5 +202,6 @@ void vc4_get_uniform_bo(struct vc4_context *vc4,
void vc4_flush(struct pipe_context *pctx);
void vc4_emit_state(struct pipe_context *pctx);
void vc4_generate_code(struct qcompile *c);
+void vc4_update_compiled_shaders(struct vc4_context *vc4);
#endif /* VC4_CONTEXT_H */
diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c
index 1d7956d1d4f..10b5deb560e 100644
--- a/src/gallium/drivers/vc4/vc4_draw.c
+++ b/src/gallium/drivers/vc4/vc4_draw.c
@@ -98,6 +98,8 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
"tile_state");
}
+ vc4_update_compiled_shaders(vc4);
+
vc4->needs_flush = true;
// Tile state data is 48 bytes per tile, I think it can be thrown away
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 8d71212a69f..bc66ecc3446 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -25,6 +25,9 @@
#include <stdio.h>
#include <inttypes.h>
#include "pipe/p_state.h"
+#include "util/u_format.h"
+#include "util/u_hash_table.h"
+#include "util/u_hash.h"
#include "util/u_memory.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_dump.h"
@@ -43,11 +46,29 @@ struct tgsi_to_qir {
struct qreg *consts;
uint32_t num_consts;
+ struct vc4_shader_state *shader_state;
+ struct vc4_fs_key *fs_key;
+ struct vc4_vs_key *vs_key;
+
uint32_t *uniform_data;
enum quniform_contents *uniform_contents;
uint32_t num_uniforms;
};
+struct vc4_key {
+ struct vc4_shader_state *shader_state;
+};
+
+struct vc4_fs_key {
+ struct vc4_key base;
+ enum pipe_format color_format;
+};
+
+struct vc4_vs_key {
+ struct vc4_key base;
+ enum pipe_format attr_formats[8];
+};
+
static struct qreg
get_temp_for_uniform(struct tgsi_to_qir *trans, uint32_t uniform)
{
@@ -323,7 +344,7 @@ parse_tgsi_immediate(struct tgsi_to_qir *trans, struct tgsi_full_immediate *imm)
}
static void
-emit_frag_init(struct tgsi_to_qir *trans, struct vc4_shader_state *so)
+emit_frag_init(struct tgsi_to_qir *trans)
{
/* XXX: lols */
for (int i = 0; i < 4; i++) {
@@ -333,7 +354,7 @@ emit_frag_init(struct tgsi_to_qir *trans, struct vc4_shader_state *so)
}
static void
-emit_vert_init(struct tgsi_to_qir *trans, struct vc4_shader_state *so)
+emit_vert_init(struct tgsi_to_qir *trans)
{
struct qcompile *c = trans->c;
@@ -346,7 +367,7 @@ emit_vert_init(struct tgsi_to_qir *trans, struct vc4_shader_state *so)
}
static void
-emit_coord_init(struct tgsi_to_qir *trans, struct vc4_shader_state *so)
+emit_coord_init(struct tgsi_to_qir *trans)
{
struct qcompile *c = trans->c;
@@ -359,16 +380,27 @@ emit_coord_init(struct tgsi_to_qir *trans, struct vc4_shader_state *so)
}
static void
-emit_frag_end(struct tgsi_to_qir *trans, struct vc4_shader_state *so)
+emit_frag_end(struct tgsi_to_qir *trans)
{
struct qcompile *c = trans->c;
struct qreg t = qir_get_temp(c);
+
+ const struct util_format_description *format_desc =
+ util_format_description(trans->fs_key->color_format);
+
+ struct qreg swizzled_outputs[4] = {
+ trans->outputs[format_desc->swizzle[0]],
+ trans->outputs[format_desc->swizzle[1]],
+ trans->outputs[format_desc->swizzle[2]],
+ trans->outputs[format_desc->swizzle[3]],
+ };
+
qir_emit(c, qir_inst4(QOP_PACK_COLORS, t,
- trans->outputs[0],
- trans->outputs[1],
- trans->outputs[2],
- trans->outputs[3]));
+ swizzled_outputs[0],
+ swizzled_outputs[1],
+ swizzled_outputs[2],
+ swizzled_outputs[3]));
qir_emit(c, qir_inst(QOP_TLB_COLOR_WRITE, c->undef,
t, c->undef));
}
@@ -409,7 +441,7 @@ emit_1_wc_write(struct tgsi_to_qir *trans)
}
static void
-emit_vert_end(struct tgsi_to_qir *trans, struct vc4_shader_state *so)
+emit_vert_end(struct tgsi_to_qir *trans)
{
emit_scaled_viewport_write(trans);
emit_zs_write(trans);
@@ -418,7 +450,7 @@ emit_vert_end(struct tgsi_to_qir *trans, struct vc4_shader_state *so)
}
static void
-emit_coord_end(struct tgsi_to_qir *trans, struct vc4_shader_state *so)
+emit_coord_end(struct tgsi_to_qir *trans)
{
struct qcompile *c = trans->c;
@@ -431,7 +463,8 @@ emit_coord_end(struct tgsi_to_qir *trans, struct vc4_shader_state *so)
}
static struct tgsi_to_qir *
-vc4_shader_tgsi_to_qir(struct vc4_shader_state *so, enum qstage stage)
+vc4_shader_tgsi_to_qir(struct vc4_compiled_shader *shader, enum qstage stage,
+ struct vc4_key *key)
{
struct tgsi_to_qir *trans = CALLOC_STRUCT(tgsi_to_qir);
struct qcompile *c;
@@ -451,24 +484,28 @@ vc4_shader_tgsi_to_qir(struct vc4_shader_state *so, enum qstage stage)
trans->uniform_data = calloc(sizeof(uint32_t), 1024);
trans->uniform_contents = calloc(sizeof(enum quniform_contents), 1024);
+ trans->shader_state = key->shader_state;
trans->c = c;
- ret = tgsi_parse_init(&trans->parser, so->base.tokens);
+ ret = tgsi_parse_init(&trans->parser, trans->shader_state->base.tokens);
assert(ret == TGSI_PARSE_OK);
if (vc4_debug & VC4_DEBUG_TGSI) {
fprintf(stderr, "TGSI:\n");
- tgsi_dump(so->base.tokens, 0);
+ tgsi_dump(trans->shader_state->base.tokens, 0);
}
switch (stage) {
case QSTAGE_FRAG:
- emit_frag_init(trans, so);
+ trans->fs_key = (struct vc4_fs_key *)key;
+ emit_frag_init(trans);
break;
case QSTAGE_VERT:
- emit_vert_init(trans, so);
+ trans->vs_key = (struct vc4_vs_key *)key;
+ emit_vert_init(trans);
break;
case QSTAGE_COORD:
- emit_coord_init(trans, so);
+ trans->vs_key = (struct vc4_vs_key *)key;
+ emit_coord_init(trans);
break;
}
@@ -490,13 +527,13 @@ vc4_shader_tgsi_to_qir(struct vc4_shader_state *so, enum qstage stage)
switch (stage) {
case QSTAGE_FRAG:
- emit_frag_end(trans, so);
+ emit_frag_end(trans);
break;
case QSTAGE_VERT:
- emit_vert_end(trans, so);
+ emit_vert_end(trans);
break;
case QSTAGE_COORD:
- emit_coord_end(trans, so);
+ emit_coord_end(trans);
break;
}
@@ -520,7 +557,7 @@ vc4_shader_tgsi_to_qir(struct vc4_shader_state *so, enum qstage stage)
return trans;
}
-static struct vc4_shader_state *
+static void *
vc4_shader_state_create(struct pipe_context *pctx,
const struct pipe_shader_state *cso)
{
@@ -534,12 +571,12 @@ vc4_shader_state_create(struct pipe_context *pctx,
}
static void
-copy_uniform_state_to_shader(struct vc4_shader_state *so,
+copy_uniform_state_to_shader(struct vc4_compiled_shader *shader,
int shader_index,
struct tgsi_to_qir *trans)
{
int count = trans->num_uniforms;
- struct vc4_shader_uniform_info *uinfo = &so->uniforms[shader_index];
+ struct vc4_shader_uniform_info *uinfo = &shader->uniforms[shader_index];
uinfo->count = count;
uinfo->data = malloc(count * sizeof(*uinfo->data));
@@ -550,71 +587,187 @@ copy_uniform_state_to_shader(struct vc4_shader_state *so,
count * sizeof(*uinfo->contents));
}
-static void *
-vc4_fs_state_create(struct pipe_context *pctx,
- const struct pipe_shader_state *cso)
+static void
+vc4_fs_compile(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
+ struct vc4_fs_key *key)
{
- struct vc4_context *vc4 = vc4_context(pctx);
- struct vc4_shader_state *so = vc4_shader_state_create(pctx, cso);
- if (!so)
- return NULL;
-
- struct tgsi_to_qir *trans = vc4_shader_tgsi_to_qir(so, QSTAGE_FRAG);
- copy_uniform_state_to_shader(so, 0, trans);
+ struct tgsi_to_qir *trans = vc4_shader_tgsi_to_qir(shader, QSTAGE_FRAG,
+ &key->base);
+ copy_uniform_state_to_shader(shader, 0, trans);
- so->bo = vc4_bo_alloc_mem(vc4->screen, trans->c->qpu_insts,
- trans->c->num_qpu_insts * sizeof(uint64_t),
- "fs_code");
+ shader->bo = vc4_bo_alloc_mem(vc4->screen, trans->c->qpu_insts,
+ trans->c->num_qpu_insts * sizeof(uint64_t),
+ "fs_code");
qir_compile_destroy(trans->c);
free(trans);
-
- return so;
}
-static void *
-vc4_vs_state_create(struct pipe_context *pctx,
- const struct pipe_shader_state *cso)
+static void
+vc4_vs_compile(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
+ struct vc4_vs_key *key)
{
- struct vc4_context *vc4 = vc4_context(pctx);
- struct vc4_shader_state *so = vc4_shader_state_create(pctx, cso);
- if (!so)
- return NULL;
-
- struct tgsi_to_qir *vs_trans = vc4_shader_tgsi_to_qir(so, QSTAGE_VERT);
- copy_uniform_state_to_shader(so, 0, vs_trans);
+ struct tgsi_to_qir *vs_trans = vc4_shader_tgsi_to_qir(shader,
+ QSTAGE_VERT,
+ &key->base);
+ copy_uniform_state_to_shader(shader, 0, vs_trans);
- struct tgsi_to_qir *cs_trans = vc4_shader_tgsi_to_qir(so, QSTAGE_COORD);
- copy_uniform_state_to_shader(so, 1, cs_trans);
+ struct tgsi_to_qir *cs_trans = vc4_shader_tgsi_to_qir(shader,
+ QSTAGE_COORD,
+ &key->base);
+ copy_uniform_state_to_shader(shader, 1, cs_trans);
uint32_t vs_size = vs_trans->c->num_qpu_insts * sizeof(uint64_t);
uint32_t cs_size = cs_trans->c->num_qpu_insts * sizeof(uint64_t);
- so->coord_shader_offset = vs_size; /* XXX: alignment? */
- so->bo = vc4_bo_alloc(vc4->screen,
- so->coord_shader_offset + cs_size,
- "vs_code");
+ shader->coord_shader_offset = vs_size; /* XXX: alignment? */
+ shader->bo = vc4_bo_alloc(vc4->screen,
+ shader->coord_shader_offset + cs_size,
+ "vs_code");
- void *map = vc4_bo_map(so->bo);
+ void *map = vc4_bo_map(shader->bo);
memcpy(map, vs_trans->c->qpu_insts, vs_size);
- memcpy(map + so->coord_shader_offset, cs_trans->c->qpu_insts, cs_size);
+ memcpy(map + shader->coord_shader_offset,
+ cs_trans->c->qpu_insts, cs_size);
qir_compile_destroy(vs_trans->c);
qir_compile_destroy(cs_trans->c);
+}
- return so;
+static void
+vc4_update_compiled_fs(struct vc4_context *vc4)
+{
+ struct vc4_fs_key local_key;
+ struct vc4_fs_key *key = &local_key;
+
+ memset(key, 0, sizeof(*key));
+ key->base.shader_state = vc4->prog.bind_fs;
+
+ if (vc4->framebuffer.cbufs[0])
+ key->color_format = vc4->framebuffer.cbufs[0]->format;
+
+ vc4->prog.fs = util_hash_table_get(vc4->fs_cache, key);
+ if (vc4->prog.fs)
+ return;
+
+ key = malloc(sizeof(*key));
+ memcpy(key, &local_key, sizeof(*key));
+
+ struct vc4_compiled_shader *shader = CALLOC_STRUCT(vc4_compiled_shader);
+ vc4_fs_compile(vc4, shader, key);
+ util_hash_table_set(vc4->fs_cache, key, shader);
+
+ vc4->prog.fs = shader;
+}
+
+static void
+vc4_update_compiled_vs(struct vc4_context *vc4)
+{
+ struct vc4_vs_key local_key;
+ struct vc4_vs_key *key = &local_key;
+
+ memset(key, 0, sizeof(*key));
+ key->base.shader_state = vc4->prog.bind_vs;
+
+ vc4->prog.vs = util_hash_table_get(vc4->vs_cache, key);
+ if (vc4->prog.vs)
+ return;
+
+ key = malloc(sizeof(*key));
+ memcpy(key, &local_key, sizeof(*key));
+
+ struct vc4_compiled_shader *shader = CALLOC_STRUCT(vc4_compiled_shader);
+ vc4_vs_compile(vc4, shader, key);
+ util_hash_table_set(vc4->vs_cache, key, shader);
+
+ vc4->prog.vs = shader;
+}
+
+void
+vc4_update_compiled_shaders(struct vc4_context *vc4)
+{
+ vc4_update_compiled_fs(vc4);
+ vc4_update_compiled_vs(vc4);
+}
+
+static unsigned
+fs_cache_hash(void *key)
+{
+ return util_hash_crc32(key, sizeof(struct vc4_fs_key));
+}
+
+static unsigned
+vs_cache_hash(void *key)
+{
+ return util_hash_crc32(key, sizeof(struct vc4_vs_key));
+}
+
+static int
+fs_cache_compare(void *key1, void *key2)
+{
+ return memcmp(key1, key2, sizeof(struct vc4_fs_key));
+}
+
+static int
+vs_cache_compare(void *key1, void *key2)
+{
+ return memcmp(key1, key2, sizeof(struct vc4_vs_key));
+}
+
+struct delete_state {
+ struct vc4_context *vc4;
+ struct vc4_shader_state *shader_state;
+};
+
+static enum pipe_error
+fs_delete_from_cache(void *in_key, void *in_value, void *data)
+{
+ struct delete_state *del = data;
+ struct vc4_fs_key *key = in_key;
+ struct vc4_compiled_shader *shader = in_value;
+
+ if (key->base.shader_state == data) {
+ util_hash_table_remove(del->vc4->fs_cache, key);
+ vc4_bo_unreference(&shader->bo);
+ free(shader);
+ }
+
+ return 0;
+}
+
+static enum pipe_error
+vs_delete_from_cache(void *in_key, void *in_value, void *data)
+{
+ struct delete_state *del = data;
+ struct vc4_vs_key *key = in_key;
+ struct vc4_compiled_shader *shader = in_value;
+
+ if (key->base.shader_state == data) {
+ util_hash_table_remove(del->vc4->vs_cache, key);
+ vc4_bo_unreference(&shader->bo);
+ free(shader);
+ }
+
+ return 0;
}
static void
vc4_shader_state_delete(struct pipe_context *pctx, void *hwcso)
{
- struct pipe_shader_state *so = hwcso;
+ struct vc4_context *vc4 = vc4_context(pctx);
+ struct vc4_shader_state *so = hwcso;
+ struct delete_state del;
+
+ del.vc4 = vc4;
+ del.shader_state = so;
+ util_hash_table_foreach(vc4->fs_cache, fs_delete_from_cache, &del);
+ util_hash_table_foreach(vc4->vs_cache, vs_delete_from_cache, &del);
- free((void *)so->tokens);
+ free((void *)so->base.tokens);
free(so);
}
void
-vc4_get_uniform_bo(struct vc4_context *vc4, struct vc4_shader_state *shader,
+vc4_get_uniform_bo(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
struct vc4_constbuf_stateobj *cb,
int shader_index, struct vc4_bo **out_bo,
uint32_t *out_offset)
@@ -653,7 +806,7 @@ static void
vc4_fp_state_bind(struct pipe_context *pctx, void *hwcso)
{
struct vc4_context *vc4 = vc4_context(pctx);
- vc4->prog.fs = hwcso;
+ vc4->prog.bind_fs = hwcso;
vc4->prog.dirty |= VC4_SHADER_DIRTY_FP;
vc4->dirty |= VC4_DIRTY_PROG;
}
@@ -662,7 +815,7 @@ static void
vc4_vp_state_bind(struct pipe_context *pctx, void *hwcso)
{
struct vc4_context *vc4 = vc4_context(pctx);
- vc4->prog.vs = hwcso;
+ vc4->prog.bind_vs = hwcso;
vc4->prog.dirty |= VC4_SHADER_DIRTY_VP;
vc4->dirty |= VC4_DIRTY_PROG;
}
@@ -670,12 +823,17 @@ vc4_vp_state_bind(struct pipe_context *pctx, void *hwcso)
void
vc4_program_init(struct pipe_context *pctx)
{
- pctx->create_vs_state = vc4_vs_state_create;
+ struct vc4_context *vc4 = vc4_context(pctx);
+
+ pctx->create_vs_state = vc4_shader_state_create;
pctx->delete_vs_state = vc4_shader_state_delete;
- pctx->create_fs_state = vc4_fs_state_create;
+ pctx->create_fs_state = vc4_shader_state_create;
pctx->delete_fs_state = vc4_shader_state_delete;
pctx->bind_fs_state = vc4_fp_state_bind;
pctx->bind_vs_state = vc4_vp_state_bind;
+
+ vc4->fs_cache = util_hash_table_create(fs_cache_hash, fs_cache_compare);
+ vc4->vs_cache = util_hash_table_create(vs_cache_hash, vs_cache_compare);
}