aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRob Clark <[email protected]>2018-10-10 15:59:29 -0400
committerRob Clark <[email protected]>2018-10-17 12:44:48 -0400
commit1b9d69410ce0708f526f5e846e369b781897d10f (patch)
tree5a1f46909cee7617824b105f25f6331376df1f75
parente8606b11dd5f8a2c8df16a719b0fc8852ceb7977 (diff)
freedreno/a6xx: texture state obj
Unfortunately gallium doesn't match what the hw wants perfectly here, in using a separate CSO for each texture/sampler. So we have to use a hash table to map the collection of texture/samplers to hw state object. We probably could use separate hw state objects for texture and sampler state, but mesa/st tends to update the tex and samp state together. Signed-off-by: Rob Clark <[email protected]>
-rw-r--r--src/gallium/drivers/freedreno/a6xx/fd6_context.c2
-rw-r--r--src/gallium/drivers/freedreno/a6xx/fd6_context.h3
-rw-r--r--src/gallium/drivers/freedreno/a6xx/fd6_emit.c66
-rw-r--r--src/gallium/drivers/freedreno/a6xx/fd6_emit.h6
-rw-r--r--src/gallium/drivers/freedreno/a6xx/fd6_texture.c169
-rw-r--r--src/gallium/drivers/freedreno/a6xx/fd6_texture.h38
6 files changed, 251 insertions, 33 deletions
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_context.c b/src/gallium/drivers/freedreno/a6xx/fd6_context.c
index b82889c7a71..ab10ccb113b 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_context.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_context.c
@@ -56,6 +56,8 @@ fd6_context_destroy(struct pipe_context *pctx)
fd_context_cleanup_common_vbos(&fd6_ctx->base);
+ fd6_texture_fini(pctx);
+
free(fd6_ctx);
}
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_context.h b/src/gallium/drivers/freedreno/a6xx/fd6_context.h
index 30cc26001cd..85245c8a65f 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_context.h
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_context.h
@@ -105,6 +105,9 @@ struct fd6_context {
/*{*/
struct fd6_streamout_state tf;
/*}*/
+
+ uint16_t tex_seqno;
+ struct hash_table *tex_cache;
};
static inline struct fd6_context *
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
index 93f6a267fa9..eb24fb96cfb 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
@@ -325,32 +325,32 @@ emit_border_color(struct fd_context *ctx, struct fd_ringbuffer *ring)
u_upload_unmap(fd6_ctx->border_color_uploader);
}
-static bool
-emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
- enum a6xx_state_block sb, struct fd_texture_stateobj *tex)
+bool
+fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring,
+ enum a6xx_state_block sb, struct fd_texture_stateobj *tex,
+ unsigned bcolor_offset)
{
bool needs_border = false;
- unsigned bcolor_offset;
- unsigned opcode, tex_samp_reg, tex_const_reg;
+ unsigned opcode, tex_samp_reg, tex_const_reg, tex_count_reg;
switch (sb) {
case SB6_VS_TEX:
opcode = CP_LOAD_STATE6_GEOM;
- bcolor_offset = 0;
tex_samp_reg = REG_A6XX_SP_VS_TEX_SAMP_LO;
tex_const_reg = REG_A6XX_SP_VS_TEX_CONST_LO;
+ tex_count_reg = REG_A6XX_SP_VS_TEX_COUNT;
break;
case SB6_FS_TEX:
opcode = CP_LOAD_STATE6_FRAG;
- bcolor_offset = ctx->tex[PIPE_SHADER_VERTEX].num_samplers;
tex_samp_reg = REG_A6XX_SP_FS_TEX_SAMP_LO;
tex_const_reg = REG_A6XX_SP_FS_TEX_CONST_LO;
+ tex_count_reg = REG_A6XX_SP_FS_TEX_COUNT;
break;
case SB6_CS_TEX:
opcode = CP_LOAD_STATE6_FRAG;
- bcolor_offset = 0;
tex_samp_reg = REG_A6XX_SP_CS_TEX_SAMP_LO;
tex_const_reg = REG_A6XX_SP_CS_TEX_CONST_LO;
+ tex_count_reg = 0; //REG_A6XX_SP_CS_TEX_COUNT;
break;
default:
unreachable("bad state block");
@@ -359,8 +359,8 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
if (tex->num_samplers > 0) {
struct fd_ringbuffer *state =
- fd_ringbuffer_new_flags(ctx->pipe, tex->num_samplers * 4 * 4,
- FD_RINGBUFFER_OBJECT | FD_RINGBUFFER_STREAMING);
+ fd_ringbuffer_new_flags(pipe, tex->num_samplers * 4 * 4,
+ FD_RINGBUFFER_OBJECT);
for (unsigned i = 0; i < tex->num_samplers; i++) {
static const struct fd6_sampler_stateobj dummy_sampler = {};
const struct fd6_sampler_stateobj *sampler = tex->samplers[i] ?
@@ -390,8 +390,8 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
if (tex->num_textures > 0) {
struct fd_ringbuffer *state =
- fd_ringbuffer_new_flags(ctx->pipe, tex->num_textures * 16 * 4,
- FD_RINGBUFFER_OBJECT | FD_RINGBUFFER_STREAMING);
+ fd_ringbuffer_new_flags(pipe, tex->num_textures * 16 * 4,
+ FD_RINGBUFFER_OBJECT);
for (unsigned i = 0; i < tex->num_textures; i++) {
static const struct fd6_pipe_sampler_view dummy_view = {};
const struct fd6_pipe_sampler_view *view = tex->textures[i] ?
@@ -445,6 +445,11 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
fd_ringbuffer_del(state);
}
+ if (tex_count_reg) {
+ OUT_PKT4(ring, tex_count_reg, 1);
+ OUT_RING(ring, tex->num_textures);
+ }
+
return needs_border;
}
@@ -931,28 +936,25 @@ fd6_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RING(ring, A6XX_RB_BLEND_ALPHA_F32(bcolor->color[3]));
}
- if (ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_TEX) {
- needs_border |= emit_textures(ctx, ring, SB6_VS_TEX,
- &ctx->tex[PIPE_SHADER_VERTEX]);
- OUT_PKT4(ring, REG_A6XX_SP_VS_TEX_COUNT, 1);
- OUT_RING(ring, ctx->tex[PIPE_SHADER_VERTEX].num_textures);
- }
+ if ((ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_TEX) &&
+ ctx->tex[PIPE_SHADER_VERTEX].num_textures > 0) {
+ struct fd6_texture_state *tex = fd6_texture_state(ctx,
+ SB6_VS_TEX, &ctx->tex[PIPE_SHADER_VERTEX]);
+
+ needs_border |= tex->needs_border;
- if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_TEX) {
- needs_border |= emit_textures(ctx, ring, SB6_FS_TEX,
- &ctx->tex[PIPE_SHADER_FRAGMENT]);
- OUT_PKT4(ring, REG_A6XX_SP_FS_TEX_COUNT, 1);
- OUT_RING(ring, ctx->tex[PIPE_SHADER_FRAGMENT].num_textures);
+ fd6_emit_add_group(emit, tex->stateobj, FD6_GROUP_VS_TEX, 0x7);
}
-#if 0
- OUT_PKT4(ring, REG_A6XX_TPL1_FS_TEX_COUNT, 1);
- OUT_RING(ring, ctx->shaderimg[PIPE_SHADER_FRAGMENT].enabled_mask ?
- ~0 : ctx->tex[PIPE_SHADER_FRAGMENT].num_textures);
+ if ((ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_TEX) &&
+ ctx->tex[PIPE_SHADER_FRAGMENT].num_textures > 0) {
+ struct fd6_texture_state *tex = fd6_texture_state(ctx,
+ SB6_FS_TEX, &ctx->tex[PIPE_SHADER_FRAGMENT]);
- OUT_PKT4(ring, REG_A6XX_TPL1_CS_TEX_COUNT, 1);
- OUT_RING(ring, 0);
-#endif
+ needs_border |= tex->needs_border;
+
+ fd6_emit_add_group(emit, tex->stateobj, FD6_GROUP_FS_TEX, 0x7);
+ }
if (needs_border)
emit_border_color(ctx, ring);
@@ -988,8 +990,8 @@ fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
if (dirty & FD_DIRTY_SHADER_TEX) {
bool needs_border = false;
- needs_border |= emit_textures(ctx, ring, SB6_CS_TEX,
- &ctx->tex[PIPE_SHADER_COMPUTE]);
+ needs_border |= fd6_emit_textures(ctx->pipe, ring, SB6_CS_TEX,
+ &ctx->tex[PIPE_SHADER_COMPUTE], 0);
if (needs_border)
emit_border_color(ctx, ring);
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h
index 4e27597a70b..005952750f8 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h
@@ -45,6 +45,8 @@ struct fd_ringbuffer;
enum fd6_state_id {
FD6_GROUP_VS_CONST,
FD6_GROUP_FS_CONST,
+ FD6_GROUP_VS_TEX,
+ FD6_GROUP_FS_TEX,
};
struct fd6_state_group {
@@ -174,6 +176,10 @@ fd6_stage2shadersb(enum shader_t type)
}
}
+bool fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring,
+ enum a6xx_state_block sb, struct fd_texture_stateobj *tex,
+ unsigned bcolor_offset);
+
void fd6_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd6_emit *emit);
void fd6_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_texture.c b/src/gallium/drivers/freedreno/a6xx/fd6_texture.c
index 0f342ae8e28..a48c4ee1ad0 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_texture.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_texture.c
@@ -30,9 +30,13 @@
#include "util/u_memory.h"
#include "util/u_inlines.h"
#include "util/u_format.h"
+#include "util/hash_table.h"
#include "fd6_texture.h"
#include "fd6_format.h"
+#include "fd6_emit.h"
+
+static void fd6_texture_state_destroy(struct fd6_texture_state *state);
static enum a6xx_tex_clamp
tex_clamp(unsigned wrap, bool clamp_to_edge, bool *needs_border)
@@ -94,6 +98,7 @@ fd6_sampler_state_create(struct pipe_context *pctx,
return NULL;
so->base = *cso;
+ so->seqno = ++fd6_context(fd_context(pctx))->tex_seqno;
if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
miplinear = true;
@@ -141,6 +146,28 @@ fd6_sampler_state_create(struct pipe_context *pctx,
}
static void
+fd6_sampler_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+ struct fd6_context *fd6_ctx = fd6_context(fd_context(pctx));
+ struct fd6_sampler_stateobj *samp = hwcso;
+
+ struct hash_entry *entry;
+ hash_table_foreach(fd6_ctx->tex_cache, entry) {
+ struct fd6_texture_state *state = entry->data;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(state->key.samp); i++) {
+ if (samp->seqno == state->key.samp[i].seqno) {
+ fd6_texture_state_destroy(entry->data);
+ _mesa_hash_table_remove(fd6_ctx->tex_cache, entry);
+ break;
+ }
+ }
+ }
+
+ free(hwcso);
+}
+
+static void
fd6_sampler_states_bind(struct pipe_context *pctx,
enum pipe_shader_type shader, unsigned start,
unsigned nr, void **hwcso)
@@ -215,6 +242,7 @@ fd6_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
so->base.texture = prsc;
so->base.reference.count = 1;
so->base.context = pctx;
+ so->seqno = ++fd6_context(fd_context(pctx))->tex_seqno;
so->texconst0 =
A6XX_TEX_CONST_0_FMT(fd6_pipe2tex(format)) |
@@ -310,6 +338,31 @@ fd6_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
}
static void
+fd6_sampler_view_destroy(struct pipe_context *pctx,
+ struct pipe_sampler_view *_view)
+{
+ struct fd6_context *fd6_ctx = fd6_context(fd_context(pctx));
+ struct fd6_pipe_sampler_view *view = fd6_pipe_sampler_view(_view);
+
+ struct hash_entry *entry;
+ hash_table_foreach(fd6_ctx->tex_cache, entry) {
+ struct fd6_texture_state *state = entry->data;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(state->key.view); i++) {
+ if (view->seqno == state->key.view[i].seqno) {
+ fd6_texture_state_destroy(entry->data);
+ _mesa_hash_table_remove(fd6_ctx->tex_cache, entry);
+ break;
+ }
+ }
+ }
+
+ pipe_resource_reference(&view->base.texture, NULL);
+
+ free(view);
+}
+
+static void
fd6_set_sampler_views(struct pipe_context *pctx, enum pipe_shader_type shader,
unsigned start, unsigned nr,
struct pipe_sampler_view **views)
@@ -337,11 +390,127 @@ fd6_set_sampler_views(struct pipe_context *pctx, enum pipe_shader_type shader,
}
}
+
+static uint32_t
+key_hash(const void *_key)
+{
+ const struct fd6_texture_key *key = _key;
+ uint32_t hash = _mesa_fnv32_1a_offset_bias;
+ hash = _mesa_fnv32_1a_accumulate_block(hash, key, sizeof(*key));
+ return hash;
+}
+
+static bool
+key_equals(const void *_a, const void *_b)
+{
+ const struct fd6_texture_key *a = _a;
+ const struct fd6_texture_key *b = _b;
+ return memcmp(a, b, sizeof(struct fd6_texture_key)) == 0;
+}
+
+struct fd6_texture_state *
+fd6_texture_state(struct fd_context *ctx, enum a6xx_state_block sb,
+ struct fd_texture_stateobj *tex)
+{
+ struct fd6_context *fd6_ctx = fd6_context(ctx);
+ struct fd6_texture_key key;
+ bool needs_border = false;
+
+ memset(&key, 0, sizeof(key));
+
+ for (unsigned i = 0; i < tex->num_textures; i++) {
+ if (!tex->textures[i])
+ continue;
+
+ struct fd6_pipe_sampler_view *view =
+ fd6_pipe_sampler_view(tex->textures[i]);
+
+ key.view[i].rsc_seqno = fd_resource(view->base.texture)->seqno;
+ key.view[i].seqno = view->seqno;
+ }
+
+ for (unsigned i = 0; i < tex->num_samplers; i++) {
+ if (!tex->samplers[i])
+ continue;
+
+ struct fd6_sampler_stateobj *sampler =
+ fd6_sampler_stateobj(tex->samplers[i]);
+
+ key.samp[i].seqno = sampler->seqno;
+
+ needs_border |= sampler->needs_border;
+ }
+
+ /* This will need update for HS/DS/GS: */
+ if (unlikely(needs_border && (sb == SB6_FS_TEX))) {
+ /* TODO we could probably use fixed offsets for each shader
+ * stage and avoid the need for # of VS samplers to be part
+ * of the FS tex state.. but I don't think our handling of
+ * BCOLOR_OFFSET is actually correct, and trying to use a
+ * hard coded offset of 16 breaks things.
+ *
+ * Note that when this changes, then a corresponding change
+ * in emit_border_color() is also needed.
+ */
+ key.bcolor_offset = ctx->tex[PIPE_SHADER_VERTEX].num_samplers;
+ }
+
+ uint32_t hash = key_hash(&key);
+ struct hash_entry *entry =
+ _mesa_hash_table_search_pre_hashed(fd6_ctx->tex_cache, hash, &key);
+
+ if (entry) {
+ return entry->data;
+ }
+
+ struct fd6_texture_state *state = CALLOC_STRUCT(fd6_texture_state);
+
+ state->key = key;
+ state->stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000);
+ state->needs_border = needs_border;
+
+ fd6_emit_textures(ctx->pipe, state->stateobj, sb, tex, key.bcolor_offset);
+
+ /* NOTE: uses copy of key in state obj, because pointer passed by caller
+ * is probably on the stack
+ */
+ _mesa_hash_table_insert_pre_hashed(fd6_ctx->tex_cache, hash,
+ &state->key, state);
+
+ return state;
+}
+
+static void
+fd6_texture_state_destroy(struct fd6_texture_state *state)
+{
+ fd_ringbuffer_del(state->stateobj);
+ free(state);
+}
+
void
fd6_texture_init(struct pipe_context *pctx)
{
+ struct fd6_context *fd6_ctx = fd6_context(fd_context(pctx));
+
pctx->create_sampler_state = fd6_sampler_state_create;
+ pctx->delete_sampler_state = fd6_sampler_state_delete;
pctx->bind_sampler_states = fd6_sampler_states_bind;
+
pctx->create_sampler_view = fd6_sampler_view_create;
+ pctx->sampler_view_destroy = fd6_sampler_view_destroy;
pctx->set_sampler_views = fd6_set_sampler_views;
+
+ fd6_ctx->tex_cache = _mesa_hash_table_create(NULL, key_hash, key_equals);
+}
+
+void
+fd6_texture_fini(struct pipe_context *pctx)
+{
+ struct fd6_context *fd6_ctx = fd6_context(fd_context(pctx));
+
+ struct hash_entry *entry;
+ hash_table_foreach(fd6_ctx->tex_cache, entry) {
+ fd6_texture_state_destroy(entry->data);
+ }
+ ralloc_free(fd6_ctx->tex_cache);
}
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_texture.h b/src/gallium/drivers/freedreno/a6xx/fd6_texture.h
index a45ed6b3a7b..576afaafdb4 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_texture.h
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_texture.h
@@ -41,6 +41,7 @@ struct fd6_sampler_stateobj {
uint32_t texsamp0, texsamp1, texsamp2, texsamp3;
bool saturate_s, saturate_t, saturate_r;
bool needs_border;
+ uint16_t seqno;
};
static inline struct fd6_sampler_stateobj *
@@ -55,6 +56,7 @@ struct fd6_pipe_sampler_view {
uint32_t texconst6, texconst7, texconst8, texconst9, texconst10, texconst11;
uint32_t offset;
bool astc_srgb;
+ uint16_t seqno;
};
static inline struct fd6_pipe_sampler_view *
@@ -64,7 +66,7 @@ fd6_pipe_sampler_view(struct pipe_sampler_view *pview)
}
void fd6_texture_init(struct pipe_context *pctx);
-
+void fd6_texture_fini(struct pipe_context *pctx);
static inline enum a6xx_tex_type
fd6_tex_type(unsigned target)
@@ -88,4 +90,38 @@ fd6_tex_type(unsigned target)
}
}
+/*
+ * Texture stateobj:
+ *
+ * The sampler and sampler-view state is mapped to a single hardware
+ * stateobj which can be emit'd as a pointer in a CP_SET_DRAW_STATE
+ * packet, to avoid the overhead of re-generating the entire cmdstream
+ * when application toggles thru multiple different texture states.
+ */
+
+struct fd6_texture_key {
+ struct {
+ /* We need to track the seqno of the rsc as well as of the
+ * sampler view, because resource shadowing/etc can result
+ * that the underlying bo changes (which means the previous
+ * state was no longer valid.
+ */
+ uint16_t rsc_seqno;
+ uint16_t seqno;
+ } view[16];
+ struct {
+ uint16_t seqno;
+ } samp[16];
+ uint8_t bcolor_offset;
+};
+
+struct fd6_texture_state {
+ struct fd6_texture_key key;
+ struct fd_ringbuffer *stateobj;
+ bool needs_border;
+};
+
+struct fd6_texture_state * fd6_texture_state(struct fd_context *ctx,
+ enum a6xx_state_block sb, struct fd_texture_stateobj *tex);
+
#endif /* FD6_TEXTURE_H_ */