summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRob Clark <[email protected]>2014-10-14 16:23:18 -0400
committerRob Clark <[email protected]>2014-10-15 15:49:48 -0400
commit368466b7b72aed74b917aeb3225d7a0a7101678c (patch)
tree204b333fa07505888f4aa720af02650d7037304b
parentd595987ea3d1706fecb9f6416031ec8b27c95a9e (diff)
freedreno/ir3: optimize shader key comparision
Signed-off-by: Rob Clark <[email protected]>
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_context.h3
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_draw.c21
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_texture.c10
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_shader.c27
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_shader.h58
5 files changed, 79 insertions, 40 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.h b/src/gallium/drivers/freedreno/a3xx/fd3_context.h
index 324edb2eb80..77e4605e550 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_context.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.h
@@ -92,6 +92,9 @@ struct fd3_context {
struct u_upload_mgr *border_color_uploader;
struct pipe_resource *border_color_buf;
+ /* if *any* of bits are set in {v,f}saturate_{s,t,r} */
+ bool vsaturate, fsaturate;
+
/* bitmask of sampler which needs coords clamped for vertex
* shader:
*/
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
index ccedb391fed..7cc24e598e2 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
@@ -82,18 +82,20 @@ fixup_shader_state(struct fd_context *ctx, struct ir3_shader_key *key)
struct fd3_context *fd3_ctx = fd3_context(ctx);
struct ir3_shader_key *last_key = &fd3_ctx->last_key;
- if (memcmp(last_key, key, sizeof(*key))) {
+ if (!ir3_shader_key_equal(last_key, key)) {
ctx->dirty |= FD_DIRTY_PROG;
- if ((last_key->vsaturate_s != key->vsaturate_s) ||
- (last_key->vsaturate_t != key->vsaturate_t) ||
- (last_key->vsaturate_r != key->vsaturate_r))
- ctx->prog.dirty |= FD_SHADER_DIRTY_VP;
+ if (last_key->has_per_samp || key->has_per_samp) {
+ if ((last_key->vsaturate_s != key->vsaturate_s) ||
+ (last_key->vsaturate_t != key->vsaturate_t) ||
+ (last_key->vsaturate_r != key->vsaturate_r))
+ ctx->prog.dirty |= FD_SHADER_DIRTY_VP;
- if ((last_key->fsaturate_s != key->fsaturate_s) ||
- (last_key->fsaturate_t != key->fsaturate_t) ||
- (last_key->fsaturate_r != key->fsaturate_r))
- ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
+ if ((last_key->fsaturate_s != key->fsaturate_s) ||
+ (last_key->fsaturate_t != key->fsaturate_t) ||
+ (last_key->fsaturate_r != key->fsaturate_r))
+ ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
+ }
if (last_key->color_two_side != key->color_two_side)
ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
@@ -124,6 +126,7 @@ fd3_draw(struct fd_context *ctx, const struct pipe_draw_info *info)
// TODO set .half_precision based on render target format,
// ie. float16 and smaller use half, float32 use full..
.half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
+ .has_per_samp = fd3_ctx->fsaturate || fd3_ctx->vsaturate,
.vsaturate_s = fd3_ctx->vsaturate_s,
.vsaturate_t = fd3_ctx->vsaturate_t,
.vsaturate_r = fd3_ctx->vsaturate_r,
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
index d70b39e2114..39befef7672 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
@@ -143,7 +143,7 @@ fd3_sampler_states_bind(struct pipe_context *pctx,
{
struct fd_context *ctx = fd_context(pctx);
struct fd3_context *fd3_ctx = fd3_context(ctx);
- unsigned saturate_s = 0, saturate_t = 0, saturate_r = 0;
+ uint16_t saturate_s = 0, saturate_t = 0, saturate_r = 0;
unsigned i;
for (i = 0; i < nr; i++) {
@@ -162,10 +162,18 @@ fd3_sampler_states_bind(struct pipe_context *pctx,
fd_sampler_states_bind(pctx, shader, start, nr, hwcso);
if (shader == PIPE_SHADER_FRAGMENT) {
+ fd3_ctx->fsaturate =
+ (saturate_s != 0) ||
+ (saturate_t != 0) ||
+ (saturate_r != 0);
fd3_ctx->fsaturate_s = saturate_s;
fd3_ctx->fsaturate_t = saturate_t;
fd3_ctx->fsaturate_r = saturate_r;
} else if (shader == PIPE_SHADER_VERTEX) {
+ fd3_ctx->vsaturate =
+ (saturate_s != 0) ||
+ (saturate_t != 0) ||
+ (saturate_r != 0);
fd3_ctx->vsaturate_s = saturate_s;
fd3_ctx->vsaturate_t = saturate_t;
fd3_ctx->vsaturate_r = saturate_r;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
index c77cec10cc7..1f7e869d9f3 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
@@ -182,23 +182,30 @@ ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key)
* so normalize the key to avoid constructing multiple identical
* variants:
*/
- if (shader->type == SHADER_FRAGMENT) {
+ switch (shader->type) {
+ case SHADER_FRAGMENT:
+ case SHADER_COMPUTE:
key.binning_pass = false;
- key.vsaturate_s = 0;
- key.vsaturate_t = 0;
- key.vsaturate_r = 0;
- }
- if (shader->type == SHADER_VERTEX) {
+ if (key.has_per_samp) {
+ key.vsaturate_s = 0;
+ key.vsaturate_t = 0;
+ key.vsaturate_r = 0;
+ }
+ break;
+ case SHADER_VERTEX:
key.color_two_side = false;
key.half_precision = false;
key.alpha = false;
- key.fsaturate_s = 0;
- key.fsaturate_t = 0;
- key.fsaturate_r = 0;
+ if (key.has_per_samp) {
+ key.fsaturate_s = 0;
+ key.fsaturate_t = 0;
+ key.fsaturate_r = 0;
+ }
+ break;
}
for (v = shader->variants; v; v = v->next)
- if (!memcmp(&key, &v->key, sizeof(key)))
+ if (ir3_shader_key_equal(&key, &v->key))
return v;
/* compile new variant if it doesn't exist already: */
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
index c531ad704cc..628c09e1be3 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
@@ -54,36 +54,54 @@ static inline uint16_t sem2idx(ir3_semantic sem)
* in hw (two sided color), binning-pass vertex shader, etc.
*/
struct ir3_shader_key {
+ union {
+ struct {
+ /* do we need to check {v,f}saturate_{s,t,r}? */
+ unsigned has_per_samp : 1;
+
+ /*
+ * Vertex shader variant parameters:
+ */
+ unsigned binning_pass : 1;
+
+ /*
+ * Fragment shader variant parameters:
+ */
+ unsigned color_two_side : 1;
+ unsigned half_precision : 1;
+ /* For rendering to alpha, we need a bit of special handling
+ * since the hw always takes gl_FragColor starting from x
+ * component, rather than figuring out to take the w component.
+ * We could be more clever and generate variants for other
+ * render target formats (ie. luminance formats are xxx1), but
+ * let's start with this and see how it goes:
+ */
+ unsigned alpha : 1;
+ };
+ uint32_t global;
+ };
+
/* bitmask of sampler which needs coords clamped for vertex
* shader:
*/
- unsigned vsaturate_s, vsaturate_t, vsaturate_r;
+ uint16_t vsaturate_s, vsaturate_t, vsaturate_r;
/* bitmask of sampler which needs coords clamped for frag
* shader:
*/
- unsigned fsaturate_s, fsaturate_t, fsaturate_r;
-
- /*
- * Vertex shader variant parameters:
- */
- unsigned binning_pass : 1;
+ uint16_t fsaturate_s, fsaturate_t, fsaturate_r;
- /*
- * Fragment shader variant parameters:
- */
- unsigned color_two_side : 1;
- unsigned half_precision : 1;
- /* For rendering to alpha, we need a bit of special handling
- * since the hw always takes gl_FragColor starting from x
- * component, rather than figuring out to take the w component.
- * We could be more clever and generate variants for other
- * render target formats (ie. luminance formats are xxx1), but
- * let's start with this and see how it goes:
- */
- unsigned alpha : 1;
};
+static inline bool
+ir3_shader_key_equal(struct ir3_shader_key *a, struct ir3_shader_key *b)
+{
+ /* slow-path if we need to check {v,f}saturate_{s,t,r} */
+ if (a->has_per_samp || b->has_per_samp)
+ return memcmp(a, b, sizeof(struct ir3_shader_key)) == 0;
+ return a->global == b->global;
+}
+
struct ir3_shader_variant {
struct fd_bo *bo;