summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2016-07-14 17:26:43 -0700
committerEric Anholt <[email protected]>2016-07-15 13:54:00 -0700
commit3bcd0f1912a60cc9d3813923d18d29465e41ff56 (patch)
treea453cbfb302a1e70b0704c3725f9ce6e5172db0d /src/gallium
parent88152d7dc0e2cf233cd2c38a4e9affb1ea73fa97 (diff)
vc4: Speed up glGenerateMipmaps by avoiding shadow baselevel.
To support general GL_TEXTURE_BASE_LEVEL we have to copy to a temporary miptree. However, if a single level is being selected, we can use the existing miptree and force all the sampling to be from that particular level. This avoids a ton of software fallbacks in glGenerateMipmaps(), which uses base levels in the blit implementation in gallium. Improves "glmark2 -b terrain" from 2 fps to 3 (perhaps some more precision would be useful?), and cuts its CPU usage during the benchmarking from ~30% to ~10% (total CPU time from 8.8s to 7.6s).
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/vc4/vc4_context.h1
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c11
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h1
-rw-r--r--src/gallium/drivers/vc4/vc4_resource.c4
-rw-r--r--src/gallium/drivers/vc4/vc4_state.c9
5 files changed, 23 insertions, 3 deletions
diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h
index 7da2b554e49..751f0437807 100644
--- a/src/gallium/drivers/vc4/vc4_context.h
+++ b/src/gallium/drivers/vc4/vc4_context.h
@@ -74,6 +74,7 @@ struct vc4_sampler_view {
struct pipe_sampler_view base;
uint32_t texture_p0;
uint32_t texture_p1;
+ bool force_first_level;
};
struct vc4_sampler_state {
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index b4b62e3869a..4ee49a258f1 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -393,6 +393,12 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr)
}
}
+ if (c->key->tex[unit].forced_first_level) {
+ lod = qir_uniform_f(c, c->key->tex[unit].forced_first_level);
+ is_txl = true;
+ is_txb = false;
+ }
+
struct qreg texture_u[] = {
qir_uniform(c, QUNIFORM_TEXTURE_CONFIG_P0, unit),
qir_uniform(c, QUNIFORM_TEXTURE_CONFIG_P1, unit),
@@ -2313,6 +2319,7 @@ vc4_setup_shared_key(struct vc4_context *vc4, struct vc4_key *key,
{
for (int i = 0; i < texstate->num_textures; i++) {
struct pipe_sampler_view *sampler = texstate->textures[i];
+ struct vc4_sampler_view *vc4_sampler = vc4_sampler_view(sampler);
struct pipe_sampler_state *sampler_state =
texstate->samplers[i];
@@ -2333,6 +2340,10 @@ vc4_setup_shared_key(struct vc4_context *vc4, struct vc4_key *key,
key->tex[i].compare_func = sampler_state->compare_func;
key->tex[i].wrap_s = sampler_state->wrap_s;
key->tex[i].wrap_t = sampler_state->wrap_t;
+ if (vc4_sampler->force_first_level) {
+ key->tex[i].forced_first_level =
+ sampler->u.tex.first_level;
+ }
}
}
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index 88eda225d80..81b55651cec 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -314,6 +314,7 @@ struct vc4_key {
unsigned compare_func:3;
unsigned wrap_s:3;
unsigned wrap_t:3;
+ unsigned forced_first_level:8;
};
struct {
uint16_t msaa_width, msaa_height;
diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c
index a07fa3d9979..08d7d207f79 100644
--- a/src/gallium/drivers/vc4/vc4_resource.c
+++ b/src/gallium/drivers/vc4/vc4_resource.c
@@ -877,7 +877,9 @@ vc4_update_shadow_baselevel_texture(struct pipe_context *pctx,
if (shadow->writes == orig->writes && orig->bo->private)
return;
- perf_debug("Updating shadow texture due to %s\n",
+ perf_debug("Updating %dx%d@%d shadow texture due to %s\n",
+ orig->base.b.width0, orig->base.b.height0,
+ view->u.tex.first_level,
view->u.tex.first_level ? "base level" : "raster layout");
for (int i = 0; i <= shadow->base.b.last_level; i++) {
diff --git a/src/gallium/drivers/vc4/vc4_state.c b/src/gallium/drivers/vc4/vc4_state.c
index bf4e0232daf..df9e1a3ab03 100644
--- a/src/gallium/drivers/vc4/vc4_state.c
+++ b/src/gallium/drivers/vc4/vc4_state.c
@@ -603,7 +603,8 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
* Also, Raspberry Pi doesn't support sampling from raster textures,
* so we also have to copy to a temporary then.
*/
- if (cso->u.tex.first_level ||
+ if ((cso->u.tex.first_level &&
+ (cso->u.tex.first_level != cso->u.tex.last_level)) ||
rsc->vc4_format == VC4_TEXTURE_TYPE_RGBA32R) {
struct vc4_resource *shadow_parent = vc4_resource(prsc);
struct pipe_resource tmpl = shadow_parent->base.b;
@@ -626,6 +627,8 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
clone->writes = shadow_parent->writes - 1;
assert(clone->vc4_format != VC4_TEXTURE_TYPE_RGBA32R);
+ } else if (cso->u.tex.first_level) {
+ so->force_first_level = true;
}
so->base.texture = prsc;
so->base.reference.count = 1;
@@ -634,7 +637,9 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
so->texture_p0 =
(VC4_SET_FIELD(rsc->slices[0].offset >> 12, VC4_TEX_P0_OFFSET) |
VC4_SET_FIELD(rsc->vc4_format & 15, VC4_TEX_P0_TYPE) |
- VC4_SET_FIELD(cso->u.tex.last_level -
+ VC4_SET_FIELD(so->force_first_level ?
+ cso->u.tex.last_level :
+ cso->u.tex.last_level -
cso->u.tex.first_level, VC4_TEX_P0_MIPLVLS) |
VC4_SET_FIELD(cso->target == PIPE_TEXTURE_CUBE,
VC4_TEX_P0_CMMODE));