summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_compiler.c10
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c2
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_emit.c18
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_program.c6
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_program.h4
-rw-r--r--src/gallium/drivers/freedreno/a3xx/ir3.h4
-rw-r--r--src/gallium/drivers/freedreno/a3xx/ir3_ra.c22
7 files changed, 41 insertions, 25 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
index 1d99e5caa99..911330cde2a 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
@@ -2054,12 +2054,6 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
}
}
-static void
-decl_samp(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
-{
- ctx->so->samplers_count++;
-}
-
/* from TGSI perspective, we actually have inputs. But most of the "inputs"
* for a fragment shader are just bary.f instructions. The *actual* inputs
* from the hw perspective are the frag_pos and optionally frag_coord and
@@ -2160,8 +2154,6 @@ compile_instructions(struct fd3_compile_context *ctx)
decl_out(ctx, decl);
} else if (decl->Declaration.File == TGSI_FILE_INPUT) {
decl_in(ctx, decl);
- } else if (decl->Declaration.File == TGSI_FILE_SAMPLER) {
- decl_samp(ctx, decl);
}
break;
}
@@ -2320,7 +2312,7 @@ fd3_compile_shader(struct fd3_shader_variant *so,
}
ret = ir3_block_ra(block, so->type, key.half_precision,
- so->frag_coord, so->frag_face);
+ so->frag_coord, so->frag_face, &so->has_samp);
if (ret)
goto out;
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c
index 76de287b163..ee58591fffc 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c
@@ -1417,7 +1417,7 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
static void
decl_samp(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
{
- ctx->so->samplers_count++;
+ ctx->so->has_samp = true;
}
static void
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index 00f1014444b..b1cf3fd131a 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -177,7 +177,7 @@ emit_textures(struct fd_ringbuffer *ring,
CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
for (i = 0; i < tex->num_samplers; i++) {
static const struct fd3_sampler_stateobj dummy_sampler = {};
- struct fd3_sampler_stateobj *sampler = tex->samplers[i] ?
+ const struct fd3_sampler_stateobj *sampler = tex->samplers[i] ?
fd3_sampler_stateobj(tex->samplers[i]) :
&dummy_sampler;
OUT_RING(ring, sampler->texsamp0);
@@ -542,11 +542,19 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
if (dirty & (FD_DIRTY_VERTTEX | FD_DIRTY_FRAGTEX))
fd_wfi(ctx, ring);
- if (dirty & FD_DIRTY_VERTTEX)
- emit_textures(ring, SB_VERT_TEX, &ctx->verttex);
+ if (dirty & FD_DIRTY_VERTTEX) {
+ if (vp->has_samp)
+ emit_textures(ring, SB_VERT_TEX, &ctx->verttex);
+ else
+ dirty &= ~FD_DIRTY_VERTTEX;
+ }
- if (dirty & FD_DIRTY_FRAGTEX)
- emit_textures(ring, SB_FRAG_TEX, &ctx->fragtex);
+ if (dirty & FD_DIRTY_FRAGTEX) {
+ if (fp->has_samp)
+ emit_textures(ring, SB_FRAG_TEX, &ctx->fragtex);
+ else
+ dirty &= ~FD_DIRTY_FRAGTEX;
+ }
ctx->dirty &= ~dirty;
}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
index 09cadf81cbf..b5544e8c358 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
@@ -120,7 +120,7 @@ create_variant(struct fd3_shader_stateobj *so, struct fd3_shader_key key)
v->inputs_count = 0;
v->outputs_count = 0;
v->total_in = 0;
- v->samplers_count = 0;
+ v->has_samp = false;
v->immediates_count = 0;
}
} else {
@@ -397,7 +397,7 @@ fd3_program_emit(struct fd_ringbuffer *ring,
A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) |
A3XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE |
- COND(vp->samplers_count > 0, A3XX_SP_VS_CTRL_REG0_PIXLODENABLE) |
+ COND(vp->has_samp, A3XX_SP_VS_CTRL_REG0_PIXLODENABLE) |
A3XX_SP_VS_CTRL_REG0_LENGTH(vp->instrlen));
OUT_RING(ring, A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(vp->constlen) |
A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(vp->total_in) |
@@ -475,7 +475,7 @@ fd3_program_emit(struct fd_ringbuffer *ring,
A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) |
A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
- COND(fp->samplers_count > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) |
+ COND(fp->has_samp > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) |
A3XX_SP_FS_CTRL_REG0_LENGTH(fp->instrlen));
OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) |
A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->total_in) |
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.h b/src/gallium/drivers/freedreno/a3xx/fd3_program.h
index 8d4fd57ae75..e0866c1d008 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.h
@@ -107,8 +107,8 @@ struct fd3_shader_variant {
unsigned total_in; /* sum of inputs (scalar) */
- /* samplers: */
- unsigned samplers_count;
+ /* do we have one or more texture sample instructions: */
+ bool has_samp;
/* const reg # of first immediate, ie. 1 == c1
* (not regid, because TGSI thinks in terms of vec4 registers,
diff --git a/src/gallium/drivers/freedreno/a3xx/ir3.h b/src/gallium/drivers/freedreno/a3xx/ir3.h
index 09052346992..872f47883bb 100644
--- a/src/gallium/drivers/freedreno/a3xx/ir3.h
+++ b/src/gallium/drivers/freedreno/a3xx/ir3.h
@@ -385,8 +385,8 @@ void ir3_block_sched(struct ir3_block *block);
/* register assignment: */
int ir3_block_ra(struct ir3_block *block, enum shader_t type,
- bool half_precision, bool frag_coord, bool frag_face);
-
+ bool half_precision, bool frag_coord, bool frag_face,
+ bool *has_samp);
#ifndef ARRAY_SIZE
# define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
diff --git a/src/gallium/drivers/freedreno/a3xx/ir3_ra.c b/src/gallium/drivers/freedreno/a3xx/ir3_ra.c
index 4e48eded2bb..57c68c729c5 100644
--- a/src/gallium/drivers/freedreno/a3xx/ir3_ra.c
+++ b/src/gallium/drivers/freedreno/a3xx/ir3_ra.c
@@ -56,6 +56,7 @@ struct ir3_ra_ctx {
bool half_precision;
bool frag_coord;
bool frag_face;
+ bool has_samp;
int cnt;
bool error;
};
@@ -654,8 +655,17 @@ static void legalize(struct ir3_ra_ctx *ctx, struct ir3_block *block)
if (is_sfu(n))
regmask_set(&needs_ss, n->regs[0]);
- if (is_tex(n))
+ if (is_tex(n)) {
+ /* this ends up being the # of samp instructions.. but that
+ * is ok, everything else only cares whether it is zero or
+ * not. We do this here, rather than when we encounter a
+ * SAMP decl, because (especially in binning pass shader)
+ * the samp instruction(s) could get eliminated if the
+ * result is not used.
+ */
+ ctx->has_samp = true;
regmask_set(&needs_sy, n->regs[0]);
+ }
/* both tex/sfu appear to not always immediately consume
* their src register(s):
@@ -730,7 +740,8 @@ static int block_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block)
}
int ir3_block_ra(struct ir3_block *block, enum shader_t type,
- bool half_precision, bool frag_coord, bool frag_face)
+ bool half_precision, bool frag_coord, bool frag_face,
+ bool *has_samp)
{
struct ir3_ra_ctx ctx = {
.block = block,
@@ -739,6 +750,11 @@ int ir3_block_ra(struct ir3_block *block, enum shader_t type,
.frag_coord = frag_coord,
.frag_face = frag_face,
};
+ int ret;
+
ir3_shader_clear_mark(block->shader);
- return block_ra(&ctx, block);
+ ret = block_ra(&ctx, block);
+ *has_samp = ctx.has_samp;
+
+ return ret;
}