diff options
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 10 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 18 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_program.c | 6 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_program.h | 4 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/ir3.h | 4 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/ir3_ra.c | 22 |
7 files changed, 41 insertions, 25 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c index 1d99e5caa99..911330cde2a 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c @@ -2054,12 +2054,6 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) } } -static void -decl_samp(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) -{ - ctx->so->samplers_count++; -} - /* from TGSI perspective, we actually have inputs. But most of the "inputs" * for a fragment shader are just bary.f instructions. The *actual* inputs * from the hw perspective are the frag_pos and optionally frag_coord and @@ -2160,8 +2154,6 @@ compile_instructions(struct fd3_compile_context *ctx) decl_out(ctx, decl); } else if (decl->Declaration.File == TGSI_FILE_INPUT) { decl_in(ctx, decl); - } else if (decl->Declaration.File == TGSI_FILE_SAMPLER) { - decl_samp(ctx, decl); } break; } @@ -2320,7 +2312,7 @@ fd3_compile_shader(struct fd3_shader_variant *so, } ret = ir3_block_ra(block, so->type, key.half_precision, - so->frag_coord, so->frag_face); + so->frag_coord, so->frag_face, &so->has_samp); if (ret) goto out; diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c index 76de287b163..ee58591fffc 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c @@ -1417,7 +1417,7 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) static void decl_samp(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) { - ctx->so->samplers_count++; + ctx->so->has_samp = true; } static void diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index 00f1014444b..b1cf3fd131a 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -177,7 +177,7 @@ emit_textures(struct fd_ringbuffer *ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); for (i = 0; i < tex->num_samplers; i++) { static const struct fd3_sampler_stateobj dummy_sampler = {}; - struct fd3_sampler_stateobj *sampler = tex->samplers[i] ? + const struct fd3_sampler_stateobj *sampler = tex->samplers[i] ? fd3_sampler_stateobj(tex->samplers[i]) : &dummy_sampler; OUT_RING(ring, sampler->texsamp0); @@ -542,11 +542,19 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, if (dirty & (FD_DIRTY_VERTTEX | FD_DIRTY_FRAGTEX)) fd_wfi(ctx, ring); - if (dirty & FD_DIRTY_VERTTEX) - emit_textures(ring, SB_VERT_TEX, &ctx->verttex); + if (dirty & FD_DIRTY_VERTTEX) { + if (vp->has_samp) + emit_textures(ring, SB_VERT_TEX, &ctx->verttex); + else + dirty &= ~FD_DIRTY_VERTTEX; + } - if (dirty & FD_DIRTY_FRAGTEX) - emit_textures(ring, SB_FRAG_TEX, &ctx->fragtex); + if (dirty & FD_DIRTY_FRAGTEX) { + if (fp->has_samp) + emit_textures(ring, SB_FRAG_TEX, &ctx->fragtex); + else + dirty &= ~FD_DIRTY_FRAGTEX; + } ctx->dirty &= ~dirty; } diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c index 09cadf81cbf..b5544e8c358 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -120,7 +120,7 @@ create_variant(struct fd3_shader_stateobj *so, struct fd3_shader_key key) v->inputs_count = 0; v->outputs_count = 0; v->total_in = 0; - v->samplers_count = 0; + v->has_samp = false; v->immediates_count = 0; } } else { @@ -397,7 +397,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) | A3XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) | A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE | - COND(vp->samplers_count > 0, A3XX_SP_VS_CTRL_REG0_PIXLODENABLE) | + COND(vp->has_samp, A3XX_SP_VS_CTRL_REG0_PIXLODENABLE) | A3XX_SP_VS_CTRL_REG0_LENGTH(vp->instrlen)); OUT_RING(ring, A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(vp->constlen) | A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(vp->total_in) | @@ -475,7 +475,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) | A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) | A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE | - COND(fp->samplers_count > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) | + COND(fp->has_samp > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) | A3XX_SP_FS_CTRL_REG0_LENGTH(fp->instrlen)); OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) | A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->total_in) | diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.h b/src/gallium/drivers/freedreno/a3xx/fd3_program.h index 8d4fd57ae75..e0866c1d008 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.h @@ -107,8 +107,8 @@ struct fd3_shader_variant { unsigned total_in; /* sum of inputs (scalar) */ - /* samplers: */ - unsigned samplers_count; + /* do we have one or more texture sample instructions: */ + bool has_samp; /* const reg # of first immediate, ie. 1 == c1 * (not regid, because TGSI thinks in terms of vec4 registers, diff --git a/src/gallium/drivers/freedreno/a3xx/ir3.h b/src/gallium/drivers/freedreno/a3xx/ir3.h index 09052346992..872f47883bb 100644 --- a/src/gallium/drivers/freedreno/a3xx/ir3.h +++ b/src/gallium/drivers/freedreno/a3xx/ir3.h @@ -385,8 +385,8 @@ void ir3_block_sched(struct ir3_block *block); /* register assignment: */ int ir3_block_ra(struct ir3_block *block, enum shader_t type, - bool half_precision, bool frag_coord, bool frag_face); - + bool half_precision, bool frag_coord, bool frag_face, + bool *has_samp); #ifndef ARRAY_SIZE # define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) diff --git a/src/gallium/drivers/freedreno/a3xx/ir3_ra.c b/src/gallium/drivers/freedreno/a3xx/ir3_ra.c index 4e48eded2bb..57c68c729c5 100644 --- a/src/gallium/drivers/freedreno/a3xx/ir3_ra.c +++ b/src/gallium/drivers/freedreno/a3xx/ir3_ra.c @@ -56,6 +56,7 @@ struct ir3_ra_ctx { bool half_precision; bool frag_coord; bool frag_face; + bool has_samp; int cnt; bool error; }; @@ -654,8 +655,17 @@ static void legalize(struct ir3_ra_ctx *ctx, struct ir3_block *block) if (is_sfu(n)) regmask_set(&needs_ss, n->regs[0]); - if (is_tex(n)) + if (is_tex(n)) { + /* this ends up being the # of samp instructions.. but that + * is ok, everything else only cares whether it is zero or + * not. We do this here, rather than when we encounter a + * SAMP decl, because (especially in binning pass shader) + * the samp instruction(s) could get eliminated if the + * result is not used. + */ + ctx->has_samp = true; regmask_set(&needs_sy, n->regs[0]); + } /* both tex/sfu appear to not always immediately consume * their src register(s): @@ -730,7 +740,8 @@ static int block_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block) } int ir3_block_ra(struct ir3_block *block, enum shader_t type, - bool half_precision, bool frag_coord, bool frag_face) + bool half_precision, bool frag_coord, bool frag_face, + bool *has_samp) { struct ir3_ra_ctx ctx = { .block = block, @@ -739,6 +750,11 @@ int ir3_block_ra(struct ir3_block *block, enum shader_t type, .frag_coord = frag_coord, .frag_face = frag_face, }; + int ret; + ir3_shader_clear_mark(block->shader); - return block_ra(&ctx, block); + ret = block_ra(&ctx, block); + *has_samp = ctx.has_samp; + + return ret; } |