diff options
author | Rob Clark <[email protected]> | 2018-10-12 16:01:22 -0400 |
---|---|---|
committer | Rob Clark <[email protected]> | 2018-10-17 12:44:48 -0400 |
commit | d9dbc9c21f17e4c86f8e366fbe225df39e3b7b59 (patch) | |
tree | 8aac369382a6b6bdcaf229526f896ae0dd243cad | |
parent | 1a51c4a87ea9202af90ccb28bd697f0df753f587 (diff) |
freedreno/ir3: move binning-pass fixup for a6xx+
Move this to after ir3_cp (which can add lowered immediates to the const
state) for a6xx+, to ensure the uniform state matches between binning
and vertex shaders. This way we can emit just a single VS_CONST state-
group when we re-use single cmdstream for both binning and draw passes.
Signed-off-by: Rob Clark <[email protected]>
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c | 57 |
1 files changed, 37 insertions, 20 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 9474f75a9f7..197196383b0 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -3562,6 +3562,32 @@ fixup_astc_srgb(struct ir3_context *ctx) } } +static void +fixup_binning_pass(struct ir3_context *ctx) +{ + struct ir3_shader_variant *so = ctx->so; + struct ir3 *ir = ctx->ir; + unsigned i, j; + + for (i = 0, j = 0; i < so->outputs_count; i++) { + unsigned slot = so->outputs[i].slot; + + /* throw away everything but first position/psize */ + if ((slot == VARYING_SLOT_POS) || (slot == VARYING_SLOT_PSIZ)) { + if (i != j) { + so->outputs[j] = so->outputs[i]; + ir->outputs[(j*4)+0] = ir->outputs[(i*4)+0]; + ir->outputs[(j*4)+1] = ir->outputs[(i*4)+1]; + ir->outputs[(j*4)+2] = ir->outputs[(i*4)+2]; + ir->outputs[(j*4)+3] = ir->outputs[(i*4)+3]; + } + j++; + } + } + so->outputs_count = j; + ir->noutputs = j * 4; +} + int ir3_compile_shader_nir(struct ir3_compiler *compiler, struct ir3_shader_variant *so) @@ -3569,7 +3595,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, struct ir3_context *ctx; struct ir3 *ir; struct ir3_instruction **inputs; - unsigned i, j, actual_in, inloc; + unsigned i, actual_in, inloc; int ret = 0, max_bary; assert(!so->ir); @@ -3599,25 +3625,8 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, fixup_frag_inputs(ctx); /* at this point, for binning pass, throw away unneeded outputs: */ - if (so->binning_pass) { - for (i = 0, j = 0; i < so->outputs_count; i++) { - unsigned slot = so->outputs[i].slot; - - /* throw away everything but first position/psize */ - if ((slot == VARYING_SLOT_POS) || (slot == VARYING_SLOT_PSIZ)) { - if (i != j) { - so->outputs[j] = so->outputs[i]; - ir->outputs[(j*4)+0] = ir->outputs[(i*4)+0]; - ir->outputs[(j*4)+1] = ir->outputs[(i*4)+1]; - ir->outputs[(j*4)+2] = ir->outputs[(i*4)+2]; - ir->outputs[(j*4)+3] = ir->outputs[(i*4)+3]; - } - j++; - } - } - so->outputs_count = j; - ir->noutputs = j * 4; - } + if (so->binning_pass && (ctx->compiler->gpu_id < 600)) + fixup_binning_pass(ctx); /* if we want half-precision outputs, mark the output registers * as half: @@ -3656,6 +3665,14 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, ir3_cp(ir, so); + /* at this point, for binning pass, throw away unneeded outputs: + * Note that for a6xx and later, we do this after ir3_cp to ensure + * that the uniform/constant layout for BS and VS matches, so that + * we can re-use same VS_CONST state group. + */ + if (so->binning_pass && (ctx->compiler->gpu_id >= 600)) + fixup_binning_pass(ctx); + /* Insert mov if there's same instruction for each output. * eg. dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.const_expression.vertex.sampler2dshadow */ |