From e9f2abe349886ae5423c7c31d201e7d587a3695a Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 25 Feb 2015 13:54:25 -0500 Subject: freedreno/ir3: handle flat bypass for a4xx We may not need this for later a4xx patchlevels, but we do at least need this for patchlevel 0. Bypass bary.f for fetching varyings when flat shading is needed (rather than configure via cmdstream). This requires a special dummy bary.f w/ (ei) flag to signal to scheduler when all varyings are consumed. And requires shader variants based on rasterizer flatshade state to handle TGSI_INTERPOLATE_COLOR. Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/a4xx/fd4_draw.c | 5 ++- src/gallium/drivers/freedreno/a4xx/fd4_emit.h | 1 - src/gallium/drivers/freedreno/ir3/ir3.c | 2 + src/gallium/drivers/freedreno/ir3/ir3.h | 6 +++ src/gallium/drivers/freedreno/ir3/ir3_compiler.c | 47 +++++++++++++++++++++++- src/gallium/drivers/freedreno/ir3/ir3_legalize.c | 38 ++++++++++++++++++- src/gallium/drivers/freedreno/ir3/ir3_shader.c | 1 + src/gallium/drivers/freedreno/ir3/ir3_shader.h | 4 ++ 8 files changed, 99 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers/freedreno') diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c index b6bf650cfe0..57f257478a7 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c @@ -100,6 +100,9 @@ fixup_shader_state(struct fd_context *ctx, struct ir3_shader_key *key) if (last_key->alpha != key->alpha) ctx->prog.dirty |= FD_SHADER_DIRTY_FP; + if (last_key->rasterflat != key->rasterflat) + ctx->prog.dirty |= FD_SHADER_DIRTY_FP; + fd4_ctx->last_key = *key; } } @@ -118,6 +121,7 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info) .binning_pass = true, .color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false, .alpha = util_format_is_alpha(pipe_surface_format(pfb->cbufs[0])), + .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade, // TODO set .half_precision based on render target format, // ie. float16 and smaller use half, float32 use full.. .half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF), @@ -130,7 +134,6 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info) .fsaturate_r = fd4_ctx->fsaturate_r, }, .format = fd4_emit_format(pfb->cbufs[0]), - .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade, }; unsigned dirty; diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h index cdfa0f4c53a..5dc3db8880e 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h @@ -55,7 +55,6 @@ struct fd4_emit { struct ir3_shader_key key; enum a4xx_color_fmt format; uint32_t dirty; - bool rasterflat; /* cached to avoid repeated lookups of same variants: */ struct ir3_shader_variant *vp, *fp; diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c index a02b06f059a..fe0ffc98c97 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.c +++ b/src/gallium/drivers/freedreno/ir3/ir3.c @@ -113,6 +113,8 @@ static uint32_t reg(struct ir3_register *reg, struct ir3_info *info, if (reg->flags & IR3_REG_CONST) { info->max_const = MAX2(info->max_const, max); + } else if (val.num == 63) { + /* ignore writes to dummy register r63.x */ } else if ((max != REG_A0) && (max != REG_P0)) { if (reg->flags & IR3_REG_HALF) { info->max_half_reg = MAX2(info->max_half_reg, max); diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index f90392b7297..18d59fa7d55 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -427,6 +427,12 @@ static inline bool is_mem(struct ir3_instruction *instr) static inline bool is_input(struct ir3_instruction *instr) { + /* in some cases, ldlv is used to fetch varying without + * interpolation.. fortunately inloc is the first src + * register in either case + */ + if (is_mem(instr) && (instr->opc == OPC_LDLV)) + return true; return (instr->category == 2) && (instr->opc == OPC_BARY_F); } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c index 3ee9642efcc..2084ad31a56 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c @@ -105,6 +105,11 @@ struct ir3_compile_context { /* for calculating input/output positions/linkages: */ unsigned next_inloc; + /* a4xx (at least patchlevel 0) cannot seem to flat-interpolate + * so we need to use ldlv.u32 to load the varying directly: + */ + bool flat_bypass; + unsigned num_internal_temps; struct tgsi_src_register internal_temps[8]; @@ -204,9 +209,13 @@ compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so, } else if (ir3_shader_gpuid(so->shader) >= 400) { /* a4xx seems to have *no* sam.p */ lconfig.lower_TXP = ~0; /* lower all txp */ + /* need special handling for "flat" */ + ctx->flat_bypass = true; } else { /* a3xx just needs to avoid sam.p for 3d tex */ lconfig.lower_TXP = (1 << TGSI_TEXTURE_3D); + /* no special handling for "flat" */ + ctx->flat_bypass = false; } ctx->tokens = tgsi_transform_lowering(&lconfig, tokens, &ctx->info); @@ -2745,11 +2754,23 @@ decl_semantic(const struct tgsi_declaration_semantic *sem) static struct ir3_instruction * decl_in_frag_bary(struct ir3_compile_context *ctx, unsigned regid, - unsigned j, unsigned inloc) + unsigned j, unsigned inloc, bool use_ldlv) { struct ir3_instruction *instr; struct ir3_register *src; + if (use_ldlv) { + /* ldlv.u32 dst, l[#inloc], 1 */ + instr = instr_create(ctx, 6, OPC_LDLV); + instr->cat6.type = TYPE_U32; + instr->cat6.iim_val = 1; + ir3_reg_create(instr, regid, 0); /* dummy dst */ + ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = inloc; + ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 1; + + return instr; + } + /* bary.f dst, #inloc, r0.x */ instr = instr_create(ctx, 2, OPC_BARY_F); ir3_reg_create(instr, regid, 0); /* dummy dst */ @@ -2943,9 +2964,31 @@ decl_in(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl) so->frag_face = true; instr = decl_in_frag_face(ctx, r + j, j); } else { + bool use_ldlv = false; + + /* I don't believe it is valid to not have Interp + * on a normal frag shader input, and various parts + * that that handle flat/smooth shading make this + * assumption as well. + */ + compile_assert(ctx, decl->Declaration.Interpolate); + + if (ctx->flat_bypass) { + switch (decl->Interp.Interpolate) { + case TGSI_INTERPOLATE_COLOR: + if (!ctx->so->key.rasterflat) + break; + /* fallthrough */ + case TGSI_INTERPOLATE_CONSTANT: + use_ldlv = true; + break; + } + } + so->inputs[n].bary = true; + instr = decl_in_frag_bary(ctx, r + j, j, - so->inputs[n].inloc + j - 8); + so->inputs[n].inloc + j - 8, use_ldlv); } } else { instr = create_input(ctx->block, NULL, (i * 4) + j); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c index 11629f61f73..4e0b42b8d3a 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c @@ -187,8 +187,44 @@ static void legalize(struct ir3_legalize_ctx *ctx) last_input = n; } - if (last_input) + if (last_input) { + /* special hack.. if using ldlv to bypass interpolation, + * we need to insert a dummy bary.f on which we can set + * the (ei) flag: + */ + if (is_mem(last_input) && (last_input->opc == OPC_LDLV)) { + int i, cnt; + + /* note that ir3_instr_create() inserts into + * shader->instrs[] and increments the count.. + * so we need to bump up the cnt initially (to + * avoid it clobbering the last real instr) and + * restore it after. + */ + cnt = ++shader->instrs_count; + + /* inserting instructions would be a bit nicer if list.. */ + for (i = cnt - 2; i >= 0; i--) { + if (shader->instrs[i] == last_input) { + + /* (ss)bary.f (ei)r63.x, 0, r0.x */ + last_input = ir3_instr_create(block, 2, OPC_BARY_F); + last_input->flags |= IR3_INSTR_SS; + ir3_reg_create(last_input, regid(63, 0), 0); + ir3_reg_create(last_input, 0, IR3_REG_IMMED)->iim_val = 0; + ir3_reg_create(last_input, regid(0, 0), 0); + + shader->instrs[i + 1] = last_input; + + break; + } + shader->instrs[i + 1] = shader->instrs[i]; + } + + shader->instrs_count = cnt; + } last_input->regs[0]->flags |= IR3_REG_EI; + } if (last_rel) last_rel->flags |= IR3_INSTR_UL; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c index 5e43e2866c9..7e7ae365bb5 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c @@ -246,6 +246,7 @@ ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key) key.color_two_side = false; key.half_precision = false; key.alpha = false; + key.rasterflat = false; if (key.has_per_samp) { key.fsaturate_s = 0; key.fsaturate_t = 0; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h index e5d57af1ea6..7f38067972b 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h @@ -77,6 +77,10 @@ struct ir3_shader_key { * let's start with this and see how it goes: */ unsigned alpha : 1; + /* used when shader needs to handle flat varyings (a4xx), + * for TGSI_INTERPOLATE_COLOR: + */ + unsigned rasterflat : 1; }; uint32_t global; }; -- cgit v1.2.3