diff options
-rw-r--r-- | src/gallium/drivers/freedreno/a4xx/fd4_draw.c | 5 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a4xx/fd4_emit.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3.h | 6 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_compiler.c | 47 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_legalize.c | 38 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_shader.c | 1 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_shader.h | 4 |
8 files changed, 99 insertions, 5 deletions
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c index b6bf650cfe0..57f257478a7 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c @@ -100,6 +100,9 @@ fixup_shader_state(struct fd_context *ctx, struct ir3_shader_key *key) if (last_key->alpha != key->alpha) ctx->prog.dirty |= FD_SHADER_DIRTY_FP; + if (last_key->rasterflat != key->rasterflat) + ctx->prog.dirty |= FD_SHADER_DIRTY_FP; + fd4_ctx->last_key = *key; } } @@ -118,6 +121,7 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info) .binning_pass = true, .color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false, .alpha = util_format_is_alpha(pipe_surface_format(pfb->cbufs[0])), + .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade, // TODO set .half_precision based on render target format, // ie. float16 and smaller use half, float32 use full.. .half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF), @@ -130,7 +134,6 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info) .fsaturate_r = fd4_ctx->fsaturate_r, }, .format = fd4_emit_format(pfb->cbufs[0]), - .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade, }; unsigned dirty; diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h index cdfa0f4c53a..5dc3db8880e 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h @@ -55,7 +55,6 @@ struct fd4_emit { struct ir3_shader_key key; enum a4xx_color_fmt format; uint32_t dirty; - bool rasterflat; /* cached to avoid repeated lookups of same variants: */ struct ir3_shader_variant *vp, *fp; diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c index a02b06f059a..fe0ffc98c97 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.c +++ b/src/gallium/drivers/freedreno/ir3/ir3.c @@ -113,6 +113,8 @@ static uint32_t reg(struct ir3_register *reg, struct ir3_info *info, if (reg->flags & IR3_REG_CONST) { info->max_const = MAX2(info->max_const, max); + } else if (val.num == 63) { + /* ignore writes to dummy register r63.x */ } else if ((max != REG_A0) && (max != REG_P0)) { if (reg->flags & IR3_REG_HALF) { info->max_half_reg = MAX2(info->max_half_reg, max); diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index f90392b7297..18d59fa7d55 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -427,6 +427,12 @@ static inline bool is_mem(struct ir3_instruction *instr) static inline bool is_input(struct ir3_instruction *instr) { + /* in some cases, ldlv is used to fetch varying without + * interpolation.. fortunately inloc is the first src + * register in either case + */ + if (is_mem(instr) && (instr->opc == OPC_LDLV)) + return true; return (instr->category == 2) && (instr->opc == OPC_BARY_F); } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c index 3ee9642efcc..2084ad31a56 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c @@ -105,6 +105,11 @@ struct ir3_compile_context { /* for calculating input/output positions/linkages: */ unsigned next_inloc; + /* a4xx (at least patchlevel 0) cannot seem to flat-interpolate + * so we need to use ldlv.u32 to load the varying directly: + */ + bool flat_bypass; + unsigned num_internal_temps; struct tgsi_src_register internal_temps[8]; @@ -204,9 +209,13 @@ compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so, } else if (ir3_shader_gpuid(so->shader) >= 400) { /* a4xx seems to have *no* sam.p */ lconfig.lower_TXP = ~0; /* lower all txp */ + /* need special handling for "flat" */ + ctx->flat_bypass = true; } else { /* a3xx just needs to avoid sam.p for 3d tex */ lconfig.lower_TXP = (1 << TGSI_TEXTURE_3D); + /* no special handling for "flat" */ + ctx->flat_bypass = false; } ctx->tokens = tgsi_transform_lowering(&lconfig, tokens, &ctx->info); @@ -2745,11 +2754,23 @@ decl_semantic(const struct tgsi_declaration_semantic *sem) static struct ir3_instruction * decl_in_frag_bary(struct ir3_compile_context *ctx, unsigned regid, - unsigned j, unsigned inloc) + unsigned j, unsigned inloc, bool use_ldlv) { struct ir3_instruction *instr; struct ir3_register *src; + if (use_ldlv) { + /* ldlv.u32 dst, l[#inloc], 1 */ + instr = instr_create(ctx, 6, OPC_LDLV); + instr->cat6.type = TYPE_U32; + instr->cat6.iim_val = 1; + ir3_reg_create(instr, regid, 0); /* dummy dst */ + ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = inloc; + ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 1; + + return instr; + } + /* bary.f dst, #inloc, r0.x */ instr = instr_create(ctx, 2, OPC_BARY_F); ir3_reg_create(instr, regid, 0); /* dummy dst */ @@ -2943,9 +2964,31 @@ decl_in(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl) so->frag_face = true; instr = decl_in_frag_face(ctx, r + j, j); } else { + bool use_ldlv = false; + + /* I don't believe it is valid to not have Interp + * on a normal frag shader input, and various parts + * that that handle flat/smooth shading make this + * assumption as well. + */ + compile_assert(ctx, decl->Declaration.Interpolate); + + if (ctx->flat_bypass) { + switch (decl->Interp.Interpolate) { + case TGSI_INTERPOLATE_COLOR: + if (!ctx->so->key.rasterflat) + break; + /* fallthrough */ + case TGSI_INTERPOLATE_CONSTANT: + use_ldlv = true; + break; + } + } + so->inputs[n].bary = true; + instr = decl_in_frag_bary(ctx, r + j, j, - so->inputs[n].inloc + j - 8); + so->inputs[n].inloc + j - 8, use_ldlv); } } else { instr = create_input(ctx->block, NULL, (i * 4) + j); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c index 11629f61f73..4e0b42b8d3a 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c @@ -187,8 +187,44 @@ static void legalize(struct ir3_legalize_ctx *ctx) last_input = n; } - if (last_input) + if (last_input) { + /* special hack.. if using ldlv to bypass interpolation, + * we need to insert a dummy bary.f on which we can set + * the (ei) flag: + */ + if (is_mem(last_input) && (last_input->opc == OPC_LDLV)) { + int i, cnt; + + /* note that ir3_instr_create() inserts into + * shader->instrs[] and increments the count.. + * so we need to bump up the cnt initially (to + * avoid it clobbering the last real instr) and + * restore it after. + */ + cnt = ++shader->instrs_count; + + /* inserting instructions would be a bit nicer if list.. */ + for (i = cnt - 2; i >= 0; i--) { + if (shader->instrs[i] == last_input) { + + /* (ss)bary.f (ei)r63.x, 0, r0.x */ + last_input = ir3_instr_create(block, 2, OPC_BARY_F); + last_input->flags |= IR3_INSTR_SS; + ir3_reg_create(last_input, regid(63, 0), 0); + ir3_reg_create(last_input, 0, IR3_REG_IMMED)->iim_val = 0; + ir3_reg_create(last_input, regid(0, 0), 0); + + shader->instrs[i + 1] = last_input; + + break; + } + shader->instrs[i + 1] = shader->instrs[i]; + } + + shader->instrs_count = cnt; + } last_input->regs[0]->flags |= IR3_REG_EI; + } if (last_rel) last_rel->flags |= IR3_INSTR_UL; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c index 5e43e2866c9..7e7ae365bb5 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c @@ -246,6 +246,7 @@ ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key) key.color_two_side = false; key.half_precision = false; key.alpha = false; + key.rasterflat = false; if (key.has_per_samp) { key.fsaturate_s = 0; key.fsaturate_t = 0; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h index e5d57af1ea6..7f38067972b 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h @@ -77,6 +77,10 @@ struct ir3_shader_key { * let's start with this and see how it goes: */ unsigned alpha : 1; + /* used when shader needs to handle flat varyings (a4xx), + * for TGSI_INTERPOLATE_COLOR: + */ + unsigned rasterflat : 1; }; uint32_t global; }; |