summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_draw.c5
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_emit.h1
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3.c2
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3.h6
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler.c47
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_legalize.c38
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_shader.c1
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_shader.h4
8 files changed, 99 insertions, 5 deletions
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
index b6bf650cfe0..57f257478a7 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
@@ -100,6 +100,9 @@ fixup_shader_state(struct fd_context *ctx, struct ir3_shader_key *key)
if (last_key->alpha != key->alpha)
ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
+ if (last_key->rasterflat != key->rasterflat)
+ ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
+
fd4_ctx->last_key = *key;
}
}
@@ -118,6 +121,7 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
.binning_pass = true,
.color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false,
.alpha = util_format_is_alpha(pipe_surface_format(pfb->cbufs[0])),
+ .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
// TODO set .half_precision based on render target format,
// ie. float16 and smaller use half, float32 use full..
.half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
@@ -130,7 +134,6 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
.fsaturate_r = fd4_ctx->fsaturate_r,
},
.format = fd4_emit_format(pfb->cbufs[0]),
- .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
};
unsigned dirty;
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h
index cdfa0f4c53a..5dc3db8880e 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h
@@ -55,7 +55,6 @@ struct fd4_emit {
struct ir3_shader_key key;
enum a4xx_color_fmt format;
uint32_t dirty;
- bool rasterflat;
/* cached to avoid repeated lookups of same variants: */
struct ir3_shader_variant *vp, *fp;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c
index a02b06f059a..fe0ffc98c97 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3.c
@@ -113,6 +113,8 @@ static uint32_t reg(struct ir3_register *reg, struct ir3_info *info,
if (reg->flags & IR3_REG_CONST) {
info->max_const = MAX2(info->max_const, max);
+ } else if (val.num == 63) {
+ /* ignore writes to dummy register r63.x */
} else if ((max != REG_A0) && (max != REG_P0)) {
if (reg->flags & IR3_REG_HALF) {
info->max_half_reg = MAX2(info->max_half_reg, max);
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h
index f90392b7297..18d59fa7d55 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3.h
@@ -427,6 +427,12 @@ static inline bool is_mem(struct ir3_instruction *instr)
static inline bool is_input(struct ir3_instruction *instr)
{
+ /* in some cases, ldlv is used to fetch varying without
+ * interpolation.. fortunately inloc is the first src
+ * register in either case
+ */
+ if (is_mem(instr) && (instr->opc == OPC_LDLV))
+ return true;
return (instr->category == 2) && (instr->opc == OPC_BARY_F);
}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
index 3ee9642efcc..2084ad31a56 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
@@ -105,6 +105,11 @@ struct ir3_compile_context {
/* for calculating input/output positions/linkages: */
unsigned next_inloc;
+ /* a4xx (at least patchlevel 0) cannot seem to flat-interpolate
+ * so we need to use ldlv.u32 to load the varying directly:
+ */
+ bool flat_bypass;
+
unsigned num_internal_temps;
struct tgsi_src_register internal_temps[8];
@@ -204,9 +209,13 @@ compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so,
} else if (ir3_shader_gpuid(so->shader) >= 400) {
/* a4xx seems to have *no* sam.p */
lconfig.lower_TXP = ~0; /* lower all txp */
+ /* need special handling for "flat" */
+ ctx->flat_bypass = true;
} else {
/* a3xx just needs to avoid sam.p for 3d tex */
lconfig.lower_TXP = (1 << TGSI_TEXTURE_3D);
+ /* no special handling for "flat" */
+ ctx->flat_bypass = false;
}
ctx->tokens = tgsi_transform_lowering(&lconfig, tokens, &ctx->info);
@@ -2745,11 +2754,23 @@ decl_semantic(const struct tgsi_declaration_semantic *sem)
static struct ir3_instruction *
decl_in_frag_bary(struct ir3_compile_context *ctx, unsigned regid,
- unsigned j, unsigned inloc)
+ unsigned j, unsigned inloc, bool use_ldlv)
{
struct ir3_instruction *instr;
struct ir3_register *src;
+ if (use_ldlv) {
+ /* ldlv.u32 dst, l[#inloc], 1 */
+ instr = instr_create(ctx, 6, OPC_LDLV);
+ instr->cat6.type = TYPE_U32;
+ instr->cat6.iim_val = 1;
+ ir3_reg_create(instr, regid, 0); /* dummy dst */
+ ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = inloc;
+ ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 1;
+
+ return instr;
+ }
+
/* bary.f dst, #inloc, r0.x */
instr = instr_create(ctx, 2, OPC_BARY_F);
ir3_reg_create(instr, regid, 0); /* dummy dst */
@@ -2943,9 +2964,31 @@ decl_in(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl)
so->frag_face = true;
instr = decl_in_frag_face(ctx, r + j, j);
} else {
+ bool use_ldlv = false;
+
+ /* I don't believe it is valid to not have Interp
+ * on a normal frag shader input, and various parts
+ * that that handle flat/smooth shading make this
+ * assumption as well.
+ */
+ compile_assert(ctx, decl->Declaration.Interpolate);
+
+ if (ctx->flat_bypass) {
+ switch (decl->Interp.Interpolate) {
+ case TGSI_INTERPOLATE_COLOR:
+ if (!ctx->so->key.rasterflat)
+ break;
+ /* fallthrough */
+ case TGSI_INTERPOLATE_CONSTANT:
+ use_ldlv = true;
+ break;
+ }
+ }
+
so->inputs[n].bary = true;
+
instr = decl_in_frag_bary(ctx, r + j, j,
- so->inputs[n].inloc + j - 8);
+ so->inputs[n].inloc + j - 8, use_ldlv);
}
} else {
instr = create_input(ctx->block, NULL, (i * 4) + j);
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c
index 11629f61f73..4e0b42b8d3a 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c
@@ -187,8 +187,44 @@ static void legalize(struct ir3_legalize_ctx *ctx)
last_input = n;
}
- if (last_input)
+ if (last_input) {
+ /* special hack.. if using ldlv to bypass interpolation,
+ * we need to insert a dummy bary.f on which we can set
+ * the (ei) flag:
+ */
+ if (is_mem(last_input) && (last_input->opc == OPC_LDLV)) {
+ int i, cnt;
+
+ /* note that ir3_instr_create() inserts into
+ * shader->instrs[] and increments the count..
+ * so we need to bump up the cnt initially (to
+ * avoid it clobbering the last real instr) and
+ * restore it after.
+ */
+ cnt = ++shader->instrs_count;
+
+ /* inserting instructions would be a bit nicer if list.. */
+ for (i = cnt - 2; i >= 0; i--) {
+ if (shader->instrs[i] == last_input) {
+
+ /* (ss)bary.f (ei)r63.x, 0, r0.x */
+ last_input = ir3_instr_create(block, 2, OPC_BARY_F);
+ last_input->flags |= IR3_INSTR_SS;
+ ir3_reg_create(last_input, regid(63, 0), 0);
+ ir3_reg_create(last_input, 0, IR3_REG_IMMED)->iim_val = 0;
+ ir3_reg_create(last_input, regid(0, 0), 0);
+
+ shader->instrs[i + 1] = last_input;
+
+ break;
+ }
+ shader->instrs[i + 1] = shader->instrs[i];
+ }
+
+ shader->instrs_count = cnt;
+ }
last_input->regs[0]->flags |= IR3_REG_EI;
+ }
if (last_rel)
last_rel->flags |= IR3_INSTR_UL;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
index 5e43e2866c9..7e7ae365bb5 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
@@ -246,6 +246,7 @@ ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key)
key.color_two_side = false;
key.half_precision = false;
key.alpha = false;
+ key.rasterflat = false;
if (key.has_per_samp) {
key.fsaturate_s = 0;
key.fsaturate_t = 0;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
index e5d57af1ea6..7f38067972b 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
@@ -77,6 +77,10 @@ struct ir3_shader_key {
* let's start with this and see how it goes:
*/
unsigned alpha : 1;
+ /* used when shader needs to handle flat varyings (a4xx),
+ * for TGSI_INTERPOLATE_COLOR:
+ */
+ unsigned rasterflat : 1;
};
uint32_t global;
};