aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.gitlab-ci/deqp-freedreno-a307-fails.txt8
-rw-r--r--src/freedreno/ir3/ir3_ra.c41
-rw-r--r--src/freedreno/ir3/ir3_ra.h2
3 files changed, 30 insertions, 21 deletions
diff --git a/.gitlab-ci/deqp-freedreno-a307-fails.txt b/.gitlab-ci/deqp-freedreno-a307-fails.txt
index 6c835a85b60..7cf581dc204 100644
--- a/.gitlab-ci/deqp-freedreno-a307-fails.txt
+++ b/.gitlab-ci/deqp-freedreno-a307-fails.txt
@@ -608,14 +608,6 @@ dEQP-GLES3.functional.texture.format.sized.3d.rgb10_a2ui_pot
dEQP-GLES3.functional.texture.format.sized.cube.rgb10_a2ui_npot
dEQP-GLES3.functional.texture.format.sized.cube.rgb10_a2ui_pot
dEQP-GLES3.functional.texture.mipmap.cube.max_level.linear_nearest
-dEQP-GLES3.functional.texture.shadow.2d.linear.less_or_equal_depth_component32f
-dEQP-GLES3.functional.texture.shadow.2d.linear_mipmap_nearest.equal_depth_component32f
-dEQP-GLES3.functional.texture.shadow.2d_array.linear.equal_depth_component32f
-dEQP-GLES3.functional.texture.shadow.2d_array.nearest.less_depth_component32f
-dEQP-GLES3.functional.texture.shadow.2d_array.nearest_mipmap_linear.less_or_equal_depth_component32f
-dEQP-GLES3.functional.texture.shadow.cube.linear_mipmap_linear.less_or_equal_depth_component32f
-dEQP-GLES3.functional.texture.shadow.cube.nearest.less_or_equal_depth_component32f
-dEQP-GLES3.functional.texture.shadow.cube.nearest_mipmap_nearest.equal_depth_component32f
dEQP-GLES3.functional.texture.specification.basic_teximage2d.r8i_2d
dEQP-GLES3.functional.texture.specification.basic_teximage2d.r8i_cube
dEQP-GLES3.functional.texture.specification.basic_teximage2d.r8ui_2d
diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c
index 927f91e98ec..231bfc6ffc1 100644
--- a/src/freedreno/ir3/ir3_ra.c
+++ b/src/freedreno/ir3/ir3_ra.c
@@ -497,19 +497,6 @@ ra_select_reg_merged(unsigned int n, BITSET_WORD *regs, void *data)
return reg;
}
}
- } else if (is_tex_or_prefetch(instr)) {
- /* we could have a tex fetch w/ wrmask .z, for example.. these
- * cannot land in r0.x since that would underflow when we
- * subtract the offset. Ie. if we pick r0.z, and subtract
- * the offset, the register encoded for dst will be r0.x
- */
- unsigned n = ffs(instr->regs[0]->wrmask);
- debug_assert(n > 0);
- unsigned offset = n - 1;
- if (!half)
- offset *= 2;
- base += offset;
- max_target -= offset;
}
int r = pick_in_range(regs, base + start, base + max_target);
@@ -571,6 +558,12 @@ ra_init(struct ir3_ra_ctx *ctx)
}
ctx->alloc_count += ctx->class_alloc_count[total_class_count];
+ /* Add vreg names for r0.xyz */
+ ctx->r0_xyz_nodes = ctx->alloc_count;
+ ctx->alloc_count += 3;
+ ctx->hr0_xyz_nodes = ctx->alloc_count;
+ ctx->alloc_count += 3;
+
ctx->g = ra_alloc_interference_graph(ctx->set->regs, ctx->alloc_count);
ralloc_steal(ctx->g, ctx->instrd);
ctx->def = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
@@ -710,6 +703,20 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
if ((instr->opc == OPC_META_INPUT) && first_non_input)
use(name, first_non_input);
+
+ /* Texture instructions with writemasks can be treated as smaller
+ * vectors (or just scalars!) to allocate knowing that the
+ * masked-out regs won't be written, but we need to make sure that
+ * the start of the vector doesn't come before the first register
+ * or we'll wrap.
+ */
+ if (is_tex_or_prefetch(instr)) {
+ int writemask_skipped_regs = ffs(instr->regs[0]->wrmask) - 1;
+ int r0_xyz = (instr->regs[0]->flags & IR3_REG_HALF) ?
+ ctx->hr0_xyz_nodes : ctx->r0_xyz_nodes;
+ for (int i = 0; i < writemask_skipped_regs; i++)
+ ra_add_node_interference(ctx->g, name, r0_xyz + i);
+ }
}
foreach_use (name, ctx, instr) {
@@ -1005,6 +1012,14 @@ ra_add_interference(struct ir3_ra_ctx *ctx)
arr->end_ip = 0;
}
+
+ /* set up the r0.xyz precolor regs. */
+ for (int i = 0; i < 3; i++) {
+ ra_set_node_reg(ctx->g, ctx->r0_xyz_nodes + i, i);
+ ra_set_node_reg(ctx->g, ctx->hr0_xyz_nodes + i,
+ ctx->set->first_half_reg + i);
+ }
+
/* compute live ranges (use/def) on a block level, also updating
* block's def/use bitmasks (used below to calculate per-block
* livein/liveout):
diff --git a/src/freedreno/ir3/ir3_ra.h b/src/freedreno/ir3/ir3_ra.h
index 45df2397ddc..35fb618c49a 100644
--- a/src/freedreno/ir3/ir3_ra.h
+++ b/src/freedreno/ir3/ir3_ra.h
@@ -144,6 +144,8 @@ struct ir3_ra_ctx {
bool scalar_pass;
unsigned alloc_count;
+ unsigned r0_xyz_nodes; /* ra node numbers for r0.[xyz] precolors */
+ unsigned hr0_xyz_nodes; /* ra node numbers for hr0.[xyz] precolors pre-a6xx */
/* one per class, plus one slot for arrays: */
unsigned class_alloc_count[total_class_count + 1];
unsigned class_base[total_class_count + 1];