diff options
-rw-r--r-- | .gitlab-ci/deqp-freedreno-a307-fails.txt | 8 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_ra.c | 41 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_ra.h | 2 |
3 files changed, 30 insertions, 21 deletions
diff --git a/.gitlab-ci/deqp-freedreno-a307-fails.txt b/.gitlab-ci/deqp-freedreno-a307-fails.txt index 6c835a85b60..7cf581dc204 100644 --- a/.gitlab-ci/deqp-freedreno-a307-fails.txt +++ b/.gitlab-ci/deqp-freedreno-a307-fails.txt @@ -608,14 +608,6 @@ dEQP-GLES3.functional.texture.format.sized.3d.rgb10_a2ui_pot dEQP-GLES3.functional.texture.format.sized.cube.rgb10_a2ui_npot dEQP-GLES3.functional.texture.format.sized.cube.rgb10_a2ui_pot dEQP-GLES3.functional.texture.mipmap.cube.max_level.linear_nearest -dEQP-GLES3.functional.texture.shadow.2d.linear.less_or_equal_depth_component32f -dEQP-GLES3.functional.texture.shadow.2d.linear_mipmap_nearest.equal_depth_component32f -dEQP-GLES3.functional.texture.shadow.2d_array.linear.equal_depth_component32f -dEQP-GLES3.functional.texture.shadow.2d_array.nearest.less_depth_component32f -dEQP-GLES3.functional.texture.shadow.2d_array.nearest_mipmap_linear.less_or_equal_depth_component32f -dEQP-GLES3.functional.texture.shadow.cube.linear_mipmap_linear.less_or_equal_depth_component32f -dEQP-GLES3.functional.texture.shadow.cube.nearest.less_or_equal_depth_component32f -dEQP-GLES3.functional.texture.shadow.cube.nearest_mipmap_nearest.equal_depth_component32f dEQP-GLES3.functional.texture.specification.basic_teximage2d.r8i_2d dEQP-GLES3.functional.texture.specification.basic_teximage2d.r8i_cube dEQP-GLES3.functional.texture.specification.basic_teximage2d.r8ui_2d diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c index 927f91e98ec..231bfc6ffc1 100644 --- a/src/freedreno/ir3/ir3_ra.c +++ b/src/freedreno/ir3/ir3_ra.c @@ -497,19 +497,6 @@ ra_select_reg_merged(unsigned int n, BITSET_WORD *regs, void *data) return reg; } } - } else if (is_tex_or_prefetch(instr)) { - /* we could have a tex fetch w/ wrmask .z, for example.. these - * cannot land in r0.x since that would underflow when we - * subtract the offset. Ie. if we pick r0.z, and subtract - * the offset, the register encoded for dst will be r0.x - */ - unsigned n = ffs(instr->regs[0]->wrmask); - debug_assert(n > 0); - unsigned offset = n - 1; - if (!half) - offset *= 2; - base += offset; - max_target -= offset; } int r = pick_in_range(regs, base + start, base + max_target); @@ -571,6 +558,12 @@ ra_init(struct ir3_ra_ctx *ctx) } ctx->alloc_count += ctx->class_alloc_count[total_class_count]; + /* Add vreg names for r0.xyz */ + ctx->r0_xyz_nodes = ctx->alloc_count; + ctx->alloc_count += 3; + ctx->hr0_xyz_nodes = ctx->alloc_count; + ctx->alloc_count += 3; + ctx->g = ra_alloc_interference_graph(ctx->set->regs, ctx->alloc_count); ralloc_steal(ctx->g, ctx->instrd); ctx->def = rzalloc_array(ctx->g, unsigned, ctx->alloc_count); @@ -710,6 +703,20 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block) if ((instr->opc == OPC_META_INPUT) && first_non_input) use(name, first_non_input); + + /* Texture instructions with writemasks can be treated as smaller + * vectors (or just scalars!) to allocate knowing that the + * masked-out regs won't be written, but we need to make sure that + * the start of the vector doesn't come before the first register + * or we'll wrap. + */ + if (is_tex_or_prefetch(instr)) { + int writemask_skipped_regs = ffs(instr->regs[0]->wrmask) - 1; + int r0_xyz = (instr->regs[0]->flags & IR3_REG_HALF) ? + ctx->hr0_xyz_nodes : ctx->r0_xyz_nodes; + for (int i = 0; i < writemask_skipped_regs; i++) + ra_add_node_interference(ctx->g, name, r0_xyz + i); + } } foreach_use (name, ctx, instr) { @@ -1005,6 +1012,14 @@ ra_add_interference(struct ir3_ra_ctx *ctx) arr->end_ip = 0; } + + /* set up the r0.xyz precolor regs. */ + for (int i = 0; i < 3; i++) { + ra_set_node_reg(ctx->g, ctx->r0_xyz_nodes + i, i); + ra_set_node_reg(ctx->g, ctx->hr0_xyz_nodes + i, + ctx->set->first_half_reg + i); + } + /* compute live ranges (use/def) on a block level, also updating * block's def/use bitmasks (used below to calculate per-block * livein/liveout): diff --git a/src/freedreno/ir3/ir3_ra.h b/src/freedreno/ir3/ir3_ra.h index 45df2397ddc..35fb618c49a 100644 --- a/src/freedreno/ir3/ir3_ra.h +++ b/src/freedreno/ir3/ir3_ra.h @@ -144,6 +144,8 @@ struct ir3_ra_ctx { bool scalar_pass; unsigned alloc_count; + unsigned r0_xyz_nodes; /* ra node numbers for r0.[xyz] precolors */ + unsigned hr0_xyz_nodes; /* ra node numbers for hr0.[xyz] precolors pre-a6xx */ /* one per class, plus one slot for arrays: */ unsigned class_alloc_count[total_class_count + 1]; unsigned class_base[total_class_count + 1]; |