diff options
-rw-r--r-- | src/panfrost/Makefile.sources | 1 | ||||
-rw-r--r-- | src/panfrost/midgard/compiler.h | 47 | ||||
-rw-r--r-- | src/panfrost/midgard/meson.build | 1 | ||||
-rw-r--r-- | src/panfrost/midgard/midgard_compile.c | 1 | ||||
-rw-r--r-- | src/panfrost/midgard/midgard_ra.c | 252 | ||||
-rw-r--r-- | src/panfrost/midgard/midgard_schedule.c | 336 | ||||
-rw-r--r-- | src/panfrost/midgard/mir_squeeze.c | 81 |
7 files changed, 380 insertions, 339 deletions
diff --git a/src/panfrost/Makefile.sources b/src/panfrost/Makefile.sources index f9bbfc0452b..8a9bfc308a7 100644 --- a/src/panfrost/Makefile.sources +++ b/src/panfrost/Makefile.sources @@ -46,6 +46,7 @@ midgard_FILES := \ midgard/midgard_errata_lod.c \ midgard/mir.c \ midgard/mir_promote_uniforms.c \ + midgard/mir_squeeze.c \ midgard/lcra.c shared_FILES := \ diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h index 518de4c7df4..eb0464e6a49 100644 --- a/src/panfrost/midgard/compiler.h +++ b/src/panfrost/midgard/compiler.h @@ -565,6 +565,49 @@ v_mov(unsigned src, unsigned dest) return ins; } +/* Like a move, but to thread local storage! */ + +static inline midgard_instruction +v_load_store_scratch( + unsigned srcdest, + unsigned index, + bool is_store, + unsigned mask) +{ + /* We index by 32-bit vec4s */ + unsigned byte = (index * 4 * 4); + + midgard_instruction ins = { + .type = TAG_LOAD_STORE_4, + .mask = mask, + .dest = ~0, + .src = { ~0, ~0, ~0 }, + .swizzle = SWIZZLE_IDENTITY_4, + .load_store = { + .op = is_store ? midgard_op_st_int4 : midgard_op_ld_int4, + + /* For register spilling - to thread local storage */ + .arg_1 = 0xEA, + .arg_2 = 0x1E, + }, + + /* If we spill an unspill, RA goes into an infinite loop */ + .no_spill = true + }; + + ins.constants[0] = byte; + + if (is_store) { + /* r0 = r26, r1 = r27 */ + assert(srcdest == SSA_FIXED_REGISTER(26) || srcdest == SSA_FIXED_REGISTER(27)); + ins.src[0] = srcdest; + } else { + ins.dest = srcdest; + } + + return ins; +} + static inline bool mir_has_arg(midgard_instruction *ins, unsigned arg) { @@ -591,9 +634,9 @@ void schedule_program(compiler_context *ctx); #define REG_CLASS_TEXR 3 #define REG_CLASS_TEXW 4 +void mir_ra(compiler_context *ctx); +void mir_squeeze_index(compiler_context *ctx); void mir_lower_special_reads(compiler_context *ctx); -struct lcra_state* allocate_registers(compiler_context *ctx, bool *spilled); -void install_registers(compiler_context *ctx, struct lcra_state *g); void mir_liveness_ins_update(uint16_t *live, midgard_instruction *ins, unsigned max); void mir_compute_liveness(compiler_context *ctx); void mir_invalidate_liveness(compiler_context *ctx); diff --git a/src/panfrost/midgard/meson.build b/src/panfrost/midgard/meson.build index fcb3089c6e2..e60b338546b 100644 --- a/src/panfrost/midgard/meson.build +++ b/src/panfrost/midgard/meson.build @@ -31,6 +31,7 @@ libpanfrost_midgard_files = files( 'midgard_liveness.c', 'midgard_ops.c', 'mir_promote_uniforms.c', + 'mir_squeeze.c', 'midgard_opt_copy_prop.c', 'midgard_opt_dce.c', 'midgard_opt_invert.c', diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c index 76f53fbabfc..6575771edf7 100644 --- a/src/panfrost/midgard/midgard_compile.c +++ b/src/panfrost/midgard/midgard_compile.c @@ -2586,6 +2586,7 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl /* Schedule! */ schedule_program(ctx); + mir_ra(ctx); /* Now that all the bundles are scheduled and we can calculate block * sizes, emit actual branch instructions rather than placeholders */ diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c index 1005a8ba805..792654026aa 100644 --- a/src/panfrost/midgard/midgard_ra.c +++ b/src/panfrost/midgard/midgard_ra.c @@ -415,7 +415,7 @@ mir_compute_interference( /* This routine performs the actual register allocation. It should be succeeded * by install_registers */ -struct lcra_state * +static struct lcra_state * allocate_registers(compiler_context *ctx, bool *spilled) { /* The number of vec4 work registers available depends on when the @@ -670,9 +670,257 @@ install_registers_instr( } } -void +static void install_registers(compiler_context *ctx, struct lcra_state *l) { mir_foreach_instr_global(ctx, ins) install_registers_instr(ctx, l, ins); } + + +/* If register allocation fails, find the best spill node and spill it to fix + * whatever the issue was. This spill node could be a work register (spilling + * to thread local storage), but it could also simply be a special register + * that needs to spill to become a work register. */ + +static void mir_spill_register( + compiler_context *ctx, + struct lcra_state *l, + unsigned *spill_count) +{ + unsigned spill_index = ctx->temp_count; + + /* Our first step is to calculate spill cost to figure out the best + * spill node. All nodes are equal in spill cost, but we can't spill + * nodes written to from an unspill */ + + unsigned *cost = calloc(ctx->temp_count, sizeof(cost[0])); + + mir_foreach_instr_global(ctx, ins) { + if (ins->dest < ctx->temp_count) + cost[ins->dest]++; + + mir_foreach_src(ins, s) { + if (ins->src[s] < ctx->temp_count) + cost[ins->src[s]]++; + } + } + + for (unsigned i = 0; i < ctx->temp_count; ++i) + lcra_set_node_spill_cost(l, i, cost[i]); + + /* We can't spill any bundles that contain unspills. This could be + * optimized to allow use of r27 to spill twice per bundle, but if + * you're at the point of optimizing spilling, it's too late. + * + * We also can't double-spill. */ + + mir_foreach_block(ctx, block) { + mir_foreach_bundle_in_block(block, bun) { + bool no_spill = false; + + for (unsigned i = 0; i < bun->instruction_count; ++i) { + no_spill |= bun->instructions[i]->no_spill; + + if (bun->instructions[i]->no_spill) { + mir_foreach_src(bun->instructions[i], s) { + unsigned src = bun->instructions[i]->src[s]; + + if (src < ctx->temp_count) + lcra_set_node_spill_cost(l, src, -1); + } + } + } + + if (!no_spill) + continue; + + for (unsigned i = 0; i < bun->instruction_count; ++i) { + unsigned dest = bun->instructions[i]->dest; + if (dest < ctx->temp_count) + lcra_set_node_spill_cost(l, dest, -1); + } + } + } + + int spill_node = lcra_get_best_spill_node(l); + + if (spill_node < 0) { + mir_print_shader(ctx); + assert(0); + } + + /* We have a spill node, so check the class. Work registers + * legitimately spill to TLS, but special registers just spill to work + * registers */ + + bool is_special = l->class[spill_node] != REG_CLASS_WORK; + bool is_special_w = l->class[spill_node] == REG_CLASS_TEXW; + + /* Allocate TLS slot (maybe) */ + unsigned spill_slot = !is_special ? (*spill_count)++ : 0; + + /* For TLS, replace all stores to the spilled node. For + * special reads, just keep as-is; the class will be demoted + * implicitly. For special writes, spill to a work register */ + + if (!is_special || is_special_w) { + if (is_special_w) + spill_slot = spill_index++; + + mir_foreach_block(ctx, block) { + mir_foreach_instr_in_block_safe(block, ins) { + if (ins->dest != spill_node) continue; + + midgard_instruction st; + + if (is_special_w) { + st = v_mov(spill_node, spill_slot); + st.no_spill = true; + } else { + ins->dest = SSA_FIXED_REGISTER(26); + ins->no_spill = true; + st = v_load_store_scratch(ins->dest, spill_slot, true, ins->mask); + } + + /* Hint: don't rewrite this node */ + st.hint = true; + + mir_insert_instruction_after_scheduled(ctx, block, ins, st); + + if (!is_special) + ctx->spills++; + } + } + } + + /* For special reads, figure out how many bytes we need */ + unsigned read_bytemask = 0; + + mir_foreach_instr_global_safe(ctx, ins) { + read_bytemask |= mir_bytemask_of_read_components(ins, spill_node); + } + + /* Insert a load from TLS before the first consecutive + * use of the node, rewriting to use spilled indices to + * break up the live range. Or, for special, insert a + * move. Ironically the latter *increases* register + * pressure, but the two uses of the spilling mechanism + * are somewhat orthogonal. (special spilling is to use + * work registers to back special registers; TLS + * spilling is to use memory to back work registers) */ + + mir_foreach_block(ctx, block) { + bool consecutive_skip = false; + unsigned consecutive_index = 0; + + mir_foreach_instr_in_block(block, ins) { + /* We can't rewrite the moves used to spill in the + * first place. These moves are hinted. */ + if (ins->hint) continue; + + if (!mir_has_arg(ins, spill_node)) { + consecutive_skip = false; + continue; + } + + if (consecutive_skip) { + /* Rewrite */ + mir_rewrite_index_src_single(ins, spill_node, consecutive_index); + continue; + } + + if (!is_special_w) { + consecutive_index = ++spill_index; + + midgard_instruction *before = ins; + + /* TODO: Remove me I'm a fossil */ + if (ins->type == TAG_ALU_4 && OP_IS_CSEL(ins->alu.op)) + before = mir_prev_op(before); + + midgard_instruction st; + + if (is_special) { + /* Move */ + st = v_mov(spill_node, consecutive_index); + st.no_spill = true; + } else { + /* TLS load */ + st = v_load_store_scratch(consecutive_index, spill_slot, false, 0xF); + } + + /* Mask the load based on the component count + * actually needed to prevent RA loops */ + + st.mask = mir_from_bytemask(read_bytemask, midgard_reg_mode_32); + + mir_insert_instruction_before_scheduled(ctx, block, before, st); + // consecutive_skip = true; + } else { + /* Special writes already have their move spilled in */ + consecutive_index = spill_slot; + } + + + /* Rewrite to use */ + mir_rewrite_index_src_single(ins, spill_node, consecutive_index); + + if (!is_special) + ctx->fills++; + } + } + + /* Reset hints */ + + mir_foreach_instr_global(ctx, ins) { + ins->hint = false; + } + + free(cost); +} + +/* Run register allocation in a loop, spilling until we succeed */ + +void +mir_ra(compiler_context *ctx) +{ + struct lcra_state *l = NULL; + bool spilled = false; + int iter_count = 1000; /* max iterations */ + + /* Number of 128-bit slots in memory we've spilled into */ + unsigned spill_count = 0; + + + mir_create_pipeline_registers(ctx); + + do { + if (spilled) + mir_spill_register(ctx, l, &spill_count); + + mir_squeeze_index(ctx); + mir_invalidate_liveness(ctx); + + if (l) { + lcra_free(l); + l = NULL; + } + + l = allocate_registers(ctx, &spilled); + } while(spilled && ((iter_count--) > 0)); + + if (iter_count <= 0) { + fprintf(stderr, "panfrost: Gave up allocating registers, rendering will be incomplete\n"); + assert(0); + } + + /* Report spilling information. spill_count is in 128-bit slots (vec4 x + * fp32), but tls_size is in bytes, so multiply by 16 */ + + ctx->tls_size = spill_count * 16; + + install_registers(ctx, l); + + lcra_free(l); +} diff --git a/src/panfrost/midgard/midgard_schedule.c b/src/panfrost/midgard/midgard_schedule.c index d05bdf79d47..f2e9a3c8e9c 100644 --- a/src/panfrost/midgard/midgard_schedule.c +++ b/src/panfrost/midgard/midgard_schedule.c @@ -1099,316 +1099,12 @@ schedule_block(compiler_context *ctx, midgard_block *block) free(worklist); } -/* When we're 'squeezing down' the values in the IR, we maintain a hash - * as such */ - -static unsigned -find_or_allocate_temp(compiler_context *ctx, unsigned hash) -{ - if (hash >= SSA_FIXED_MINIMUM) - return hash; - - unsigned temp = (uintptr_t) _mesa_hash_table_u64_search( - ctx->hash_to_temp, hash + 1); - - if (temp) - return temp - 1; - - /* If no temp is find, allocate one */ - temp = ctx->temp_count++; - ctx->max_hash = MAX2(ctx->max_hash, hash); - - _mesa_hash_table_u64_insert(ctx->hash_to_temp, - hash + 1, (void *) ((uintptr_t) temp + 1)); - - return temp; -} - -/* Reassigns numbering to get rid of gaps in the indices and to prioritize - * smaller register classes */ - -static void -mir_squeeze_index(compiler_context *ctx) -{ - /* Reset */ - ctx->temp_count = 0; - /* TODO don't leak old hash_to_temp */ - ctx->hash_to_temp = _mesa_hash_table_u64_create(NULL); - - /* We need to prioritize texture registers on older GPUs so we don't - * fail RA trying to assign to work registers r0/r1 when a work - * register is already there */ - - mir_foreach_instr_global(ctx, ins) { - if (ins->type == TAG_TEXTURE_4) - ins->dest = find_or_allocate_temp(ctx, ins->dest); - } - - mir_foreach_instr_global(ctx, ins) { - if (ins->type != TAG_TEXTURE_4) - ins->dest = find_or_allocate_temp(ctx, ins->dest); - - for (unsigned i = 0; i < ARRAY_SIZE(ins->src); ++i) - ins->src[i] = find_or_allocate_temp(ctx, ins->src[i]); - } -} - -static midgard_instruction -v_load_store_scratch( - unsigned srcdest, - unsigned index, - bool is_store, - unsigned mask) -{ - /* We index by 32-bit vec4s */ - unsigned byte = (index * 4 * 4); - - midgard_instruction ins = { - .type = TAG_LOAD_STORE_4, - .mask = mask, - .dest = ~0, - .src = { ~0, ~0, ~0 }, - .swizzle = SWIZZLE_IDENTITY_4, - .load_store = { - .op = is_store ? midgard_op_st_int4 : midgard_op_ld_int4, - - /* For register spilling - to thread local storage */ - .arg_1 = 0xEA, - .arg_2 = 0x1E, - }, - - /* If we spill an unspill, RA goes into an infinite loop */ - .no_spill = true - }; - - ins.constants[0] = byte; - - if (is_store) { - /* r0 = r26, r1 = r27 */ - assert(srcdest == SSA_FIXED_REGISTER(26) || srcdest == SSA_FIXED_REGISTER(27)); - ins.src[0] = srcdest; - } else { - ins.dest = srcdest; - } - - return ins; -} - -/* If register allocation fails, find the best spill node and spill it to fix - * whatever the issue was. This spill node could be a work register (spilling - * to thread local storage), but it could also simply be a special register - * that needs to spill to become a work register. */ - -static void mir_spill_register( - compiler_context *ctx, - struct lcra_state *l, - unsigned *spill_count) -{ - unsigned spill_index = ctx->temp_count; - - /* Our first step is to calculate spill cost to figure out the best - * spill node. All nodes are equal in spill cost, but we can't spill - * nodes written to from an unspill */ - - unsigned *cost = calloc(ctx->temp_count, sizeof(cost[0])); - - mir_foreach_instr_global(ctx, ins) { - if (ins->dest < ctx->temp_count) - cost[ins->dest]++; - - mir_foreach_src(ins, s) { - if (ins->src[s] < ctx->temp_count) - cost[ins->src[s]]++; - } - } - - for (unsigned i = 0; i < ctx->temp_count; ++i) - lcra_set_node_spill_cost(l, i, cost[i]); - - /* We can't spill any bundles that contain unspills. This could be - * optimized to allow use of r27 to spill twice per bundle, but if - * you're at the point of optimizing spilling, it's too late. - * - * We also can't double-spill. */ - - mir_foreach_block(ctx, block) { - mir_foreach_bundle_in_block(block, bun) { - bool no_spill = false; - - for (unsigned i = 0; i < bun->instruction_count; ++i) { - no_spill |= bun->instructions[i]->no_spill; - - if (bun->instructions[i]->no_spill) { - mir_foreach_src(bun->instructions[i], s) { - unsigned src = bun->instructions[i]->src[s]; - - if (src < ctx->temp_count) - lcra_set_node_spill_cost(l, src, -1); - } - } - } - - if (!no_spill) - continue; - - for (unsigned i = 0; i < bun->instruction_count; ++i) { - unsigned dest = bun->instructions[i]->dest; - if (dest < ctx->temp_count) - lcra_set_node_spill_cost(l, dest, -1); - } - } - } - - int spill_node = lcra_get_best_spill_node(l); - - if (spill_node < 0) { - mir_print_shader(ctx); - assert(0); - } - - /* We have a spill node, so check the class. Work registers - * legitimately spill to TLS, but special registers just spill to work - * registers */ - - bool is_special = l->class[spill_node] != REG_CLASS_WORK; - bool is_special_w = l->class[spill_node] == REG_CLASS_TEXW; - - /* Allocate TLS slot (maybe) */ - unsigned spill_slot = !is_special ? (*spill_count)++ : 0; - - /* For TLS, replace all stores to the spilled node. For - * special reads, just keep as-is; the class will be demoted - * implicitly. For special writes, spill to a work register */ - - if (!is_special || is_special_w) { - if (is_special_w) - spill_slot = spill_index++; - - mir_foreach_block(ctx, block) { - mir_foreach_instr_in_block_safe(block, ins) { - if (ins->dest != spill_node) continue; - - midgard_instruction st; - - if (is_special_w) { - st = v_mov(spill_node, spill_slot); - st.no_spill = true; - } else { - ins->dest = SSA_FIXED_REGISTER(26); - ins->no_spill = true; - st = v_load_store_scratch(ins->dest, spill_slot, true, ins->mask); - } - - /* Hint: don't rewrite this node */ - st.hint = true; - - mir_insert_instruction_after_scheduled(ctx, block, ins, st); - - if (!is_special) - ctx->spills++; - } - } - } - - /* For special reads, figure out how many bytes we need */ - unsigned read_bytemask = 0; - - mir_foreach_instr_global_safe(ctx, ins) { - read_bytemask |= mir_bytemask_of_read_components(ins, spill_node); - } - - /* Insert a load from TLS before the first consecutive - * use of the node, rewriting to use spilled indices to - * break up the live range. Or, for special, insert a - * move. Ironically the latter *increases* register - * pressure, but the two uses of the spilling mechanism - * are somewhat orthogonal. (special spilling is to use - * work registers to back special registers; TLS - * spilling is to use memory to back work registers) */ - - mir_foreach_block(ctx, block) { - bool consecutive_skip = false; - unsigned consecutive_index = 0; - - mir_foreach_instr_in_block(block, ins) { - /* We can't rewrite the moves used to spill in the - * first place. These moves are hinted. */ - if (ins->hint) continue; - - if (!mir_has_arg(ins, spill_node)) { - consecutive_skip = false; - continue; - } - - if (consecutive_skip) { - /* Rewrite */ - mir_rewrite_index_src_single(ins, spill_node, consecutive_index); - continue; - } - - if (!is_special_w) { - consecutive_index = ++spill_index; - - midgard_instruction *before = ins; - - /* TODO: Remove me I'm a fossil */ - if (ins->type == TAG_ALU_4 && OP_IS_CSEL(ins->alu.op)) - before = mir_prev_op(before); - - midgard_instruction st; - - if (is_special) { - /* Move */ - st = v_mov(spill_node, consecutive_index); - st.no_spill = true; - } else { - /* TLS load */ - st = v_load_store_scratch(consecutive_index, spill_slot, false, 0xF); - } - - /* Mask the load based on the component count - * actually needed to prevent RA loops */ - - st.mask = mir_from_bytemask(read_bytemask, midgard_reg_mode_32); - - mir_insert_instruction_before_scheduled(ctx, block, before, st); - // consecutive_skip = true; - } else { - /* Special writes already have their move spilled in */ - consecutive_index = spill_slot; - } - - - /* Rewrite to use */ - mir_rewrite_index_src_single(ins, spill_node, consecutive_index); - - if (!is_special) - ctx->fills++; - } - } - - /* Reset hints */ - - mir_foreach_instr_global(ctx, ins) { - ins->hint = false; - } - - free(cost); -} - void schedule_program(compiler_context *ctx) { - struct lcra_state *l = NULL; - bool spilled = false; - int iter_count = 1000; /* max iterations */ - - /* Number of 128-bit slots in memory we've spilled into */ - unsigned spill_count = 0; - midgard_promote_uniforms(ctx, 16); - /* Must be lowered right before RA */ + /* Must be lowered right before scheduling */ mir_squeeze_index(ctx); mir_lower_special_reads(ctx); mir_squeeze_index(ctx); @@ -1420,34 +1116,4 @@ schedule_program(compiler_context *ctx) schedule_block(ctx, block); } - mir_create_pipeline_registers(ctx); - - do { - if (spilled) - mir_spill_register(ctx, l, &spill_count); - - mir_squeeze_index(ctx); - mir_invalidate_liveness(ctx); - - if (l) { - lcra_free(l); - l = NULL; - } - - l = allocate_registers(ctx, &spilled); - } while(spilled && ((iter_count--) > 0)); - - if (iter_count <= 0) { - fprintf(stderr, "panfrost: Gave up allocating registers, rendering will be incomplete\n"); - assert(0); - } - - /* Report spilling information. spill_count is in 128-bit slots (vec4 x - * fp32), but tls_size is in bytes, so multiply by 16 */ - - ctx->tls_size = spill_count * 16; - - install_registers(ctx, l); - - lcra_free(l); } diff --git a/src/panfrost/midgard/mir_squeeze.c b/src/panfrost/midgard/mir_squeeze.c new file mode 100644 index 00000000000..e5bf078b344 --- /dev/null +++ b/src/panfrost/midgard/mir_squeeze.c @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2019 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors (Collabora): + * Alyssa Rosenzweig <[email protected]> + */ + +#include "compiler.h" + +/* When we're 'squeezing down' the values in the IR, we maintain a hash + * as such */ + +static unsigned +find_or_allocate_temp(compiler_context *ctx, unsigned hash) +{ + if (hash >= SSA_FIXED_MINIMUM) + return hash; + + unsigned temp = (uintptr_t) _mesa_hash_table_u64_search( + ctx->hash_to_temp, hash + 1); + + if (temp) + return temp - 1; + + /* If no temp is find, allocate one */ + temp = ctx->temp_count++; + ctx->max_hash = MAX2(ctx->max_hash, hash); + + _mesa_hash_table_u64_insert(ctx->hash_to_temp, + hash + 1, (void *) ((uintptr_t) temp + 1)); + + return temp; +} + +/* Reassigns numbering to get rid of gaps in the indices and to prioritize + * smaller register classes */ + +void +mir_squeeze_index(compiler_context *ctx) +{ + /* Reset */ + ctx->temp_count = 0; + /* TODO don't leak old hash_to_temp */ + ctx->hash_to_temp = _mesa_hash_table_u64_create(NULL); + + /* We need to prioritize texture registers on older GPUs so we don't + * fail RA trying to assign to work registers r0/r1 when a work + * register is already there */ + + mir_foreach_instr_global(ctx, ins) { + if (ins->type == TAG_TEXTURE_4) + ins->dest = find_or_allocate_temp(ctx, ins->dest); + } + + mir_foreach_instr_global(ctx, ins) { + if (ins->type != TAG_TEXTURE_4) + ins->dest = find_or_allocate_temp(ctx, ins->dest); + + for (unsigned i = 0; i < ARRAY_SIZE(ins->src); ++i) + ins->src[i] = find_or_allocate_temp(ctx, ins->src[i]); + } +} |