/* * Mesa 3-D graphics library * * Copyright (C) 2012-2013 LunarG, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Authors: * Chia-I Wu */ #include /* for qsort() */ #include "toy_compiler.h" #include "toy_legalize.h" /** * Live interval of a VRF register. */ struct linear_scan_live_interval { int vrf; int startpoint; int endpoint; /* * should this be assigned a consecutive register of the previous * interval's? */ bool consecutive; int reg; struct list_head list; }; /** * Linear scan. */ struct linear_scan { struct linear_scan_live_interval *intervals; int max_vrf, num_vrfs; int num_regs; struct list_head active_list; int *free_regs; int num_free_regs; int *vrf_mapping; }; /** * Return a chunk of registers to the free register pool. */ static void linear_scan_free_regs(struct linear_scan *ls, int reg, int count) { int i; for (i = 0; i < count; i++) ls->free_regs[ls->num_free_regs++] = reg + count - 1 - i; } static int linear_scan_compare_regs(const void *elem1, const void *elem2) { const int *reg1 = elem1; const int *reg2 = elem2; /* in reverse order */ return (*reg2 - *reg1); } /** * Allocate a chunk of registers from the free register pool. */ static int linear_scan_allocate_regs(struct linear_scan *ls, int count) { bool sorted = false; int reg; /* simple cases */ if (count > ls->num_free_regs) return -1; else if (count == 1) return ls->free_regs[--ls->num_free_regs]; /* TODO a free register pool */ /* TODO reserve some regs for spilling */ while (true) { bool found = false; int start; /* * find a chunk of registers that have consecutive register * numbers */ for (start = ls->num_free_regs - 1; start >= count - 1; start--) { int i; for (i = 1; i < count; i++) { if (ls->free_regs[start - i] != ls->free_regs[start] + i) break; } if (i >= count) { found = true; break; } } if (found) { reg = ls->free_regs[start]; if (start != ls->num_free_regs - 1) { start++; memmove(&ls->free_regs[start - count], &ls->free_regs[start], sizeof(*ls->free_regs) * (ls->num_free_regs - start)); } ls->num_free_regs -= count; break; } else if (!sorted) { /* sort and retry */ qsort(ls->free_regs, ls->num_free_regs, sizeof(*ls->free_regs), linear_scan_compare_regs); sorted = true; } else { /* failed */ reg = -1; break; } } return reg; } /** * Add an interval to the active list. */ static void linear_scan_add_active(struct linear_scan *ls, struct linear_scan_live_interval *interval) { struct linear_scan_live_interval *pos; /* keep the active list sorted by endpoints */ LIST_FOR_EACH_ENTRY(pos, &ls->active_list, list) { if (pos->endpoint >= interval->endpoint) break; } list_addtail(&interval->list, &pos->list); } /** * Remove an interval from the active list. */ static void linear_scan_remove_active(struct linear_scan *ls, struct linear_scan_live_interval *interval) { list_del(&interval->list); } /** * Remove intervals that are no longer active from the active list. */ static void linear_scan_expire_active(struct linear_scan *ls, int pc) { struct linear_scan_live_interval *interval, *next; LIST_FOR_EACH_ENTRY_SAFE(interval, next, &ls->active_list, list) { /* * since we sort intervals on the active list by their endpoints, we * know that this and the rest of the intervals are still active. */ if (interval->endpoint >= pc) break; linear_scan_remove_active(ls, interval); /* recycle the reg */ linear_scan_free_regs(ls, interval->reg, 1); } } /** * Spill an interval. */ static void linear_scan_spill(struct linear_scan *ls, struct linear_scan_live_interval *interval, bool is_active) { assert(!"no spilling support"); } /** * Spill a range of intervals. */ static void linear_scan_spill_range(struct linear_scan *ls, int first, int count) { int i; for (i = 0; i < count; i++) { struct linear_scan_live_interval *interval = &ls->intervals[first + i]; linear_scan_spill(ls, interval, false); } } /** * Perform linear scan to allocate registers for the intervals. */ static bool linear_scan_run(struct linear_scan *ls) { int i; i = 0; while (i < ls->num_vrfs) { struct linear_scan_live_interval *first = &ls->intervals[i]; int reg, count; /* * BRW_OPCODE_SEND may write to multiple consecutive registers and we need to * support that */ for (count = 1; i + count < ls->num_vrfs; count++) { const struct linear_scan_live_interval *interval = &ls->intervals[i + count]; if (interval->startpoint != first->startpoint || !interval->consecutive) break; } reg = linear_scan_allocate_regs(ls, count); /* expire intervals that are no longer active and try again */ if (reg < 0) { linear_scan_expire_active(ls, first->startpoint); reg = linear_scan_allocate_regs(ls, count); } /* have to spill some intervals */ if (reg < 0) { struct linear_scan_live_interval *last_active = container_of(ls->active_list.prev, (struct linear_scan_live_interval *) NULL, list); /* heuristically spill the interval that ends last */ if (count > 1 || last_active->endpoint < first->endpoint) { linear_scan_spill_range(ls, i, count); i += count; continue; } /* make some room for the new interval */ linear_scan_spill(ls, last_active, true); reg = linear_scan_allocate_regs(ls, count); if (reg < 0) { assert(!"failed to spill any register"); return false; } } while (count--) { struct linear_scan_live_interval *interval = &ls->intervals[i++]; interval->reg = reg++; linear_scan_add_active(ls, interval); ls->vrf_mapping[interval->vrf] = interval->reg; /* * this should and must be the case because of how we initialized the * intervals */ assert(interval->vrf - first->vrf == interval->reg - first->reg); } } return true; } /** * Add a new interval. */ static void linear_scan_add_live_interval(struct linear_scan *ls, int vrf, int pc) { if (ls->intervals[vrf].vrf) return; ls->intervals[vrf].vrf = vrf; ls->intervals[vrf].startpoint = pc; ls->num_vrfs++; if (vrf > ls->max_vrf) ls->max_vrf = vrf; } /** * Perform (oversimplified?) live variable analysis. */ static void linear_scan_init_live_intervals(struct linear_scan *ls, struct toy_compiler *tc) { const struct toy_inst *inst; int pc, do_pc, while_pc; pc = 0; do_pc = -1; while_pc = -1; tc_head(tc); while ((inst = tc_next_no_skip(tc)) != NULL) { const int startpoint = (pc <= while_pc) ? do_pc : pc; const int endpoint = (pc <= while_pc) ? while_pc : pc; int vrf, i; /* * assume all registers used in this outermost loop are live through out * the whole loop */ if (inst->marker) { if (pc > while_pc) { struct toy_inst *inst2; int loop_level = 1; assert(inst->opcode == BRW_OPCODE_DO); do_pc = pc; while_pc = pc + 1; /* find the matching BRW_OPCODE_WHILE */ LIST_FOR_EACH_ENTRY_FROM(inst2, tc->iter_next, &tc->instructions, list) { if (inst2->marker) { assert(inst->opcode == BRW_OPCODE_DO); loop_level++; continue; } if (inst2->opcode == BRW_OPCODE_WHILE) { loop_level--; if (!loop_level) break; } while_pc++; } } continue; } if (inst->dst.file == TOY_FILE_VRF) { int num_dst; /* TODO this is a hack */ if (inst->opcode == BRW_OPCODE_SEND || inst->opcode == BRW_OPCODE_SENDC) { const uint32_t mdesc = inst->src[1].val32; int response_length = (mdesc >> 20) & 0x1f; num_dst = response_length; if (num_dst > 1 && inst->exec_size == BRW_EXECUTE_16) num_dst /= 2; } else { num_dst = 1; } vrf = inst->dst.val32 / TOY_REG_WIDTH; for (i = 0; i < num_dst; i++) { /* first use */ if (!ls->intervals[vrf].vrf) linear_scan_add_live_interval(ls, vrf, startpoint); ls->intervals[vrf].endpoint = endpoint; ls->intervals[vrf].consecutive = (i > 0); vrf++; } } for (i = 0; i < Elements(inst->src); i++) { if (inst->src[i].file != TOY_FILE_VRF) continue; vrf = inst->src[i].val32 / TOY_REG_WIDTH; /* first use */ if (!ls->intervals[vrf].vrf) linear_scan_add_live_interval(ls, vrf, startpoint); ls->intervals[vrf].endpoint = endpoint; } pc++; } } /** * Clean up after performing linear scan. */ static void linear_scan_cleanup(struct linear_scan *ls) { FREE(ls->vrf_mapping); FREE(ls->intervals); FREE(ls->free_regs); } static int linear_scan_compare_live_intervals(const void *elem1, const void *elem2) { const struct linear_scan_live_interval *interval1 = elem1; const struct linear_scan_live_interval *interval2 = elem2; /* make unused elements appear at the end */ if (!interval1->vrf) return 1; else if (!interval2->vrf) return -1; /* sort by startpoints first, and then by vrf */ if (interval1->startpoint != interval2->startpoint) return (interval1->startpoint - interval2->startpoint); else return (interval1->vrf - interval2->vrf); } /** * Prepare for linear scan. */ static bool linear_scan_init(struct linear_scan *ls, int num_regs, struct toy_compiler *tc) { int num_intervals, i; memset(ls, 0, sizeof(*ls)); /* this may be much larger than ls->num_vrfs... */ num_intervals = tc->next_vrf; ls->intervals = CALLOC(num_intervals, sizeof(ls->intervals[0])); if (!ls->intervals) return false; linear_scan_init_live_intervals(ls, tc); /* sort intervals by startpoints */ qsort(ls->intervals, num_intervals, sizeof(*ls->intervals), linear_scan_compare_live_intervals); ls->num_regs = num_regs; ls->num_free_regs = num_regs; ls->free_regs = MALLOC(ls->num_regs * sizeof(*ls->free_regs)); if (!ls->free_regs) { FREE(ls->intervals); return false; } /* add in reverse order as we will allocate from the tail */ for (i = 0; i < ls->num_regs; i++) ls->free_regs[i] = num_regs - i - 1; list_inithead(&ls->active_list); ls->vrf_mapping = CALLOC(ls->max_vrf + 1, sizeof(*ls->vrf_mapping)); if (!ls->vrf_mapping) { FREE(ls->intervals); FREE(ls->free_regs); return false; } return true; } /** * Allocate registers with linear scan. */ static void linear_scan_allocation(struct toy_compiler *tc, int start_grf, int end_grf, int num_grf_per_vrf) { const int num_grfs = end_grf - start_grf + 1; struct linear_scan ls; struct toy_inst *inst; if (!linear_scan_init(&ls, num_grfs / num_grf_per_vrf, tc)) return; if (!linear_scan_run(&ls)) { tc_fail(tc, "failed to allocate registers"); return; } tc_head(tc); while ((inst = tc_next(tc)) != NULL) { int i; if (inst->dst.file == TOY_FILE_VRF) { const uint32_t val32 = inst->dst.val32; int reg = val32 / TOY_REG_WIDTH; int subreg = val32 % TOY_REG_WIDTH; /* map to GRF */ reg = ls.vrf_mapping[reg] * num_grf_per_vrf + start_grf; inst->dst.file = TOY_FILE_GRF; inst->dst.val32 = reg * TOY_REG_WIDTH + subreg; } for (i = 0; i < Elements(inst->src); i++) { const uint32_t val32 = inst->src[i].val32; int reg, subreg; if (inst->src[i].file != TOY_FILE_VRF) continue; reg = val32 / TOY_REG_WIDTH; subreg = val32 % TOY_REG_WIDTH; /* map to GRF */ reg = ls.vrf_mapping[reg] * num_grf_per_vrf + start_grf; inst->src[i].file = TOY_FILE_GRF; inst->src[i].val32 = reg * TOY_REG_WIDTH + subreg; } } linear_scan_cleanup(&ls); } /** * Trivially allocate registers. */ static void trivial_allocation(struct toy_compiler *tc, int start_grf, int end_grf, int num_grf_per_vrf) { struct toy_inst *inst; int max_grf = -1; tc_head(tc); while ((inst = tc_next(tc)) != NULL) { int i; if (inst->dst.file == TOY_FILE_VRF) { const uint32_t val32 = inst->dst.val32; int reg = val32 / TOY_REG_WIDTH; int subreg = val32 % TOY_REG_WIDTH; reg = reg * num_grf_per_vrf + start_grf - 1; inst->dst.file = TOY_FILE_GRF; inst->dst.val32 = reg * TOY_REG_WIDTH + subreg; if (reg > max_grf) max_grf = reg; } for (i = 0; i < Elements(inst->src); i++) { const uint32_t val32 = inst->src[i].val32; int reg, subreg; if (inst->src[i].file != TOY_FILE_VRF) continue; reg = val32 / TOY_REG_WIDTH; subreg = val32 % TOY_REG_WIDTH; reg = reg * num_grf_per_vrf + start_grf - 1; inst->src[i].file = TOY_FILE_GRF; inst->src[i].val32 = reg * TOY_REG_WIDTH + subreg; if (reg > max_grf) max_grf = reg; } } if (max_grf + num_grf_per_vrf - 1 > end_grf) tc_fail(tc, "failed to allocate registers"); } /** * Allocate GRF registers to VRF registers. */ void toy_compiler_allocate_registers(struct toy_compiler *tc, int start_grf, int end_grf, int num_grf_per_vrf) { if (true) linear_scan_allocation(tc, start_grf, end_grf, num_grf_per_vrf); else trivial_allocation(tc, start_grf, end_grf, num_grf_per_vrf); }