summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp77
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h8
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_cse.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp8
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_live_variables.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp50
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp8
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_visitor.cpp47
-rw-r--r--src/mesa/drivers/dri/i965/brw_ir_allocator.h87
-rw-r--r--src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp10
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.h6
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.cpp20
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.h12
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp8
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_cse.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp10
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp43
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp36
18 files changed, 235 insertions, 201 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 200a494c4c9..45870356f93 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -424,7 +424,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_reg &dst,
assert(dst.width % 8 == 0);
int regs_written = 4 * (dst.width / 8) * scale;
- fs_reg vec4_result = fs_reg(GRF, virtual_grf_alloc(regs_written),
+ fs_reg vec4_result = fs_reg(GRF, alloc.allocate(regs_written),
dst.type, dst.width);
inst = new(mem_ctx) fs_inst(op, vec4_result, surf_index, vec4_offset);
inst->regs_written = regs_written;
@@ -688,7 +688,7 @@ fs_visitor::get_timestamp()
0),
BRW_REGISTER_TYPE_UD));
- fs_reg dst = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD, 4);
+ fs_reg dst = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD, 4);
fs_inst *mov = emit(MOV(dst, ts));
/* We want to read the 3 fields we care about even if it's not enabled in
@@ -764,7 +764,7 @@ fs_visitor::emit_shader_time_end()
fs_reg start = shader_start_time;
start.negate = true;
- fs_reg diff = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD, 1);
+ fs_reg diff = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD, 1);
emit(ADD(diff, start, shader_end_time));
/* If there were no instructions between the two timestamp gets, the diff
@@ -1029,26 +1029,11 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
}
}
-int
-fs_visitor::virtual_grf_alloc(int size)
-{
- if (virtual_grf_array_size <= virtual_grf_count) {
- if (virtual_grf_array_size == 0)
- virtual_grf_array_size = 16;
- else
- virtual_grf_array_size *= 2;
- virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
- virtual_grf_array_size);
- }
- virtual_grf_sizes[virtual_grf_count] = size;
- return virtual_grf_count++;
-}
-
fs_reg
fs_visitor::vgrf(const glsl_type *const type)
{
int reg_width = dispatch_width / 8;
- return fs_reg(GRF, virtual_grf_alloc(type_size(type) * reg_width),
+ return fs_reg(GRF, alloc.allocate(type_size(type) * reg_width),
brw_type_for_base_type(type), dispatch_width);
}
@@ -1056,7 +1041,7 @@ fs_reg
fs_visitor::vgrf(int num_components)
{
int reg_width = dispatch_width / 8;
- return fs_reg(GRF, virtual_grf_alloc(num_components * reg_width),
+ return fs_reg(GRF, alloc.allocate(num_components * reg_width),
BRW_REGISTER_TYPE_F, dispatch_width);
}
@@ -1912,14 +1897,14 @@ fs_visitor::assign_vs_urb_setup()
void
fs_visitor::split_virtual_grfs()
{
- int num_vars = this->virtual_grf_count;
+ int num_vars = this->alloc.count;
/* Count the total number of registers */
int reg_count = 0;
int vgrf_to_reg[num_vars];
for (int i = 0; i < num_vars; i++) {
vgrf_to_reg[i] = reg_count;
- reg_count += virtual_grf_sizes[i];
+ reg_count += alloc.sizes[i];
}
/* An array of "split points". For each register slot, this indicates
@@ -1935,14 +1920,14 @@ fs_visitor::split_virtual_grfs()
foreach_block_and_inst(block, fs_inst, inst, cfg) {
if (inst->dst.file == GRF) {
int reg = vgrf_to_reg[inst->dst.reg];
- for (int j = 1; j < this->virtual_grf_sizes[inst->dst.reg]; j++)
+ for (unsigned j = 1; j < this->alloc.sizes[inst->dst.reg]; j++)
split_points[reg + j] = true;
}
for (int i = 0; i < inst->sources; i++) {
if (inst->src[i].file == GRF) {
int reg = vgrf_to_reg[inst->src[i].reg];
- for (int j = 1; j < this->virtual_grf_sizes[inst->src[i].reg]; j++)
+ for (unsigned j = 1; j < this->alloc.sizes[inst->src[i].reg]; j++)
split_points[reg + j] = true;
}
}
@@ -1988,13 +1973,13 @@ fs_visitor::split_virtual_grfs()
int offset = 1;
/* j > 0 case */
- for (int j = 1; j < virtual_grf_sizes[i]; j++) {
+ for (unsigned j = 1; j < alloc.sizes[i]; j++) {
/* If this is a split point, reset the offset to 0 and allocate a
* new virtual GRF for the previous offset many registers
*/
if (split_points[reg]) {
assert(offset <= MAX_VGRF_SIZE);
- int grf = virtual_grf_alloc(offset);
+ int grf = alloc.allocate(offset);
for (int k = reg - offset; k < reg; k++)
new_virtual_grf[k] = grf;
offset = 0;
@@ -2006,7 +1991,7 @@ fs_visitor::split_virtual_grfs()
/* The last one gets the original register number */
assert(offset <= MAX_VGRF_SIZE);
- virtual_grf_sizes[i] = offset;
+ alloc.sizes[i] = offset;
for (int k = reg - offset; k < reg; k++)
new_virtual_grf[k] = i;
}
@@ -2017,14 +2002,14 @@ fs_visitor::split_virtual_grfs()
reg = vgrf_to_reg[inst->dst.reg] + inst->dst.reg_offset;
inst->dst.reg = new_virtual_grf[reg];
inst->dst.reg_offset = new_reg_offset[reg];
- assert(new_reg_offset[reg] < virtual_grf_sizes[new_virtual_grf[reg]]);
+ assert((unsigned)new_reg_offset[reg] < alloc.sizes[new_virtual_grf[reg]]);
}
for (int i = 0; i < inst->sources; i++) {
if (inst->src[i].file == GRF) {
reg = vgrf_to_reg[inst->src[i].reg] + inst->src[i].reg_offset;
inst->src[i].reg = new_virtual_grf[reg];
inst->src[i].reg_offset = new_reg_offset[reg];
- assert(new_reg_offset[reg] < virtual_grf_sizes[new_virtual_grf[reg]]);
+ assert((unsigned)new_reg_offset[reg] < alloc.sizes[new_virtual_grf[reg]]);
}
}
}
@@ -2044,7 +2029,7 @@ bool
fs_visitor::compact_virtual_grfs()
{
bool progress = false;
- int remap_table[this->virtual_grf_count];
+ int remap_table[this->alloc.count];
memset(remap_table, -1, sizeof(remap_table));
/* Mark which virtual GRFs are used. */
@@ -2060,7 +2045,7 @@ fs_visitor::compact_virtual_grfs()
/* Compact the GRF arrays. */
int new_index = 0;
- for (int i = 0; i < this->virtual_grf_count; i++) {
+ for (unsigned i = 0; i < this->alloc.count; i++) {
if (remap_table[i] == -1) {
/* We just found an unused register. This means that we are
* actually going to compact something.
@@ -2068,13 +2053,13 @@ fs_visitor::compact_virtual_grfs()
progress = true;
} else {
remap_table[i] = new_index;
- virtual_grf_sizes[new_index] = virtual_grf_sizes[i];
+ alloc.sizes[new_index] = alloc.sizes[i];
invalidate_live_intervals();
++new_index;
}
}
- this->virtual_grf_count = new_index;
+ this->alloc.count = new_index;
/* Patch all the instructions to use the newly renumbered registers */
foreach_block_and_inst(block, fs_inst, inst, cfg) {
@@ -2458,8 +2443,8 @@ fs_visitor::opt_register_renaming()
bool progress = false;
int depth = 0;
- int remap[virtual_grf_count];
- memset(remap, -1, sizeof(int) * virtual_grf_count);
+ int remap[alloc.count];
+ memset(remap, -1, sizeof(int) * alloc.count);
foreach_block_and_inst(block, fs_inst, inst, cfg) {
if (inst->opcode == BRW_OPCODE_IF || inst->opcode == BRW_OPCODE_DO) {
@@ -2483,12 +2468,12 @@ fs_visitor::opt_register_renaming()
if (depth == 0 &&
inst->dst.file == GRF &&
- virtual_grf_sizes[inst->dst.reg] == inst->dst.width / 8 &&
+ alloc.sizes[inst->dst.reg] == inst->dst.width / 8 &&
!inst->is_partial_write()) {
if (remap[dst] == -1) {
remap[dst] = dst;
} else {
- remap[dst] = virtual_grf_alloc(inst->dst.width / 8);
+ remap[dst] = alloc.allocate(inst->dst.width / 8);
inst->dst.reg = remap[dst];
progress = true;
}
@@ -3030,7 +3015,7 @@ fs_visitor::lower_uniform_pull_constant_loads()
*/
if (brw->gen >= 9) {
payload.reg_offset++;
- virtual_grf_sizes[payload.reg] = 2;
+ alloc.sizes[payload.reg] = 2;
}
/* This is actually going to be a MOV, but since only the first dword
@@ -3071,11 +3056,11 @@ fs_visitor::lower_load_payload()
{
bool progress = false;
- int vgrf_to_reg[virtual_grf_count];
+ int vgrf_to_reg[alloc.count];
int reg_count = 16; /* Leave room for MRF */
- for (int i = 0; i < virtual_grf_count; ++i) {
+ for (unsigned i = 0; i < alloc.count; ++i) {
vgrf_to_reg[i] = reg_count;
- reg_count += virtual_grf_sizes[i];
+ reg_count += alloc.sizes[i];
}
struct {
@@ -3239,7 +3224,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
fprintf(file, "vgrf%d", inst->dst.reg);
if (inst->dst.width != dispatch_width)
fprintf(file, "@%d", inst->dst.width);
- if (virtual_grf_sizes[inst->dst.reg] != inst->dst.width / 8 ||
+ if (alloc.sizes[inst->dst.reg] != inst->dst.width / 8 ||
inst->dst.subreg_offset)
fprintf(file, "+%d.%d",
inst->dst.reg_offset, inst->dst.subreg_offset);
@@ -3299,7 +3284,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
fprintf(file, "vgrf%d", inst->src[i].reg);
if (inst->src[i].width != dispatch_width)
fprintf(file, "@%d", inst->src[i].width);
- if (virtual_grf_sizes[inst->src[i].reg] != inst->src[i].width / 8 ||
+ if (alloc.sizes[inst->src[i].reg] != inst->src[i].width / 8 ||
inst->src[i].subreg_offset)
fprintf(file, "+%d.%d", inst->src[i].reg_offset,
inst->src[i].subreg_offset);
@@ -3550,9 +3535,9 @@ fs_visitor::calculate_register_pressure()
regs_live_at_ip = rzalloc_array(mem_ctx, int, num_instructions);
- for (int reg = 0; reg < virtual_grf_count; reg++) {
+ for (unsigned reg = 0; reg < alloc.count; reg++) {
for (int ip = virtual_grf_start[reg]; ip <= virtual_grf_end[reg]; ip++)
- regs_live_at_ip[ip] += virtual_grf_sizes[reg];
+ regs_live_at_ip[ip] += alloc.sizes[reg];
}
}
@@ -3640,7 +3625,7 @@ fs_visitor::fixup_3src_null_dest()
{
foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
if (inst->is_3src() && inst->dst.is_null()) {
- inst->dst = fs_reg(GRF, virtual_grf_alloc(dispatch_width / 8),
+ inst->dst = fs_reg(GRF, alloc.allocate(dispatch_width / 8),
inst->dst.type);
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index b95e2c03e13..2c9b705c07b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -321,7 +321,6 @@ public:
void init();
fs_reg *variable_storage(ir_variable *var);
- int virtual_grf_alloc(int size);
fs_reg vgrf(const glsl_type *const type);
fs_reg vgrf(int num_components);
void import_uniforms(fs_visitor *v);
@@ -643,9 +642,6 @@ public:
int *param_size;
- int *virtual_grf_sizes;
- int virtual_grf_count;
- int virtual_grf_array_size;
int *virtual_grf_start;
int *virtual_grf_end;
brw::fs_live_variables *live_intervals;
@@ -679,7 +675,7 @@ public:
bool do_dual_src;
int first_non_payload_grf;
/** Either BRW_MAX_GRF or GEN7_MRF_HACK_START */
- int max_grf;
+ unsigned max_grf;
fs_reg *fp_temp_regs;
fs_reg *fp_input_regs;
@@ -730,7 +726,7 @@ public:
fs_reg shader_start_time;
fs_reg userplane[MAX_CLIP_PLANES];
- int grf_used;
+ unsigned grf_used;
bool spilled_any_registers;
const unsigned dispatch_width; /**< 8 or 16 */
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index 11cb327614c..ae069bb757a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -224,7 +224,7 @@ fs_visitor::opt_cse_local(bblock_t *block)
assert(written % dst_width == 0);
fs_reg orig_dst = entry->generator->dst;
- fs_reg tmp = fs_reg(GRF, virtual_grf_alloc(written),
+ fs_reg tmp = fs_reg(GRF, alloc.allocate(written),
orig_dst.type, orig_dst.width);
entry->tmp = tmp;
entry->generator->dst = tmp;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
index 189a119025d..968219bc074 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
@@ -278,17 +278,17 @@ fs_live_variables::fs_live_variables(fs_visitor *v, const cfg_t *cfg)
{
mem_ctx = ralloc_context(NULL);
- num_vgrfs = v->virtual_grf_count;
+ num_vgrfs = v->alloc.count;
num_vars = 0;
var_from_vgrf = rzalloc_array(mem_ctx, int, num_vgrfs);
for (int i = 0; i < num_vgrfs; i++) {
var_from_vgrf[i] = num_vars;
- num_vars += v->virtual_grf_sizes[i];
+ num_vars += v->alloc.sizes[i];
}
vgrf_from_var = rzalloc_array(mem_ctx, int, num_vars);
for (int i = 0; i < num_vgrfs; i++) {
- for (int j = 0; j < v->virtual_grf_sizes[i]; j++) {
+ for (unsigned j = 0; j < v->alloc.sizes[i]; j++) {
vgrf_from_var[var_from_vgrf[i] + j] = i;
}
}
@@ -344,7 +344,7 @@ fs_visitor::calculate_live_intervals()
if (this->live_intervals)
return;
- int num_vgrfs = this->virtual_grf_count;
+ int num_vgrfs = this->alloc.count;
ralloc_free(this->virtual_grf_start);
ralloc_free(this->virtual_grf_end);
virtual_grf_start = ralloc_array(mem_ctx, int, num_vgrfs);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.h b/src/mesa/drivers/dri/i965/brw_fs_live_variables.h
index a52f922d959..a9b61aa39e5 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.h
+++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.h
@@ -77,7 +77,7 @@ public:
/**
* Map from any index in block_data to the virtual GRF containing it.
*
- * For virtual_grf_sizes of [1, 2, 3], vgrf_from_var would contain
+ * For alloc.sizes of [1, 2, 3], vgrf_from_var would contain
* [0, 1, 1, 2, 2, 2].
*/
int *vgrf_from_var;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index bcd657b2172..ebe0b12b098 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -31,7 +31,7 @@
#include "glsl/ir_optimization.h"
static void
-assign_reg(int *reg_hw_locations, fs_reg *reg)
+assign_reg(unsigned *reg_hw_locations, fs_reg *reg)
{
if (reg->file == GRF) {
assert(reg->reg_offset >= 0);
@@ -43,17 +43,17 @@ assign_reg(int *reg_hw_locations, fs_reg *reg)
void
fs_visitor::assign_regs_trivial()
{
- int hw_reg_mapping[this->virtual_grf_count + 1];
- int i;
+ unsigned hw_reg_mapping[this->alloc.count + 1];
+ unsigned i;
int reg_width = dispatch_width / 8;
/* Note that compressed instructions require alignment to 2 registers. */
hw_reg_mapping[0] = ALIGN(this->first_non_payload_grf, reg_width);
- for (i = 1; i <= this->virtual_grf_count; i++) {
+ for (i = 1; i <= this->alloc.count; i++) {
hw_reg_mapping[i] = (hw_reg_mapping[i - 1] +
- this->virtual_grf_sizes[i - 1]);
+ this->alloc.sizes[i - 1]);
}
- this->grf_used = hw_reg_mapping[this->virtual_grf_count];
+ this->grf_used = hw_reg_mapping[this->alloc.count];
foreach_block_and_inst(block, fs_inst, inst, cfg) {
assign_reg(hw_reg_mapping, &inst->dst);
@@ -66,7 +66,7 @@ fs_visitor::assign_regs_trivial()
fail("Ran out of regs on trivial allocator (%d/%d)\n",
this->grf_used, max_grf);
} else {
- this->virtual_grf_count = this->grf_used;
+ this->alloc.count = this->grf_used;
}
}
@@ -427,7 +427,7 @@ fs_visitor::setup_payload_interference(struct ra_graph *g,
* live between the start of the program and our last use of the payload
* node.
*/
- for (int j = 0; j < this->virtual_grf_count; j++) {
+ for (unsigned j = 0; j < this->alloc.count; j++) {
/* Note that we use a <= comparison, unlike virtual_grf_interferes(),
* in order to not have to worry about the uniform issue described in
* calculate_live_intervals().
@@ -515,7 +515,7 @@ fs_visitor::setup_mrf_hack_interference(struct ra_graph *g, int first_mrf_node)
* that are used as conflicting with all virtual GRFs.
*/
if (mrf_used[i]) {
- for (int j = 0; j < this->virtual_grf_count; j++) {
+ for (unsigned j = 0; j < this->alloc.count; j++) {
ra_add_node_interference(g, first_mrf_node + i, j);
}
}
@@ -533,12 +533,12 @@ fs_visitor::assign_regs(bool allow_spilling)
* for reg_width == 2.
*/
int reg_width = dispatch_width / 8;
- int hw_reg_mapping[this->virtual_grf_count];
+ unsigned hw_reg_mapping[this->alloc.count];
int payload_node_count = ALIGN(this->first_non_payload_grf, reg_width);
int rsi = reg_width - 1; /* Which screen->wm_reg_sets[] to use */
calculate_live_intervals();
- int node_count = this->virtual_grf_count;
+ int node_count = this->alloc.count;
int first_payload_node = node_count;
node_count += payload_node_count;
int first_mrf_hack_node = node_count;
@@ -547,8 +547,8 @@ fs_visitor::assign_regs(bool allow_spilling)
struct ra_graph *g =
ra_alloc_interference_graph(screen->wm_reg_sets[rsi].regs, node_count);
- for (int i = 0; i < this->virtual_grf_count; i++) {
- unsigned size = this->virtual_grf_sizes[i];
+ for (unsigned i = 0; i < this->alloc.count; i++) {
+ unsigned size = this->alloc.sizes[i];
int c;
assert(size <= ARRAY_SIZE(screen->wm_reg_sets[rsi].classes) &&
@@ -572,7 +572,7 @@ fs_visitor::assign_regs(bool allow_spilling)
ra_set_node_class(g, i, c);
- for (int j = 0; j < i; j++) {
+ for (unsigned j = 0; j < i; j++) {
if (virtual_grf_interferes(i, j)) {
ra_add_node_interference(g, i, j);
}
@@ -595,7 +595,7 @@ fs_visitor::assign_regs(bool allow_spilling)
* highest register that works.
*/
if (inst->eot) {
- int size = virtual_grf_sizes[inst->src[0].reg];
+ int size = alloc.sizes[inst->src[0].reg];
int reg = screen->wm_reg_sets[rsi].class_to_ra_reg_range[size] - 1;
ra_set_node_reg(g, inst->src[0].reg, reg);
break;
@@ -661,12 +661,12 @@ fs_visitor::assign_regs(bool allow_spilling)
* numbers.
*/
this->grf_used = payload_node_count;
- for (int i = 0; i < this->virtual_grf_count; i++) {
+ for (unsigned i = 0; i < this->alloc.count; i++) {
int reg = ra_get_node_reg(g, i);
hw_reg_mapping[i] = screen->wm_reg_sets[rsi].ra_reg_to_grf[reg];
this->grf_used = MAX2(this->grf_used,
- hw_reg_mapping[i] + this->virtual_grf_sizes[i]);
+ hw_reg_mapping[i] + this->alloc.sizes[i]);
}
foreach_block_and_inst(block, fs_inst, inst, cfg) {
@@ -676,7 +676,7 @@ fs_visitor::assign_regs(bool allow_spilling)
}
}
- this->virtual_grf_count = this->grf_used;
+ this->alloc.count = this->grf_used;
ralloc_free(g);
@@ -747,10 +747,10 @@ int
fs_visitor::choose_spill_reg(struct ra_graph *g)
{
float loop_scale = 1.0;
- float spill_costs[this->virtual_grf_count];
- bool no_spill[this->virtual_grf_count];
+ float spill_costs[this->alloc.count];
+ bool no_spill[this->alloc.count];
- for (int i = 0; i < this->virtual_grf_count; i++) {
+ for (unsigned i = 0; i < this->alloc.count; i++) {
spill_costs[i] = 0.0;
no_spill[i] = false;
}
@@ -811,7 +811,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
}
}
- for (int i = 0; i < this->virtual_grf_count; i++) {
+ for (unsigned i = 0; i < this->alloc.count; i++) {
if (!no_spill[i])
ra_set_node_spill_cost(g, i, spill_costs[i]);
}
@@ -822,7 +822,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
void
fs_visitor::spill_reg(int spill_reg)
{
- int size = virtual_grf_sizes[spill_reg];
+ int size = alloc.sizes[spill_reg];
unsigned int spill_offset = last_scratch;
assert(ALIGN(spill_offset, 16) == spill_offset); /* oword read/write req. */
int spill_base_mrf = dispatch_width > 8 ? 13 : 14;
@@ -862,7 +862,7 @@ fs_visitor::spill_reg(int spill_reg)
int regs_read = inst->regs_read(this, i);
int subset_spill_offset = (spill_offset +
REG_SIZE * inst->src[i].reg_offset);
- fs_reg unspill_dst(GRF, virtual_grf_alloc(regs_read));
+ fs_reg unspill_dst(GRF, alloc.allocate(regs_read));
inst->src[i].reg = unspill_dst.reg;
inst->src[i].reg_offset = 0;
@@ -876,7 +876,7 @@ fs_visitor::spill_reg(int spill_reg)
inst->dst.reg == spill_reg) {
int subset_spill_offset = (spill_offset +
REG_SIZE * inst->dst.reg_offset);
- fs_reg spill_src(GRF, virtual_grf_alloc(inst->regs_written));
+ fs_reg spill_src(GRF, alloc.allocate(inst->regs_written));
inst->dst.reg = spill_src.reg;
inst->dst.reg_offset = 0;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
index 62788cd3091..09f0faddbdb 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
@@ -66,7 +66,7 @@ is_nop_mov(const fs_inst *inst)
static bool
is_copy_payload(const fs_visitor *v, const fs_inst *inst)
{
- if (v->virtual_grf_sizes[inst->src[0].reg] != inst->regs_written)
+ if (v->alloc.sizes[inst->src[0].reg] != inst->regs_written)
return false;
fs_reg reg = inst->src[0];
@@ -94,8 +94,8 @@ is_coalesce_candidate(const fs_visitor *v, const fs_inst *inst)
return false;
}
- if (v->virtual_grf_sizes[inst->src[0].reg] >
- v->virtual_grf_sizes[inst->dst.reg])
+ if (v->alloc.sizes[inst->src[0].reg] >
+ v->alloc.sizes[inst->dst.reg])
return false;
if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) {
@@ -179,7 +179,7 @@ fs_visitor::register_coalesce()
if (reg_from != inst->src[0].reg) {
reg_from = inst->src[0].reg;
- src_size = virtual_grf_sizes[inst->src[0].reg];
+ src_size = alloc.sizes[inst->src[0].reg];
assert(src_size <= MAX_VGRF_SIZE);
assert(inst->src[0].width % 8 == 0);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 6cddcf5e7e9..2a36d942838 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1278,7 +1278,7 @@ fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
/* If last_rhs_inst wrote a different number of components than our LHS,
* we can't safely rewrite it.
*/
- if (virtual_grf_sizes[dst.reg] != modify->regs_written)
+ if (alloc.sizes[dst.reg] != modify->regs_written)
return false;
/* Success! Rewrite the instruction. */
@@ -1461,7 +1461,7 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst,
* this weirdness around to the expected layout.
*/
orig_dst = dst;
- dst = fs_reg(GRF, virtual_grf_alloc(8), orig_dst.type);
+ dst = fs_reg(GRF, alloc.allocate(8), orig_dst.type);
}
enum opcode opcode;
@@ -1672,7 +1672,7 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst,
* need to offset the Sampler State Pointer in the header.
*/
header_present = true;
- sources[0] = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD);
+ sources[0] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
length++;
}
@@ -1814,7 +1814,7 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst,
else
mlen = length * reg_width;
- fs_reg src_payload = fs_reg(GRF, virtual_grf_alloc(mlen),
+ fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen),
BRW_REGISTER_TYPE_F);
emit(LOAD_PAYLOAD(src_payload, sources, length));
@@ -1926,7 +1926,7 @@ fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components,
* tracking to get the scaling factor.
*/
if (brw->gen < 6 && is_rect) {
- fs_reg dst = fs_reg(GRF, virtual_grf_alloc(coord_components));
+ fs_reg dst = fs_reg(GRF, alloc.allocate(coord_components));
fs_reg src = coordinate;
coordinate = dst;
@@ -1985,7 +1985,7 @@ fs_reg
fs_visitor::emit_mcs_fetch(fs_reg coordinate, int components, fs_reg sampler)
{
int reg_width = dispatch_width / 8;
- fs_reg payload = fs_reg(GRF, virtual_grf_alloc(components * reg_width),
+ fs_reg payload = fs_reg(GRF, alloc.allocate(components * reg_width),
BRW_REGISTER_TYPE_F);
fs_reg dest = vgrf(glsl_type::uvec4_type);
fs_reg *sources = ralloc_array(mem_ctx, fs_reg, components);
@@ -2986,7 +2986,7 @@ fs_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 4);
- sources[0] = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD);
+ sources[0] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
/* Initialize the sample mask in the message header. */
emit(MOV(sources[0], fs_reg(0u)))
->force_writemask_all = true;
@@ -3020,7 +3020,7 @@ fs_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
}
int mlen = 1 + (length - 1) * reg_width;
- fs_reg src_payload = fs_reg(GRF, virtual_grf_alloc(mlen),
+ fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen),
BRW_REGISTER_TYPE_UD);
emit(LOAD_PAYLOAD(src_payload, sources, length));
@@ -3041,7 +3041,7 @@ fs_visitor::emit_untyped_surface_read(unsigned surf_index, fs_reg dst,
fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 2);
- sources[0] = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD);
+ sources[0] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
/* Initialize the sample mask in the message header. */
emit(MOV(sources[0], fs_reg(0u)))
->force_writemask_all = true;
@@ -3060,7 +3060,7 @@ fs_visitor::emit_untyped_surface_read(unsigned surf_index, fs_reg dst,
emit(MOV(sources[1], offset));
int mlen = 1 + reg_width;
- fs_reg src_payload = fs_reg(GRF, virtual_grf_alloc(mlen),
+ fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen),
BRW_REGISTER_TYPE_UD);
fs_inst *inst = emit(LOAD_PAYLOAD(src_payload, sources, 2));
@@ -3280,7 +3280,7 @@ fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components)
int len = 0;
for (unsigned i = 0; i < 4; ++i) {
if (colors_enabled & (1 << i)) {
- dst[len] = fs_reg(GRF, virtual_grf_alloc(color.width / 8),
+ dst[len] = fs_reg(GRF, alloc.allocate(color.width / 8),
color.type, color.width);
inst = emit(MOV(dst[len], offset(color, i)));
inst->saturate = key->clamp_fragment_color;
@@ -3304,11 +3304,11 @@ fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components)
*/
for (unsigned i = 0; i < 4; ++i) {
if (colors_enabled & (1 << i)) {
- dst[i] = fs_reg(GRF, virtual_grf_alloc(1), color.type);
+ dst[i] = fs_reg(GRF, alloc.allocate(1), color.type);
inst = emit(MOV(dst[i], half(offset(color, i), 0)));
inst->saturate = key->clamp_fragment_color;
- dst[i + 4] = fs_reg(GRF, virtual_grf_alloc(1), color.type);
+ dst[i + 4] = fs_reg(GRF, alloc.allocate(1), color.type);
inst = emit(MOV(dst[i + 4], half(offset(color, i), 1)));
inst->saturate = key->clamp_fragment_color;
inst->force_sechalf = true;
@@ -3409,7 +3409,7 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
length += 2;
if (payload.aa_dest_stencil_reg) {
- sources[length] = fs_reg(GRF, virtual_grf_alloc(1));
+ sources[length] = fs_reg(GRF, alloc.allocate(1));
emit(MOV(sources[length],
fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0))));
length++;
@@ -3423,7 +3423,7 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
/* Hand over gl_SampleMask. Only lower 16 bits are relevant. Since
* it's unsinged single words, one vgrf is always 16-wide.
*/
- sources[length] = fs_reg(GRF, virtual_grf_alloc(1),
+ sources[length] = fs_reg(GRF, alloc.allocate(1),
BRW_REGISTER_TYPE_UW, 16);
emit(FS_OPCODE_SET_OMASK, sources[length], this->sample_mask);
length++;
@@ -3437,7 +3437,7 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
length += setup_color_payload(sources + length, this->outputs[0], 0);
} else if (color1.file == BAD_FILE) {
if (src0_alpha.file != BAD_FILE) {
- sources[length] = fs_reg(GRF, virtual_grf_alloc(reg_size),
+ sources[length] = fs_reg(GRF, alloc.allocate(reg_size),
src0_alpha.type, src0_alpha.width);
fs_inst *inst = emit(MOV(sources[length], src0_alpha));
inst->saturate = key->clamp_fragment_color;
@@ -3486,7 +3486,7 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
/* Send from the GRF */
fs_reg payload = fs_reg(GRF, -1, BRW_REGISTER_TYPE_F);
load = emit(LOAD_PAYLOAD(payload, sources, length));
- payload.reg = virtual_grf_alloc(load->regs_written);
+ payload.reg = alloc.allocate(load->regs_written);
payload.width = dispatch_width;
load->dst = payload;
write = emit(FS_OPCODE_FB_WRITE, reg_undef, payload);
@@ -3655,7 +3655,7 @@ fs_visitor::emit_urb_writes()
* send to terminate the shader. */
if (vue_map->slots_valid == 0) {
- fs_reg payload = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD);
+ fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
fs_inst *inst = emit(MOV(payload, fs_reg(retype(brw_vec8_grf(1, 0),
BRW_REGISTER_TYPE_UD))));
inst->force_writemask_all = true;
@@ -3688,7 +3688,7 @@ fs_visitor::emit_urb_writes()
break;
}
- zero = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD);
+ zero = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
emit(MOV(zero, fs_reg(0u)));
sources[length++] = zero;
@@ -3742,7 +3742,7 @@ fs_visitor::emit_urb_writes()
* temp register and use that for the payload.
*/
for (int i = 0; i < 4; i++) {
- reg = fs_reg(GRF, virtual_grf_alloc(1), outputs[varying].type);
+ reg = fs_reg(GRF, alloc.allocate(1), outputs[varying].type);
src = offset(this->outputs[varying], i);
fs_inst *inst = emit(MOV(reg, src));
inst->saturate = true;
@@ -3769,14 +3769,14 @@ fs_visitor::emit_urb_writes()
emit_shader_time_end();
fs_reg *payload_sources = ralloc_array(mem_ctx, fs_reg, length + 1);
- fs_reg payload = fs_reg(GRF, virtual_grf_alloc(length + 1),
+ fs_reg payload = fs_reg(GRF, alloc.allocate(length + 1),
BRW_REGISTER_TYPE_F);
/* We need WE_all on the MOV for the message header (the URB handles)
* so do a MOV to a dummy register and set force_writemask_all on the
* MOV. LOAD_PAYLOAD will preserve that.
*/
- fs_reg dummy = fs_reg(GRF, virtual_grf_alloc(1),
+ fs_reg dummy = fs_reg(GRF, alloc.allocate(1),
BRW_REGISTER_TYPE_UD);
fs_inst *inst = emit(MOV(dummy, fs_reg(retype(brw_vec8_grf(1, 0),
BRW_REGISTER_TYPE_UD))));
@@ -3892,9 +3892,6 @@ fs_visitor::init()
this->current_annotation = NULL;
this->base_ir = NULL;
- this->virtual_grf_sizes = NULL;
- this->virtual_grf_count = 0;
- this->virtual_grf_array_size = 0;
this->virtual_grf_start = NULL;
this->virtual_grf_end = NULL;
this->live_intervals = NULL;
diff --git a/src/mesa/drivers/dri/i965/brw_ir_allocator.h b/src/mesa/drivers/dri/i965/brw_ir_allocator.h
new file mode 100644
index 00000000000..b1237ed38e7
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_ir_allocator.h
@@ -0,0 +1,87 @@
+/* -*- c++ -*- */
+/*
+ * Copyright © 2010-2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef BRW_IR_ALLOCATOR_H
+#define BRW_IR_ALLOCATOR_H
+
+#include "main/macros.h"
+
+namespace brw {
+ /**
+ * Simple allocator used to keep track of virtual GRFs.
+ */
+ class simple_allocator {
+ public:
+ simple_allocator() :
+ sizes(NULL), offsets(NULL), count(0), total_size(0), capacity(0)
+ {
+ }
+
+ ~simple_allocator()
+ {
+ free(offsets);
+ free(sizes);
+ }
+
+ unsigned
+ allocate(unsigned size)
+ {
+ if (capacity <= count) {
+ capacity = MAX2(16, capacity * 2);
+ sizes = (unsigned *)realloc(sizes, capacity * sizeof(unsigned));
+ offsets = (unsigned *)realloc(offsets, capacity * sizeof(unsigned));
+ }
+
+ sizes[count] = size;
+ offsets[count] = total_size;
+ total_size += size;
+
+ return count++;
+ }
+
+ /**
+ * Array of sizes for each allocation. The allocation unit is up to the
+ * back-end, but it's expected to be one scalar value in the FS back-end
+ * and one vec4 in the VEC4 back-end.
+ */
+ unsigned *sizes;
+
+ /**
+ * Array of offsets from the start of the VGRF space in allocation
+ * units.
+ */
+ unsigned *offsets;
+
+ /** Total number of VGRFs allocated. */
+ unsigned count;
+
+ /** Cumulative size in allocation units. */
+ unsigned total_size;
+
+ private:
+ unsigned capacity;
+ };
+}
+
+#endif
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index 40b5715cccd..78666fd222f 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -544,9 +544,9 @@ fs_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be)
if (inst->dst.file == GRF) {
if (remaining_grf_uses[inst->dst.reg] == 1)
- benefit += v->virtual_grf_sizes[inst->dst.reg];
+ benefit += v->alloc.sizes[inst->dst.reg];
if (!grf_active[inst->dst.reg])
- benefit -= v->virtual_grf_sizes[inst->dst.reg];
+ benefit -= v->alloc.sizes[inst->dst.reg];
}
for (int i = 0; i < inst->sources; i++) {
@@ -554,9 +554,9 @@ fs_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be)
continue;
if (remaining_grf_uses[inst->src[i].reg] == 1)
- benefit += v->virtual_grf_sizes[inst->src[i].reg];
+ benefit += v->alloc.sizes[inst->src[i].reg];
if (!grf_active[inst->src[i].reg])
- benefit -= v->virtual_grf_sizes[inst->src[i].reg];
+ benefit -= v->alloc.sizes[inst->src[i].reg];
}
return benefit;
@@ -1503,7 +1503,7 @@ fs_visitor::schedule_instructions(instruction_scheduler_mode mode)
if (mode == SCHEDULE_POST)
grf_count = grf_used;
else
- grf_count = virtual_grf_count;
+ grf_count = alloc.count;
fs_instruction_scheduler sched(this, grf_count, mode);
sched.run(cfg);
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index 5ad87d6278b..ab3ad60e02b 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -27,6 +27,10 @@
#include "main/compiler.h"
#include "glsl/ir.h"
+#ifdef __cplusplus
+#include "brw_ir_allocator.h"
+#endif
+
#pragma once
enum PACKED register_file {
@@ -172,6 +176,8 @@ public:
gl_shader_stage stage;
+ brw::simple_allocator alloc;
+
virtual void dump_instruction(backend_instruction *inst) = 0;
virtual void dump_instruction(backend_instruction *inst, FILE *file) = 0;
virtual void dump_instructions();
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 98fad6c8f87..f2339b399e3 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1209,7 +1209,7 @@ vec4_visitor::opt_register_coalesce()
void
vec4_visitor::split_virtual_grfs()
{
- int num_vars = this->virtual_grf_count;
+ int num_vars = this->alloc.count;
int new_virtual_grf[num_vars];
bool split_grf[num_vars];
@@ -1217,7 +1217,7 @@ vec4_visitor::split_virtual_grfs()
/* Try to split anything > 0 sized. */
for (int i = 0; i < num_vars; i++) {
- split_grf[i] = this->virtual_grf_sizes[i] != 1;
+ split_grf[i] = this->alloc.sizes[i] != 1;
}
/* Check that the instructions are compatible with the registers we're trying
@@ -1243,13 +1243,13 @@ vec4_visitor::split_virtual_grfs()
if (!split_grf[i])
continue;
- new_virtual_grf[i] = virtual_grf_alloc(1);
- for (int j = 2; j < this->virtual_grf_sizes[i]; j++) {
- int reg = virtual_grf_alloc(1);
+ new_virtual_grf[i] = alloc.allocate(1);
+ for (unsigned j = 2; j < this->alloc.sizes[i]; j++) {
+ unsigned reg = alloc.allocate(1);
assert(reg == new_virtual_grf[i] + j - 1);
(void) reg;
}
- this->virtual_grf_sizes[i] = 1;
+ this->alloc.sizes[i] = 1;
}
foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
@@ -1432,7 +1432,7 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
/* Don't print .0; and only VGRFs have reg_offsets and sizes */
if (inst->src[i].reg_offset != 0 &&
inst->src[i].file == GRF &&
- virtual_grf_sizes[inst->src[i].reg] != 1)
+ alloc.sizes[inst->src[i].reg] != 1)
fprintf(file, ".%d", inst->src[i].reg_offset);
if (inst->src[i].file != IMM) {
@@ -1834,9 +1834,9 @@ vec4_visitor::run()
if (false) {
/* Debug of register spilling: Go spill everything. */
- const int grf_count = virtual_grf_count;
- float spill_costs[virtual_grf_count];
- bool no_spill[virtual_grf_count];
+ const int grf_count = alloc.count;
+ float spill_costs[alloc.count];
+ bool no_spill[alloc.count];
evaluate_spill_costs(spill_costs, no_spill);
for (int i = 0; i < grf_count; i++) {
if (no_spill[i])
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 980544d3dbe..6b710c9bc31 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -275,9 +275,6 @@ public:
const void *base_ir;
const char *current_annotation;
- int *virtual_grf_sizes;
- int virtual_grf_count;
- int virtual_grf_array_size;
int first_non_payload_grf;
unsigned int max_grf;
int *virtual_grf_start;
@@ -285,14 +282,6 @@ public:
brw::vec4_live_variables *live_intervals;
dst_reg userplane[MAX_CLIP_PLANES];
- /**
- * This is the size to be used for an array with an element per
- * reg_offset
- */
- int virtual_grf_reg_count;
- /** Per-virtual-grf indices into an array of size virtual_grf_reg_count */
- int *virtual_grf_reg_map;
-
dst_reg *variable_storage(ir_variable *var);
void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
@@ -347,7 +336,6 @@ public:
bool run(void);
void fail(const char *msg, ...);
- int virtual_grf_alloc(int size);
void setup_uniform_clipplane_values();
void setup_uniform_values(ir_variable *ir);
void setup_builtin_uniform_values(ir_variable *ir);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
index 638d99a4c92..81567d2b295 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
@@ -346,7 +346,7 @@ bool
vec4_visitor::opt_copy_propagation(bool do_constant_prop)
{
bool progress = false;
- struct copy_entry entries[virtual_grf_reg_count];
+ struct copy_entry entries[alloc.total_size];
memset(&entries, 0, sizeof(entries));
@@ -375,7 +375,7 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop)
inst->src[i].reladdr)
continue;
- int reg = (virtual_grf_reg_map[inst->src[i].reg] +
+ int reg = (alloc.offsets[inst->src[i].reg] +
inst->src[i].reg_offset);
/* Find the regs that each swizzle component came from.
@@ -418,7 +418,7 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop)
/* Track available source registers. */
if (inst->dst.file == GRF) {
const int reg =
- virtual_grf_reg_map[inst->dst.reg] + inst->dst.reg_offset;
+ alloc.offsets[inst->dst.reg] + inst->dst.reg_offset;
/* Update our destination's current channel values. For a direct copy,
* the value is the newly propagated source. Otherwise, we don't know
@@ -439,7 +439,7 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop)
if (inst->dst.reladdr)
memset(&entries, 0, sizeof(entries));
else {
- for (int i = 0; i < virtual_grf_reg_count; i++) {
+ for (unsigned i = 0; i < alloc.total_size; i++) {
for (int j = 0; j < 4; j++) {
if (is_channel_updated(inst, entries[i].value, j)){
entries[i].value[j] = NULL;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
index ee50419dc9a..5fb8f3166ce 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
@@ -241,7 +241,7 @@ vec4_visitor::opt_cse_local(bblock_t *block)
* more -- a sure sign they'll fail operands_match().
*/
if (src->file == GRF) {
- assert((src->reg * 4 + 3) < (virtual_grf_count * 4));
+ assert((unsigned)(src->reg * 4 + 3) < (alloc.count * 4));
int last_reg_use = MAX2(MAX2(virtual_grf_end[src->reg * 4 + 0],
virtual_grf_end[src->reg * 4 + 1]),
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
index 98350691db2..c562b2e6800 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
@@ -96,7 +96,7 @@ vec4_live_variables::setup_def_use()
* variable, and thus qualify for being in def[].
*/
if (inst->dst.file == GRF &&
- v->virtual_grf_sizes[inst->dst.reg] == 1 &&
+ v->alloc.sizes[inst->dst.reg] == 1 &&
!inst->predicate) {
for (int c = 0; c < 4; c++) {
if (inst->dst.writemask & (1 << c)) {
@@ -180,7 +180,7 @@ vec4_live_variables::vec4_live_variables(vec4_visitor *v, cfg_t *cfg)
{
mem_ctx = ralloc_context(NULL);
- num_vars = v->virtual_grf_count * 4;
+ num_vars = v->alloc.count * 4;
block_data = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks);
bitset_words = BITSET_WORDS(num_vars);
@@ -230,14 +230,14 @@ vec4_visitor::calculate_live_intervals()
if (this->live_intervals)
return;
- int *start = ralloc_array(mem_ctx, int, this->virtual_grf_count * 4);
- int *end = ralloc_array(mem_ctx, int, this->virtual_grf_count * 4);
+ int *start = ralloc_array(mem_ctx, int, this->alloc.count * 4);
+ int *end = ralloc_array(mem_ctx, int, this->alloc.count * 4);
ralloc_free(this->virtual_grf_start);
ralloc_free(this->virtual_grf_end);
this->virtual_grf_start = start;
this->virtual_grf_end = end;
- for (int i = 0; i < this->virtual_grf_count * 4; i++) {
+ for (unsigned i = 0; i < this->alloc.count * 4; i++) {
start[i] = MAX_INSTRUCTION;
end[i] = -1;
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
index e8e2185ac1a..b944d454df6 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
@@ -45,15 +45,14 @@ assign(unsigned int *reg_hw_locations, backend_reg *reg)
bool
vec4_visitor::reg_allocate_trivial()
{
- unsigned int hw_reg_mapping[this->virtual_grf_count];
- bool virtual_grf_used[this->virtual_grf_count];
- int i;
+ unsigned int hw_reg_mapping[this->alloc.count];
+ bool virtual_grf_used[this->alloc.count];
int next;
/* Calculate which virtual GRFs are actually in use after whatever
* optimization passes have occurred.
*/
- for (int i = 0; i < this->virtual_grf_count; i++) {
+ for (unsigned i = 0; i < this->alloc.count; i++) {
virtual_grf_used[i] = false;
}
@@ -61,18 +60,18 @@ vec4_visitor::reg_allocate_trivial()
if (inst->dst.file == GRF)
virtual_grf_used[inst->dst.reg] = true;
- for (int i = 0; i < 3; i++) {
+ for (unsigned i = 0; i < 3; i++) {
if (inst->src[i].file == GRF)
virtual_grf_used[inst->src[i].reg] = true;
}
}
hw_reg_mapping[0] = this->first_non_payload_grf;
- next = hw_reg_mapping[0] + this->virtual_grf_sizes[0];
- for (i = 1; i < this->virtual_grf_count; i++) {
+ next = hw_reg_mapping[0] + this->alloc.sizes[0];
+ for (unsigned i = 1; i < this->alloc.count; i++) {
if (virtual_grf_used[i]) {
hw_reg_mapping[i] = next;
- next += this->virtual_grf_sizes[i];
+ next += this->alloc.sizes[i];
}
}
prog_data->total_grf = next;
@@ -176,7 +175,7 @@ bool
vec4_visitor::reg_allocate()
{
struct intel_screen *screen = brw->intelScreen;
- unsigned int hw_reg_mapping[virtual_grf_count];
+ unsigned int hw_reg_mapping[alloc.count];
int payload_reg_count = this->first_non_payload_grf;
/* Using the trivial allocator can be useful in debugging undefined
@@ -187,19 +186,19 @@ vec4_visitor::reg_allocate()
calculate_live_intervals();
- int node_count = virtual_grf_count;
+ int node_count = alloc.count;
int first_payload_node = node_count;
node_count += payload_reg_count;
struct ra_graph *g =
ra_alloc_interference_graph(screen->vec4_reg_set.regs, node_count);
- for (int i = 0; i < virtual_grf_count; i++) {
- int size = this->virtual_grf_sizes[i];
+ for (unsigned i = 0; i < alloc.count; i++) {
+ int size = this->alloc.sizes[i];
assert(size >= 1 && size <= 2 &&
"Register allocation relies on split_virtual_grfs().");
ra_set_node_class(g, i, screen->vec4_reg_set.classes[size - 1]);
- for (int j = 0; j < i; j++) {
+ for (unsigned j = 0; j < i; j++) {
if (virtual_grf_interferes(i, j)) {
ra_add_node_interference(g, i, j);
}
@@ -230,12 +229,12 @@ vec4_visitor::reg_allocate()
* numbers.
*/
prog_data->total_grf = payload_reg_count;
- for (int i = 0; i < virtual_grf_count; i++) {
+ for (unsigned i = 0; i < alloc.count; i++) {
int reg = ra_get_node_reg(g, i);
hw_reg_mapping[i] = screen->vec4_reg_set.ra_reg_to_grf[reg];
prog_data->total_grf = MAX2(prog_data->total_grf,
- hw_reg_mapping[i] + virtual_grf_sizes[i]);
+ hw_reg_mapping[i] + alloc.sizes[i]);
}
foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
@@ -255,9 +254,9 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
{
float loop_scale = 1.0;
- for (int i = 0; i < this->virtual_grf_count; i++) {
+ for (unsigned i = 0; i < this->alloc.count; i++) {
spill_costs[i] = 0.0;
- no_spill[i] = virtual_grf_sizes[i] != 1;
+ no_spill[i] = alloc.sizes[i] != 1;
}
/* Calculate costs for spilling nodes. Call it a cost of 1 per
@@ -308,12 +307,12 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
int
vec4_visitor::choose_spill_reg(struct ra_graph *g)
{
- float spill_costs[this->virtual_grf_count];
- bool no_spill[this->virtual_grf_count];
+ float spill_costs[this->alloc.count];
+ bool no_spill[this->alloc.count];
evaluate_spill_costs(spill_costs, no_spill);
- for (int i = 0; i < this->virtual_grf_count; i++) {
+ for (unsigned i = 0; i < this->alloc.count; i++) {
if (!no_spill[i])
ra_set_node_spill_cost(g, i, spill_costs[i]);
}
@@ -324,7 +323,7 @@ vec4_visitor::choose_spill_reg(struct ra_graph *g)
void
vec4_visitor::spill_reg(int spill_reg_nr)
{
- assert(virtual_grf_sizes[spill_reg_nr] == 1);
+ assert(alloc.sizes[spill_reg_nr] == 1);
unsigned int spill_offset = c->last_scratch++;
/* Generate spill/unspill instructions for the objects being spilled. */
@@ -332,7 +331,7 @@ vec4_visitor::spill_reg(int spill_reg_nr)
for (unsigned int i = 0; i < 3; i++) {
if (inst->src[i].file == GRF && inst->src[i].reg == spill_reg_nr) {
src_reg spill_reg = inst->src[i];
- inst->src[i].reg = virtual_grf_alloc(1);
+ inst->src[i].reg = alloc.allocate(1);
dst_reg temp = dst_reg(inst->src[i]);
emit_scratch_read(block, inst, temp, spill_reg, spill_offset);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index e6a7ed06020..7d5221386cb 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -616,31 +616,12 @@ type_size(const struct glsl_type *type)
return 0;
}
-int
-vec4_visitor::virtual_grf_alloc(int size)
-{
- if (virtual_grf_array_size <= virtual_grf_count) {
- if (virtual_grf_array_size == 0)
- virtual_grf_array_size = 16;
- else
- virtual_grf_array_size *= 2;
- virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
- virtual_grf_array_size);
- virtual_grf_reg_map = reralloc(mem_ctx, virtual_grf_reg_map, int,
- virtual_grf_array_size);
- }
- virtual_grf_reg_map[virtual_grf_count] = virtual_grf_reg_count;
- virtual_grf_reg_count += size;
- virtual_grf_sizes[virtual_grf_count] = size;
- return virtual_grf_count++;
-}
-
src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
{
init();
this->file = GRF;
- this->reg = v->virtual_grf_alloc(type_size(type));
+ this->reg = v->alloc.allocate(type_size(type));
if (type->is_array() || type->is_record()) {
this->swizzle = BRW_SWIZZLE_NOOP;
@@ -658,7 +639,7 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type, int size)
init();
this->file = GRF;
- this->reg = v->virtual_grf_alloc(type_size(type) * size);
+ this->reg = v->alloc.allocate(type_size(type) * size);
this->swizzle = BRW_SWIZZLE_NOOP;
@@ -670,7 +651,7 @@ dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
init();
this->file = GRF;
- this->reg = v->virtual_grf_alloc(type_size(type));
+ this->reg = v->alloc.allocate(type_size(type));
if (type->is_array() || type->is_record()) {
this->writemask = WRITEMASK_XYZW;
@@ -3372,7 +3353,7 @@ vec4_visitor::emit_scratch_write(bblock_t *block, vec4_instruction *inst,
void
vec4_visitor::move_grf_array_access_to_scratch()
{
- int scratch_loc[this->virtual_grf_count];
+ int scratch_loc[this->alloc.count];
memset(scratch_loc, -1, sizeof(scratch_loc));
/* First, calculate the set of virtual GRFs that need to be punted
@@ -3383,7 +3364,7 @@ vec4_visitor::move_grf_array_access_to_scratch()
if (inst->dst.file == GRF && inst->dst.reladdr &&
scratch_loc[inst->dst.reg] == -1) {
scratch_loc[inst->dst.reg] = c->last_scratch;
- c->last_scratch += this->virtual_grf_sizes[inst->dst.reg];
+ c->last_scratch += this->alloc.sizes[inst->dst.reg];
}
for (int i = 0 ; i < 3; i++) {
@@ -3392,7 +3373,7 @@ vec4_visitor::move_grf_array_access_to_scratch()
if (src->file == GRF && src->reladdr &&
scratch_loc[src->reg] == -1) {
scratch_loc[src->reg] = c->last_scratch;
- c->last_scratch += this->virtual_grf_sizes[src->reg];
+ c->last_scratch += this->alloc.sizes[src->reg];
}
}
}
@@ -3612,11 +3593,6 @@ vec4_visitor::vec4_visitor(struct brw_context *brw,
this->virtual_grf_start = NULL;
this->virtual_grf_end = NULL;
- this->virtual_grf_sizes = NULL;
- this->virtual_grf_count = 0;
- this->virtual_grf_reg_map = NULL;
- this->virtual_grf_reg_count = 0;
- this->virtual_grf_array_size = 0;
this->live_intervals = NULL;
this->max_grf = brw->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;