diff options
author | Iago Toral Quiroga <itoral@igalia.com> | 2015-03-17 10:48:04 +0100 |
---|---|---|
committer | Iago Toral Quiroga <itoral@igalia.com> | 2015-04-01 15:35:23 +0200 |
commit | 3818dfcf3c2d03809774bba613d7dd92752b36db (patch) | |
tree | 63ee1336fea0fe271ee6ee86b3e09c58f61ea8c1 /src/mesa/drivers/dri | |
parent | e3252defd2eabb6bcc8eb14f16d534e964fd8884 (diff) |
i965: Handle scratch accesses where reladdr also points to scratch space
This is a problem when we have IR like this:
(array_ref (var_ref temps) (swiz x (expression ivec4 bitcast_f2i
(swiz xxxx (array_ref (var_ref temps) (constant int (2)) ) )) )) ) )
where we are indexing an array with the result of an expression that
accesses the same array.
In this scenario, temps will be moved to scratch space and we will need
to add scratch reads/writes for all accesses to temps, however, the
current implementation does not consider the case where a reladdr pointer
(obtained by indexing into temps trough a expression) points to a register
that is also stored in scratch space (as in this case, where the expression
used to index temps access temps[2]), and thus, requires a scratch read
before it is accessed.
v2 (Francisco Jerez):
- Handle also recursive reladdr addressing.
- Do not memcpy dst_reg into src_reg when rewriting reladdr.
v3 (Francisco Jerez):
- Reduce complexity by moving recursive reladdr scratch access handling
to a separate recursive function.
- Do not skip demoting reladdr index registers to scratch space if the
top level GRF has already been visited.
v4 (Francisco Jerez)
- Remove redundant checks.
- Simplify code by making emit_resolve_reladdr return a register with
the original src data except for reg, reg_offset and reladdr.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89508
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Diffstat (limited to 'src/mesa/drivers/dri')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4.h | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 100 |
2 files changed, 76 insertions, 26 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 33297aea827..6ec00d5ffd3 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -367,6 +367,8 @@ public: dst_reg dst, src_reg orig_src, int base_offset); + src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block, + vec4_instruction *inst, src_reg src); bool try_emit_mad(ir_expression *ir); bool try_emit_b2f_of_compare(ir_expression *ir); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 26a3b9f4956..ca1a9958b5b 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -3352,6 +3352,39 @@ vec4_visitor::emit_scratch_write(bblock_t *block, vec4_instruction *inst, } /** + * Checks if \p src and/or \p src.reladdr require a scratch read, and if so, + * adds the scratch read(s) before \p inst. The function also checks for + * recursive reladdr scratch accesses, issuing the corresponding scratch + * loads and rewriting reladdr references accordingly. + * + * \return \p src if it did not require a scratch load, otherwise, the + * register holding the result of the scratch load that the caller should + * use to rewrite src. + */ +src_reg +vec4_visitor::emit_resolve_reladdr(int scratch_loc[], bblock_t *block, + vec4_instruction *inst, src_reg src) +{ + /* Resolve recursive reladdr scratch access by calling ourselves + * with src.reladdr + */ + if (src.reladdr) + *src.reladdr = emit_resolve_reladdr(scratch_loc, block, inst, + *src.reladdr); + + /* Now handle scratch access on src */ + if (src.file == GRF && scratch_loc[src.reg] != -1) { + dst_reg temp = dst_reg(this, glsl_type::vec4_type); + emit_scratch_read(block, inst, temp, src, scratch_loc[src.reg]); + src.reg = temp.reg; + src.reg_offset = temp.reg_offset; + src.reladdr = NULL; + } + + return src; +} + +/** * We can't generally support array access in GRF space, because a * single instruction's destination can only span 2 contiguous * registers. So, we send all GRF arrays that get variable index @@ -3368,20 +3401,31 @@ vec4_visitor::move_grf_array_access_to_scratch() * scratch. */ foreach_block_and_inst(block, vec4_instruction, inst, cfg) { - if (inst->dst.file == GRF && inst->dst.reladdr && - scratch_loc[inst->dst.reg] == -1) { - scratch_loc[inst->dst.reg] = c->last_scratch; - c->last_scratch += this->alloc.sizes[inst->dst.reg]; + if (inst->dst.file == GRF && inst->dst.reladdr) { + if (scratch_loc[inst->dst.reg] == -1) { + scratch_loc[inst->dst.reg] = c->last_scratch; + c->last_scratch += this->alloc.sizes[inst->dst.reg]; + } + + for (src_reg *iter = inst->dst.reladdr; + iter->reladdr; + iter = iter->reladdr) { + if (iter->file == GRF && scratch_loc[iter->reg] == -1) { + scratch_loc[iter->reg] = c->last_scratch; + c->last_scratch += this->alloc.sizes[iter->reg]; + } + } } for (int i = 0 ; i < 3; i++) { - src_reg *src = &inst->src[i]; - - if (src->file == GRF && src->reladdr && - scratch_loc[src->reg] == -1) { - scratch_loc[src->reg] = c->last_scratch; - c->last_scratch += this->alloc.sizes[src->reg]; - } + for (src_reg *iter = &inst->src[i]; + iter->reladdr; + iter = iter->reladdr) { + if (iter->file == GRF && scratch_loc[iter->reg] == -1) { + scratch_loc[iter->reg] = c->last_scratch; + c->last_scratch += this->alloc.sizes[iter->reg]; + } + } } } @@ -3395,23 +3439,27 @@ vec4_visitor::move_grf_array_access_to_scratch() base_ir = inst->ir; current_annotation = inst->annotation; - if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) { - emit_scratch_write(block, inst, scratch_loc[inst->dst.reg]); - } - - for (int i = 0 ; i < 3; i++) { - if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1) - continue; - - dst_reg temp = dst_reg(this, glsl_type::vec4_type); + /* First handle scratch access on the dst. Notice we have to handle + * the case where the dst's reladdr also points to scratch space. + */ + if (inst->dst.reladdr) + *inst->dst.reladdr = emit_resolve_reladdr(scratch_loc, block, inst, + *inst->dst.reladdr); - emit_scratch_read(block, inst, temp, inst->src[i], - scratch_loc[inst->src[i].reg]); + /* Now that we have handled any (possibly recursive) reladdr scratch + * accesses for dst we can safely do the scratch write for dst itself + */ + if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) + emit_scratch_write(block, inst, scratch_loc[inst->dst.reg]); - inst->src[i].file = temp.file; - inst->src[i].reg = temp.reg; - inst->src[i].reg_offset = temp.reg_offset; - inst->src[i].reladdr = NULL; + /* Now handle scratch access on any src. In this case, since inst->src[i] + * already is a src_reg, we can just call emit_resolve_reladdr with + * inst->src[i] and it will take care of handling scratch loads for + * both src and src.reladdr (recursively). + */ + for (int i = 0 ; i < 3; i++) { + inst->src[i] = emit_resolve_reladdr(scratch_loc, block, inst, + inst->src[i]); } } } |