diff options
author | Jason Ekstrand <[email protected]> | 2017-09-06 18:31:11 -0700 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2017-11-07 10:37:52 -0800 |
commit | 0d905597fe2997c89022c76cdf84dc4fba5eb055 (patch) | |
tree | f0a2b53d59974fd71ec1e785da755664812bba14 | |
parent | fcd4adb9d08094520fb8d118d3448b04c6ec1fd1 (diff) |
intel/fs: Be more explicit about our placement of [un]zip
Before, we were careful to place the zip after the last of the split
instructions but did unzip on-demand. This changes things so that the
unzips go before all of the split instructions and the unzip comes
explicitly after all the split instructions. As a side-effect of this
change, we now emit the split instruction from highest SIMD group to
lowest instead of low to high. We could have kept the old behavior, but
it shouldn't matter and this made the code easier.
Reviewed-by: Iago Toral Quiroga <[email protected]>
Cc: [email protected]
-rw-r--r-- | src/intel/compiler/brw_fs.cpp | 20 |
1 files changed, 17 insertions, 3 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index befe421d214..965eb86f65e 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -5196,6 +5196,20 @@ fs_visitor::lower_simd_width() assert(!inst->writes_accumulator && !inst->mlen); + /* Inserting the zip, unzip, and duplicated instructions in all of + * the right spots is somewhat tricky. All of the unzip and any + * instructions from the zip which unzip the destination prior to + * writing need to happen before all of the per-group instructions + * and the zip instructions need to happen after. In order to sort + * this all out, we insert the unzip instructions before \p inst, + * insert the per-group instructions after \p inst (i.e. before + * inst->next), and insert the zip instructions before the + * instruction after \p inst. Since we are inserting instructions + * after \p inst, inst->next is a moving target and we need to save + * it off here so that we insert the zip instructions in the right + * place. + */ + exec_node *const after_inst = inst->next; for (unsigned i = 0; i < n; i++) { /* Emit a copy of the original instruction with the lowered width. * If the EOT flag was set throw it away except for the last @@ -5203,7 +5217,7 @@ fs_visitor::lower_simd_width() */ fs_inst split_inst = *inst; split_inst.exec_size = lower_width; - split_inst.eot = inst->eot && i == n - 1; + split_inst.eot = inst->eot && i == 0; /* Select the correct channel enables for the i-th group, then * transform the sources and destination and emit the lowered @@ -5215,11 +5229,11 @@ fs_visitor::lower_simd_width() split_inst.src[j] = emit_unzip(lbld.at(block, inst), inst, j); split_inst.dst = emit_zip(lbld.at(block, inst), - lbld.at(block, inst->next), inst); + lbld.at(block, after_inst), inst); split_inst.size_written = split_inst.dst.component_size(lower_width) * dst_size; - lbld.emit(split_inst); + lbld.at(block, inst->next).emit(split_inst); } inst->remove(block); |