summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2017-09-06 18:31:11 -0700
committerJason Ekstrand <[email protected]>2017-11-07 10:37:52 -0800
commit0d905597fe2997c89022c76cdf84dc4fba5eb055 (patch)
treef0a2b53d59974fd71ec1e785da755664812bba14
parentfcd4adb9d08094520fb8d118d3448b04c6ec1fd1 (diff)
intel/fs: Be more explicit about our placement of [un]zip
Before, we were careful to place the zip after the last of the split instructions but did unzip on-demand. This changes things so that the unzips go before all of the split instructions and the unzip comes explicitly after all the split instructions. As a side-effect of this change, we now emit the split instruction from highest SIMD group to lowest instead of low to high. We could have kept the old behavior, but it shouldn't matter and this made the code easier. Reviewed-by: Iago Toral Quiroga <[email protected]> Cc: [email protected]
-rw-r--r--src/intel/compiler/brw_fs.cpp20
1 files changed, 17 insertions, 3 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index befe421d214..965eb86f65e 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -5196,6 +5196,20 @@ fs_visitor::lower_simd_width()
assert(!inst->writes_accumulator && !inst->mlen);
+ /* Inserting the zip, unzip, and duplicated instructions in all of
+ * the right spots is somewhat tricky. All of the unzip and any
+ * instructions from the zip which unzip the destination prior to
+ * writing need to happen before all of the per-group instructions
+ * and the zip instructions need to happen after. In order to sort
+ * this all out, we insert the unzip instructions before \p inst,
+ * insert the per-group instructions after \p inst (i.e. before
+ * inst->next), and insert the zip instructions before the
+ * instruction after \p inst. Since we are inserting instructions
+ * after \p inst, inst->next is a moving target and we need to save
+ * it off here so that we insert the zip instructions in the right
+ * place.
+ */
+ exec_node *const after_inst = inst->next;
for (unsigned i = 0; i < n; i++) {
/* Emit a copy of the original instruction with the lowered width.
* If the EOT flag was set throw it away except for the last
@@ -5203,7 +5217,7 @@ fs_visitor::lower_simd_width()
*/
fs_inst split_inst = *inst;
split_inst.exec_size = lower_width;
- split_inst.eot = inst->eot && i == n - 1;
+ split_inst.eot = inst->eot && i == 0;
/* Select the correct channel enables for the i-th group, then
* transform the sources and destination and emit the lowered
@@ -5215,11 +5229,11 @@ fs_visitor::lower_simd_width()
split_inst.src[j] = emit_unzip(lbld.at(block, inst), inst, j);
split_inst.dst = emit_zip(lbld.at(block, inst),
- lbld.at(block, inst->next), inst);
+ lbld.at(block, after_inst), inst);
split_inst.size_written =
split_inst.dst.component_size(lower_width) * dst_size;
- lbld.emit(split_inst);
+ lbld.at(block, inst->next).emit(split_inst);
}
inst->remove(block);