From 0d905597fe2997c89022c76cdf84dc4fba5eb055 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 6 Sep 2017 18:31:11 -0700 Subject: intel/fs: Be more explicit about our placement of [un]zip Before, we were careful to place the zip after the last of the split instructions but did unzip on-demand. This changes things so that the unzips go before all of the split instructions and the unzip comes explicitly after all the split instructions. As a side-effect of this change, we now emit the split instruction from highest SIMD group to lowest instead of low to high. We could have kept the old behavior, but it shouldn't matter and this made the code easier. Reviewed-by: Iago Toral Quiroga Cc: mesa-stable@lists.freedesktop.org --- src/intel/compiler/brw_fs.cpp | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'src/intel/compiler') diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index befe421d214..965eb86f65e 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -5196,6 +5196,20 @@ fs_visitor::lower_simd_width() assert(!inst->writes_accumulator && !inst->mlen); + /* Inserting the zip, unzip, and duplicated instructions in all of + * the right spots is somewhat tricky. All of the unzip and any + * instructions from the zip which unzip the destination prior to + * writing need to happen before all of the per-group instructions + * and the zip instructions need to happen after. In order to sort + * this all out, we insert the unzip instructions before \p inst, + * insert the per-group instructions after \p inst (i.e. before + * inst->next), and insert the zip instructions before the + * instruction after \p inst. Since we are inserting instructions + * after \p inst, inst->next is a moving target and we need to save + * it off here so that we insert the zip instructions in the right + * place. + */ + exec_node *const after_inst = inst->next; for (unsigned i = 0; i < n; i++) { /* Emit a copy of the original instruction with the lowered width. * If the EOT flag was set throw it away except for the last @@ -5203,7 +5217,7 @@ fs_visitor::lower_simd_width() */ fs_inst split_inst = *inst; split_inst.exec_size = lower_width; - split_inst.eot = inst->eot && i == n - 1; + split_inst.eot = inst->eot && i == 0; /* Select the correct channel enables for the i-th group, then * transform the sources and destination and emit the lowered @@ -5215,11 +5229,11 @@ fs_visitor::lower_simd_width() split_inst.src[j] = emit_unzip(lbld.at(block, inst), inst, j); split_inst.dst = emit_zip(lbld.at(block, inst), - lbld.at(block, inst->next), inst); + lbld.at(block, after_inst), inst); split_inst.size_written = split_inst.dst.component_size(lower_width) * dst_size; - lbld.emit(split_inst); + lbld.at(block, inst->next).emit(split_inst); } inst->remove(block); -- cgit v1.2.3