summaryrefslogtreecommitdiffstats
path: root/src/intel/compiler
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2017-09-01 23:24:15 -0700
committerJason Ekstrand <[email protected]>2017-11-07 10:37:52 -0800
commitdef013a863558a1f4735d82ef3dfa0f8261fa743 (patch)
treea0b1ece059fc1e9fbea3135a471ec843e1d50e6e /src/intel/compiler
parent0d905597fe2997c89022c76cdf84dc4fba5eb055 (diff)
intel/fs: Use ANY/ALL32 predicates in SIMD32
We have ANY/ALL32 predicates and, for the most part, they work just fine. (See the next commit for more details.) Also, due to the way that flag registers are handled in hardware, instruction splitting is able to split the CMP correctly. Specifically, that hardware looks at the execution group and knows to shift it's flag usage up correctly so a 2H instruction will write to f0.1 instead of f0.0. Reviewed-by: Matt Turner <[email protected]> Cc: [email protected]
Diffstat (limited to 'src/intel/compiler')
-rw-r--r--src/intel/compiler/brw_fs_nir.cpp42
1 files changed, 30 insertions, 12 deletions
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index bb153cabbe8..f29d4e809ce 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -4199,12 +4199,18 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
* dead channels from affecting the result, we initialize the flag with
* with the identity value for the logical operation.
*/
- ubld.MOV(brw_flag_reg(0, 0), brw_imm_uw(0));
+ if (dispatch_width == 32) {
+ /* For SIMD32, we use a UD type so we fill both f0.0 and f0.1. */
+ ubld.MOV(retype(brw_flag_reg(0, 0), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(0));
+ } else {
+ ubld.MOV(brw_flag_reg(0, 0), brw_imm_uw(0));
+ }
bld.CMP(bld.null_reg_d(), get_nir_src(instr->src[0]), brw_imm_d(0), BRW_CONDITIONAL_NZ);
bld.MOV(dest, brw_imm_d(-1));
- set_predicate(dispatch_width == 8 ?
- BRW_PREDICATE_ALIGN1_ANY8H :
- BRW_PREDICATE_ALIGN1_ANY16H,
+ set_predicate(dispatch_width == 8 ? BRW_PREDICATE_ALIGN1_ANY8H :
+ dispatch_width == 16 ? BRW_PREDICATE_ALIGN1_ANY16H :
+ BRW_PREDICATE_ALIGN1_ANY32H,
bld.SEL(dest, dest, brw_imm_d(0)));
break;
}
@@ -4215,12 +4221,18 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
* dead channels from affecting the result, we initialize the flag with
* with the identity value for the logical operation.
*/
- ubld.MOV(brw_flag_reg(0, 0), brw_imm_uw(0xffff));
+ if (dispatch_width == 32) {
+ /* For SIMD32, we use a UD type so we fill both f0.0 and f0.1. */
+ ubld.MOV(retype(brw_flag_reg(0, 0), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(0xffffffff));
+ } else {
+ ubld.MOV(brw_flag_reg(0, 0), brw_imm_uw(0xffff));
+ }
bld.CMP(bld.null_reg_d(), get_nir_src(instr->src[0]), brw_imm_d(0), BRW_CONDITIONAL_NZ);
bld.MOV(dest, brw_imm_d(-1));
- set_predicate(dispatch_width == 8 ?
- BRW_PREDICATE_ALIGN1_ALL8H :
- BRW_PREDICATE_ALIGN1_ALL16H,
+ set_predicate(dispatch_width == 8 ? BRW_PREDICATE_ALIGN1_ALL8H :
+ dispatch_width == 16 ? BRW_PREDICATE_ALIGN1_ALL16H :
+ BRW_PREDICATE_ALIGN1_ALL32H,
bld.SEL(dest, dest, brw_imm_d(0)));
break;
}
@@ -4233,12 +4245,18 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
* dead channels from affecting the result, we initialize the flag with
* with the identity value for the logical operation.
*/
- ubld.MOV(brw_flag_reg(0, 0), brw_imm_uw(0xffff));
+ if (dispatch_width == 32) {
+ /* For SIMD32, we use a UD type so we fill both f0.0 and f0.1. */
+ ubld.MOV(retype(brw_flag_reg(0, 0), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(0xffffffff));
+ } else {
+ ubld.MOV(brw_flag_reg(0, 0), brw_imm_uw(0xffff));
+ }
bld.CMP(bld.null_reg_d(), value, uniformized, BRW_CONDITIONAL_Z);
bld.MOV(dest, brw_imm_d(-1));
- set_predicate(dispatch_width == 8 ?
- BRW_PREDICATE_ALIGN1_ALL8H :
- BRW_PREDICATE_ALIGN1_ALL16H,
+ set_predicate(dispatch_width == 8 ? BRW_PREDICATE_ALIGN1_ALL8H :
+ dispatch_width == 16 ? BRW_PREDICATE_ALIGN1_ALL16H :
+ BRW_PREDICATE_ALIGN1_ALL32H,
bld.SEL(dest, dest, brw_imm_d(0)));
break;
}