diff options
author | Matt Turner <[email protected]> | 2017-06-14 14:47:19 -0700 |
---|---|---|
committer | Matt Turner <[email protected]> | 2018-02-28 11:15:47 -0800 |
commit | 432674ce93ceee2abd7e0cc4171bc36a499d4c1f (patch) | |
tree | 997367c0830f7f3651ddb865d82035ba822eb7da /src/intel/compiler | |
parent | b5d8781e19559a8f9850f1a900ef93ffa3617faa (diff) |
intel/compiler/fs: Implement FS_OPCODE_LINTERP with MADs on Gen11+
The PLN instruction is no more. Its functionality is now implemented
using two MAD instructions with the new native-float type. Instead of
pln(16) r20.0<1>:F r10.4<0;1,0>:F r4.0<8;8,1>:F
we now have
mad(8) acc0<1>:NF r10.7<0;1,0>:F r4.0<8;8,1>:F r10.4<0;1,0>:F
mad(8) r20.0<1>:F acc0<8;8,1>:NF r5.0<8;8,1>:F r10.5<0;1,0>:F
mad(8) acc0<1>:NF r10.7<0;1,0>:F r6.0<8;8,1>:F r10.4<0;1,0>:F
mad(8) r21.0<1>:F acc0<8;8,1>:NF r7.0<8;8,1>:F r10.5<0;1,0>:F
... and in the case of SIMD8 only the first pair of MAD instructions is
used.
Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src/intel/compiler')
-rw-r--r-- | src/intel/compiler/brw_eu_emit.c | 2 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_generator.cpp | 48 |
2 files changed, 46 insertions, 4 deletions
diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index ec871e5aa75..a96fe43556e 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -968,7 +968,7 @@ ALU2(DP4) ALU2(DPH) ALU2(DP3) ALU2(DP2) -ALU3F(MAD) +ALU3(MAD) ALU3F(LRP) ALU1(BFREV) ALU3(BFE) diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 3abd7cf5388..736b3b5fba0 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -673,10 +673,52 @@ fs_generator::generate_linterp(fs_inst *inst, struct brw_reg delta_x = src[0]; struct brw_reg delta_y = offset(src[0], inst->exec_size / 8); struct brw_reg interp = src[1]; - brw_inst *i[2]; + brw_inst *i[4]; - if (devinfo->has_pln && - (devinfo->gen >= 7 || (delta_x.nr & 1) == 0)) { + if (devinfo->gen >= 11) { + struct brw_reg acc = retype(brw_acc_reg(8), BRW_REGISTER_TYPE_NF); + struct brw_reg dwP = suboffset(interp, 0); + struct brw_reg dwQ = suboffset(interp, 1); + struct brw_reg dwR = suboffset(interp, 3); + + brw_set_default_exec_size(p, BRW_EXECUTE_8); + + if (inst->exec_size == 8) { + i[0] = brw_MAD(p, acc, dwR, offset(delta_x, 0), dwP); + i[1] = brw_MAD(p, offset(dst, 0), acc, offset(delta_y, 0), dwQ); + + brw_inst_set_cond_modifier(p->devinfo, i[1], inst->conditional_mod); + + /* brw_set_default_saturate() is called before emitting instructions, + * so the saturate bit is set in each instruction, so we need to unset + * it on the first instruction of each pair. + */ + brw_inst_set_saturate(p->devinfo, i[0], false); + } else { + brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); + i[0] = brw_MAD(p, acc, dwR, offset(delta_x, 0), dwP); + i[1] = brw_MAD(p, offset(dst, 0), acc, offset(delta_x, 1), dwQ); + + brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF); + i[2] = brw_MAD(p, acc, dwR, offset(delta_y, 0), dwP); + i[3] = brw_MAD(p, offset(dst, 1), acc, offset(delta_y, 1), dwQ); + + brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED); + + brw_inst_set_cond_modifier(p->devinfo, i[1], inst->conditional_mod); + brw_inst_set_cond_modifier(p->devinfo, i[3], inst->conditional_mod); + + /* brw_set_default_saturate() is called before emitting instructions, + * so the saturate bit is set in each instruction, so we need to unset + * it on the first instruction of each pair. + */ + brw_inst_set_saturate(p->devinfo, i[0], false); + brw_inst_set_saturate(p->devinfo, i[2], false); + } + + return true; + } else if (devinfo->has_pln && + (devinfo->gen >= 7 || (delta_x.nr & 1) == 0)) { brw_PLN(p, dst, interp, delta_x); return false; |