summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKenneth Graunke <[email protected]>2014-06-28 16:08:39 -0700
committerKenneth Graunke <[email protected]>2014-08-12 13:39:25 -0700
commitaafdf9eef481a77810258b828e2a0b4e3c0aa696 (patch)
treee25851985586a25b73e9d390447b5a10b3c07d8a
parent849046b8429f690fcc9eb7c31e193b467dd97e1a (diff)
i965/eu: Emulate F32TO16 and F16TO32 on Broadwell.
When we combine the Gen4-7 and Gen8+ generators, we'll need to handle half float packing/unpacking functions somehow. The Gen8+ generator code today just emulates the behavior of the Gen7 F32TO16/F16TO32 instructions, including the align16 mode bugs. Rather than messing with fs_generator/vec4_generator, I decided to just emulate the instructions at the brw_eu_emit.c layer. v2: Change gen >= 7 asserts to gen == 7 (suggested by Chris Forbes). Fix regressions on Haswell in VS tests due to type assertions. Signed-off-by: Kenneth Graunke <[email protected]> Reviewed-by: Chris Forbes <[email protected]> Reviewed-by: Matt Turner <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c52
1 files changed, 50 insertions, 2 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index a1acd785743..4d1d6ce510b 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -1004,8 +1004,6 @@ ALU2(XOR)
ALU2(SHR)
ALU2(SHL)
ALU2(ASR)
-ALU1(F32TO16)
-ALU1(F16TO32)
ALU1(FRC)
ALU1(RNDD)
ALU2(MAC)
@@ -1110,6 +1108,56 @@ brw_MUL(struct brw_compile *p, struct brw_reg dest,
return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
}
+brw_inst *
+brw_F32TO16(struct brw_compile *p, struct brw_reg dst, struct brw_reg src)
+{
+ const struct brw_context *brw = p->brw;
+ bool align16 = brw_inst_access_mode(brw, p->current) == BRW_ALIGN_16;
+
+ if (align16) {
+ assert(dst.type == BRW_REGISTER_TYPE_UD);
+ } else {
+ assert(dst.type == BRW_REGISTER_TYPE_W ||
+ dst.type == BRW_REGISTER_TYPE_UW ||
+ dst.type == BRW_REGISTER_TYPE_HF);
+ }
+
+ if (brw->gen >= 8) {
+ if (align16) {
+ /* Emulate the Gen7 zeroing bug (see comments in vec4_visitor's
+ * emit_pack_half_2x16 method.)
+ */
+ brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u));
+ }
+ return brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_HF), src);
+ } else {
+ assert(brw->gen == 7);
+ return brw_alu1(p, BRW_OPCODE_F32TO16, dst, src);
+ }
+}
+
+brw_inst *
+brw_F16TO32(struct brw_compile *p, struct brw_reg dst, struct brw_reg src)
+{
+ const struct brw_context *brw = p->brw;
+ bool align16 = brw_inst_access_mode(brw, p->current) == BRW_ALIGN_16;
+
+ if (align16) {
+ assert(src.type == BRW_REGISTER_TYPE_UD);
+ } else {
+ assert(src.type == BRW_REGISTER_TYPE_W ||
+ src.type == BRW_REGISTER_TYPE_UW ||
+ src.type == BRW_REGISTER_TYPE_HF);
+ }
+
+ if (brw->gen >= 8) {
+ return brw_MOV(p, dst, retype(src, BRW_REGISTER_TYPE_HF));
+ } else {
+ assert(brw->gen == 7);
+ return brw_alu1(p, BRW_OPCODE_F16TO32, dst, src);
+ }
+}
+
void brw_NOP(struct brw_compile *p)
{