diff options
author | Francisco Jerez <[email protected]> | 2015-02-04 18:08:47 +0200 |
---|---|---|
committer | Francisco Jerez <[email protected]> | 2015-02-19 14:06:42 +0200 |
commit | 437d401e6398eebc2ecd061650d16d1ad2d947f1 (patch) | |
tree | 2fc6cec461f640a1a5434caface4b7b30d54da7e | |
parent | 509f58740c721e1a8cc8ec4e556af3ed993f927e (diff) |
i965/gen8: Fix F32TO16 in vec4 mode if the source and destination registers alias.
We cannot zero out the destination register if it overlaps with the
source. Use an Align1 instruction instead to zero out the high 16
bits after the conversion to half float.
Reviewed-by: Matt Turner <[email protected]>
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_eu_emit.c | 36 |
1 files changed, 27 insertions, 9 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 308b305458e..d9e01fdd9bd 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -1148,7 +1148,15 @@ brw_inst * brw_F32TO16(struct brw_compile *p, struct brw_reg dst, struct brw_reg src) { const struct brw_context *brw = p->brw; - bool align16 = brw_inst_access_mode(brw, p->current) == BRW_ALIGN_16; + const bool align16 = brw_inst_access_mode(brw, p->current) == BRW_ALIGN_16; + /* The F32TO16 instruction doesn't support 32-bit destination types in + * Align1 mode, and neither does the Gen8 implementation in terms of a + * converting MOV. Gen7 does zero out the high 16 bits in Align16 mode as + * an undocumented feature. + */ + const bool needs_zero_fill = (dst.type == BRW_REGISTER_TYPE_UD && + brw->gen >= 8); + brw_inst *inst; if (align16) { assert(dst.type == BRW_REGISTER_TYPE_UD); @@ -1158,18 +1166,28 @@ brw_F32TO16(struct brw_compile *p, struct brw_reg dst, struct brw_reg src) dst.type == BRW_REGISTER_TYPE_HF); } + brw_push_insn_state(p); + + if (needs_zero_fill) { + brw_set_default_access_mode(p, BRW_ALIGN_1); + dst = spread(retype(dst, BRW_REGISTER_TYPE_W), 2); + } + if (brw->gen >= 8) { - if (align16) { - /* Emulate the Gen7 zeroing bug (see comments in vec4_visitor's - * emit_pack_half_2x16 method.) - */ - brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u)); - } - return brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_HF), src); + inst = brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_HF), src); } else { assert(brw->gen == 7); - return brw_alu1(p, BRW_OPCODE_F32TO16, dst, src); + inst = brw_alu1(p, BRW_OPCODE_F32TO16, dst, src); + } + + if (needs_zero_fill) { + brw_inst_set_no_dd_clear(brw, inst, true); + inst = brw_MOV(p, suboffset(dst, 1), brw_imm_ud(0u)); + brw_inst_set_no_dd_check(brw, inst, true); } + + brw_pop_insn_state(p); + return inst; } brw_inst * |