summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFrancisco Jerez <[email protected]>2015-02-04 18:08:47 +0200
committerFrancisco Jerez <[email protected]>2015-02-19 14:06:42 +0200
commit437d401e6398eebc2ecd061650d16d1ad2d947f1 (patch)
tree2fc6cec461f640a1a5434caface4b7b30d54da7e
parent509f58740c721e1a8cc8ec4e556af3ed993f927e (diff)
i965/gen8: Fix F32TO16 in vec4 mode if the source and destination registers alias.
We cannot zero out the destination register if it overlaps with the source. Use an Align1 instruction instead to zero out the high 16 bits after the conversion to half float. Reviewed-by: Matt Turner <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c36
1 files changed, 27 insertions, 9 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 308b305458e..d9e01fdd9bd 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -1148,7 +1148,15 @@ brw_inst *
brw_F32TO16(struct brw_compile *p, struct brw_reg dst, struct brw_reg src)
{
const struct brw_context *brw = p->brw;
- bool align16 = brw_inst_access_mode(brw, p->current) == BRW_ALIGN_16;
+ const bool align16 = brw_inst_access_mode(brw, p->current) == BRW_ALIGN_16;
+ /* The F32TO16 instruction doesn't support 32-bit destination types in
+ * Align1 mode, and neither does the Gen8 implementation in terms of a
+ * converting MOV. Gen7 does zero out the high 16 bits in Align16 mode as
+ * an undocumented feature.
+ */
+ const bool needs_zero_fill = (dst.type == BRW_REGISTER_TYPE_UD &&
+ brw->gen >= 8);
+ brw_inst *inst;
if (align16) {
assert(dst.type == BRW_REGISTER_TYPE_UD);
@@ -1158,18 +1166,28 @@ brw_F32TO16(struct brw_compile *p, struct brw_reg dst, struct brw_reg src)
dst.type == BRW_REGISTER_TYPE_HF);
}
+ brw_push_insn_state(p);
+
+ if (needs_zero_fill) {
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ dst = spread(retype(dst, BRW_REGISTER_TYPE_W), 2);
+ }
+
if (brw->gen >= 8) {
- if (align16) {
- /* Emulate the Gen7 zeroing bug (see comments in vec4_visitor's
- * emit_pack_half_2x16 method.)
- */
- brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u));
- }
- return brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_HF), src);
+ inst = brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_HF), src);
} else {
assert(brw->gen == 7);
- return brw_alu1(p, BRW_OPCODE_F32TO16, dst, src);
+ inst = brw_alu1(p, BRW_OPCODE_F32TO16, dst, src);
+ }
+
+ if (needs_zero_fill) {
+ brw_inst_set_no_dd_clear(brw, inst, true);
+ inst = brw_MOV(p, suboffset(dst, 1), brw_imm_ud(0u));
+ brw_inst_set_no_dd_check(brw, inst, true);
}
+
+ brw_pop_insn_state(p);
+ return inst;
}
brw_inst *