From 344283de5d3f4e2bfa10455f6b974cf731184b55 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 11 May 2011 02:18:24 -0700 Subject: i965: Fix RNDZ and RNDE on Sandybridge and Ivybridge. On gen4/5, the RNDZ and RNDE instructions return floor(x), but set special "round increment bits" in the flag register; a predicated ADD (+1) fixes the result. The documentation still lists '.r' as existing, and says that the predicated add is necessary, but it apparently lies. According to the simulator, BRW_CONDITIONAL_R (7) is not a valid conditional modifier and the RNDZ and RNDE instructions simply produce the correct value. Signed-off-by: Kenneth Graunke Reviewed-by: Eric Anholt --- src/mesa/drivers/dri/i965/brw_eu_emit.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 19346b5917f..007f58c341c 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -784,6 +784,8 @@ struct brw_instruction *brw_##OP(struct brw_compile *p, \ * stores a rounded value (possibly the wrong way) in the dest register, but * also sets a per-channel "increment bit" in the flag register. A predicated * add of 1.0 fixes dest to contain the desired result. + * + * Sandybridge and later appear to round correctly without an ADD. */ #define ROUND(OP) \ void brw_##OP(struct brw_compile *p, \ @@ -794,10 +796,13 @@ void brw_##OP(struct brw_compile *p, \ rnd = next_insn(p, BRW_OPCODE_##OP); \ brw_set_dest(p, rnd, dest); \ brw_set_src0(p, rnd, src); \ - rnd->header.destreg__conditionalmod = 0x7; /* turn on round-increments */ \ \ - add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \ - add->header.predicate_control = BRW_PREDICATE_NORMAL; \ + if (p->brw->intel.gen < 6) { \ + /* turn on round-increments */ \ + rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \ + add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \ + add->header.predicate_control = BRW_PREDICATE_NORMAL; \ + } \ } -- cgit v1.2.3