glsl: Improve precision of mod(x,y)

Currently, Mesa uses the lowering pass MOD_TO_FRACT to implement mod(x,y) as y * fract(x/y). This implementation has a down side though: it introduces precision errors due to the fract() operation. Even worse, since the result of fract() is multiplied by y, the larger y gets the larger the precision error we produce, so for large enough numbers the precision loss is significant. Some examples on i965: Operation Precision error ----------------------------------------------------- mod(-1.951171875, 1.9980468750) 0.0000000447 mod(121.57, 13.29) 0.0000023842 mod(3769.12, 321.99) 0.0000762939 mod(3769.12, 1321.99) 0.0001220703 mod(-987654.125, 123456.984375) 0.0160663128 mod( 987654.125, 123456.984375) 0.0312500000 This patch replaces the current lowering pass with a different one (MOD_TO_FLOOR) that follows the recommended implementation in the GLSL man pages: mod(x,y) = x - y * floor(x/y) This implementation eliminates the precision errors at the expense of an additional add instruction on some systems. On systems that can do negate with multiply-add in a single operation this new implementation would come at no additional cost. v2 (Ian Romanick) - Do not clone operands because when they are expressions we would be duplicating them and that can lead to suboptimal code. Fixes the following 16 dEQP tests: dEQP-GLES3.functional.shaders.builtin_functions.precision.mod.mediump_* dEQP-GLES3.functional.shaders.builtin_functions.precision.mod.highp_* Reviewed-by: Ian Romanick <[email protected]>
author: Iago Toral Quiroga <[email protected]> 2015-01-20 17:09:59 +0100
committer: Iago Toral Quiroga <[email protected]> 2015-02-03 13:19:36 +0100
commit: 5dfb085ff325df3dbefda515f06106469babbefc (patch)
tree: acf8e618b2e412ca3846a98d96e9484841ae4496 /src/glsl
parent: c27d23f0c8e952d8ec340c4c9cd3ad026cd6187b (diff)
3 files changed, 41 insertions, 28 deletions
diff --git a/src/glsl/README b/src/glsl/README
index 2f93f12ffa8..bfcf69f903a 100644
--- a/src/glsl/README
+++ b/src/glsl/README
@@ -187,7 +187,7 @@ You may also need to update the backends if they will see the new expr type:
 
 You can then use the new expression from builtins (if all backends
 would rather see it), or scan the IR and convert to use your new
-expression type (see ir_mod_to_fract, for example).
+expression type (see ir_mod_to_floor, for example).
 
 Q: How is memory management handled in the compiler?
 
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 34e0b4b947d..912d9103e17 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -34,7 +34,7 @@
 #define EXP_TO_EXP2        0x04
 #define POW_TO_EXP2        0x08
 #define LOG_TO_LOG2        0x10
-#define MOD_TO_FRACT       0x20
+#define MOD_TO_FLOOR       0x20
 #define INT_DIV_TO_MUL_RCP 0x40
 #define BITFIELD_INSERT_TO_BFM_BFI 0x80
 #define LDEXP_TO_ARITH     0x100
diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp
index 684285350d0..09afe556454 100644
--- a/src/glsl/lower_instructions.cpp
+++ b/src/glsl/lower_instructions.cpp
@@ -36,7 +36,7 @@
  * - EXP_TO_EXP2
  * - POW_TO_EXP2
  * - LOG_TO_LOG2
- * - MOD_TO_FRACT
+ * - MOD_TO_FLOOR
  * - LDEXP_TO_ARITH
  * - BITFIELD_INSERT_TO_BFM_BFI
  * - CARRY_TO_ARITH
@@ -77,14 +77,17 @@
  * Many older GPUs don't have an x**y instruction.  For these GPUs, convert
  * x**y to 2**(y * log2(x)).
  *
- * MOD_TO_FRACT:
+ * MOD_TO_FLOOR:
  * -------------
- * Breaks an ir_binop_mod expression down to (op1 * fract(op0 / op1))
+ * Breaks an ir_binop_mod expression down to (op0 - op1 * floor(op0 / op1))
  *
  * Many GPUs don't have a MOD instruction (945 and 965 included), and
  * if we have to break it down like this anyway, it gives an
  * opportunity to do things like constant fold the (1.0 / op1) easily.
  *
+ * Note: before we used to implement this as op1 * fract(op / op1) but this
+ * implementation had significant precision errors.
+ *
  * LDEXP_TO_ARITH:
  * -------------
  * Converts ir_binop_ldexp to arithmetic and bit operations.
@@ -136,7 +139,7 @@ private:
    void sub_to_add_neg(ir_expression *);
    void div_to_mul_rcp(ir_expression *);
    void int_div_to_mul_rcp(ir_expression *);
-   void mod_to_fract(ir_expression *);
+   void mod_to_floor(ir_expression *);
    void exp_to_exp2(ir_expression *);
    void pow_to_exp2(ir_expression *);
    void log_to_log2(ir_expression *);
@@ -276,22 +279,29 @@ lower_instructions_visitor::log_to_log2(ir_expression *ir)
 }
 
 void
-lower_instructions_visitor::mod_to_fract(ir_expression *ir)
+lower_instructions_visitor::mod_to_floor(ir_expression *ir)
 {
-   ir_variable *temp = new(ir) ir_variable(ir->operands[1]->type, "mod_b",
-					   ir_var_temporary);
-   this->base_ir->insert_before(temp);
-
-   ir_assignment *const assign =
-      new(ir) ir_assignment(new(ir) ir_dereference_variable(temp),
-			    ir->operands[1], NULL);
-
-   this->base_ir->insert_before(assign);
+   ir_variable *x = new(ir) ir_variable(ir->operands[0]->type, "mod_x",
+                                         ir_var_temporary);
+   ir_variable *y = new(ir) ir_variable(ir->operands[1]->type, "mod_y",
+                                         ir_var_temporary);
+   this->base_ir->insert_before(x);
+   this->base_ir->insert_before(y);
+
+   ir_assignment *const assign_x =
+      new(ir) ir_assignment(new(ir) ir_dereference_variable(x),
+                            ir->operands[0], NULL);
+   ir_assignment *const assign_y =
+      new(ir) ir_assignment(new(ir) ir_dereference_variable(y),
+                            ir->operands[1], NULL);
+
+   this->base_ir->insert_before(assign_x);
+   this->base_ir->insert_before(assign_y);
 
    ir_expression *const div_expr =
-      new(ir) ir_expression(ir_binop_div, ir->operands[0]->type,
-			    ir->operands[0],
-			    new(ir) ir_dereference_variable(temp));
+      new(ir) ir_expression(ir_binop_div, x->type,
+                            new(ir) ir_dereference_variable(x),
+                            new(ir) ir_dereference_variable(y));
 
    /* Don't generate new IR that would need to be lowered in an additional
     * pass.
@@ -299,14 +309,17 @@ lower_instructions_visitor::mod_to_fract(ir_expression *ir)
    if (lowering(DIV_TO_MUL_RCP))
       div_to_mul_rcp(div_expr);
 
-   ir_rvalue *expr = new(ir) ir_expression(ir_unop_fract,
-					   ir->operands[0]->type,
-					   div_expr,
-					   NULL);
+   ir_expression *const floor_expr =
+      new(ir) ir_expression(ir_unop_floor, x->type, div_expr);
 
-   ir->operation = ir_binop_mul;
-   ir->operands[0] = new(ir) ir_dereference_variable(temp);
-   ir->operands[1] = expr;
+   ir_expression *const mul_expr =
+      new(ir) ir_expression(ir_binop_mul,
+                            new(ir) ir_dereference_variable(y),
+                            floor_expr);
+
+   ir->operation = ir_binop_sub;
+   ir->operands[0] = new(ir) ir_dereference_variable(x);
+   ir->operands[1] = mul_expr;
    this->progress = true;
 }
 
@@ -535,8 +548,8 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
       break;
 
    case ir_binop_mod:
-      if (lowering(MOD_TO_FRACT) && ir->type->is_float())
-	 mod_to_fract(ir);
+      if (lowering(MOD_TO_FLOOR) && ir->type->is_float())
+	 mod_to_floor(ir);
       break;
 
    case ir_binop_pow:
author	Iago Toral Quiroga <[email protected]>	2015-01-20 17:09:59 +0100
committer	Iago Toral Quiroga <[email protected]>	2015-02-03 13:19:36 +0100
commit	5dfb085ff325df3dbefda515f06106469babbefc (patch)
tree	acf8e618b2e412ca3846a98d96e9484841ae4496 /src/glsl
parent	c27d23f0c8e952d8ec340c4c9cd3ad026cd6187b (diff)