aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2018-09-21 14:11:12 -0700
committerEric Anholt <[email protected]>2018-09-21 17:16:43 -0700
commit10d5d2d527dea11f4afe300eebeaba077f169af0 (patch)
tree5fa4fa921aeb02dc4e7b903eea96bc4539dd6e6f /src/gallium/drivers
parenta0baedb6387aea78ffb0eb654cb837421e15d9fe (diff)
vc4: Fix sin(0.0) and cos(0.0) accuracy to fix SDL rendering rotation.
SDL has some shaders that compute sin(angle) and cos(angle) for a rotation matrix in the VS, and angle is usually 0.0. Our previous implementation had quite a bit of error around 0.0, causing single-pixel rotations at typical window sizes. SDL2 has changed as of August 28th (commit 12156:e5a666405750) to not need sin/cos in the VS, but we should still fix this for existing implementations or similar patterns that other programs may have. glsl-cos goes from 32 instructions to 36, but 9 uniforms to 7. glsl-sin goes from 32 instructions to 34, but 8 uniforms to 7. This seems like a fine impact to have for the bugfix. Cc: 18.1 18.2 <[email protected]> Fixes: https://github.com/anholt/mesa/issues/110
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c66
1 files changed, 40 insertions, 26 deletions
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 1d767af1bdb..1f46b64005b 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -687,24 +687,44 @@ ntq_fceil(struct vc4_compile *c, struct qreg src)
}
static struct qreg
+ntq_shrink_sincos_input_range(struct vc4_compile *c, struct qreg x)
+{
+ /* Since we're using a Taylor approximation, we want to have a small
+ * number of coefficients and take advantage of sin/cos repeating
+ * every 2pi. We keep our x as close to 0 as we can, since the series
+ * will be less accurate as |x| increases. (Also, be careful of
+ * shifting the input x value to be tricky with sin/cos relations,
+ * because getting accurate values for x==0 is very important for SDL
+ * rendering)
+ */
+ struct qreg scaled_x =
+ qir_FMUL(c, x,
+ qir_uniform_f(c, 1.0f / (M_PI * 2.0f)));
+ /* Note: FTOI truncates toward 0. */
+ struct qreg x_frac = qir_FSUB(c, scaled_x,
+ qir_ITOF(c, qir_FTOI(c, scaled_x)));
+ /* Map [0.5, 1] to [-0.5, 0] */
+ qir_SF(c, qir_FSUB(c, x_frac, qir_uniform_f(c, 0.5)));
+ qir_FSUB_dest(c, x_frac, x_frac, qir_uniform_f(c, 1.0))->cond = QPU_COND_NC;
+ /* Map [-1, -0.5] to [0, 0.5] */
+ qir_SF(c, qir_FADD(c, x_frac, qir_uniform_f(c, 0.5)));
+ qir_FADD_dest(c, x_frac, x_frac, qir_uniform_f(c, 1.0))->cond = QPU_COND_NS;
+
+ return x_frac;
+}
+
+static struct qreg
ntq_fsin(struct vc4_compile *c, struct qreg src)
{
float coeff[] = {
- -2.0 * M_PI,
- pow(2.0 * M_PI, 3) / (3 * 2 * 1),
- -pow(2.0 * M_PI, 5) / (5 * 4 * 3 * 2 * 1),
- pow(2.0 * M_PI, 7) / (7 * 6 * 5 * 4 * 3 * 2 * 1),
- -pow(2.0 * M_PI, 9) / (9 * 8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
+ 2.0 * M_PI,
+ -pow(2.0 * M_PI, 3) / (3 * 2 * 1),
+ pow(2.0 * M_PI, 5) / (5 * 4 * 3 * 2 * 1),
+ -pow(2.0 * M_PI, 7) / (7 * 6 * 5 * 4 * 3 * 2 * 1),
+ pow(2.0 * M_PI, 9) / (9 * 8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
};
- struct qreg scaled_x =
- qir_FMUL(c,
- src,
- qir_uniform_f(c, 1.0 / (M_PI * 2.0)));
-
- struct qreg x = qir_FADD(c,
- ntq_ffract(c, scaled_x),
- qir_uniform_f(c, -0.5));
+ struct qreg x = ntq_shrink_sincos_input_range(c, src);
struct qreg x2 = qir_FMUL(c, x, x);
struct qreg sum = qir_FMUL(c, x, qir_uniform_f(c, coeff[0]));
for (int i = 1; i < ARRAY_SIZE(coeff); i++) {
@@ -722,21 +742,15 @@ static struct qreg
ntq_fcos(struct vc4_compile *c, struct qreg src)
{
float coeff[] = {
- -1.0f,
- pow(2.0 * M_PI, 2) / (2 * 1),
- -pow(2.0 * M_PI, 4) / (4 * 3 * 2 * 1),
- pow(2.0 * M_PI, 6) / (6 * 5 * 4 * 3 * 2 * 1),
- -pow(2.0 * M_PI, 8) / (8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
- pow(2.0 * M_PI, 10) / (10 * 9 * 8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
+ 1.0f,
+ -pow(2.0 * M_PI, 2) / (2 * 1),
+ pow(2.0 * M_PI, 4) / (4 * 3 * 2 * 1),
+ -pow(2.0 * M_PI, 6) / (6 * 5 * 4 * 3 * 2 * 1),
+ pow(2.0 * M_PI, 8) / (8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
+ -pow(2.0 * M_PI, 10) / (10 * 9 * 8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
};
- struct qreg scaled_x =
- qir_FMUL(c, src,
- qir_uniform_f(c, 1.0f / (M_PI * 2.0f)));
- struct qreg x_frac = qir_FADD(c,
- ntq_ffract(c, scaled_x),
- qir_uniform_f(c, -0.5));
-
+ struct qreg x_frac = ntq_shrink_sincos_input_range(c, src);
struct qreg sum = qir_uniform_f(c, coeff[0]);
struct qreg x2 = qir_FMUL(c, x_frac, x_frac);
struct qreg x = x2; /* Current x^2, x^4, or x^6 */