Generate the bsinc tables using constexpr methods

All the methods used should be compliant with C++14 constexpr rules. However, the number of scales and phases cause GenerateBSincCoeffs to reach the allowed step limit, preventing full compile-time generation. It's not a terribly big deal, it'll generate them very quickly when loading, but it does prevent using shared read-only memory pages.
author: Chris Robinson <[email protected]> 2020-04-01 23:17:46 -0700
committer: Chris Robinson <[email protected]> 2020-04-02 00:49:19 -0700
commit: 8853519d896f11b678873f0afacac16b3cdebc27 (patch)
tree: 319fd5c5a5978e7790bbd22844c9e218b2bbe10e
parent: 6fb59f1182965a0e1e45d60e81e3ff02db45944c (diff)
9 files changed, 380 insertions, 12 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2255baeb..3ce37e84 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -545,6 +545,9 @@ SET(COMMON_OBJS
     common/alstring.cpp
     common/alstring.h
     common/atomic.h
+    common/bsinc_defs.h
+    common/bsinc_tables.cpp
+    common/bsinc_tables.h
     common/dynload.cpp
     common/dynload.h
     common/endiantest.h
diff --git a/alc/alu.cpp b/alc/alu.cpp
index df9af3fe..18973942 100644
--- a/alc/alu.cpp
+++ b/alc/alu.cpp
@@ -80,10 +80,10 @@
 #include "vecmat.h"
 #include "voice.h"
 
-#include "bsinc_inc.h"
+#include "bsinc_tables.h"
 
 
-static_assert(!(MAX_RESAMPLER_PADDING&1) && MAX_RESAMPLER_PADDING >= bsinc24.m[0],
+static_assert(!(MAX_RESAMPLER_PADDING&1) && MAX_RESAMPLER_PADDING >= BSINC_POINTS_MAX,
     "MAX_RESAMPLER_PADDING is not a multiple of two, or is too small");
 
 
diff --git a/alc/mixer/mixer_c.cpp b/alc/mixer/mixer_c.cpp
index c086dd8d..6b68d821 100644
--- a/alc/mixer/mixer_c.cpp
+++ b/alc/mixer/mixer_c.cpp
@@ -6,7 +6,7 @@
 
 #include "alcmain.h"
 #include "alu.h"
-
+#include "bsinc_defs.h"
 #include "defs.h"
 #include "hrtfbase.h"
 
diff --git a/alc/mixer/mixer_neon.cpp b/alc/mixer/mixer_neon.cpp
index afc9768a..2cdf21c3 100644
--- a/alc/mixer/mixer_neon.cpp
+++ b/alc/mixer/mixer_neon.cpp
@@ -10,6 +10,7 @@
 #include "alu.h"
 #include "hrtf.h"
 #include "defs.h"
+#include "bsinc_defs.h"
 #include "hrtfbase.h"
 
 
diff --git a/alc/mixer/mixer_sse.cpp b/alc/mixer/mixer_sse.cpp
index 3bc7b30f..32345522 100644
--- a/alc/mixer/mixer_sse.cpp
+++ b/alc/mixer/mixer_sse.cpp
@@ -9,6 +9,7 @@
 #include "alcmain.h"
 
 #include "alu.h"
+#include "bsinc_defs.h"
 #include "defs.h"
 #include "hrtfbase.h"
 
diff --git a/alc/voice.h b/alc/voice.h
index 7adffe7f..f4d09ad3 100644
--- a/alc/voice.h
+++ b/alc/voice.h
@@ -44,16 +44,9 @@ enum class Resampler {
 };
 extern Resampler ResamplerDefault;
 
-/* The number of distinct scale and phase intervals within the bsinc filter
- * table.
- */
-#define BSINC_SCALE_BITS  4
-#define BSINC_SCALE_COUNT (1<<BSINC_SCALE_BITS)
-#define BSINC_PHASE_BITS  5
-#define BSINC_PHASE_COUNT (1<<BSINC_PHASE_BITS)
 
-/* Interpolator state.  Kind of a misnomer since the interpolator itself is
- * stateless.  This just keeps it from having to recompute scale-related
+/* Interpolator state. Kind of a misnomer since the interpolator itself is
+ * stateless. This just keeps it from having to recompute scale-related
  * mappings for every sample.
  */
 struct BsincState {
diff --git a/common/bsinc_defs.h b/common/bsinc_defs.h
new file mode 100644
index 00000000..30d1219e
--- /dev/null
+++ b/common/bsinc_defs.h
@@ -0,0 +1,13 @@
+#ifndef BSINC_DEFS_H
+#define BSINC_DEFS_H
+
+/* The number of distinct scale and phase intervals within the filter table. */
+#define BSINC_SCALE_BITS  4
+#define BSINC_SCALE_COUNT (1<<BSINC_SCALE_BITS)
+#define BSINC_PHASE_BITS  5
+#define BSINC_PHASE_COUNT (1<<BSINC_PHASE_BITS)
+
+/* The maximum number of sample points for the bsinc filters. */
+#define BSINC_POINTS_MAX 48
+
+#endif /* BSINC_DEFS_H */
diff --git a/common/bsinc_tables.cpp b/common/bsinc_tables.cpp
new file mode 100644
index 00000000..99033a0b
--- /dev/null
+++ b/common/bsinc_tables.cpp
@@ -0,0 +1,340 @@
+
+#include "bsinc_tables.h"
+
+#include <algorithm>
+#include <cassert>
+#include <cmath>
+#include <limits>
+#include <stdexcept>
+
+#include "math_defs.h"
+#include "vector.h"
+
+
+namespace {
+
+/* The max points includes the doubling for downsampling, so the maximum number
+ * of base sample points is 24, which is 23rd order.
+ */
+constexpr int BSincPointsMax{BSINC_POINTS_MAX};
+constexpr int BSincPointsHalf{BSincPointsMax / 2};
+
+constexpr int BSincPhaseCount{BSINC_PHASE_COUNT};
+constexpr int BSincScaleCount{BSINC_SCALE_COUNT};
+
+
+template<typename T>
+constexpr std::enable_if_t<std::is_floating_point<T>::value,T> sqrt(T x)
+{
+    if(!(x >= 0 && x < std::numeric_limits<double>::infinity()))
+        throw std::domain_error{"Invalid sqrt value"};
+
+    T cur{x}, prev{0};
+    while(cur != prev)
+    {
+        prev = cur;
+        cur = 0.5f*(cur + x/cur);
+    }
+    return cur;
+}
+
+template<typename T>
+constexpr std::enable_if_t<std::is_floating_point<T>::value,T> sin(T x)
+{
+    if(x >= al::MathDefs<T>::Tau())
+    {
+        if(!(x < 65536))
+            throw std::domain_error{"Invalid sin value"};
+        do {
+            x -= al::MathDefs<T>::Tau();
+        } while(x >= al::MathDefs<T>::Tau());
+    }
+    else if(x < 0)
+    {
+        if(!(x > -65536))
+            throw std::domain_error{"Invalid sin value"};
+        do {
+            x += al::MathDefs<T>::Tau();
+        } while(x < 0);
+    }
+
+    T prev{x}, n{6};
+    int i{4}, s{-1};
+    const T xx{x*x};
+    T t{xx*x};
+
+    T cur{prev + t*s/n};
+    while(prev != cur)
+    {
+        prev = cur;
+        n *= i*(i+1);
+        i += 2;
+        s = -s;
+        t *= xx;
+
+        cur += t*s/n;
+    }
+    return cur;
+}
+
+
+/* This is the normalized cardinal sine (sinc) function.
+ *
+ *   sinc(x) = { 1,                   x = 0
+ *             { sin(pi x) / (pi x),  otherwise.
+ */
+constexpr double Sinc(const double x)
+{
+    if(std::abs(x) < 1e-15)
+        return 1.0;
+    return sin(al::MathDefs<double>::Pi()*x) / (al::MathDefs<double>::Pi()*x);
+}
+
+/* The zero-order modified Bessel function of the first kind, used for the
+ * Kaiser window.
+ *
+ *   I_0(x) = sum_{k=0}^inf (1 / k!)^2 (x / 2)^(2 k)
+ *          = sum_{k=0}^inf ((x / 2)^k / k!)^2
+ */
+constexpr double BesselI_0(const double x)
+{
+    /* Start at k=1 since k=0 is trivial. */
+    const double x2{x / 2.0};
+    double term{1.0};
+    double sum{1.0};
+    double last_sum{};
+    int k{1};
+
+    /* Let the integration converge until the term of the sum is no longer
+     * significant.
+     */
+    do {
+        const double y{x2 / k};
+        ++k;
+        last_sum = sum;
+        term *= y * y;
+        sum += term;
+    } while(sum != last_sum);
+
+    return sum;
+}
+
+/* Calculate a Kaiser window from the given beta value and a normalized k
+ * [-1, 1].
+ *
+ *   w(k) = { I_0(B sqrt(1 - k^2)) / I_0(B),  -1 <= k <= 1
+ *          { 0,                              elsewhere.
+ *
+ * Where k can be calculated as:
+ *
+ *   k = i / l,         where -l <= i <= l.
+ *
+ * or:
+ *
+ *   k = 2 i / M - 1,   where 0 <= i <= M.
+ */
+constexpr double Kaiser(const double beta, const double k, const double besseli_0_beta)
+{
+    if(!(k >= -1.0 && k <= 1.0))
+        return 0.0;
+    return BesselI_0(beta * sqrt(1.0 - k*k)) / besseli_0_beta;
+}
+
+/* Calculates the (normalized frequency) transition width of the Kaiser window.
+ * Rejection is in dB.
+ */
+constexpr double CalcKaiserWidth(const double rejection, const int order)
+{
+    if(rejection > 21.19)
+       return (rejection - 7.95) / (order * 2.285 * al::MathDefs<double>::Tau());
+    /* This enforces a minimum rejection of just above 21.18dB */
+    return 5.79 / (order * al::MathDefs<double>::Tau());
+}
+
+/* Calculates the beta value of the Kaiser window. Rejection is in dB. */
+constexpr double CalcKaiserBeta(const double rejection)
+{
+    if(rejection > 50.0)
+       return 0.1102 * (rejection-8.7);
+    else if(rejection >= 21.0)
+       return (0.5842 * std::pow(rejection-21.0, 0.4)) + (0.07886 * (rejection-21.0));
+    return 0.0;
+}
+
+
+struct BSincHeader {
+    double width;
+    double beta;
+    double scaleBase;
+    double scaleRange;
+    double besseli_0_beta;
+
+    int a[BSINC_SCALE_COUNT];
+    int total_size;
+};
+
+constexpr BSincHeader GenerateBSincHeader(int Rejection, int Order)
+{
+    BSincHeader ret{};
+    ret.width = CalcKaiserWidth(Rejection, Order);
+    ret.beta = CalcKaiserBeta(Rejection);
+    ret.scaleBase = ret.width / 2.0;
+    ret.scaleRange = 1.0 - ret.scaleBase;
+    ret.besseli_0_beta = BesselI_0(ret.beta);
+
+    int num_points{Order+1};
+    for(int si{0};si < BSincScaleCount;++si)
+    {
+        const double scale{ret.scaleBase + (ret.scaleRange * si / (BSincScaleCount - 1))};
+        const int a{std::min(static_cast<int>(num_points / 2.0 / scale), num_points)};
+        const int m{2 * a};
+
+        ret.a[si] = a;
+        ret.total_size += 4 * BSincPhaseCount * ((m+3) & ~3);
+    }
+
+    return ret;
+}
+
+/* 11th and 23rd order filters (12 and 24-point respectively) with a 60dB drop
+ * at nyquist. Each filter will scale up the order when downsampling, to 23 and
+ * 47th order respectively.
+ */
+constexpr BSincHeader bsinc12_hdr{GenerateBSincHeader(60, 11)};
+constexpr BSincHeader bsinc24_hdr{GenerateBSincHeader(60, 23)};
+
+
+/* std::array is missing constexpr for several methods. */
+template<typename T, size_t N>
+struct Array {
+    T data[N];
+};
+
+template<size_t total_size>
+constexpr auto GenerateBSincCoeffs(const BSincHeader hdr)
+{
+    double   filter[BSincScaleCount][BSincPhaseCount+1][BSincPointsMax]{};
+    double phDeltas[BSincScaleCount][BSincPhaseCount  ][BSincPointsMax]{};
+    double scDeltas[BSincScaleCount][BSincPhaseCount  ][BSincPointsMax]{};
+    double spDeltas[BSincScaleCount][BSincPhaseCount  ][BSincPointsMax]{};
+
+    /* Calculate the Kaiser-windowed Sinc filter coefficients for each scale
+     * and phase.
+     */
+    for(unsigned int si{0};si < BSincScaleCount;++si)
+    {
+        const int m{hdr.a[si] * 2};
+        const int o{BSincPointsHalf - (m/2)};
+        const int l{hdr.a[si] - 1};
+        const int a{hdr.a[si]};
+        const double scale{hdr.scaleBase + (hdr.scaleRange * si / (BSincScaleCount - 1))};
+        const double cutoff{scale - (hdr.scaleBase * std::max(0.5, scale) * 2.0)};
+
+        for(int pi{0};pi <= BSincPhaseCount;++pi)
+        {
+            const double phase{l + (pi/double{BSincPhaseCount})};
+
+            for(int i{0};i < m;++i)
+            {
+                const double x{i - phase};
+                filter[si][pi][o + i] = Kaiser(hdr.beta, x/a, hdr.besseli_0_beta) * cutoff *
+                    Sinc(cutoff*x);
+            }
+        }
+    }
+
+    /* Linear interpolation between phases is simplified by pre-calculating the
+     * delta (b - a) in: x = a + f (b - a)
+     */
+    for(unsigned int si{0};si < BSincScaleCount;++si)
+    {
+        const int m{hdr.a[si] * 2};
+        const int o{BSincPointsHalf - (m/2)};
+
+        for(int pi{0};pi < BSincPhaseCount;++pi)
+        {
+            for(int i{0};i < m;++i)
+                phDeltas[si][pi][o + i] = filter[si][pi + 1][o + i] - filter[si][pi][o + i];
+        }
+    }
+
+    /* Linear interpolation between scales is also simplified.
+     *
+     * Given a difference in points between scales, the destination points will
+     * be 0, thus: x = a + f (-a)
+     */
+    for(unsigned int si{0};si < (BSincScaleCount-1);++si)
+    {
+        const int m{hdr.a[si] * 2};
+        const int o{BSincPointsHalf - (m/2)};
+
+        for(int pi{0};pi < BSincPhaseCount;++pi)
+        {
+            for(int i{0};i < m;++i)
+                scDeltas[si][pi][o + i] = filter[si + 1][pi][o + i] - filter[si][pi][o + i];
+        }
+    }
+
+    /* This last simplification is done to complete the bilinear equation for
+     * the combination of phase and scale.
+     */
+    for(unsigned int si{0};si < (BSincScaleCount-1);++si)
+    {
+        const int m{hdr.a[si] * 2};
+        const int o{BSincPointsHalf - (m/2)};
+
+        for(int pi{0};pi < BSincPhaseCount;++pi)
+        {
+            for(int i{0};i < m;++i)
+                spDeltas[si][pi][o + i] = phDeltas[si + 1][pi][o + i] - phDeltas[si][pi][o + i];
+        }
+    }
+
+    Array<float,total_size> ret{};
+    size_t idx{0};
+
+    for(unsigned int si{0};si < BSincScaleCount;++si)
+    {
+        const int m{((hdr.a[si]*2) + 3) & ~3};
+        const int o{BSincPointsHalf - (m/2)};
+
+        for(int pi{0};pi < BSincPhaseCount;++pi)
+        {
+            for(int i{0};i < m;++i)
+                ret.data[idx++] = static_cast<float>(filter[si][pi][o + i]);
+            for(int i{0};i < m;++i)
+                ret.data[idx++] = static_cast<float>(phDeltas[si][pi][o + i]);
+            for(int i{0};i < m;++i)
+                ret.data[idx++] = static_cast<float>(scDeltas[si][pi][o + i]);
+            for(int i{0};i < m;++i)
+                ret.data[idx++] = static_cast<float>(spDeltas[si][pi][o + i]);
+        }
+    }
+    assert(idx == total_size);
+
+    return ret;
+}
+
+/* FIXME: These can't be constexpr due to reaching the step limit. */
+const auto bsinc12_table = GenerateBSincCoeffs<bsinc12_hdr.total_size>(bsinc12_hdr);
+const auto bsinc24_table = GenerateBSincCoeffs<bsinc24_hdr.total_size>(bsinc24_hdr);
+
+
+constexpr BSincTable GenerateBSincTable(const BSincHeader hdr, const float *tab)
+{
+    BSincTable ret{};
+    ret.scaleBase = static_cast<float>(hdr.scaleBase);
+    ret.scaleRange = static_cast<float>(1.0 / hdr.scaleRange);
+    for(int i{0};i < BSincScaleCount;++i)
+        ret.m[i] = static_cast<unsigned int>(((hdr.a[i]*2) + 3) & ~3);
+    ret.filterOffset[0] = 0;
+    for(int i{1};i < BSincScaleCount;++i)
+        ret.filterOffset[i] = ret.filterOffset[i-1] + ret.m[i-1];
+    ret.Tab = tab;
+    return ret;
+}
+
+} // namespace
+
+const BSincTable bsinc12{GenerateBSincTable(bsinc12_hdr, bsinc12_table.data)};
+const BSincTable bsinc24{GenerateBSincTable(bsinc24_hdr, bsinc24_table.data)};
diff --git a/common/bsinc_tables.h b/common/bsinc_tables.h
new file mode 100644
index 00000000..2e2443e5
--- /dev/null
+++ b/common/bsinc_tables.h
@@ -0,0 +1,17 @@
+#ifndef BSINC_TABLES_H
+#define BSINC_TABLES_H
+
+#include "bsinc_defs.h"
+
+
+struct BSincTable {
+    float scaleBase, scaleRange;
+    unsigned int m[BSINC_SCALE_COUNT];
+    unsigned int filterOffset[BSINC_SCALE_COUNT];
+    const float *Tab;
+};
+
+extern const BSincTable bsinc12;
+extern const BSincTable bsinc24;
+
+#endif /* BSINC_TABLES_H */
author	Chris Robinson <[email protected]>	2020-04-01 23:17:46 -0700
committer	Chris Robinson <[email protected]>	2020-04-02 00:49:19 -0700
commit	8853519d896f11b678873f0afacac16b3cdebc27 (patch)
tree	319fd5c5a5978e7790bbd22844c9e218b2bbe10e
parent	6fb59f1182965a0e1e45d60e81e3ff02db45944c (diff)