From 520dd5c77972cad13d1a97e228903b3c5bdc384f Mon Sep 17 00:00:00 2001 From: Chris Robinson Date: Wed, 16 Aug 2017 02:05:10 -0700 Subject: Make the bsinc table layout more efficient The old layout separated filters, scale deltas, phase deltas, and scale phase deltas into separate segments that each contained a numbers of scale and phase entries, Since processing a sample needed a filter and one of each delta entry relating to a particular scale and phase, the memory needed would be spread across the whole table. And since subsequent samples would use a different phase, it would jump around the table a whole lot as well. The new layout packs the data in a way more consistent with its use. The filters, scale deltas, phase deltas, and scale phase deltas are interleaved, such that for a particular scale and phase, the filter and delta entries used are contiguous. And the phase entries for a particular scale are kept together, so the ~500 to ~1000 samples processed per source update stay within the same 3KB to 6KB area of the 70+KB table, which is much more cache friendly. --- utils/bsincgen.c | 119 ++++++++++--------------------------------------------- 1 file changed, 20 insertions(+), 99 deletions(-) (limited to 'utils') diff --git a/utils/bsincgen.c b/utils/bsincgen.c index fcc5ec85..82235a63 100644 --- a/utils/bsincgen.c +++ b/utils/bsincgen.c @@ -129,10 +129,10 @@ static double CalcKaiserBeta(const double rejection) /* Generates the coefficient, delta, and index tables required by the bsinc resampler */ static void BsiGenerateTables() { - static double filter[BSINC_SCALE_COUNT][BSINC_PHASE_COUNT + 1][2 * BSINC_POINTS_MIN]; - static double scDeltas[BSINC_SCALE_COUNT - 1][BSINC_PHASE_COUNT][2 * BSINC_POINTS_MIN]; + static double filter[BSINC_SCALE_COUNT][BSINC_PHASE_COUNT + 1][2 * BSINC_POINTS_MIN]; + static double scDeltas[BSINC_SCALE_COUNT][BSINC_PHASE_COUNT ][2 * BSINC_POINTS_MIN]; static double phDeltas[BSINC_SCALE_COUNT][BSINC_PHASE_COUNT + 1][2 * BSINC_POINTS_MIN]; - static double spDeltas[BSINC_SCALE_COUNT - 1][BSINC_PHASE_COUNT][2 * BSINC_POINTS_MIN]; + static double spDeltas[BSINC_SCALE_COUNT][BSINC_PHASE_COUNT ][2 * BSINC_POINTS_MIN]; static int mt[BSINC_SCALE_COUNT]; static double at[BSINC_SCALE_COUNT]; double width, beta, scaleBase, scaleRange; @@ -237,13 +237,9 @@ static void BsiGenerateTables() } // Calculate the table size. - i = mt[0]; - for(si = 1; si < BSINC_SCALE_COUNT; si++) - i += BSINC_PHASE_COUNT * mt[si]; - for(si = 0; si < (BSINC_SCALE_COUNT - 1); si++) - i += 2 * BSINC_PHASE_COUNT * mt[si]; - for(si = 1; si < BSINC_SCALE_COUNT; si++) - i += BSINC_PHASE_COUNT * mt[si]; + i = 0; + for(si = 0; si < BSINC_SCALE_COUNT; si++) + i += 4 * BSINC_PHASE_COUNT * mt[si]; fprintf(stdout, "/* Generated by bsincgen, do not edit! */\n\n" "/* Table of windowed sinc coefficients and deltas. This 11th order filter\n" @@ -257,78 +253,28 @@ static void BsiGenerateTables() " alignas(16) const float Tab[%d];\n" " const float scaleBase, scaleRange;\n" " const int m[BSINC_SCALE_COUNT];\n" -" const int to[4][BSINC_SCALE_COUNT];\n" -" const int tm[2][BSINC_SCALE_COUNT];\n" +" const int filterOffset[BSINC_SCALE_COUNT];\n" "} bsinc = {\n", i); fprintf(stdout, " /* Tab */ {\n"); - /* Only output enough coefficients for the first (cut) scale as needed to - perform interpolation without extra branching. - */ - fprintf(stdout, " /* %2d,%2d */", mt[0], 0); - for(i = 0; i < mt[0]; i++) - fprintf(stdout, " %+14.9ef,", filter[0][0][i]); - fprintf(stdout, "\n\n"); - - fprintf(stdout, " /* Filters */\n"); - for(si = 1; si < BSINC_SCALE_COUNT; si++) + for(si = 0; si < BSINC_SCALE_COUNT; si++) { const int m = mt[si]; const int o = BSINC_POINTS_MIN - (m / 2); for(pi = 0; pi < BSINC_PHASE_COUNT; pi++) { - fprintf(stdout, " /* %2d,%2d */", m, pi); + fprintf(stdout, " /* %2d,%2d (%d) */", si, pi, m); + fprintf(stdout, "\n "); for(i = 0; i < m; i++) fprintf(stdout, " %+14.9ef,", filter[si][pi][o + i]); - fprintf(stdout, "\n"); - } - } - fprintf(stdout, "\n"); - - // There are N-1 scale deltas for N scales. - fprintf(stdout, " /* Scale deltas */\n"); - for(si = 0; si < (BSINC_SCALE_COUNT - 1); si++) - { - const int m = mt[si]; - const int o = BSINC_POINTS_MIN - (m / 2); - - for(pi = 0; pi < BSINC_PHASE_COUNT; pi++) - { - fprintf(stdout, " /* %2d,%2d */", m, pi); + fprintf(stdout, "\n "); for(i = 0; i < m; i++) fprintf(stdout, " %+14.9ef,", scDeltas[si][pi][o + i]); - fprintf(stdout, "\n"); - } - } - fprintf(stdout, "\n"); - - // Exclude phases for the first (cut) scale. - fprintf(stdout, " /* Phase deltas */\n"); - for(si = 1; si < BSINC_SCALE_COUNT; si++) - { - const int m = mt[si]; - const int o = BSINC_POINTS_MIN - (m / 2); - - for(pi = 0; pi < BSINC_PHASE_COUNT; pi++) - { - fprintf(stdout, " /* %2d,%2d */", m, pi); + fprintf(stdout, "\n "); for(i = 0; i < m; i++) fprintf(stdout, " %+14.9ef,", phDeltas[si][pi][o + i]); - fprintf(stdout, "\n"); - } - } - fprintf(stdout, "\n"); - - fprintf(stdout, " /* Scale phase deltas */\n"); - for(si = 0; si < (BSINC_SCALE_COUNT - 1); si++) - { - const int m = mt[si]; - const int o = BSINC_POINTS_MIN - (m / 2); - - for(pi = 0; pi < BSINC_PHASE_COUNT; pi++) - { - fprintf(stdout, " /* %2d,%2d */", m, pi); + fprintf(stdout, "\n "); for(i = 0; i < m; i++) fprintf(stdout, " %+14.9ef,", spDeltas[si][pi][o + i]); fprintf(stdout, "\n"); @@ -342,48 +288,23 @@ static void BsiGenerateTables() base-2 logarithm of its inverse: log_2(1 / scaleBase) */ fprintf(stdout, " /* scaleBase */ %.9ef, /* scaleRange */ %.9ef,\n", scaleBase, 1.0 / scaleRange); - fprintf(stdout, " /* m */ {"); + fprintf(stdout, " /* m */ {"); fprintf(stdout, " %d", mt[0]); for(si = 1; si < BSINC_SCALE_COUNT; si++) fprintf(stdout, ", %d", mt[si]); - fprintf(stdout, " },\n"); - fprintf(stdout, " /* to */ {\n { %5d", 0); - i = mt[0]; - for(si = 1; si < BSINC_SCALE_COUNT; si++) - { - fprintf(stdout, ", %5d", i); - i += BSINC_PHASE_COUNT * mt[si]; - } - fprintf(stdout, " },\n {"); - for(si = 0; si < (BSINC_SCALE_COUNT - 1); si++) - { - fprintf(stdout, " %5d,", i); - i += BSINC_PHASE_COUNT * mt[si]; - } - fprintf(stdout, " %5d },\n { %5d", 0, 0); + fprintf(stdout, " /* filterOffset */ {"); + fprintf(stdout, " %d", 0); + i = mt[0]*4*BSINC_PHASE_COUNT; for(si = 1; si < BSINC_SCALE_COUNT; si++) { - fprintf(stdout, ", %5d", i); - i += BSINC_PHASE_COUNT * mt[si]; - } - fprintf (stdout, " },\n {"); - for(si = 0; si < (BSINC_SCALE_COUNT - 1); si++) - { - fprintf(stdout, " %5d,", i); - i += BSINC_PHASE_COUNT * mt[si]; + fprintf(stdout, ", %d", i); + i += mt[si]*4*BSINC_PHASE_COUNT; } - fprintf(stdout, " %5d }\n },\n", 0); - fprintf(stdout, " /* tm */ {\n { 0"); - for(si = 1; si < BSINC_SCALE_COUNT; si++) - fprintf(stdout, ", %d", mt[si]); - fprintf(stdout, " },\n {"); - for(si = 0; si < (BSINC_SCALE_COUNT - 1); si++) - fprintf(stdout, " %d,", mt[si]); - fprintf(stdout, " 0 }\n }\n};\n\n"); + fprintf(stdout, " }\n};\n\n"); } -- cgit v1.2.3