From 66e613c21d53c454d56ebd079d04d3afb558bff4 Mon Sep 17 00:00:00 2001 From: jstebbins Date: Thu, 28 Apr 2011 20:31:17 +0000 Subject: fix screaching at the beginning of some ac3 to X transcodes a52dec isn't completely thread safe. It has a collection of static data that gets initialized when a52_init is called. This static data gets initialized to the same thing every time, but if one instance of the decoder is using the data when it gets reinitialized, bad(tm) things happen. git-svn-id: svn://svn.handbrake.fr/HandBrake/trunk@3966 b64f7644-9d1e-0410-96f1-a4d463321fa5 --- contrib/a52dec/A01-thread-safe.patch | 459 +++++++++++++++++++++++++++++++++++ 1 file changed, 459 insertions(+) create mode 100644 contrib/a52dec/A01-thread-safe.patch diff --git a/contrib/a52dec/A01-thread-safe.patch b/contrib/a52dec/A01-thread-safe.patch new file mode 100644 index 000000000..0fa7f0a53 --- /dev/null +++ b/contrib/a52dec/A01-thread-safe.patch @@ -0,0 +1,459 @@ +diff -Naur -x '*.o' -x '*.lo' -x '*.a' -x '*.la' a52dec/liba52/a52_internal.h ../a52dec.new/liba52/a52_internal.h +--- a52dec/liba52/a52_internal.h 2011-04-28 07:54:05.499052327 -0700 ++++ ../a52dec.new/liba52/a52_internal.h 2011-04-28 07:50:30.998845068 -0700 +@@ -32,6 +32,11 @@ + int8_t bap[256]; /* derived channel bit allocation */ + } expbap_t; + ++typedef struct complex_s { ++ sample_t real; ++ sample_t imag; ++} complex_t; ++ + struct a52_state_s { + uint8_t fscod; /* sample rate */ + uint8_t halfrate; /* halfrate factor */ +@@ -85,6 +90,20 @@ + + sample_t * samples; + int downmixed; ++ ++ /* Root values for IFFT */ ++ sample_t * roots16; // size 3 ++ sample_t * roots32; // size 7 ++ sample_t * roots64; // size 15 ++ sample_t * roots128; // size 31 ++ ++ /* Twiddle factors for IMDCT */ ++ complex_t * pre1; // size 128 ++ complex_t * post1; // size 64 ++ complex_t * pre2; // size 64 ++ complex_t * post2; // size 32 ++ ++ sample_t * a52_imdct_window; // size 256 + }; + + #define LEVEL_PLUS6DB 2.0 +@@ -119,6 +138,6 @@ + sample_t clev, sample_t slev); + void a52_upmix (sample_t * samples, int acmod, int output); + +-void a52_imdct_init (uint32_t mm_accel); +-void a52_imdct_256 (sample_t * data, sample_t * delay, sample_t bias); +-void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias); ++void a52_imdct_init (a52_state_t * state, uint32_t mm_accel); ++void a52_imdct_256 (a52_state_t * state, sample_t * data, sample_t * delay, sample_t bias); ++void a52_imdct_512 (a52_state_t * state, sample_t * data, sample_t * delay, sample_t bias); +diff -Naur -x '*.o' -x '*.lo' -x '*.a' -x '*.la' a52dec/liba52/imdct.c ../a52dec.new/liba52/imdct.c +--- a52dec/liba52/imdct.c 2002-07-27 18:52:07.000000000 -0700 ++++ ../a52dec.new/liba52/imdct.c 2011-04-28 07:51:00.398624376 -0700 +@@ -40,11 +40,6 @@ + #include "a52_internal.h" + #include "mm_accel.h" + +-typedef struct complex_s { +- sample_t real; +- sample_t imag; +-} complex_t; +- + static uint8_t fftorder[] = { + 0,128, 64,192, 32,160,224, 96, 16,144, 80,208,240,112, 48,176, + 8,136, 72,200, 40,168,232,104,248,120, 56,184, 24,152,216, 88, +@@ -56,22 +51,8 @@ + 6,134, 70,198, 38,166,230,102,246,118, 54,182, 22,150,214, 86 + }; + +-/* Root values for IFFT */ +-static sample_t roots16[3]; +-static sample_t roots32[7]; +-static sample_t roots64[15]; +-static sample_t roots128[31]; +- +-/* Twiddle factors for IMDCT */ +-static complex_t pre1[128]; +-static complex_t post1[64]; +-static complex_t pre2[64]; +-static complex_t post2[32]; +- +-static sample_t a52_imdct_window[256]; +- +-static void (* ifft128) (complex_t * buf); +-static void (* ifft64) (complex_t * buf); ++static void (* ifft128) (a52_state_t * state, complex_t * buf); ++static void (* ifft64) (a52_state_t * state, complex_t * buf); + + static inline void ifft2 (complex_t * buf) + { +@@ -167,7 +148,7 @@ + a1.imag += tmp4; \ + } while (0) + +-static inline void ifft8 (complex_t * buf) ++static inline void ifft8 (a52_state_t * state, complex_t * buf) + { + double tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; + +@@ -175,7 +156,7 @@ + ifft2 (buf + 4); + ifft2 (buf + 6); + BUTTERFLY_ZERO (buf[0], buf[2], buf[4], buf[6]); +- BUTTERFLY_HALF (buf[1], buf[3], buf[5], buf[7], roots16[1]); ++ BUTTERFLY_HALF (buf[1], buf[3], buf[5], buf[7], state->roots16[1]); + } + + static void ifft_pass (complex_t * buf, sample_t * weight, int n) +@@ -205,66 +186,66 @@ + } while (--i); + } + +-static void ifft16 (complex_t * buf) ++static void ifft16 (a52_state_t * state, complex_t * buf) + { +- ifft8 (buf); ++ ifft8 (state, buf); + ifft4 (buf + 8); + ifft4 (buf + 12); +- ifft_pass (buf, roots16 - 4, 4); ++ ifft_pass (buf, state->roots16 - 4, 4); + } + +-static void ifft32 (complex_t * buf) ++static void ifft32 (a52_state_t * state, complex_t * buf) + { +- ifft16 (buf); +- ifft8 (buf + 16); +- ifft8 (buf + 24); +- ifft_pass (buf, roots32 - 8, 8); ++ ifft16 (state, buf); ++ ifft8 (state, buf + 16); ++ ifft8 (state, buf + 24); ++ ifft_pass (buf, state->roots32 - 8, 8); + } + +-static void ifft64_c (complex_t * buf) ++static void ifft64_c (a52_state_t * state, complex_t * buf) + { +- ifft32 (buf); +- ifft16 (buf + 32); +- ifft16 (buf + 48); +- ifft_pass (buf, roots64 - 16, 16); ++ ifft32 (state, buf); ++ ifft16 (state, buf + 32); ++ ifft16 (state, buf + 48); ++ ifft_pass (buf, state->roots64 - 16, 16); + } + +-static void ifft128_c (complex_t * buf) ++static void ifft128_c (a52_state_t * state, complex_t * buf) + { +- ifft32 (buf); +- ifft16 (buf + 32); +- ifft16 (buf + 48); +- ifft_pass (buf, roots64 - 16, 16); ++ ifft32 (state, buf); ++ ifft16 (state, buf + 32); ++ ifft16 (state, buf + 48); ++ ifft_pass (buf, state->roots64 - 16, 16); + +- ifft32 (buf + 64); +- ifft32 (buf + 96); +- ifft_pass (buf, roots128 - 32, 32); ++ ifft32 (state, buf + 64); ++ ifft32 (state, buf + 96); ++ ifft_pass (buf, state->roots128 - 32, 32); + } + +-void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias) ++void a52_imdct_512 (a52_state_t * state, sample_t * data, sample_t * delay, sample_t bias) + { + int i, k; + sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2; +- const sample_t * window = a52_imdct_window; ++ const sample_t * window = state->a52_imdct_window; + complex_t buf[128]; + + for (i = 0; i < 128; i++) { + k = fftorder[i]; +- t_r = pre1[i].real; +- t_i = pre1[i].imag; ++ t_r = state->pre1[i].real; ++ t_i = state->pre1[i].imag; + + buf[i].real = t_i * data[255-k] + t_r * data[k]; + buf[i].imag = t_r * data[255-k] - t_i * data[k]; + } + +- ifft128 (buf); ++ ifft128 (state, buf); + + /* Post IFFT complex multiply plus IFFT complex conjugate*/ + /* Window and convert to real valued signal */ + for (i = 0; i < 64; i++) { + /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */ +- t_r = post1[i].real; +- t_i = post1[i].imag; ++ t_r = state->post1[i].real; ++ t_i = state->post1[i].imag; + + a_r = t_r * buf[i].real + t_i * buf[i].imag; + a_i = t_i * buf[i].real - t_r * buf[i].imag; +@@ -285,18 +266,18 @@ + } + } + +-void a52_imdct_256(sample_t * data, sample_t * delay, sample_t bias) ++void a52_imdct_256(a52_state_t * state, sample_t * data, sample_t * delay, sample_t bias) + { + int i, k; + sample_t t_r, t_i, a_r, a_i, b_r, b_i, c_r, c_i, d_r, d_i, w_1, w_2; +- const sample_t * window = a52_imdct_window; ++ const sample_t * window = state->a52_imdct_window; + complex_t buf1[64], buf2[64]; + + /* Pre IFFT complex multiply plus IFFT cmplx conjugate */ + for (i = 0; i < 64; i++) { + k = fftorder[i]; +- t_r = pre2[i].real; +- t_i = pre2[i].imag; ++ t_r = state->pre2[i].real; ++ t_i = state->pre2[i].imag; + + buf1[i].real = t_i * data[254-k] + t_r * data[k]; + buf1[i].imag = t_r * data[254-k] - t_i * data[k]; +@@ -305,15 +286,15 @@ + buf2[i].imag = t_r * data[255-k] - t_i * data[k+1]; + } + +- ifft64 (buf1); +- ifft64 (buf2); ++ ifft64 (state, buf1); ++ ifft64 (state, buf2); + + /* Post IFFT complex multiply */ + /* Window and convert to real valued signal */ + for (i = 0; i < 32; i++) { + /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */ +- t_r = post2[i].real; +- t_i = post2[i].imag; ++ t_r = state->post2[i].real; ++ t_i = state->post2[i].imag; + + a_r = t_r * buf1[i].real + t_i * buf1[i].imag; + a_i = t_i * buf1[i].real - t_r * buf1[i].imag; +@@ -362,7 +343,7 @@ + return bessel; + } + +-void a52_imdct_init (uint32_t mm_accel) ++void a52_imdct_init (a52_state_t * state, uint32_t mm_accel) + { + int i, k; + double sum; +@@ -371,50 +352,50 @@ + sum = 0; + for (i = 0; i < 256; i++) { + sum += besselI0 (i * (256 - i) * (5 * M_PI / 256) * (5 * M_PI / 256)); +- a52_imdct_window[i] = sum; ++ state->a52_imdct_window[i] = sum; + } + sum++; + for (i = 0; i < 256; i++) +- a52_imdct_window[i] = sqrt (a52_imdct_window[i] / sum); ++ state->a52_imdct_window[i] = sqrt (state->a52_imdct_window[i] / sum); + + for (i = 0; i < 3; i++) +- roots16[i] = cos ((M_PI / 8) * (i + 1)); ++ state->roots16[i] = cos ((M_PI / 8) * (i + 1)); + + for (i = 0; i < 7; i++) +- roots32[i] = cos ((M_PI / 16) * (i + 1)); ++ state->roots32[i] = cos ((M_PI / 16) * (i + 1)); + + for (i = 0; i < 15; i++) +- roots64[i] = cos ((M_PI / 32) * (i + 1)); ++ state->roots64[i] = cos ((M_PI / 32) * (i + 1)); + + for (i = 0; i < 31; i++) +- roots128[i] = cos ((M_PI / 64) * (i + 1)); ++ state->roots128[i] = cos ((M_PI / 64) * (i + 1)); + + for (i = 0; i < 64; i++) { + k = fftorder[i] / 2 + 64; +- pre1[i].real = cos ((M_PI / 256) * (k - 0.25)); +- pre1[i].imag = sin ((M_PI / 256) * (k - 0.25)); ++ state->pre1[i].real = cos ((M_PI / 256) * (k - 0.25)); ++ state->pre1[i].imag = sin ((M_PI / 256) * (k - 0.25)); + } + + for (i = 64; i < 128; i++) { + k = fftorder[i] / 2 + 64; +- pre1[i].real = -cos ((M_PI / 256) * (k - 0.25)); +- pre1[i].imag = -sin ((M_PI / 256) * (k - 0.25)); ++ state->pre1[i].real = -cos ((M_PI / 256) * (k - 0.25)); ++ state->pre1[i].imag = -sin ((M_PI / 256) * (k - 0.25)); + } + + for (i = 0; i < 64; i++) { +- post1[i].real = cos ((M_PI / 256) * (i + 0.5)); +- post1[i].imag = sin ((M_PI / 256) * (i + 0.5)); ++ state->post1[i].real = cos ((M_PI / 256) * (i + 0.5)); ++ state->post1[i].imag = sin ((M_PI / 256) * (i + 0.5)); + } + + for (i = 0; i < 64; i++) { + k = fftorder[i] / 4; +- pre2[i].real = cos ((M_PI / 128) * (k - 0.25)); +- pre2[i].imag = sin ((M_PI / 128) * (k - 0.25)); ++ state->pre2[i].real = cos ((M_PI / 128) * (k - 0.25)); ++ state->pre2[i].imag = sin ((M_PI / 128) * (k - 0.25)); + } + + for (i = 0; i < 32; i++) { +- post2[i].real = cos ((M_PI / 128) * (i + 0.5)); +- post2[i].imag = sin ((M_PI / 128) * (i + 0.5)); ++ state->post2[i].real = cos ((M_PI / 128) * (i + 0.5)); ++ state->post2[i].imag = sin ((M_PI / 128) * (i + 0.5)); + } + + #ifdef LIBA52_DJBFFT +diff -Naur -x '*.o' -x '*.lo' -x '*.a' -x '*.la' a52dec/liba52/parse.c ../a52dec.new/liba52/parse.c +--- a52dec/liba52/parse.c 2002-07-27 18:52:07.000000000 -0700 ++++ ../a52dec.new/liba52/parse.c 2011-04-28 07:40:48.030435061 -0700 +@@ -56,16 +56,53 @@ + a52_state_t * state; + int i; + +- state = malloc (sizeof (a52_state_t)); ++ state = calloc (1, sizeof (a52_state_t)); + if (state == NULL) + return NULL; + + state->samples = memalign (16, 256 * 12 * sizeof (sample_t)); + if (state->samples == NULL) { +- free (state); +- return NULL; ++ goto fail; + } + ++ /* Root values for IFFT */ ++ state->roots16 = memalign (16, 3 * sizeof (sample_t)); ++ if (state->roots16 == NULL) ++ goto fail; ++ ++ state->roots32 = memalign (16, 7 * sizeof (sample_t)); ++ if (state->roots32 == NULL) ++ goto fail; ++ ++ state->roots64 = memalign (16, 15 * sizeof (sample_t)); ++ if (state->roots64 == NULL) ++ goto fail; ++ ++ state->roots128 = memalign (16, 31 * sizeof (sample_t)); ++ if (state->roots128 == NULL) ++ goto fail; ++ ++ /* Twiddle factors for IMDCT */ ++ state->pre1 = memalign (16, 128 * sizeof (complex_t)); ++ if (state->pre1 == NULL) ++ goto fail; ++ ++ state->post1 = memalign (16, 64 * sizeof (complex_t)); ++ if (state->post1 == NULL) ++ goto fail; ++ ++ state->pre2 = memalign (16, 64 * sizeof (complex_t)); ++ if (state->pre2 == NULL) ++ goto fail; ++ ++ state->post2 = memalign (16, 32 * sizeof (complex_t)); ++ if (state->post2 == NULL) ++ goto fail; ++ ++ state->a52_imdct_window = memalign (16, 256 * sizeof (sample_t)); ++ if (state->a52_imdct_window == NULL) ++ goto fail; ++ + for (i = 0; i < 256 * 12; i++) + state->samples[i] = 0; + +@@ -73,9 +110,27 @@ + + state->lfsr_state = 1; + +- a52_imdct_init (mm_accel); ++ a52_imdct_init (state, mm_accel); + + return state; ++ ++fail: ++ if ( state ) ++ { ++ free (state->a52_imdct_window); ++ free (state->post2); ++ free (state->pre2); ++ free (state->post1); ++ free (state->pre1); ++ free (state->roots128); ++ free (state->roots64); ++ free (state->roots32); ++ free (state->roots16); ++ free (state->samples); ++ free (state); ++ } ++ return NULL; ++ + } + + sample_t * a52_samples (a52_state_t * state) +@@ -825,7 +880,7 @@ + state->dynrng, 0, 7); + for (i = 7; i < 256; i++) + (samples-256)[i] = 0; +- a52_imdct_512 (samples - 256, samples + 1536 - 256, state->bias); ++ a52_imdct_512 (state, samples - 256, samples + 1536 - 256, state->bias); + } else { + /* just skip the LFE coefficients */ + coeff_get (state, samples + 1280, &state->lfe_expbap, &quantizer, +@@ -854,10 +909,10 @@ + + if (coeff[i]) { + if (blksw[i]) +- a52_imdct_256 (samples + 256 * i, samples + 1536 + 256 * i, ++ a52_imdct_256 (state, samples + 256 * i, samples + 1536 + 256 * i, + bias); + else +- a52_imdct_512 (samples + 256 * i, samples + 1536 + 256 * i, ++ a52_imdct_512 (state, samples + 256 * i, samples + 1536 + 256 * i, + bias); + } else { + int j; +@@ -883,11 +938,11 @@ + + if (blksw[0]) + for (i = 0; i < nfchans; i++) +- a52_imdct_256 (samples + 256 * i, samples + 1536 + 256 * i, ++ a52_imdct_256 (state, samples + 256 * i, samples + 1536 + 256 * i, + state->bias); + else + for (i = 0; i < nfchans; i++) +- a52_imdct_512 (samples + 256 * i, samples + 1536 + 256 * i, ++ a52_imdct_512 (state, samples + 256 * i, samples + 1536 + 256 * i, + state->bias); + } + +@@ -896,6 +951,15 @@ + + void a52_free (a52_state_t * state) + { ++ free (state->a52_imdct_window); ++ free (state->post2); ++ free (state->pre2); ++ free (state->post1); ++ free (state->pre1); ++ free (state->roots128); ++ free (state->roots64); ++ free (state->roots32); ++ free (state->roots16); + free (state->samples); + free (state); + } -- cgit v1.2.3