diff options
Diffstat (limited to 'contrib/patch-x264-linux.patch')
-rw-r--r-- | contrib/patch-x264-linux.patch | 194 |
1 files changed, 194 insertions, 0 deletions
diff --git a/contrib/patch-x264-linux.patch b/contrib/patch-x264-linux.patch new file mode 100644 index 000000000..3f1752d4c --- /dev/null +++ b/contrib/patch-x264-linux.patch @@ -0,0 +1,194 @@ +Index: /common/ppc/quant.c +=================================================================== +--- /common/ppc/quant.c (revision 601) ++++ /common/ppc/quant.c (revision 621) +@@ -18,8 +18,4 @@ + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. + *****************************************************************************/ +- +-#ifdef HAVE_ALTIVEC_H +-#include <altivec.h> +-#endif + + #include "common/common.h" +@@ -54,29 +50,29 @@ + temp2v = vec_xor(temp2v, mskB); \ + temp1v = vec_adds(temp1v, vec_and(mskA, one)); \ +-vec_st(temp1v, (dct0), dct); \ ++vec_st(temp1v, (dct0), (int16_t*)dct); \ + temp2v = vec_adds(temp2v, vec_and(mskB, one)); \ +-vec_st(temp2v, (dct1), dct); ++vec_st(temp2v, (dct1), (int16_t*)dct); + + void x264_quant_4x4_altivec( int16_t dct[4][4], int quant_mf[4][4], int const i_qbits, int const f ) { + vector bool short mskA; +- vec_s32_t i_qbitsv; ++ vec_u32_t i_qbitsv; + vec_u16_t coefvA; + vec_u32_t multEvenvA, multOddvA; +- vec_u32_t mfvA; ++ vec_u16_t mfvA; + vec_s16_t zerov, one; +- vec_s32_t fV; ++ vec_u32_t fV; + + vector bool short mskB; + vec_u16_t coefvB; + vec_u32_t multEvenvB, multOddvB; +- vec_u32_t mfvB; ++ vec_u16_t mfvB; + + vec_s16_t temp1v, temp2v; + +- vect_sint_u qbits_u; ++ vect_int_u qbits_u; + qbits_u.s[0]=i_qbits; + i_qbitsv = vec_splat(qbits_u.v, 0); + +- vect_sint_u f_u; ++ vect_int_u f_u; + f_u.s[0]=f; + +@@ -114,16 +110,16 @@ + temp2v = vec_xor(temp2v, mskB); \ + temp1v = vec_add(temp1v, vec_and(mskA, one)); \ +-vec_st(temp1v, (dct0), dct); \ ++vec_st(temp1v, (dct0), (int16_t*)dct); \ + temp2v = vec_add(temp2v, vec_and(mskB, one)); \ +-vec_st(temp2v, (dct1), dct); ++vec_st(temp2v, (dct1), (int16_t*)dct); + + + void x264_quant_4x4_dc_altivec( int16_t dct[4][4], int i_quant_mf, int const i_qbits, int const f ) { + vector bool short mskA; +- vec_s32_t i_qbitsv; ++ vec_u32_t i_qbitsv; + vec_u16_t coefvA; + vec_u32_t multEvenvA, multOddvA; + vec_s16_t zerov, one; +- vec_s32_t fV; ++ vec_u32_t fV; + + vector bool short mskB; +@@ -133,15 +129,14 @@ + vec_s16_t temp1v, temp2v; + +- vec_u32_t mfv; +- vect_int_u mf_u; ++ vec_u16_t mfv; ++ vect_ushort_u mf_u; + mf_u.s[0]=i_quant_mf; + mfv = vec_splat( mf_u.v, 0 ); +- mfv = vec_packs( mfv, mfv); + +- vect_sint_u qbits_u; ++ vect_int_u qbits_u; + qbits_u.s[0]=i_qbits; + i_qbitsv = vec_splat(qbits_u.v, 0); + +- vect_sint_u f_u; ++ vect_int_u f_u; + f_u.s[0]=f; + fV = vec_splat(f_u.v, 0); +@@ -156,13 +151,15 @@ + void x264_quant_8x8_altivec( int16_t dct[8][8], int quant_mf[8][8], int const i_qbits, int const f ) { + vector bool short mskA; +- vec_s32_t i_qbitsv; ++ vec_u32_t i_qbitsv; + vec_u16_t coefvA; +- vec_s32_t multEvenvA, multOddvA, mfvA; ++ vec_u32_t multEvenvA, multOddvA; ++ vec_u16_t mfvA; + vec_s16_t zerov, one; +- vec_s32_t fV; ++ vec_u32_t fV; + + vector bool short mskB; + vec_u16_t coefvB; +- vec_u32_t multEvenvB, multOddvB, mfvB; ++ vec_u32_t multEvenvB, multOddvB; ++ vec_u16_t mfvB; + + vec_s16_t temp1v, temp2v; +@@ -172,5 +169,5 @@ + i_qbitsv = vec_splat(qbits_u.v, 0); + +- vect_sint_u f_u; ++ vect_int_u f_u; + f_u.s[0]=f; + fV = vec_splat(f_u.v, 0); +Index: /common/ppc/dct.c +=================================================================== +--- /common/ppc/dct.c (revision 604) ++++ /common/ppc/dct.c (revision 621) +@@ -61,6 +61,6 @@ + VEC_DCT( dct0v, dct1v, dct2v, dct3v, tmp0v, tmp1v, tmp2v, tmp3v ); + +- vec_st(vec_perm(tmp0v, tmp1v, permHighv), 0, dct); +- vec_st(vec_perm(tmp2v, tmp3v, permHighv), 16, dct); ++ vec_st(vec_perm(tmp0v, tmp1v, permHighv), 0, (int16_t*)dct); ++ vec_st(vec_perm(tmp2v, tmp3v, permHighv), 16, (int16_t*)dct); + } + +@@ -95,12 +95,12 @@ + VEC_DCT( dct4v, dct5v, dct6v, dct7v, tmp4v, tmp5v, tmp6v, tmp7v ); + +- vec_st(vec_perm(tmp0v, tmp1v, permHighv), 0, dct); +- vec_st(vec_perm(tmp2v, tmp3v, permHighv), 16, dct); +- vec_st(vec_perm(tmp4v, tmp5v, permHighv), 32, dct); +- vec_st(vec_perm(tmp6v, tmp7v, permHighv), 48, dct); +- vec_st(vec_perm(tmp0v, tmp1v, permLowv), 64, dct); +- vec_st(vec_perm(tmp2v, tmp3v, permLowv), 80, dct); +- vec_st(vec_perm(tmp4v, tmp5v, permLowv), 96, dct); +- vec_st(vec_perm(tmp6v, tmp7v, permLowv), 112, dct); ++ vec_st(vec_perm(tmp0v, tmp1v, permHighv), 0, (int16_t*)dct); ++ vec_st(vec_perm(tmp2v, tmp3v, permHighv), 16, (int16_t*)dct); ++ vec_st(vec_perm(tmp4v, tmp5v, permHighv), 32, (int16_t*)dct); ++ vec_st(vec_perm(tmp6v, tmp7v, permHighv), 48, (int16_t*)dct); ++ vec_st(vec_perm(tmp0v, tmp1v, permLowv), 64, (int16_t*)dct); ++ vec_st(vec_perm(tmp2v, tmp3v, permLowv), 80, (int16_t*)dct); ++ vec_st(vec_perm(tmp4v, tmp5v, permLowv), 96, (int16_t*)dct); ++ vec_st(vec_perm(tmp6v, tmp7v, permLowv), 112, (int16_t*)dct); + } + +@@ -312,6 +312,6 @@ + void x264_add8x8_idct8_altivec( uint8_t *dst, int16_t dct[8][8] ) + { +- vec_s16_t onev = vec_splat_s16(1); +- vec_s16_t twov = vec_splat_s16(2); ++ vec_u16_t onev = vec_splat_s16(1); ++ vec_u16_t twov = vec_splat_s16(2); + + dct[0][0] += 32; // rounding for the >>6 at the end +@@ -342,5 +342,5 @@ + vec_u8_t perm_ldv = vec_lvsl(0, dst); + vec_u8_t perm_stv = vec_lvsr(8, dst); +- vec_s16_t sixv = vec_splat_s16(6); ++ vec_u16_t sixv = vec_splat_s16(6); + const vec_u8_t sel = (vec_u8_t) CV(0,0,0,0,0,0,0,0,-1,-1,-1,-1,-1,-1,-1,-1); + LOAD_ZERO; +Index: /common/ppc/quant.h +=================================================================== +--- /common/ppc/quant.h (revision 601) ++++ /common/ppc/quant.h (revision 621) +@@ -19,4 +19,8 @@ + *****************************************************************************/ + ++#ifdef SYS_LINUX ++#include <altivec.h> ++#endif ++ + #ifndef _PPC_QUANT_H + #define _PPC_QUANT_H 1 +@@ -28,8 +32,7 @@ + + typedef union { +- signed int s[4]; +- vector signed int v; +-} vect_sint_u; +- ++ unsigned short s[8]; ++ vector unsigned short v; ++} vect_ushort_u; + + void x264_quant_4x4_altivec( int16_t dct[4][4], int quant_mf[4][4], int const i_qbits, int const f ); |