2 files changed, 251 insertions, 181 deletions
diff --git a/contrib/Jamfile b/contrib/Jamfile
index 262ed0b0c..06cb557ec 100644
--- a/contrib/Jamfile
+++ b/contrib/Jamfile
@@ -348,7 +348,7 @@ rule LibX264
     {
         LIBX264_PATCH += " $(PATCH) -p1 < ../patch-x264-solaris.patch && " ;
     }
-	# AQ is temporarily disabled    LIBX264_PATCH += "$(PATCH) -p0 < ../patch-x264-aq.patch && " ;
+    LIBX264_PATCH += "$(PATCH) -p0 < ../patch-x264-aq.patch && " ;
     LIBX264_PATCH += "$(PATCH) -p0 < ../patch-x264-idr.patch && " ;
 	LIBX264_PATCH += "$(PATCH) -p0 < ../patch-x264-vbv-1pass.patch && " ;
     LIBX264_PATCH += "$(PATCH) -p0 < ../patch-x264-vbv-2pass.patch && " ;
diff --git a/contrib/patch-x264-aq.patch b/contrib/patch-x264-aq.patch
index ec624ee4d..79d8dca54 100644
--- a/contrib/patch-x264-aq.patch
+++ b/contrib/patch-x264-aq.patch
@@ -1,210 +1,241 @@
-Index: common/common.c
-===================================================================
---- common/common.c	(revision 669)
-+++ common/common.c	(working copy)
-@@ -123,6 +123,9 @@
-     param->analyse.i_chroma_qp_offset = 0;
-     param->analyse.b_fast_pskip = 1;
-     param->analyse.b_dct_decimate = 1;
-+    param->analyse.b_aq = 0;
-+    param->analyse.f_aq_strength = 0.0;
-+    param->analyse.f_aq_sensitivity = 15;
-     param->analyse.i_luma_deadzone[0] = 21;
-     param->analyse.i_luma_deadzone[1] = 11;
-     param->analyse.b_psnr = 1;
-@@ -455,6 +458,13 @@
-         p->analyse.b_fast_pskip = atobool(value);
-     OPT("dct-decimate")
-         p->analyse.b_dct_decimate = atobool(value);
-+    OPT("aq-strength")
-+    {
-+        p->analyse.f_aq_strength = atof(value);
-+        p->analyse.b_aq = (p->analyse.f_aq_strength > 0.0);
-+    }
-+    OPT("aq-sensitivity")
-+        p->analyse.f_aq_sensitivity = atof(value);
-     OPT("deadzone-inter")
-         p->analyse.i_luma_deadzone[0] = atoi(value);
-     OPT("deadzone-intra")
-@@ -939,6 +949,9 @@
-             s += sprintf( s, " zones" );
-     }
+Index: encoder/ratecontrol.h
+===================================================================
+--- encoder/ratecontrol.h	(revision 736)
++++ encoder/ratecontrol.h	(working copy)
+@@ -34,6 +34,7 @@
+ int  x264_ratecontrol_qp( x264_t * );
+ void x264_ratecontrol_end( x264_t *, int bits );
+ void x264_ratecontrol_summary( x264_t * );
++void x264_adaptive_quant    ( x264_t * );
  
-+    if( p->analyse.b_aq )
-+        s += sprintf( s, " aq=1:%.1f:%.1f", p->analyse.f_aq_strength, p->analyse.f_aq_sensitivity );
-+
-     return buf;
- }
- 
-Index: common/pixel.c
-===================================================================
---- common/pixel.c	(revision 669)
-+++ common/pixel.c	(working copy)
-@@ -213,6 +213,14 @@
- PIXEL_SATD_C( x264_pixel_satd_4x8,   4, 8 )
- PIXEL_SATD_C( x264_pixel_satd_4x4,   4, 4 )
- 
-+static int x264_pixel_count_8x8( uint8_t *pix, int i_pix, uint32_t threshold )
-+{
-+    int x, y, sum = 0;
-+    for( y=0; y<8; y++, pix += i_pix )
-+        for( x=0; x<8; x++ )
-+            sum += pix[x] > (uint8_t)threshold;
-+    return sum;
-+}
+ #endif
  
- /****************************************************************************
-  * pixel_sa8d_WxH: sum of 8x8 Hadamard transformed differences
-@@ -470,6 +478,8 @@
-     pixf->ads[PIXEL_16x8] = pixel_ads2;
-     pixf->ads[PIXEL_8x8] = pixel_ads1;
+Index: encoder/encoder.c
+===================================================================
+--- encoder/encoder.c	(revision 736)
++++ encoder/encoder.c	(working copy)
+@@ -401,6 +401,7 @@
+         h->param.analyse.b_fast_pskip = 0;
+         h->param.analyse.i_noise_reduction = 0;
+         h->param.analyse.i_subpel_refine = x264_clip3( h->param.analyse.i_subpel_refine, 1, 6 );
++        h->param.analyse.b_aq = 0;
+     }
+     if( h->param.rc.i_rc_method == X264_RC_CQP )
+     {
+@@ -475,6 +476,10 @@
+     if( !h->param.b_cabac )
+         h->param.analyse.i_trellis = 0;
+     h->param.analyse.i_trellis = x264_clip3( h->param.analyse.i_trellis, 0, 2 );
++    h->param.analyse.b_aq = h->param.analyse.b_aq && h->param.analyse.f_aq_strength > 0;
++    /* VAQ on static sensitivity mode effectively replaces qcomp, so qcomp is raised towards 1 to compensate. */
++    if(h->param.analyse.b_aq && h->param.analyse.f_aq_sensitivity != 0) 
++        h->param.rc.f_qcompress = x264_clip3f(h->param.rc.f_qcompress + h->param.analyse.f_aq_strength * 0.4 / 0.28, 0, 1);
+     h->param.analyse.i_noise_reduction = x264_clip3( h->param.analyse.i_noise_reduction, 0, 1<<16 );
  
-+    pixf->count_8x8 = x264_pixel_count_8x8;
-+
- #ifdef HAVE_MMX
-     if( cpu&X264_CPU_MMX )
      {
-Index: common/pixel.h
-===================================================================
---- common/pixel.h	(revision 669)
-+++ common/pixel.h	(working copy)
-@@ -90,6 +90,8 @@
-     void (*ads[7])( int enc_dc[4], uint16_t *sums, int delta,
-                     uint16_t *res, int width );
- 
-+    int (*count_8x8)( uint8_t *pix, int i_pix, uint32_t threshold );
-+
-     /* calculate satd of V, H, and DC modes.
-      * may be NULL, in which case just use pred+satd instead. */
-     void (*intra_satd_x3_16x16)( uint8_t *fenc, uint8_t *fdec, int res[3] );
-Index: encoder/analyse.c
-===================================================================
---- encoder/analyse.c	(revision 669)
-+++ encoder/analyse.c	(working copy)
-@@ -29,6 +29,7 @@
- #endif
+Index: encoder/ratecontrol.c
+===================================================================
+--- encoder/ratecontrol.c	(revision 736)
++++ encoder/ratecontrol.c	(working copy)
+@@ -127,6 +127,10 @@
+     predictor_t *pred_b_from_p; /* predict B-frame size from P-frame satd */
+     int bframes;                /* # consecutive B-frames before this P-frame */
+     int bframe_bits;            /* total cost of those frames */
++    
++    /* AQ stuff */
++    float aq_threshold;
++    int *ac_energy;
  
- #include "common/common.h"
-+#include "common/cpu.h"
- #include "macroblock.h"
- #include "me.h"
- #include "ratecontrol.h"
-@@ -2029,8 +2030,68 @@
-     }
+     int i_zones;
+     x264_zone_t *zones;
+@@ -169,7 +173,97 @@
+            + rce->misc_bits;
  }
  
-+static int x264_sum_dctq( int16_t dct[8][8] )
++// Find the total AC energy of the block in all planes.
++static int ac_energy_mb( x264_t *h, int mb_x, int mb_y, int *satd )
 +{
-+    int i, t = 0;
-+    int16_t *p = &dct[0][0];
-+    for( i=1; i<64; i++ )
-+        t += abs(p[i]) * x264_dct8_weight_tab[i];
-+    return t;
++    DECLARE_ALIGNED( static uint8_t, flat[16], 16 ) = {128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128};
++//  DECLARE_ALIGNED( static uint8_t, flat[16], 16 );
++    unsigned int var=0, sad, ssd, i;
++    for( i=0; i<3; i++ )
++    {
++        int w = i ? 8 : 16;
++        int stride = h->fenc->i_stride[i];
++        int offset = h->mb.b_interlaced
++            ? w * (mb_x + (mb_y&~1) * stride) + (mb_y&1) * stride
++            : w * (mb_x + mb_y * stride);
++        int pix = i ? PIXEL_8x8 : PIXEL_16x16;
++        stride <<= h->mb.b_interlaced;
++        sad = h->pixf.sad[pix](flat, 0, h->fenc->plane[i]+offset, stride);
++        ssd = h->pixf.ssd[pix](flat, 0, h->fenc->plane[i]+offset, stride);
++        var += ssd - (sad * sad >> (i?6:8));
++        // SATD to represent the block's overall complexity (bit cost) for intra encoding.
++        // exclude the DC coef, because nothing short of an actual intra prediction will estimate DC cost.
++        if( var && satd )
++            *satd += h->pixf.satd[pix](flat, 0, h->fenc->plane[i]+offset, stride) - sad/2;
++    }
++    return var;
++}
++ 
++void x264_autosense_aq( x264_t *h )
++{
++    double total = 0;
++    double n = 0;
++    int mb_x, mb_y;
++    /* FIXME: Some of the SATDs might be already calculated elsewhere (ratecontrol?).  Can we reuse them? */
++    /* FIXME: Is chroma SATD necessary? */
++    for( mb_y=0; mb_y<h->sps->i_mb_height; mb_y++ )
++        for( mb_x=0; mb_x<h->sps->i_mb_width; mb_x++ )
++        {
++            int energy, satd=0;
++            energy = ac_energy_mb( h, mb_x, mb_y, &satd );
++            h->rc->ac_energy[mb_x + mb_y * h->sps->i_mb_width] = energy;
++            /* Weight the energy value by the SATD value of the MB.  This represents the fact that
++            the more complex blocks in a frame should be weighted more when calculating the optimal sensitivity. 
++            This also helps diminish the negative effect of large numbers of simple blocks in a frame, such as in the case
++            of a letterboxed film. */
++            if( energy )
++            {
++                x264_cpu_restore(h->param.cpu);
++                total += logf(energy) * satd;
++                n += satd;
++            }
++        }
++    x264_cpu_restore(h->param.cpu);
++    /* Calculate and store the threshold. */
++    h->rc->aq_threshold = n ? total/n : 15;
 +}
  
- /*****************************************************************************
-+ * x264_adaptive_quant:
-+ * check if mb is "flat", i.e. has most energy in low frequency components, and
-+ * adjust qp down if it is
-+ *****************************************************************************/
-+void x264_adaptive_quant( x264_t *h, x264_mb_analysis_t *a )
++/*****************************************************************************
++* x264_adaptive_quant:
++ * adjust macroblock QP based on variance (AC energy) of the MB.
++ * high variance  = higher QP
++ * low variance = lower QP
++ * This generally increases SSIM and lowers PSNR.
++*****************************************************************************/
++void x264_adaptive_quant( x264_t *h )
 +{
-+    DECLARE_ALIGNED( static uint8_t, zero[FDEC_STRIDE*8], 16 );
-+    DECLARE_ALIGNED( int16_t, dct[8][8], 16 );
-+    float fc;
-+    int total = 0;
-+    int qp = h->mb.i_qp, qp_adj;
-+    int i;
-+
-+    if( qp <= 10 ) /* AQ is probably not needed at such low QP */
-+        return;
-+
-+    if( h->pixf.sad[PIXEL_16x16](h->mb.pic.p_fenc[0], FENC_STRIDE, zero, 16) > 64*16*16 )
-+    {   /* light places */
-+        if( h->pixf.count_8x8(h->mb.pic.p_fenc[1], FENC_STRIDE, 0x81818181) < 40 )
-+            /* not enough "blue" pixels */
-+            return;
-+
-+        if( h->pixf.count_8x8(h->mb.pic.p_fenc[2], FENC_STRIDE, 0x87878787) > 24 )
-+            /* too many "red" pixels */
-+            return;
++    int qp = h->mb.i_qp;
++    int energy;
++    x264_cpu_restore(h->param.cpu);
++    if(h->param.analyse.f_aq_sensitivity != 0)
++        energy = ac_energy_mb( h, h->mb.i_mb_x, h->mb.i_mb_y, NULL );
++    else
++        energy = h->rc->ac_energy[h->mb.i_mb_xy];
++    if(energy == 0)
++    {
++        h->mb.i_qp = h->mb.i_last_qp;
 +    }
-+
-+    for( i=0; i<4; i++ )
++    else
 +    {
-+        h->dctf.sub8x8_dct8( dct, h->mb.pic.p_fenc[0] + (i&1)*8 + (i>>1)*FENC_STRIDE, zero );
-+        total += x264_sum_dctq( dct );
++        x264_cpu_restore(h->param.cpu);
++        float result = energy;
++        /* Adjust the QP based on the AC energy of the macroblock. */
++        float qp_adj = 3 * (logf(result) - h->rc->aq_threshold);
++        if(h->param.analyse.f_aq_sensitivity == 0) qp_adj = x264_clip3f(qp_adj, -5, 5);
++        int new_qp = x264_clip3(qp + qp_adj * h->param.analyse.f_aq_strength + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max);
++        /* If the QP of this MB is within 1 of the previous MB, code the same QP as the previous MB,
++         * to lower the bit cost of the qp_delta. */
++        if(abs(new_qp - h->mb.i_last_qp) == 1) new_qp = h->mb.i_last_qp;
++        h->mb.i_qp = new_qp;
 +    }
-+
-+    if( total == 0 ) /* no AC coefficients, nothing to do */
-+        return;
-+
-+    x264_cpu_restore( h->param.cpu );
-+
-+    fc = expf(-5e-13 * total * total);
-+
-+    /* the function is chosen such that it stays close to 0 in almost all
-+      * range of 0..1, and rapidly goes up to 1 near 1.0 */
-+    qp_adj = (int)(qp * h->param.analyse.f_aq_strength / pow(2 - fc, h->param.analyse.f_aq_sensitivity));
-+
-+    /* don't adjust by more than this amount */
-+    qp_adj = X264_MIN(qp_adj, qp/2);
-+
-+    h->mb.i_qp = a->i_qp = qp - qp_adj;
 +    h->mb.i_chroma_qp = i_chroma_qp_table[x264_clip3( h->mb.i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )];
 +}
 +
-+/*****************************************************************************
-  * x264_macroblock_analyse:
-  *****************************************************************************/
- void x264_macroblock_analyse( x264_t *h )
-@@ -2038,9 +2099,14 @@
-     x264_mb_analysis_t analysis;
+ int x264_ratecontrol_new( x264_t *h )
+ {
+     x264_ratecontrol_t *rc;
+@@ -244,7 +338,7 @@
+         rc->rate_tolerance = 0.01;
+     }
+ 
+-    h->mb.b_variable_qp = rc->b_vbv && !rc->b_2pass;
++    h->mb.b_variable_qp = (rc->b_vbv && !rc->b_2pass) || h->param.analyse.b_aq;
+ 
+     if( rc->b_abr )
+     {
+@@ -458,10 +552,13 @@
+         x264_free( p );
+     }
+ 
+-    for( i=1; i<h->param.i_threads; i++ )
++    for( i=0; i<h->param.i_threads; i++ )
+     {
+         h->thread[i]->rc = rc+i;
+-        rc[i] = rc[0];
++        if( i )
++            rc[i] = rc[0];
++        if( h->param.analyse.b_aq )
++            rc[i].ac_energy = x264_malloc( h->mb.i_mb_count * sizeof(int) );
+     }
+ 
+     return 0;
+@@ -623,6 +720,8 @@
+                     x264_free( rc->zones[i].param );
+         x264_free( rc->zones );
+     }
++    for( i=0; i<h->param.i_threads; i++ )
++        x264_free( rc[i].ac_energy );
+     x264_free( rc );
+ }
+ 
+@@ -729,6 +828,15 @@
+ 
+     if( h->sh.i_type != SLICE_TYPE_B )
+         rc->last_non_b_pict_type = h->sh.i_type;
++        
++    /* Adaptive AQ sensitivity algorithm. */
++    if( h->param.analyse.b_aq )
++    {
++        if( h->param.analyse.f_aq_sensitivity > 0 ) 
++            h->rc->aq_threshold = logf(powf(h->param.analyse.f_aq_sensitivity,4)/2); //FIXME simplify
++        else
++            x264_autosense_aq(h);
++    }
+ }
+ 
+ double predict_row_size( x264_t *h, int y, int qp )
+Index: encoder/analyse.c
+===================================================================
+--- encoder/analyse.c	(revision 736)
++++ encoder/analyse.c	(working copy)
+@@ -2047,8 +2047,13 @@
      int i_cost = COST_MAX;
      int i;
-+	
-+    h->mb.i_qp = x264_ratecontrol_qp( h );
  
-+    if( h->param.analyse.b_aq )
-+        x264_adaptive_quant( h, &analysis );
-+
-     /* init analysis */
+-    /* init analysis */
 -    x264_mb_analyse_init( h, &analysis, x264_ratecontrol_qp( h ) );
++    h->mb.i_qp = x264_ratecontrol_qp( h );
++    
++    if( h->param.analyse.b_aq )
++        x264_adaptive_quant( h );
++ 
++     /* init analysis */
 +    x264_mb_analyse_init( h, &analysis, h->mb.i_qp );
  
      /*--------------------------- Do the analysis ---------------------------*/
      if( h->sh.i_type == SLICE_TYPE_I )
-Index: encoder/encoder.c
-===================================================================
---- encoder/encoder.c	(revision 669)
-+++ encoder/encoder.c	(working copy)
-@@ -477,6 +477,8 @@
-     if( !h->param.b_cabac )
-         h->param.analyse.i_trellis = 0;
-     h->param.analyse.i_trellis = x264_clip3( h->param.analyse.i_trellis, 0, 2 );
-+    if( h->param.analyse.b_aq && h->param.analyse.f_aq_strength <= 0 )
-+        h->param.analyse.b_aq = 0;
-     h->param.analyse.i_noise_reduction = x264_clip3( h->param.analyse.i_noise_reduction, 0, 1<<16 );
- 
-     {
-Index: x264.c
-===================================================================
---- x264.c	(revision 669)
-+++ x264.c	(working copy)
-@@ -243,6 +243,12 @@
+Index: x264.c
+===================================================================
+--- x264.c	(revision 736)
++++ x264.c	(working copy)
+@@ -244,6 +244,14 @@
          "                                  - 2: enabled on all mode decisions\n", defaults->analyse.i_trellis );
      H0( "      --no-fast-pskip         Disables early SKIP detection on P-frames\n" );
      H0( "      --no-dct-decimate       Disables coefficient thresholding on P-frames\n" );
-+    H0( "      --aq-strength <float>   Amount to adjust QP per MB [%.1f]\n"
++    H0( "      --aq-strength <float>   Amount to adjust QP/lambda per MB [%.1f]\n"
 +        "                                  0.0: no AQ\n"
-+        "                                  1.1: strong AQ\n", defaults->analyse.f_aq_strength );
-+    H0( "      --aq-sensitivity <float> \"Flatness\" threshold to trigger AQ [%.1f]\n"
-+        "                                    5: applies to almost all blocks\n"
-+        "                                   22: only flat blocks\n", defaults->analyse.f_aq_sensitivity );
++        "                                  1.0: medium AQ\n", defaults->analyse.f_aq_strength );
++    H0( "      --aq-sensitivity <float> \"Center\" of AQ curve. [%.1f]\n"
++        "               0: automatic sensitivity (avoids moving bits between frames)\n"
++        "               10: most QPs are raised\n"
++        "               20: good general-use sensitivity\n"
++        "               30: most QPs are lowered\n", defaults->analyse.f_aq_sensitivity );
      H0( "      --nr <integer>          Noise reduction [%d]\n", defaults->analyse.i_noise_reduction );
      H1( "\n" );
      H1( "      --deadzone-inter <int>  Set the size of the inter luma quantization deadzone [%d]\n", defaults->analyse.i_luma_deadzone[0] );
-@@ -406,6 +412,8 @@
+@@ -407,6 +415,8 @@
              { "trellis", required_argument, NULL, 't' },
              { "no-fast-pskip", no_argument, NULL, 0 },
              { "no-dct-decimate", no_argument, NULL, 0 },
@@ -213,11 +244,50 @@ Index: x264.c
              { "deadzone-inter", required_argument, NULL, '0' },
              { "deadzone-intra", required_argument, NULL, '0' },
              { "level",   required_argument, NULL, 0 },
-Index: x264.h
-===================================================================
---- x264.h	(revision 669)
-+++ x264.h	(working copy)
-@@ -227,6 +227,9 @@
+Index: common/common.c
+===================================================================
+--- common/common.c	(revision 736)
++++ common/common.c	(working copy)
+@@ -123,6 +123,9 @@
+     param->analyse.i_chroma_qp_offset = 0;
+     param->analyse.b_fast_pskip = 1;
+     param->analyse.b_dct_decimate = 1;
++    param->analyse.b_aq = 1;
++    param->analyse.f_aq_strength = 0.5;
++    param->analyse.f_aq_sensitivity = 13;
+     param->analyse.i_luma_deadzone[0] = 21;
+     param->analyse.i_luma_deadzone[1] = 11;
+     param->analyse.b_psnr = 1;
+@@ -455,6 +458,13 @@
+         p->analyse.b_fast_pskip = atobool(value);
+     OPT("dct-decimate")
+         p->analyse.b_dct_decimate = atobool(value);
++    OPT("aq-strength")
++    {
++        p->analyse.f_aq_strength = atof(value);
++        p->analyse.b_aq = 1;
++    }
++    OPT("aq-sensitivity")
++        p->analyse.f_aq_sensitivity = atof(value);
+     OPT("deadzone-inter")
+         p->analyse.i_luma_deadzone[0] = atoi(value);
+     OPT("deadzone-intra")
+@@ -883,6 +893,10 @@
+         s += sprintf( s, " ip_ratio=%.2f", p->rc.f_ip_factor );
+         if( p->i_bframe )
+             s += sprintf( s, " pb_ratio=%.2f", p->rc.f_pb_factor );
++        if( p->analyse.b_aq )
++            s += sprintf( s, " aq=1:%.1f:%.1f", p->analyse.f_aq_strength, p->analyse.f_aq_sensitivity );
++        else
++            s += sprintf( s, " aq=0" );
+         if( p->rc.psz_zones )
+             s += sprintf( s, " zones=%s", p->rc.psz_zones );
+         else if( p->rc.i_zones )
+Index: x264.h
+===================================================================
+--- x264.h	(revision 736)
++++ x264.h	(working copy)
+@@ -232,6 +232,9 @@
          int          i_trellis;  /* trellis RD quantization */
          int          b_fast_pskip; /* early SKIP detection on P-frames */
          int          b_dct_decimate; /* transform coefficient thresholding on P-frames */