Applies Dark_Shikari's Variable Adaptive Quantization patch v0.48 to libhb's copy of libx264. This major change to x264 drastically reduces blockiness on flat parts of the picture as well as blurring. It is on by default; to disable it, add "aq-strength=0" to your x264 options string. For now, it's only applied to jam builds. Make will come later.

git-svn-id: svn://svn.handbrake.fr/HandBrake/trunk@1333 b64f7644-9d1e-0410-96f1-a4d463321fa5
author: jbrjake <[email protected]> 2008-03-07 15:28:52 +0000
committer: jbrjake <[email protected]> 2008-03-07 15:28:52 +0000
commit: 8463a0b011665a2da7ef549f155d9f935971a94a (patch)
tree: c7f195d602f9831b65c7eca22a1a33837b823e21
parent: 8cf6e82be22656e77e8c4a723b1507a187fa924b (diff)
2 files changed, 251 insertions, 181 deletions
diff --git a/contrib/Jamfile b/contrib/Jamfile
index 262ed0b0c..06cb557ec 100644
--- a/contrib/Jamfile
+++ b/contrib/Jamfile
@@ -348,7 +348,7 @@ rule LibX264
     {
         LIBX264_PATCH += " $(PATCH) -p1 < ../patch-x264-solaris.patch && " ;
     }
-	# AQ is temporarily disabled    LIBX264_PATCH += "$(PATCH) -p0 < ../patch-x264-aq.patch && " ;
+    LIBX264_PATCH += "$(PATCH) -p0 < ../patch-x264-aq.patch && " ;
     LIBX264_PATCH += "$(PATCH) -p0 < ../patch-x264-idr.patch && " ;
 	LIBX264_PATCH += "$(PATCH) -p0 < ../patch-x264-vbv-1pass.patch && " ;
     LIBX264_PATCH += "$(PATCH) -p0 < ../patch-x264-vbv-2pass.patch && " ;
diff --git a/contrib/patch-x264-aq.patch b/contrib/patch-x264-aq.patch
index ec624ee4d..79d8dca54 100644
--- a/contrib/patch-x264-aq.patch
+++ b/contrib/patch-x264-aq.patch
@@ -1,210 +1,241 @@
-Index: common/common.c
-===================================================================
---- common/common.c	(revision 669)
-+++ common/common.c	(working copy)
-@@ -123,6 +123,9 @@
-     param->analyse.i_chroma_qp_offset = 0;
-     param->analyse.b_fast_pskip = 1;
-     param->analyse.b_dct_decimate = 1;
-+    param->analyse.b_aq = 0;
-+    param->analyse.f_aq_strength = 0.0;
-+    param->analyse.f_aq_sensitivity = 15;
-     param->analyse.i_luma_deadzone[0] = 21;
-     param->analyse.i_luma_deadzone[1] = 11;
-     param->analyse.b_psnr = 1;
-@@ -455,6 +458,13 @@
-         p->analyse.b_fast_pskip = atobool(value);
-     OPT("dct-decimate")
-         p->analyse.b_dct_decimate = atobool(value);
-+    OPT("aq-strength")
-+    {
-+        p->analyse.f_aq_strength = atof(value);
-+        p->analyse.b_aq = (p->analyse.f_aq_strength > 0.0);
-+    }
-+    OPT("aq-sensitivity")
-+        p->analyse.f_aq_sensitivity = atof(value);
-     OPT("deadzone-inter")
-         p->analyse.i_luma_deadzone[0] = atoi(value);
-     OPT("deadzone-intra")
-@@ -939,6 +949,9 @@
-             s += sprintf( s, " zones" );
-     }
+Index: encoder/ratecontrol.h
+===================================================================
+--- encoder/ratecontrol.h	(revision 736)
++++ encoder/ratecontrol.h	(working copy)
+@@ -34,6 +34,7 @@
+ int  x264_ratecontrol_qp( x264_t * );
+ void x264_ratecontrol_end( x264_t *, int bits );
+ void x264_ratecontrol_summary( x264_t * );
++void x264_adaptive_quant    ( x264_t * );
  
-+    if( p->analyse.b_aq )
-+        s += sprintf( s, " aq=1:%.1f:%.1f", p->analyse.f_aq_strength, p->analyse.f_aq_sensitivity );
-+
-     return buf;
- }
- 
-Index: common/pixel.c
-===================================================================
---- common/pixel.c	(revision 669)
-+++ common/pixel.c	(working copy)
-@@ -213,6 +213,14 @@
- PIXEL_SATD_C( x264_pixel_satd_4x8,   4, 8 )
- PIXEL_SATD_C( x264_pixel_satd_4x4,   4, 4 )
- 
-+static int x264_pixel_count_8x8( uint8_t *pix, int i_pix, uint32_t threshold )
-+{
-+    int x, y, sum = 0;
-+    for( y=0; y<8; y++, pix += i_pix )
-+        for( x=0; x<8; x++ )
-+            sum += pix[x] > (uint8_t)threshold;
-+    return sum;
-+}
+ #endif
  
- /****************************************************************************
-  * pixel_sa8d_WxH: sum of 8x8 Hadamard transformed differences
-@@ -470,6 +478,8 @@
-     pixf->ads[PIXEL_16x8] = pixel_ads2;
-     pixf->ads[PIXEL_8x8] = pixel_ads1;
+Index: encoder/encoder.c
+===================================================================
+--- encoder/encoder.c	(revision 736)
++++ encoder/encoder.c	(working copy)
+@@ -401,6 +401,7 @@
+         h->param.analyse.b_fast_pskip = 0;
+         h->param.analyse.i_noise_reduction = 0;
+         h->param.analyse.i_subpel_refine = x264_clip3( h->param.analyse.i_subpel_refine, 1, 6 );
++        h->param.analyse.b_aq = 0;
+     }
+     if( h->param.rc.i_rc_method == X264_RC_CQP )
+     {
+@@ -475,6 +476,10 @@
+     if( !h->param.b_cabac )
+         h->param.analyse.i_trellis = 0;
+     h->param.analyse.i_trellis = x264_clip3( h->param.analyse.i_trellis, 0, 2 );
++    h->param.analyse.b_aq = h->param.analyse.b_aq && h->param.analyse.f_aq_strength > 0;
++    /* VAQ on static sensitivity mode effectively replaces qcomp, so qcomp is raised towards 1 to compensate. */
++    if(h->param.analyse.b_aq && h->param.analyse.f_aq_sensitivity != 0) 
++        h->param.rc.f_qcompress = x264_clip3f(h->param.rc.f_qcompress + h->param.analyse.f_aq_strength * 0.4 / 0.28, 0, 1);
+     h->param.analyse.i_noise_reduction = x264_clip3( h->param.analyse.i_noise_reduction, 0, 1<<16 );
  
-+    pixf->count_8x8 = x264_pixel_count_8x8;
-+
- #ifdef HAVE_MMX
-     if( cpu&X264_CPU_MMX )
      {
-Index: common/pixel.h
-===================================================================
---- common/pixel.h	(revision 669)
-+++ common/pixel.h	(working copy)
-@@ -90,6 +90,8 @@
-     void (*ads[7])( int enc_dc[4], uint16_t *sums, int delta,
-                     uint16_t *res, int width );
- 
-+    int (*count_8x8)( uint8_t *pix, int i_pix, uint32_t threshold );
-+
-     /* calculate satd of V, H, and DC modes.
-      * may be NULL, in which case just use pred+satd instead. */
-     void (*intra_satd_x3_16x16)( uint8_t *fenc, uint8_t *fdec, int res[3] );
-Index: encoder/analyse.c
-===================================================================
---- encoder/analyse.c	(revision 669)
-+++ encoder/analyse.c	(working copy)
-@@ -29,6 +29,7 @@
- #endif
+Index: encoder/ratecontrol.c
+===================================================================
+--- encoder/ratecontrol.c	(revision 736)
++++ encoder/ratecontrol.c	(working copy)
+@@ -127,6 +127,10 @@
+     predictor_t *pred_b_from_p; /* predict B-frame size from P-frame satd */
+     int bframes;                /* # consecutive B-frames before this P-frame */
+     int bframe_bits;            /* total cost of those frames */
++    
++    /* AQ stuff */
++    float aq_threshold;
++    int *ac_energy;
  
- #include "common/common.h"
-+#include "common/cpu.h"
- #include "macroblock.h"
- #include "me.h"
- #include "ratecontrol.h"
-@@ -2029,8 +2030,68 @@
-     }
+     int i_zones;
+     x264_zone_t *zones;
+@@ -169,7 +173,97 @@
+            + rce->misc_bits;
  }
  
-+static int x264_sum_dctq( int16_t dct[8][8] )
++// Find the total AC energy of the block in all planes.
++static int ac_energy_mb( x264_t *h, int mb_x, int mb_y, int *satd )
 +{
-+    int i, t = 0;
-+    int16_t *p = &dct[0][0];
-+    for( i=1; i<64; i++ )
-+        t += abs(p[i]) * x264_dct8_weight_tab[i];
-+    return t;
++    DECLARE_ALIGNED( static uint8_t, flat[16], 16 ) = {128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128};
++//  DECLARE_ALIGNED( static uint8_t, flat[16], 16 );
++    unsigned int var=0, sad, ssd, i;
++    for( i=0; i<3; i++ )
++    {
++        int w = i ? 8 : 16;
++        int stride = h->fenc->i_stride[i];
++        int offset = h->mb.b_interlaced
++            ? w * (mb_x + (mb_y&~1) * stride) + (mb_y&1) * stride
++            : w * (mb_x + mb_y * stride);
++        int pix = i ? PIXEL_8x8 : PIXEL_16x16;
++        stride <<= h->mb.b_interlaced;
++        sad = h->pixf.sad[pix](flat, 0, h->fenc->plane[i]+offset, stride);
++        ssd = h->pixf.ssd[pix](flat, 0, h->fenc->plane[i]+offset, stride);
++        var += ssd - (sad * sad >> (i?6:8));
++        // SATD to represent the block's overall complexity (bit cost) for intra encoding.
++        // exclude the DC coef, because nothing short of an actual intra prediction will estimate DC cost.
++        if( var && satd )
++            *satd += h->pixf.satd[pix](flat, 0, h->fenc->plane[i]+offset, stride) - sad/2;
++    }
++    return var;
++}
++ 
++void x264_autosense_aq( x264_t *h )
++{
++    double total = 0;
++    double n = 0;
++    int mb_x, mb_y;
++    /* FIXME: Some of the SATDs might be already calculated elsewhere (ratecontrol?).  Can we reuse them? */
++    /* FIXME: Is chroma SATD necessary? */
++    for( mb_y=0; mb_y<h->sps->i_mb_height; mb_y++ )
++        for( mb_x=0; mb_x<h->sps->i_mb_width; mb_x++ )
++        {
++            int energy, satd=0;
++            energy = ac_energy_mb( h, mb_x, mb_y, &satd );
++            h->rc->ac_energy[mb_x + mb_y * h->sps->i_mb_width] = energy;
++            /* Weight the energy value by the SATD value of the MB.  This represents the fact that
++            the more complex blocks in a frame should be weighted more when calculating the optimal sensitivity. 
++            This also helps diminish the negative effect of large numbers of simple blocks in a frame, such as in the case
++            of a letterboxed film. */
++            if( energy )
++            {
++                x264_cpu_restore(h->param.cpu);
++                total += logf(energy) * satd;
++                n += satd;
++            }
++        }
++    x264_cpu_restore(h->param.cpu);
++    /* Calculate and store the threshold. */
++    h->rc->aq_threshold = n ? total/n : 15;
 +}
  
- /*****************************************************************************
-+ * x264_adaptive_quant:
-+ * check if mb is "flat", i.e. has most energy in low frequency components, and
-+ * adjust qp down if it is
-+ *****************************************************************************/
-+void x264_adaptive_quant( x264_t *h, x264_mb_analysis_t *a )
++/*****************************************************************************
++* x264_adaptive_quant:
++ * adjust macroblock QP based on variance (AC energy) of the MB.
++ * high variance  = higher QP
++ * low variance = lower QP
++ * This generally increases SSIM and lowers PSNR.
++*****************************************************************************/
++void x264_adaptive_quant( x264_t *h )
 +{
-+    DECLARE_ALIGNED( static uint8_t, zero[FDEC_STRIDE*8], 16 );
-+    DECLARE_ALIGNED( int16_t, dct[8][8], 16 );
-+    float fc;
-+    int total = 0;
-+    int qp = h->mb.i_qp, qp_adj;
-+    int i;
-+
-+    if( qp <= 10 ) /* AQ is probably not needed at such low QP */
-+        return;
-+
-+    if( h->pixf.sad[PIXEL_16x16](h->mb.pic.p_fenc[0], FENC_STRIDE, zero, 16) > 64*16*16 )
-+    {   /* light places */
-+        if( h->pixf.count_8x8(h->mb.pic.p_fenc[1], FENC_STRIDE, 0x81818181) < 40 )
-+            /* not enough "blue" pixels */
-+            return;
-+
-+        if( h->pixf.count_8x8(h->mb.pic.p_fenc[2], FENC_STRIDE, 0x87878787) > 24 )
-+            /* too many "red" pixels */
-+            return;
++    int qp = h->mb.i_qp;
++    int energy;
++    x264_cpu_restore(h->param.cpu);
++    if(h->param.analyse.f_aq_sensitivity != 0)
++        energy = ac_energy_mb( h, h->mb.i_mb_x, h->mb.i_mb_y, NULL );
++    else
++        energy = h->rc->ac_energy[h->mb.i_mb_xy];
++    if(energy == 0)
++    {
++        h->mb.i_qp = h->mb.i_last_qp;
 +    }
-+
-+    for( i=0; i<4; i++ )
++    else
 +    {
-+        h->dctf.sub8x8_dct8( dct, h->mb.pic.p_fenc[0] + (i&1)*8 + (i>>1)*FENC_STRIDE, zero );
-+        total += x264_sum_dctq( dct );
++        x264_cpu_restore(h->param.cpu);
++        float result = energy;
++        /* Adjust the QP based on the AC energy of the macroblock. */
++        float qp_adj = 3 * (logf(result) - h->rc->aq_threshold);
++        if(h->param.analyse.f_aq_sensitivity == 0) qp_adj = x264_clip3f(qp_adj, -5, 5);
++        int new_qp = x264_clip3(qp + qp_adj * h->param.analyse.f_aq_strength + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max);
++        /* If the QP of this MB is within 1 of the previous MB, code the same QP as the previous MB,
++         * to lower the bit cost of the qp_delta. */
++        if(abs(new_qp - h->mb.i_last_qp) == 1) new_qp = h->mb.i_last_qp;
++        h->mb.i_qp = new_qp;
 +    }
-+
-+    if( total == 0 ) /* no AC coefficients, nothing to do */
-+        return;
-+
-+    x264_cpu_restore( h->param.cpu );
-+
-+    fc = expf(-5e-13 * total * total);
-+
-+    /* the function is chosen such that it stays close to 0 in almost all
-+      * range of 0..1, and rapidly goes up to 1 near 1.0 */
-+    qp_adj = (int)(qp * h->param.analyse.f_aq_strength / pow(2 - fc, h->param.analyse.f_aq_sensitivity));
-+
-+    /* don't adjust by more than this amount */
-+    qp_adj = X264_MIN(qp_adj, qp/2);
-+
-+    h->mb.i_qp = a->i_qp = qp - qp_adj;
 +    h->mb.i_chroma_qp = i_chroma_qp_table[x264_clip3( h->mb.i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )];
 +}
 +
-+/*****************************************************************************
-  * x264_macroblock_analyse:
-  *****************************************************************************/
- void x264_macroblock_analyse( x264_t *h )
-@@ -2038,9 +2099,14 @@
-     x264_mb_analysis_t analysis;
+ int x264_ratecontrol_new( x264_t *h )
+ {
+     x264_ratecontrol_t *rc;
+@@ -244,7 +338,7 @@
+         rc->rate_tolerance = 0.01;
+     }
+ 
+-    h->mb.b_variable_qp = rc->b_vbv && !rc->b_2pass;
++    h->mb.b_variable_qp = (rc->b_vbv && !rc->b_2pass) || h->param.analyse.b_aq;
+ 
+     if( rc->b_abr )
+     {
+@@ -458,10 +552,13 @@
+         x264_free( p );
+     }
+ 
+-    for( i=1; i<h->param.i_threads; i++ )
++    for( i=0; i<h->param.i_threads; i++ )
+     {
+         h->thread[i]->rc = rc+i;
+-        rc[i] = rc[0];
++        if( i )
++            rc[i] = rc[0];
++        if( h->param.analyse.b_aq )
++            rc[i].ac_energy = x264_malloc( h->mb.i_mb_count * sizeof(int) );
+     }
+ 
+     return 0;
+@@ -623,6 +720,8 @@
+                     x264_free( rc->zones[i].param );
+         x264_free( rc->zones );
+     }
++    for( i=0; i<h->param.i_threads; i++ )
++        x264_free( rc[i].ac_energy );
+     x264_free( rc );
+ }
+ 
+@@ -729,6 +828,15 @@
+ 
+     if( h->sh.i_type != SLICE_TYPE_B )
+         rc->last_non_b_pict_type = h->sh.i_type;
++        
++    /* Adaptive AQ sensitivity algorithm. */
++    if( h->param.analyse.b_aq )
++    {
++        if( h->param.analyse.f_aq_sensitivity > 0 ) 
++            h->rc->aq_threshold = logf(powf(h->param.analyse.f_aq_sensitivity,4)/2); //FIXME simplify
++        else
++            x264_autosense_aq(h);
++    }
+ }
+ 
+ double predict_row_size( x264_t *h, int y, int qp )
+Index: encoder/analyse.c
+===================================================================
+--- encoder/analyse.c	(revision 736)
++++ encoder/analyse.c	(working copy)
+@@ -2047,8 +2047,13 @@
      int i_cost = COST_MAX;
      int i;
-+	
-+    h->mb.i_qp = x264_ratecontrol_qp( h );
  
-+    if( h->param.analyse.b_aq )
-+        x264_adaptive_quant( h, &analysis );
-+
-     /* init analysis */
+-    /* init analysis */
 -    x264_mb_analyse_init( h, &analysis, x264_ratecontrol_qp( h ) );
++    h->mb.i_qp = x264_ratecontrol_qp( h );
++    
++    if( h->param.analyse.b_aq )
++        x264_adaptive_quant( h );
++ 
++     /* init analysis */
 +    x264_mb_analyse_init( h, &analysis, h->mb.i_qp );
  
      /*--------------------------- Do the analysis ---------------------------*/
      if( h->sh.i_type == SLICE_TYPE_I )
-Index: encoder/encoder.c
-===================================================================
---- encoder/encoder.c	(revision 669)
-+++ encoder/encoder.c	(working copy)
-@@ -477,6 +477,8 @@
-     if( !h->param.b_cabac )
-         h->param.analyse.i_trellis = 0;
-     h->param.analyse.i_trellis = x264_clip3( h->param.analyse.i_trellis, 0, 2 );
-+    if( h->param.analyse.b_aq && h->param.analyse.f_aq_strength <= 0 )
-+        h->param.analyse.b_aq = 0;
-     h->param.analyse.i_noise_reduction = x264_clip3( h->param.analyse.i_noise_reduction, 0, 1<<16 );
- 
-     {
-Index: x264.c
-===================================================================
---- x264.c	(revision 669)
-+++ x264.c	(working copy)
-@@ -243,6 +243,12 @@
+Index: x264.c
+===================================================================
+--- x264.c	(revision 736)
++++ x264.c	(working copy)
+@@ -244,6 +244,14 @@
          "                                  - 2: enabled on all mode decisions\n", defaults->analyse.i_trellis );
      H0( "      --no-fast-pskip         Disables early SKIP detection on P-frames\n" );
      H0( "      --no-dct-decimate       Disables coefficient thresholding on P-frames\n" );
-+    H0( "      --aq-strength <float>   Amount to adjust QP per MB [%.1f]\n"
++    H0( "      --aq-strength <float>   Amount to adjust QP/lambda per MB [%.1f]\n"
 +        "                                  0.0: no AQ\n"
-+        "                                  1.1: strong AQ\n", defaults->analyse.f_aq_strength );
-+    H0( "      --aq-sensitivity <float> \"Flatness\" threshold to trigger AQ [%.1f]\n"
-+        "                                    5: applies to almost all blocks\n"
-+        "                                   22: only flat blocks\n", defaults->analyse.f_aq_sensitivity );
++        "                                  1.0: medium AQ\n", defaults->analyse.f_aq_strength );
++    H0( "      --aq-sensitivity <float> \"Center\" of AQ curve. [%.1f]\n"
++        "               0: automatic sensitivity (avoids moving bits between frames)\n"
++        "               10: most QPs are raised\n"
++        "               20: good general-use sensitivity\n"
++        "               30: most QPs are lowered\n", defaults->analyse.f_aq_sensitivity );
      H0( "      --nr <integer>          Noise reduction [%d]\n", defaults->analyse.i_noise_reduction );
      H1( "\n" );
      H1( "      --deadzone-inter <int>  Set the size of the inter luma quantization deadzone [%d]\n", defaults->analyse.i_luma_deadzone[0] );
-@@ -406,6 +412,8 @@
+@@ -407,6 +415,8 @@
              { "trellis", required_argument, NULL, 't' },
              { "no-fast-pskip", no_argument, NULL, 0 },
              { "no-dct-decimate", no_argument, NULL, 0 },
@@ -213,11 +244,50 @@ Index: x264.c
              { "deadzone-inter", required_argument, NULL, '0' },
              { "deadzone-intra", required_argument, NULL, '0' },
              { "level",   required_argument, NULL, 0 },
-Index: x264.h
-===================================================================
---- x264.h	(revision 669)
-+++ x264.h	(working copy)
-@@ -227,6 +227,9 @@
+Index: common/common.c
+===================================================================
+--- common/common.c	(revision 736)
++++ common/common.c	(working copy)
+@@ -123,6 +123,9 @@
+     param->analyse.i_chroma_qp_offset = 0;
+     param->analyse.b_fast_pskip = 1;
+     param->analyse.b_dct_decimate = 1;
++    param->analyse.b_aq = 1;
++    param->analyse.f_aq_strength = 0.5;
++    param->analyse.f_aq_sensitivity = 13;
+     param->analyse.i_luma_deadzone[0] = 21;
+     param->analyse.i_luma_deadzone[1] = 11;
+     param->analyse.b_psnr = 1;
+@@ -455,6 +458,13 @@
+         p->analyse.b_fast_pskip = atobool(value);
+     OPT("dct-decimate")
+         p->analyse.b_dct_decimate = atobool(value);
++    OPT("aq-strength")
++    {
++        p->analyse.f_aq_strength = atof(value);
++        p->analyse.b_aq = 1;
++    }
++    OPT("aq-sensitivity")
++        p->analyse.f_aq_sensitivity = atof(value);
+     OPT("deadzone-inter")
+         p->analyse.i_luma_deadzone[0] = atoi(value);
+     OPT("deadzone-intra")
+@@ -883,6 +893,10 @@
+         s += sprintf( s, " ip_ratio=%.2f", p->rc.f_ip_factor );
+         if( p->i_bframe )
+             s += sprintf( s, " pb_ratio=%.2f", p->rc.f_pb_factor );
++        if( p->analyse.b_aq )
++            s += sprintf( s, " aq=1:%.1f:%.1f", p->analyse.f_aq_strength, p->analyse.f_aq_sensitivity );
++        else
++            s += sprintf( s, " aq=0" );
+         if( p->rc.psz_zones )
+             s += sprintf( s, " zones=%s", p->rc.psz_zones );
+         else if( p->rc.i_zones )
+Index: x264.h
+===================================================================
+--- x264.h	(revision 736)
++++ x264.h	(working copy)
+@@ -232,6 +232,9 @@
          int          i_trellis;  /* trellis RD quantization */
          int          b_fast_pskip; /* early SKIP detection on P-frames */
          int          b_dct_decimate; /* transform coefficient thresholding on P-frames */
author	jbrjake <[email protected]>	2008-03-07 15:28:52 +0000
committer	jbrjake <[email protected]>	2008-03-07 15:28:52 +0000
commit	8463a0b011665a2da7ef549f155d9f935971a94a (patch)
tree	c7f195d602f9831b65c7eca22a1a33837b823e21
parent	8cf6e82be22656e77e8c4a723b1507a187fa924b (diff)