summaryrefslogtreecommitdiffstats
path: root/contrib/patch-x264-aq.patch
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/patch-x264-aq.patch')
-rw-r--r--contrib/patch-x264-aq.patch430
1 files changed, 250 insertions, 180 deletions
diff --git a/contrib/patch-x264-aq.patch b/contrib/patch-x264-aq.patch
index ec624ee4d..79d8dca54 100644
--- a/contrib/patch-x264-aq.patch
+++ b/contrib/patch-x264-aq.patch
@@ -1,210 +1,241 @@
-Index: common/common.c
-===================================================================
---- common/common.c (revision 669)
-+++ common/common.c (working copy)
-@@ -123,6 +123,9 @@
- param->analyse.i_chroma_qp_offset = 0;
- param->analyse.b_fast_pskip = 1;
- param->analyse.b_dct_decimate = 1;
-+ param->analyse.b_aq = 0;
-+ param->analyse.f_aq_strength = 0.0;
-+ param->analyse.f_aq_sensitivity = 15;
- param->analyse.i_luma_deadzone[0] = 21;
- param->analyse.i_luma_deadzone[1] = 11;
- param->analyse.b_psnr = 1;
-@@ -455,6 +458,13 @@
- p->analyse.b_fast_pskip = atobool(value);
- OPT("dct-decimate")
- p->analyse.b_dct_decimate = atobool(value);
-+ OPT("aq-strength")
-+ {
-+ p->analyse.f_aq_strength = atof(value);
-+ p->analyse.b_aq = (p->analyse.f_aq_strength > 0.0);
-+ }
-+ OPT("aq-sensitivity")
-+ p->analyse.f_aq_sensitivity = atof(value);
- OPT("deadzone-inter")
- p->analyse.i_luma_deadzone[0] = atoi(value);
- OPT("deadzone-intra")
-@@ -939,6 +949,9 @@
- s += sprintf( s, " zones" );
- }
+Index: encoder/ratecontrol.h
+===================================================================
+--- encoder/ratecontrol.h (revision 736)
++++ encoder/ratecontrol.h (working copy)
+@@ -34,6 +34,7 @@
+ int x264_ratecontrol_qp( x264_t * );
+ void x264_ratecontrol_end( x264_t *, int bits );
+ void x264_ratecontrol_summary( x264_t * );
++void x264_adaptive_quant ( x264_t * );
-+ if( p->analyse.b_aq )
-+ s += sprintf( s, " aq=1:%.1f:%.1f", p->analyse.f_aq_strength, p->analyse.f_aq_sensitivity );
-+
- return buf;
- }
-
-Index: common/pixel.c
-===================================================================
---- common/pixel.c (revision 669)
-+++ common/pixel.c (working copy)
-@@ -213,6 +213,14 @@
- PIXEL_SATD_C( x264_pixel_satd_4x8, 4, 8 )
- PIXEL_SATD_C( x264_pixel_satd_4x4, 4, 4 )
-
-+static int x264_pixel_count_8x8( uint8_t *pix, int i_pix, uint32_t threshold )
-+{
-+ int x, y, sum = 0;
-+ for( y=0; y<8; y++, pix += i_pix )
-+ for( x=0; x<8; x++ )
-+ sum += pix[x] > (uint8_t)threshold;
-+ return sum;
-+}
+ #endif
- /****************************************************************************
- * pixel_sa8d_WxH: sum of 8x8 Hadamard transformed differences
-@@ -470,6 +478,8 @@
- pixf->ads[PIXEL_16x8] = pixel_ads2;
- pixf->ads[PIXEL_8x8] = pixel_ads1;
+Index: encoder/encoder.c
+===================================================================
+--- encoder/encoder.c (revision 736)
++++ encoder/encoder.c (working copy)
+@@ -401,6 +401,7 @@
+ h->param.analyse.b_fast_pskip = 0;
+ h->param.analyse.i_noise_reduction = 0;
+ h->param.analyse.i_subpel_refine = x264_clip3( h->param.analyse.i_subpel_refine, 1, 6 );
++ h->param.analyse.b_aq = 0;
+ }
+ if( h->param.rc.i_rc_method == X264_RC_CQP )
+ {
+@@ -475,6 +476,10 @@
+ if( !h->param.b_cabac )
+ h->param.analyse.i_trellis = 0;
+ h->param.analyse.i_trellis = x264_clip3( h->param.analyse.i_trellis, 0, 2 );
++ h->param.analyse.b_aq = h->param.analyse.b_aq && h->param.analyse.f_aq_strength > 0;
++ /* VAQ on static sensitivity mode effectively replaces qcomp, so qcomp is raised towards 1 to compensate. */
++ if(h->param.analyse.b_aq && h->param.analyse.f_aq_sensitivity != 0)
++ h->param.rc.f_qcompress = x264_clip3f(h->param.rc.f_qcompress + h->param.analyse.f_aq_strength * 0.4 / 0.28, 0, 1);
+ h->param.analyse.i_noise_reduction = x264_clip3( h->param.analyse.i_noise_reduction, 0, 1<<16 );
-+ pixf->count_8x8 = x264_pixel_count_8x8;
-+
- #ifdef HAVE_MMX
- if( cpu&X264_CPU_MMX )
{
-Index: common/pixel.h
-===================================================================
---- common/pixel.h (revision 669)
-+++ common/pixel.h (working copy)
-@@ -90,6 +90,8 @@
- void (*ads[7])( int enc_dc[4], uint16_t *sums, int delta,
- uint16_t *res, int width );
-
-+ int (*count_8x8)( uint8_t *pix, int i_pix, uint32_t threshold );
-+
- /* calculate satd of V, H, and DC modes.
- * may be NULL, in which case just use pred+satd instead. */
- void (*intra_satd_x3_16x16)( uint8_t *fenc, uint8_t *fdec, int res[3] );
-Index: encoder/analyse.c
-===================================================================
---- encoder/analyse.c (revision 669)
-+++ encoder/analyse.c (working copy)
-@@ -29,6 +29,7 @@
- #endif
+Index: encoder/ratecontrol.c
+===================================================================
+--- encoder/ratecontrol.c (revision 736)
++++ encoder/ratecontrol.c (working copy)
+@@ -127,6 +127,10 @@
+ predictor_t *pred_b_from_p; /* predict B-frame size from P-frame satd */
+ int bframes; /* # consecutive B-frames before this P-frame */
+ int bframe_bits; /* total cost of those frames */
++
++ /* AQ stuff */
++ float aq_threshold;
++ int *ac_energy;
- #include "common/common.h"
-+#include "common/cpu.h"
- #include "macroblock.h"
- #include "me.h"
- #include "ratecontrol.h"
-@@ -2029,8 +2030,68 @@
- }
+ int i_zones;
+ x264_zone_t *zones;
+@@ -169,7 +173,97 @@
+ + rce->misc_bits;
}
-+static int x264_sum_dctq( int16_t dct[8][8] )
++// Find the total AC energy of the block in all planes.
++static int ac_energy_mb( x264_t *h, int mb_x, int mb_y, int *satd )
+{
-+ int i, t = 0;
-+ int16_t *p = &dct[0][0];
-+ for( i=1; i<64; i++ )
-+ t += abs(p[i]) * x264_dct8_weight_tab[i];
-+ return t;
++ DECLARE_ALIGNED( static uint8_t, flat[16], 16 ) = {128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128};
++// DECLARE_ALIGNED( static uint8_t, flat[16], 16 );
++ unsigned int var=0, sad, ssd, i;
++ for( i=0; i<3; i++ )
++ {
++ int w = i ? 8 : 16;
++ int stride = h->fenc->i_stride[i];
++ int offset = h->mb.b_interlaced
++ ? w * (mb_x + (mb_y&~1) * stride) + (mb_y&1) * stride
++ : w * (mb_x + mb_y * stride);
++ int pix = i ? PIXEL_8x8 : PIXEL_16x16;
++ stride <<= h->mb.b_interlaced;
++ sad = h->pixf.sad[pix](flat, 0, h->fenc->plane[i]+offset, stride);
++ ssd = h->pixf.ssd[pix](flat, 0, h->fenc->plane[i]+offset, stride);
++ var += ssd - (sad * sad >> (i?6:8));
++ // SATD to represent the block's overall complexity (bit cost) for intra encoding.
++ // exclude the DC coef, because nothing short of an actual intra prediction will estimate DC cost.
++ if( var && satd )
++ *satd += h->pixf.satd[pix](flat, 0, h->fenc->plane[i]+offset, stride) - sad/2;
++ }
++ return var;
++}
++
++void x264_autosense_aq( x264_t *h )
++{
++ double total = 0;
++ double n = 0;
++ int mb_x, mb_y;
++ /* FIXME: Some of the SATDs might be already calculated elsewhere (ratecontrol?). Can we reuse them? */
++ /* FIXME: Is chroma SATD necessary? */
++ for( mb_y=0; mb_y<h->sps->i_mb_height; mb_y++ )
++ for( mb_x=0; mb_x<h->sps->i_mb_width; mb_x++ )
++ {
++ int energy, satd=0;
++ energy = ac_energy_mb( h, mb_x, mb_y, &satd );
++ h->rc->ac_energy[mb_x + mb_y * h->sps->i_mb_width] = energy;
++ /* Weight the energy value by the SATD value of the MB. This represents the fact that
++ the more complex blocks in a frame should be weighted more when calculating the optimal sensitivity.
++ This also helps diminish the negative effect of large numbers of simple blocks in a frame, such as in the case
++ of a letterboxed film. */
++ if( energy )
++ {
++ x264_cpu_restore(h->param.cpu);
++ total += logf(energy) * satd;
++ n += satd;
++ }
++ }
++ x264_cpu_restore(h->param.cpu);
++ /* Calculate and store the threshold. */
++ h->rc->aq_threshold = n ? total/n : 15;
+}
- /*****************************************************************************
-+ * x264_adaptive_quant:
-+ * check if mb is "flat", i.e. has most energy in low frequency components, and
-+ * adjust qp down if it is
-+ *****************************************************************************/
-+void x264_adaptive_quant( x264_t *h, x264_mb_analysis_t *a )
++/*****************************************************************************
++* x264_adaptive_quant:
++ * adjust macroblock QP based on variance (AC energy) of the MB.
++ * high variance = higher QP
++ * low variance = lower QP
++ * This generally increases SSIM and lowers PSNR.
++*****************************************************************************/
++void x264_adaptive_quant( x264_t *h )
+{
-+ DECLARE_ALIGNED( static uint8_t, zero[FDEC_STRIDE*8], 16 );
-+ DECLARE_ALIGNED( int16_t, dct[8][8], 16 );
-+ float fc;
-+ int total = 0;
-+ int qp = h->mb.i_qp, qp_adj;
-+ int i;
-+
-+ if( qp <= 10 ) /* AQ is probably not needed at such low QP */
-+ return;
-+
-+ if( h->pixf.sad[PIXEL_16x16](h->mb.pic.p_fenc[0], FENC_STRIDE, zero, 16) > 64*16*16 )
-+ { /* light places */
-+ if( h->pixf.count_8x8(h->mb.pic.p_fenc[1], FENC_STRIDE, 0x81818181) < 40 )
-+ /* not enough "blue" pixels */
-+ return;
-+
-+ if( h->pixf.count_8x8(h->mb.pic.p_fenc[2], FENC_STRIDE, 0x87878787) > 24 )
-+ /* too many "red" pixels */
-+ return;
++ int qp = h->mb.i_qp;
++ int energy;
++ x264_cpu_restore(h->param.cpu);
++ if(h->param.analyse.f_aq_sensitivity != 0)
++ energy = ac_energy_mb( h, h->mb.i_mb_x, h->mb.i_mb_y, NULL );
++ else
++ energy = h->rc->ac_energy[h->mb.i_mb_xy];
++ if(energy == 0)
++ {
++ h->mb.i_qp = h->mb.i_last_qp;
+ }
-+
-+ for( i=0; i<4; i++ )
++ else
+ {
-+ h->dctf.sub8x8_dct8( dct, h->mb.pic.p_fenc[0] + (i&1)*8 + (i>>1)*FENC_STRIDE, zero );
-+ total += x264_sum_dctq( dct );
++ x264_cpu_restore(h->param.cpu);
++ float result = energy;
++ /* Adjust the QP based on the AC energy of the macroblock. */
++ float qp_adj = 3 * (logf(result) - h->rc->aq_threshold);
++ if(h->param.analyse.f_aq_sensitivity == 0) qp_adj = x264_clip3f(qp_adj, -5, 5);
++ int new_qp = x264_clip3(qp + qp_adj * h->param.analyse.f_aq_strength + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max);
++ /* If the QP of this MB is within 1 of the previous MB, code the same QP as the previous MB,
++ * to lower the bit cost of the qp_delta. */
++ if(abs(new_qp - h->mb.i_last_qp) == 1) new_qp = h->mb.i_last_qp;
++ h->mb.i_qp = new_qp;
+ }
-+
-+ if( total == 0 ) /* no AC coefficients, nothing to do */
-+ return;
-+
-+ x264_cpu_restore( h->param.cpu );
-+
-+ fc = expf(-5e-13 * total * total);
-+
-+ /* the function is chosen such that it stays close to 0 in almost all
-+ * range of 0..1, and rapidly goes up to 1 near 1.0 */
-+ qp_adj = (int)(qp * h->param.analyse.f_aq_strength / pow(2 - fc, h->param.analyse.f_aq_sensitivity));
-+
-+ /* don't adjust by more than this amount */
-+ qp_adj = X264_MIN(qp_adj, qp/2);
-+
-+ h->mb.i_qp = a->i_qp = qp - qp_adj;
+ h->mb.i_chroma_qp = i_chroma_qp_table[x264_clip3( h->mb.i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )];
+}
+
-+/*****************************************************************************
- * x264_macroblock_analyse:
- *****************************************************************************/
- void x264_macroblock_analyse( x264_t *h )
-@@ -2038,9 +2099,14 @@
- x264_mb_analysis_t analysis;
+ int x264_ratecontrol_new( x264_t *h )
+ {
+ x264_ratecontrol_t *rc;
+@@ -244,7 +338,7 @@
+ rc->rate_tolerance = 0.01;
+ }
+
+- h->mb.b_variable_qp = rc->b_vbv && !rc->b_2pass;
++ h->mb.b_variable_qp = (rc->b_vbv && !rc->b_2pass) || h->param.analyse.b_aq;
+
+ if( rc->b_abr )
+ {
+@@ -458,10 +552,13 @@
+ x264_free( p );
+ }
+
+- for( i=1; i<h->param.i_threads; i++ )
++ for( i=0; i<h->param.i_threads; i++ )
+ {
+ h->thread[i]->rc = rc+i;
+- rc[i] = rc[0];
++ if( i )
++ rc[i] = rc[0];
++ if( h->param.analyse.b_aq )
++ rc[i].ac_energy = x264_malloc( h->mb.i_mb_count * sizeof(int) );
+ }
+
+ return 0;
+@@ -623,6 +720,8 @@
+ x264_free( rc->zones[i].param );
+ x264_free( rc->zones );
+ }
++ for( i=0; i<h->param.i_threads; i++ )
++ x264_free( rc[i].ac_energy );
+ x264_free( rc );
+ }
+
+@@ -729,6 +828,15 @@
+
+ if( h->sh.i_type != SLICE_TYPE_B )
+ rc->last_non_b_pict_type = h->sh.i_type;
++
++ /* Adaptive AQ sensitivity algorithm. */
++ if( h->param.analyse.b_aq )
++ {
++ if( h->param.analyse.f_aq_sensitivity > 0 )
++ h->rc->aq_threshold = logf(powf(h->param.analyse.f_aq_sensitivity,4)/2); //FIXME simplify
++ else
++ x264_autosense_aq(h);
++ }
+ }
+
+ double predict_row_size( x264_t *h, int y, int qp )
+Index: encoder/analyse.c
+===================================================================
+--- encoder/analyse.c (revision 736)
++++ encoder/analyse.c (working copy)
+@@ -2047,8 +2047,13 @@
int i_cost = COST_MAX;
int i;
-+
-+ h->mb.i_qp = x264_ratecontrol_qp( h );
-+ if( h->param.analyse.b_aq )
-+ x264_adaptive_quant( h, &analysis );
-+
- /* init analysis */
+- /* init analysis */
- x264_mb_analyse_init( h, &analysis, x264_ratecontrol_qp( h ) );
++ h->mb.i_qp = x264_ratecontrol_qp( h );
++
++ if( h->param.analyse.b_aq )
++ x264_adaptive_quant( h );
++
++ /* init analysis */
+ x264_mb_analyse_init( h, &analysis, h->mb.i_qp );
/*--------------------------- Do the analysis ---------------------------*/
if( h->sh.i_type == SLICE_TYPE_I )
-Index: encoder/encoder.c
-===================================================================
---- encoder/encoder.c (revision 669)
-+++ encoder/encoder.c (working copy)
-@@ -477,6 +477,8 @@
- if( !h->param.b_cabac )
- h->param.analyse.i_trellis = 0;
- h->param.analyse.i_trellis = x264_clip3( h->param.analyse.i_trellis, 0, 2 );
-+ if( h->param.analyse.b_aq && h->param.analyse.f_aq_strength <= 0 )
-+ h->param.analyse.b_aq = 0;
- h->param.analyse.i_noise_reduction = x264_clip3( h->param.analyse.i_noise_reduction, 0, 1<<16 );
-
- {
-Index: x264.c
-===================================================================
---- x264.c (revision 669)
-+++ x264.c (working copy)
-@@ -243,6 +243,12 @@
+Index: x264.c
+===================================================================
+--- x264.c (revision 736)
++++ x264.c (working copy)
+@@ -244,6 +244,14 @@
" - 2: enabled on all mode decisions\n", defaults->analyse.i_trellis );
H0( " --no-fast-pskip Disables early SKIP detection on P-frames\n" );
H0( " --no-dct-decimate Disables coefficient thresholding on P-frames\n" );
-+ H0( " --aq-strength <float> Amount to adjust QP per MB [%.1f]\n"
++ H0( " --aq-strength <float> Amount to adjust QP/lambda per MB [%.1f]\n"
+ " 0.0: no AQ\n"
-+ " 1.1: strong AQ\n", defaults->analyse.f_aq_strength );
-+ H0( " --aq-sensitivity <float> \"Flatness\" threshold to trigger AQ [%.1f]\n"
-+ " 5: applies to almost all blocks\n"
-+ " 22: only flat blocks\n", defaults->analyse.f_aq_sensitivity );
++ " 1.0: medium AQ\n", defaults->analyse.f_aq_strength );
++ H0( " --aq-sensitivity <float> \"Center\" of AQ curve. [%.1f]\n"
++ " 0: automatic sensitivity (avoids moving bits between frames)\n"
++ " 10: most QPs are raised\n"
++ " 20: good general-use sensitivity\n"
++ " 30: most QPs are lowered\n", defaults->analyse.f_aq_sensitivity );
H0( " --nr <integer> Noise reduction [%d]\n", defaults->analyse.i_noise_reduction );
H1( "\n" );
H1( " --deadzone-inter <int> Set the size of the inter luma quantization deadzone [%d]\n", defaults->analyse.i_luma_deadzone[0] );
-@@ -406,6 +412,8 @@
+@@ -407,6 +415,8 @@
{ "trellis", required_argument, NULL, 't' },
{ "no-fast-pskip", no_argument, NULL, 0 },
{ "no-dct-decimate", no_argument, NULL, 0 },
@@ -213,11 +244,50 @@ Index: x264.c
{ "deadzone-inter", required_argument, NULL, '0' },
{ "deadzone-intra", required_argument, NULL, '0' },
{ "level", required_argument, NULL, 0 },
-Index: x264.h
-===================================================================
---- x264.h (revision 669)
-+++ x264.h (working copy)
-@@ -227,6 +227,9 @@
+Index: common/common.c
+===================================================================
+--- common/common.c (revision 736)
++++ common/common.c (working copy)
+@@ -123,6 +123,9 @@
+ param->analyse.i_chroma_qp_offset = 0;
+ param->analyse.b_fast_pskip = 1;
+ param->analyse.b_dct_decimate = 1;
++ param->analyse.b_aq = 1;
++ param->analyse.f_aq_strength = 0.5;
++ param->analyse.f_aq_sensitivity = 13;
+ param->analyse.i_luma_deadzone[0] = 21;
+ param->analyse.i_luma_deadzone[1] = 11;
+ param->analyse.b_psnr = 1;
+@@ -455,6 +458,13 @@
+ p->analyse.b_fast_pskip = atobool(value);
+ OPT("dct-decimate")
+ p->analyse.b_dct_decimate = atobool(value);
++ OPT("aq-strength")
++ {
++ p->analyse.f_aq_strength = atof(value);
++ p->analyse.b_aq = 1;
++ }
++ OPT("aq-sensitivity")
++ p->analyse.f_aq_sensitivity = atof(value);
+ OPT("deadzone-inter")
+ p->analyse.i_luma_deadzone[0] = atoi(value);
+ OPT("deadzone-intra")
+@@ -883,6 +893,10 @@
+ s += sprintf( s, " ip_ratio=%.2f", p->rc.f_ip_factor );
+ if( p->i_bframe )
+ s += sprintf( s, " pb_ratio=%.2f", p->rc.f_pb_factor );
++ if( p->analyse.b_aq )
++ s += sprintf( s, " aq=1:%.1f:%.1f", p->analyse.f_aq_strength, p->analyse.f_aq_sensitivity );
++ else
++ s += sprintf( s, " aq=0" );
+ if( p->rc.psz_zones )
+ s += sprintf( s, " zones=%s", p->rc.psz_zones );
+ else if( p->rc.i_zones )
+Index: x264.h
+===================================================================
+--- x264.h (revision 736)
++++ x264.h (working copy)
+@@ -232,6 +232,9 @@
int i_trellis; /* trellis RD quantization */
int b_fast_pskip; /* early SKIP detection on P-frames */
int b_dct_decimate; /* transform coefficient thresholding on P-frames */