diff options
Diffstat (limited to 'contrib/patch-x264-aq.patch')
-rw-r--r-- | contrib/patch-x264-aq.patch | 430 |
1 files changed, 250 insertions, 180 deletions
diff --git a/contrib/patch-x264-aq.patch b/contrib/patch-x264-aq.patch index ec624ee4d..79d8dca54 100644 --- a/contrib/patch-x264-aq.patch +++ b/contrib/patch-x264-aq.patch @@ -1,210 +1,241 @@ -Index: common/common.c
-===================================================================
---- common/common.c (revision 669)
-+++ common/common.c (working copy)
-@@ -123,6 +123,9 @@
- param->analyse.i_chroma_qp_offset = 0; - param->analyse.b_fast_pskip = 1; - param->analyse.b_dct_decimate = 1; -+ param->analyse.b_aq = 0; -+ param->analyse.f_aq_strength = 0.0; -+ param->analyse.f_aq_sensitivity = 15; - param->analyse.i_luma_deadzone[0] = 21; - param->analyse.i_luma_deadzone[1] = 11; - param->analyse.b_psnr = 1; -@@ -455,6 +458,13 @@
- p->analyse.b_fast_pskip = atobool(value); - OPT("dct-decimate") - p->analyse.b_dct_decimate = atobool(value); -+ OPT("aq-strength") -+ { -+ p->analyse.f_aq_strength = atof(value); -+ p->analyse.b_aq = (p->analyse.f_aq_strength > 0.0); -+ } -+ OPT("aq-sensitivity") -+ p->analyse.f_aq_sensitivity = atof(value); - OPT("deadzone-inter") - p->analyse.i_luma_deadzone[0] = atoi(value); - OPT("deadzone-intra") -@@ -939,6 +949,9 @@
- s += sprintf( s, " zones" ); - } +Index: encoder/ratecontrol.h +=================================================================== +--- encoder/ratecontrol.h (revision 736) ++++ encoder/ratecontrol.h (working copy) +@@ -34,6 +34,7 @@ + int x264_ratecontrol_qp( x264_t * ); + void x264_ratecontrol_end( x264_t *, int bits ); + void x264_ratecontrol_summary( x264_t * ); ++void x264_adaptive_quant ( x264_t * ); -+ if( p->analyse.b_aq ) -+ s += sprintf( s, " aq=1:%.1f:%.1f", p->analyse.f_aq_strength, p->analyse.f_aq_sensitivity ); -+ - return buf; - } - -Index: common/pixel.c
-===================================================================
---- common/pixel.c (revision 669)
-+++ common/pixel.c (working copy)
-@@ -213,6 +213,14 @@
- PIXEL_SATD_C( x264_pixel_satd_4x8, 4, 8 ) - PIXEL_SATD_C( x264_pixel_satd_4x4, 4, 4 ) - -+static int x264_pixel_count_8x8( uint8_t *pix, int i_pix, uint32_t threshold ) -+{ -+ int x, y, sum = 0; -+ for( y=0; y<8; y++, pix += i_pix ) -+ for( x=0; x<8; x++ ) -+ sum += pix[x] > (uint8_t)threshold; -+ return sum; -+} + #endif - /**************************************************************************** - * pixel_sa8d_WxH: sum of 8x8 Hadamard transformed differences -@@ -470,6 +478,8 @@
- pixf->ads[PIXEL_16x8] = pixel_ads2; - pixf->ads[PIXEL_8x8] = pixel_ads1; +Index: encoder/encoder.c +=================================================================== +--- encoder/encoder.c (revision 736) ++++ encoder/encoder.c (working copy) +@@ -401,6 +401,7 @@ + h->param.analyse.b_fast_pskip = 0; + h->param.analyse.i_noise_reduction = 0; + h->param.analyse.i_subpel_refine = x264_clip3( h->param.analyse.i_subpel_refine, 1, 6 ); ++ h->param.analyse.b_aq = 0; + } + if( h->param.rc.i_rc_method == X264_RC_CQP ) + { +@@ -475,6 +476,10 @@ + if( !h->param.b_cabac ) + h->param.analyse.i_trellis = 0; + h->param.analyse.i_trellis = x264_clip3( h->param.analyse.i_trellis, 0, 2 ); ++ h->param.analyse.b_aq = h->param.analyse.b_aq && h->param.analyse.f_aq_strength > 0; ++ /* VAQ on static sensitivity mode effectively replaces qcomp, so qcomp is raised towards 1 to compensate. */ ++ if(h->param.analyse.b_aq && h->param.analyse.f_aq_sensitivity != 0) ++ h->param.rc.f_qcompress = x264_clip3f(h->param.rc.f_qcompress + h->param.analyse.f_aq_strength * 0.4 / 0.28, 0, 1); + h->param.analyse.i_noise_reduction = x264_clip3( h->param.analyse.i_noise_reduction, 0, 1<<16 ); -+ pixf->count_8x8 = x264_pixel_count_8x8; -+ - #ifdef HAVE_MMX - if( cpu&X264_CPU_MMX ) { -Index: common/pixel.h
-===================================================================
---- common/pixel.h (revision 669)
-+++ common/pixel.h (working copy)
-@@ -90,6 +90,8 @@
- void (*ads[7])( int enc_dc[4], uint16_t *sums, int delta, - uint16_t *res, int width ); - -+ int (*count_8x8)( uint8_t *pix, int i_pix, uint32_t threshold ); -+ - /* calculate satd of V, H, and DC modes. - * may be NULL, in which case just use pred+satd instead. */ - void (*intra_satd_x3_16x16)( uint8_t *fenc, uint8_t *fdec, int res[3] ); -Index: encoder/analyse.c
-===================================================================
---- encoder/analyse.c (revision 669)
-+++ encoder/analyse.c (working copy)
-@@ -29,6 +29,7 @@
- #endif +Index: encoder/ratecontrol.c +=================================================================== +--- encoder/ratecontrol.c (revision 736) ++++ encoder/ratecontrol.c (working copy) +@@ -127,6 +127,10 @@ + predictor_t *pred_b_from_p; /* predict B-frame size from P-frame satd */ + int bframes; /* # consecutive B-frames before this P-frame */ + int bframe_bits; /* total cost of those frames */ ++ ++ /* AQ stuff */ ++ float aq_threshold; ++ int *ac_energy; - #include "common/common.h" -+#include "common/cpu.h" - #include "macroblock.h" - #include "me.h" - #include "ratecontrol.h" -@@ -2029,8 +2030,68 @@
- } + int i_zones; + x264_zone_t *zones; +@@ -169,7 +173,97 @@ + + rce->misc_bits; } -+static int x264_sum_dctq( int16_t dct[8][8] ) ++// Find the total AC energy of the block in all planes. ++static int ac_energy_mb( x264_t *h, int mb_x, int mb_y, int *satd ) +{ -+ int i, t = 0; -+ int16_t *p = &dct[0][0]; -+ for( i=1; i<64; i++ ) -+ t += abs(p[i]) * x264_dct8_weight_tab[i]; -+ return t; ++ DECLARE_ALIGNED( static uint8_t, flat[16], 16 ) = {128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128}; ++// DECLARE_ALIGNED( static uint8_t, flat[16], 16 ); ++ unsigned int var=0, sad, ssd, i; ++ for( i=0; i<3; i++ ) ++ { ++ int w = i ? 8 : 16; ++ int stride = h->fenc->i_stride[i]; ++ int offset = h->mb.b_interlaced ++ ? w * (mb_x + (mb_y&~1) * stride) + (mb_y&1) * stride ++ : w * (mb_x + mb_y * stride); ++ int pix = i ? PIXEL_8x8 : PIXEL_16x16; ++ stride <<= h->mb.b_interlaced; ++ sad = h->pixf.sad[pix](flat, 0, h->fenc->plane[i]+offset, stride); ++ ssd = h->pixf.ssd[pix](flat, 0, h->fenc->plane[i]+offset, stride); ++ var += ssd - (sad * sad >> (i?6:8)); ++ // SATD to represent the block's overall complexity (bit cost) for intra encoding. ++ // exclude the DC coef, because nothing short of an actual intra prediction will estimate DC cost. ++ if( var && satd ) ++ *satd += h->pixf.satd[pix](flat, 0, h->fenc->plane[i]+offset, stride) - sad/2; ++ } ++ return var; ++} ++ ++void x264_autosense_aq( x264_t *h ) ++{ ++ double total = 0; ++ double n = 0; ++ int mb_x, mb_y; ++ /* FIXME: Some of the SATDs might be already calculated elsewhere (ratecontrol?). Can we reuse them? */ ++ /* FIXME: Is chroma SATD necessary? */ ++ for( mb_y=0; mb_y<h->sps->i_mb_height; mb_y++ ) ++ for( mb_x=0; mb_x<h->sps->i_mb_width; mb_x++ ) ++ { ++ int energy, satd=0; ++ energy = ac_energy_mb( h, mb_x, mb_y, &satd ); ++ h->rc->ac_energy[mb_x + mb_y * h->sps->i_mb_width] = energy; ++ /* Weight the energy value by the SATD value of the MB. This represents the fact that ++ the more complex blocks in a frame should be weighted more when calculating the optimal sensitivity. ++ This also helps diminish the negative effect of large numbers of simple blocks in a frame, such as in the case ++ of a letterboxed film. */ ++ if( energy ) ++ { ++ x264_cpu_restore(h->param.cpu); ++ total += logf(energy) * satd; ++ n += satd; ++ } ++ } ++ x264_cpu_restore(h->param.cpu); ++ /* Calculate and store the threshold. */ ++ h->rc->aq_threshold = n ? total/n : 15; +} - /***************************************************************************** -+ * x264_adaptive_quant: -+ * check if mb is "flat", i.e. has most energy in low frequency components, and -+ * adjust qp down if it is -+ *****************************************************************************/ -+void x264_adaptive_quant( x264_t *h, x264_mb_analysis_t *a ) ++/***************************************************************************** ++* x264_adaptive_quant: ++ * adjust macroblock QP based on variance (AC energy) of the MB. ++ * high variance = higher QP ++ * low variance = lower QP ++ * This generally increases SSIM and lowers PSNR. ++*****************************************************************************/ ++void x264_adaptive_quant( x264_t *h ) +{ -+ DECLARE_ALIGNED( static uint8_t, zero[FDEC_STRIDE*8], 16 ); -+ DECLARE_ALIGNED( int16_t, dct[8][8], 16 ); -+ float fc; -+ int total = 0; -+ int qp = h->mb.i_qp, qp_adj; -+ int i; -+ -+ if( qp <= 10 ) /* AQ is probably not needed at such low QP */ -+ return; -+ -+ if( h->pixf.sad[PIXEL_16x16](h->mb.pic.p_fenc[0], FENC_STRIDE, zero, 16) > 64*16*16 ) -+ { /* light places */ -+ if( h->pixf.count_8x8(h->mb.pic.p_fenc[1], FENC_STRIDE, 0x81818181) < 40 ) -+ /* not enough "blue" pixels */ -+ return; -+ -+ if( h->pixf.count_8x8(h->mb.pic.p_fenc[2], FENC_STRIDE, 0x87878787) > 24 ) -+ /* too many "red" pixels */ -+ return; ++ int qp = h->mb.i_qp; ++ int energy; ++ x264_cpu_restore(h->param.cpu); ++ if(h->param.analyse.f_aq_sensitivity != 0) ++ energy = ac_energy_mb( h, h->mb.i_mb_x, h->mb.i_mb_y, NULL ); ++ else ++ energy = h->rc->ac_energy[h->mb.i_mb_xy]; ++ if(energy == 0) ++ { ++ h->mb.i_qp = h->mb.i_last_qp; + } -+ -+ for( i=0; i<4; i++ ) ++ else + { -+ h->dctf.sub8x8_dct8( dct, h->mb.pic.p_fenc[0] + (i&1)*8 + (i>>1)*FENC_STRIDE, zero ); -+ total += x264_sum_dctq( dct ); ++ x264_cpu_restore(h->param.cpu); ++ float result = energy; ++ /* Adjust the QP based on the AC energy of the macroblock. */ ++ float qp_adj = 3 * (logf(result) - h->rc->aq_threshold); ++ if(h->param.analyse.f_aq_sensitivity == 0) qp_adj = x264_clip3f(qp_adj, -5, 5); ++ int new_qp = x264_clip3(qp + qp_adj * h->param.analyse.f_aq_strength + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max); ++ /* If the QP of this MB is within 1 of the previous MB, code the same QP as the previous MB, ++ * to lower the bit cost of the qp_delta. */ ++ if(abs(new_qp - h->mb.i_last_qp) == 1) new_qp = h->mb.i_last_qp; ++ h->mb.i_qp = new_qp; + } -+ -+ if( total == 0 ) /* no AC coefficients, nothing to do */ -+ return; -+ -+ x264_cpu_restore( h->param.cpu ); -+ -+ fc = expf(-5e-13 * total * total); -+ -+ /* the function is chosen such that it stays close to 0 in almost all -+ * range of 0..1, and rapidly goes up to 1 near 1.0 */ -+ qp_adj = (int)(qp * h->param.analyse.f_aq_strength / pow(2 - fc, h->param.analyse.f_aq_sensitivity)); -+ -+ /* don't adjust by more than this amount */ -+ qp_adj = X264_MIN(qp_adj, qp/2); -+ -+ h->mb.i_qp = a->i_qp = qp - qp_adj; + h->mb.i_chroma_qp = i_chroma_qp_table[x264_clip3( h->mb.i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )]; +} + -+/***************************************************************************** - * x264_macroblock_analyse: - *****************************************************************************/ - void x264_macroblock_analyse( x264_t *h ) -@@ -2038,9 +2099,14 @@
- x264_mb_analysis_t analysis; + int x264_ratecontrol_new( x264_t *h ) + { + x264_ratecontrol_t *rc; +@@ -244,7 +338,7 @@ + rc->rate_tolerance = 0.01; + } + +- h->mb.b_variable_qp = rc->b_vbv && !rc->b_2pass; ++ h->mb.b_variable_qp = (rc->b_vbv && !rc->b_2pass) || h->param.analyse.b_aq; + + if( rc->b_abr ) + { +@@ -458,10 +552,13 @@ + x264_free( p ); + } + +- for( i=1; i<h->param.i_threads; i++ ) ++ for( i=0; i<h->param.i_threads; i++ ) + { + h->thread[i]->rc = rc+i; +- rc[i] = rc[0]; ++ if( i ) ++ rc[i] = rc[0]; ++ if( h->param.analyse.b_aq ) ++ rc[i].ac_energy = x264_malloc( h->mb.i_mb_count * sizeof(int) ); + } + + return 0; +@@ -623,6 +720,8 @@ + x264_free( rc->zones[i].param ); + x264_free( rc->zones ); + } ++ for( i=0; i<h->param.i_threads; i++ ) ++ x264_free( rc[i].ac_energy ); + x264_free( rc ); + } + +@@ -729,6 +828,15 @@ + + if( h->sh.i_type != SLICE_TYPE_B ) + rc->last_non_b_pict_type = h->sh.i_type; ++ ++ /* Adaptive AQ sensitivity algorithm. */ ++ if( h->param.analyse.b_aq ) ++ { ++ if( h->param.analyse.f_aq_sensitivity > 0 ) ++ h->rc->aq_threshold = logf(powf(h->param.analyse.f_aq_sensitivity,4)/2); //FIXME simplify ++ else ++ x264_autosense_aq(h); ++ } + } + + double predict_row_size( x264_t *h, int y, int qp ) +Index: encoder/analyse.c +=================================================================== +--- encoder/analyse.c (revision 736) ++++ encoder/analyse.c (working copy) +@@ -2047,8 +2047,13 @@ int i_cost = COST_MAX; int i; -+ -+ h->mb.i_qp = x264_ratecontrol_qp( h ); -+ if( h->param.analyse.b_aq ) -+ x264_adaptive_quant( h, &analysis ); -+ - /* init analysis */ +- /* init analysis */ - x264_mb_analyse_init( h, &analysis, x264_ratecontrol_qp( h ) ); ++ h->mb.i_qp = x264_ratecontrol_qp( h ); ++ ++ if( h->param.analyse.b_aq ) ++ x264_adaptive_quant( h ); ++ ++ /* init analysis */ + x264_mb_analyse_init( h, &analysis, h->mb.i_qp ); /*--------------------------- Do the analysis ---------------------------*/ if( h->sh.i_type == SLICE_TYPE_I ) -Index: encoder/encoder.c
-===================================================================
---- encoder/encoder.c (revision 669)
-+++ encoder/encoder.c (working copy)
-@@ -477,6 +477,8 @@
- if( !h->param.b_cabac ) - h->param.analyse.i_trellis = 0; - h->param.analyse.i_trellis = x264_clip3( h->param.analyse.i_trellis, 0, 2 ); -+ if( h->param.analyse.b_aq && h->param.analyse.f_aq_strength <= 0 ) -+ h->param.analyse.b_aq = 0; - h->param.analyse.i_noise_reduction = x264_clip3( h->param.analyse.i_noise_reduction, 0, 1<<16 ); - - { -Index: x264.c
-===================================================================
---- x264.c (revision 669)
-+++ x264.c (working copy)
-@@ -243,6 +243,12 @@
+Index: x264.c +=================================================================== +--- x264.c (revision 736) ++++ x264.c (working copy) +@@ -244,6 +244,14 @@ " - 2: enabled on all mode decisions\n", defaults->analyse.i_trellis ); H0( " --no-fast-pskip Disables early SKIP detection on P-frames\n" ); H0( " --no-dct-decimate Disables coefficient thresholding on P-frames\n" ); -+ H0( " --aq-strength <float> Amount to adjust QP per MB [%.1f]\n" ++ H0( " --aq-strength <float> Amount to adjust QP/lambda per MB [%.1f]\n" + " 0.0: no AQ\n" -+ " 1.1: strong AQ\n", defaults->analyse.f_aq_strength ); -+ H0( " --aq-sensitivity <float> \"Flatness\" threshold to trigger AQ [%.1f]\n" -+ " 5: applies to almost all blocks\n" -+ " 22: only flat blocks\n", defaults->analyse.f_aq_sensitivity ); ++ " 1.0: medium AQ\n", defaults->analyse.f_aq_strength ); ++ H0( " --aq-sensitivity <float> \"Center\" of AQ curve. [%.1f]\n" ++ " 0: automatic sensitivity (avoids moving bits between frames)\n" ++ " 10: most QPs are raised\n" ++ " 20: good general-use sensitivity\n" ++ " 30: most QPs are lowered\n", defaults->analyse.f_aq_sensitivity ); H0( " --nr <integer> Noise reduction [%d]\n", defaults->analyse.i_noise_reduction ); H1( "\n" ); H1( " --deadzone-inter <int> Set the size of the inter luma quantization deadzone [%d]\n", defaults->analyse.i_luma_deadzone[0] ); -@@ -406,6 +412,8 @@
+@@ -407,6 +415,8 @@ { "trellis", required_argument, NULL, 't' }, { "no-fast-pskip", no_argument, NULL, 0 }, { "no-dct-decimate", no_argument, NULL, 0 }, @@ -213,11 +244,50 @@ Index: x264.c { "deadzone-inter", required_argument, NULL, '0' }, { "deadzone-intra", required_argument, NULL, '0' }, { "level", required_argument, NULL, 0 }, -Index: x264.h
-===================================================================
---- x264.h (revision 669)
-+++ x264.h (working copy)
-@@ -227,6 +227,9 @@
+Index: common/common.c +=================================================================== +--- common/common.c (revision 736) ++++ common/common.c (working copy) +@@ -123,6 +123,9 @@ + param->analyse.i_chroma_qp_offset = 0; + param->analyse.b_fast_pskip = 1; + param->analyse.b_dct_decimate = 1; ++ param->analyse.b_aq = 1; ++ param->analyse.f_aq_strength = 0.5; ++ param->analyse.f_aq_sensitivity = 13; + param->analyse.i_luma_deadzone[0] = 21; + param->analyse.i_luma_deadzone[1] = 11; + param->analyse.b_psnr = 1; +@@ -455,6 +458,13 @@ + p->analyse.b_fast_pskip = atobool(value); + OPT("dct-decimate") + p->analyse.b_dct_decimate = atobool(value); ++ OPT("aq-strength") ++ { ++ p->analyse.f_aq_strength = atof(value); ++ p->analyse.b_aq = 1; ++ } ++ OPT("aq-sensitivity") ++ p->analyse.f_aq_sensitivity = atof(value); + OPT("deadzone-inter") + p->analyse.i_luma_deadzone[0] = atoi(value); + OPT("deadzone-intra") +@@ -883,6 +893,10 @@ + s += sprintf( s, " ip_ratio=%.2f", p->rc.f_ip_factor ); + if( p->i_bframe ) + s += sprintf( s, " pb_ratio=%.2f", p->rc.f_pb_factor ); ++ if( p->analyse.b_aq ) ++ s += sprintf( s, " aq=1:%.1f:%.1f", p->analyse.f_aq_strength, p->analyse.f_aq_sensitivity ); ++ else ++ s += sprintf( s, " aq=0" ); + if( p->rc.psz_zones ) + s += sprintf( s, " zones=%s", p->rc.psz_zones ); + else if( p->rc.i_zones ) +Index: x264.h +=================================================================== +--- x264.h (revision 736) ++++ x264.h (working copy) +@@ -232,6 +232,9 @@ int i_trellis; /* trellis RD quantization */ int b_fast_pskip; /* early SKIP detection on P-frames */ int b_dct_decimate; /* transform coefficient thresholding on P-frames */ |