diff options
Diffstat (limited to 'libhb')
-rw-r--r-- | libhb/decomb.c | 547 | ||||
-rw-r--r-- | libhb/eedi2.c | 1870 | ||||
-rw-r--r-- | libhb/eedi2.h | 84 |
3 files changed, 2432 insertions, 69 deletions
diff --git a/libhb/decomb.c b/libhb/decomb.c index 4c0f42c17..bcf215f16 100644 --- a/libhb/decomb.c +++ b/libhb/decomb.c @@ -4,10 +4,15 @@ Homepage: <http://handbrake.fr/>. It may be used under the terms of the GNU General Public License. - The yadif algorithm was created by Michael Niedermayer. */ + The yadif algorithm was created by Michael Niedermayer. + Tritical's work inspired much of the comb detection code: + http://web.missouri.edu/~kes25c/ +*/ + #include "hb.h" #include "hbffmpeg.h" #include "mpeg2dec/mpeg2.h" +#include "eedi2.h" #define SUPPRESS_AV_LOG @@ -21,17 +26,52 @@ #define MIN3(a,b,c) MIN(MIN(a,b),c) #define MAX3(a,b,c) MAX(MAX(a,b),c) -typedef struct yadif_arguments_s { +// Some names to correspond to the pv->eedi_half array's contents +#define SRCPF 0 +#define MSKPF 1 +#define TMPPF 2 +#define DSTPF 3 +// Some names to correspond to the pv->eedi_full array's contents +#define DST2PF 0 +#define TMP2PF2 1 +#define MSK2PF 2 +#define TMP2PF 3 +#define DST2MPF 4 + +struct yadif_arguments_s { uint8_t **dst; int parity; int tff; int stop; int is_combed; -} yadif_arguments_t; +}; + +struct decomb_arguments_s { + int stop; +}; -typedef struct decomb_arguments_s { +struct eedi2_arguments_s { int stop; -} decomb_arguments_t; +}; + +typedef struct yadif_arguments_s yadif_arguments_t; +typedef struct decomb_arguments_s decomb_arguments_t; +typedef struct eedi2_arguments_s eedi2_arguments_t; + +typedef struct eedi2_thread_arg_s { + hb_filter_private_t *pv; + int plane; +} eedi2_thread_arg_t; + +typedef struct decomb_thread_arg_s { + hb_filter_private_t *pv; + int segment; +} decomb_thread_arg_t; + +typedef struct yadif_thread_arg_s { + hb_filter_private_t *pv; + int segment; +} yadif_thread_arg_t; struct hb_filter_private_s { @@ -39,6 +79,7 @@ struct hb_filter_private_s int width[3]; int height[3]; + // Decomb parameters int mode; int spatial_metric; int motion_threshold; @@ -46,8 +87,19 @@ struct hb_filter_private_s int block_threshold; int block_width; int block_height; + + // EEDI2 parameters + int magnitude_threshold; + int variance_threshold; + int laplacian_threshold; + int dilation_threshold; + int erosion_threshold; + int noise_threshold; + int maximum_search_distance; + int post_processing; int parity; + int tff; int yadif_ready; @@ -70,6 +122,13 @@ struct hb_filter_private_s /* Make a buffer to store a comb mask. */ uint8_t * mask[3]; + uint8_t * eedi_half[4][3]; + uint8_t * eedi_full[5][3]; + int * cx2; + int * cy2; + int * cxy; + int * tmpc; + AVPicture pic_in; AVPicture pic_out; hb_buffer_t * buf_out[2]; @@ -86,6 +145,11 @@ struct hb_filter_private_s hb_lock_t ** decomb_begin_lock; // Thread has work hb_lock_t ** decomb_complete_lock; // Thread has completed work decomb_arguments_t *decomb_arguments; // Arguments to thread for work + + hb_thread_t ** eedi2_threads; // Threads for eedi2 - one per plane + hb_lock_t ** eedi2_begin_lock; // Thread has work + hb_lock_t ** eedi2_complete_lock; // Thread has completed work + eedi2_arguments_t *eedi2_arguments; // Arguments to thread for work }; @@ -106,7 +170,7 @@ void hb_decomb_close( hb_filter_private_t * pv ); hb_filter_object_t hb_filter_decomb = { FILTER_DECOMB, - "Deinterlaces selectively with yadif/mcdeint and lowpass5 blending", + "Decomb", NULL, hb_decomb_init, hb_decomb_work, @@ -510,10 +574,171 @@ int detect_combed_segment( hb_filter_private_t * pv, int segment_start, int segm } } -typedef struct decomb_thread_arg_s { - hb_filter_private_t *pv; - int segment; -} decomb_thread_arg_t; +// This function calls all the eedi2 filters in sequence for a given plane. +// It outputs the final interpolated image to pv->eedi_full[DST2PF]. +void eedi2_interpolate_plane( hb_filter_private_t * pv, int k ) +{ + /* We need all these pointers. No, seriously. + I swear. It's not a joke. They're used. + All nine of them. */ + uint8_t * mskp = pv->eedi_half[MSKPF][k]; + uint8_t * srcp = pv->eedi_half[SRCPF][k]; + uint8_t * tmpp = pv->eedi_half[TMPPF][k]; + uint8_t * dstp = pv->eedi_half[DSTPF][k]; + uint8_t * dst2p = pv->eedi_full[DST2PF][k]; + uint8_t * tmp2p2 = pv->eedi_full[TMP2PF2][k]; + uint8_t * msk2p = pv->eedi_full[MSK2PF][k]; + uint8_t * tmp2p = pv->eedi_full[TMP2PF][k]; + uint8_t * dst2mp = pv->eedi_full[DST2MPF][k]; + int * cx2 = pv->cx2; + int * cy2 = pv->cy2; + int * cxy = pv->cxy; + int * tmpc = pv->tmpc; + + int pitch = pv->ref_stride[k]; + int height = pv->height[k]; int width = pv->width[k]; + int half_height = height / 2; + + // edge mask + eedi2_build_edge_mask( mskp, pitch, srcp, pitch, + pv->magnitude_threshold, pv->variance_threshold, pv->laplacian_threshold, + half_height, width ); + eedi2_erode_edge_mask( mskp, pitch, tmpp, pitch, pv->erosion_threshold, half_height, width ); + eedi2_dilate_edge_mask( tmpp, pitch, mskp, pitch, pv->dilation_threshold, half_height, width ); + eedi2_erode_edge_mask( mskp, pitch, tmpp, pitch, pv->erosion_threshold, half_height, width ); + eedi2_remove_small_gaps( tmpp, pitch, mskp, pitch, half_height, width ); + + // direction mask + eedi2_calc_directions( k, mskp, pitch, srcp, pitch, tmpp, pitch, + pv->maximum_search_distance, pv->noise_threshold, + half_height, width ); + eedi2_filter_dir_map( mskp, pitch, tmpp, pitch, dstp, pitch, half_height, width ); + eedi2_expand_dir_map( mskp, pitch, dstp, pitch, tmpp, pitch, half_height, width ); + eedi2_filter_map( mskp, pitch, tmpp, pitch, dstp, pitch, half_height, width ); + + // upscale 2x vertically + eedi2_upscale_by_2( srcp, dst2p, half_height, pitch ); + eedi2_upscale_by_2( dstp, tmp2p2, half_height, pitch ); + eedi2_upscale_by_2( mskp, msk2p, half_height, pitch ); + + // upscale the direction mask + eedi2_mark_directions_2x( msk2p, pitch, tmp2p2, pitch, tmp2p, pitch, pv->tff, height, width ); + eedi2_filter_dir_map_2x( msk2p, pitch, tmp2p, pitch, dst2mp, pitch, pv->tff, height, width ); + eedi2_expand_dir_map_2x( msk2p, pitch, dst2mp, pitch, tmp2p, pitch, pv->tff, height, width ); + eedi2_fill_gaps_2x( msk2p, pitch, tmp2p, pitch, dst2mp, pitch, pv->tff, height, width ); + eedi2_fill_gaps_2x( msk2p, pitch, dst2mp, pitch, tmp2p, pitch, pv->tff, height, width ); + + // interpolate a full-size plane + eedi2_interpolate_lattice( k, tmp2p, pitch, dst2p, pitch, tmp2p2, pitch, pv->tff, + pv->noise_threshold, height, width ); + + if( pv->post_processing == 1 || pv->post_processing == 3 ) + { + // make sure the edge directions are consistent + eedi2_bit_blit( tmp2p2, pitch, tmp2p, pitch, pv->width[k], pv->height[k] ); + eedi2_filter_dir_map_2x( msk2p, pitch, tmp2p, pitch, dst2mp, pitch, pv->tff, height, width ); + eedi2_expand_dir_map_2x( msk2p, pitch, dst2mp, pitch, tmp2p, pitch, pv->tff, height, width ); + eedi2_post_process( tmp2p, pitch, tmp2p2, pitch, dst2p, pitch, pv->tff, height, width ); + } + if( pv->post_processing == 2 || pv->post_processing == 3 ) + { + // filter junctions and corners + eedi2_gaussian_blur1( srcp, pitch, tmpp, pitch, srcp, pitch, half_height, width ); + eedi2_calc_derivatives( srcp, pitch, half_height, width, cx2, cy2, cxy ); + eedi2_gaussian_blur_sqrt2( cx2, tmpc, cx2, pitch, half_height, width); + eedi2_gaussian_blur_sqrt2( cy2, tmpc, cy2, pitch, half_height, width); + eedi2_gaussian_blur_sqrt2( cxy, tmpc, cxy, pitch, half_height, width); + eedi2_post_process_corner( cx2, cy2, cxy, pitch, tmp2p2, pitch, dst2p, pitch, height, width, pv->tff ); + } +} + +/* + * eedi2 interpolate this plane in a single thread. + */ +void eedi2_filter_thread( void *thread_args_v ) +{ + eedi2_arguments_t *eedi2_work = NULL; + hb_filter_private_t * pv; + int run = 1; + int plane; + eedi2_thread_arg_t *thread_args = thread_args_v; + + pv = thread_args->pv; + plane = thread_args->plane; + + hb_log("eedi2 thread started for plane %d", plane); + + while( run ) + { + /* + * Wait here until there is work to do. hb_lock() blocks until + * render releases it to say that there is more work to do. + */ + hb_lock( pv->eedi2_begin_lock[plane] ); + + eedi2_work = &pv->eedi2_arguments[plane]; + + if( eedi2_work->stop ) + { + /* + * No more work to do, exit this thread. + */ + run = 0; + continue; + } + + /* + * Process plane + */ + eedi2_interpolate_plane( pv, plane ); + + /* + * Finished this segment, let everyone know. + */ + hb_unlock( pv->eedi2_complete_lock[plane] ); + } + free( thread_args_v ); +} + +// Sets up the input field planes for EEDI2 in pv->eedi_half[SRCPF] +// and then runs eedi2_filter_thread for each plane. +void eedi2_planer( hb_filter_private_t * pv ) +{ + /* Copy the first field from the source to a half-height frame. */ + int i; + for( i = 0; i < 3; i++ ) + { + int pitch = pv->ref_stride[i]; + int start_line = !pv->tff; + eedi2_fill_half_height_buffer_plane( &pv->ref[1][i][pitch*start_line], pv->eedi_half[SRCPF][i], pitch, pv->height[i] ); + } + + int plane; + for( plane = 0; plane < 3; plane++ ) + { + /* + * Let the thread for this plane know that we've setup work + * for it by releasing the begin lock (ensuring that the + * complete lock is already locked so that we block when + * we try to lock it again below). + */ + hb_lock( pv->eedi2_complete_lock[plane] ); + hb_unlock( pv->eedi2_begin_lock[plane] ); + } + + /* + * Wait until all three threads have completed by trying to get + * the complete lock that we locked earlier for each thread, which + * will block until that thread has completed the work on that + * plane. + */ + for( plane = 0; plane < 3; plane++ ) + { + hb_lock( pv->eedi2_complete_lock[plane] ); + hb_unlock( pv->eedi2_complete_lock[plane] ); + } +} + /* * comb detect this segment of all three planes in a single thread. @@ -626,10 +851,15 @@ static void yadif_filter_line( uint8_t *dst, to the other field in the current frame--the one not being filtered. */ uint8_t *prev2 = parity ? prev : cur ; uint8_t *next2 = parity ? cur : next; + int w = pv->width[plane]; int refs = pv->ref_stride[plane]; int x; + int eedi2_mode = (pv->mode == 5); + /* We can replace spatial_pred with this interpolation*/ + uint8_t * eedi2_guess = &pv->eedi_full[DST2PF][plane][y*refs]; + /* Decomb's cubic interpolation can only function when there are three samples above and below, so regress to yadif's traditional two-tap interpolation when filtering at the top and bottom edges. */ @@ -654,60 +884,69 @@ static void yadif_filter_line( uint8_t *dst, int temporal_diff2 = ( ABS(next[-refs] - cur[-refs]) + ABS(next[+refs] - cur[+refs]) ) >> 1; /* For the actual difference, use the largest of the previous average diffs. */ int diff = MAX3(temporal_diff0>>1, temporal_diff1, temporal_diff2); - - /* SAD of how the pixel-1, the pixel, and the pixel+1 change from the line above to below. */ - int spatial_score = ABS(cur[-refs-1] - cur[+refs-1]) + ABS(cur[-refs]-cur[+refs]) + - ABS(cur[-refs+1] - cur[+refs+1]) - 1; + int spatial_pred; - - /* Spatial pred is either a bilinear or cubic vertical interpolation. */ - if( pv->mode > 0 && !edge) + + if( eedi2_mode ) { - spatial_pred = cubic_interpolate( cur[-3*refs], cur[-refs], cur[+refs], cur[3*refs] ); + /* Who needs yadif's spatial predictions when we can have EEDI2's? */ + spatial_pred = eedi2_guess[0]; + eedi2_guess++; } - else + else // Yadif spatial interpolation { - spatial_pred = (c+e)>>1; + /* SAD of how the pixel-1, the pixel, and the pixel+1 change from the line above to below. */ + int spatial_score = ABS(cur[-refs-1] - cur[+refs-1]) + ABS(cur[-refs]-cur[+refs]) + + ABS(cur[-refs+1] - cur[+refs+1]) - 1; + + /* Spatial pred is either a bilinear or cubic vertical interpolation. */ + if( pv->mode > 0 && !edge) + { + spatial_pred = cubic_interpolate( cur[-3*refs], cur[-refs], cur[+refs], cur[3*refs] ); + } + else + { + spatial_pred = (c+e)>>1; + } + + /* EDDI: Edge Directed Deinterlacing Interpolation + Checks 4 different slopes to see if there is more similarity along a diagonal + than there was vertically. If a diagonal is more similar, then it indicates + an edge, so interpolate along that instead of a vertical line, using either + linear or cubic interpolation depending on mode. */ + #define YADIF_CHECK(j)\ + { int score = ABS(cur[-refs-1+j] - cur[+refs-1-j])\ + + ABS(cur[-refs +j] - cur[+refs -j])\ + + ABS(cur[-refs+1+j] - cur[+refs+1-j]);\ + if( score < spatial_score ){\ + spatial_score = score;\ + if( pv->mode > 0 && !edge )\ + {\ + switch(j)\ + {\ + case -1:\ + spatial_pred = cubic_interpolate(cur[-3 * refs - 3], cur[-refs -1], cur[+refs + 1], cur[3* refs + 3] );\ + break;\ + case -2:\ + spatial_pred = cubic_interpolate( ( ( cur[-3*refs - 4] + cur[-refs - 4] ) / 2 ) , cur[-refs -2], cur[+refs + 2], ( ( cur[3*refs + 4] + cur[refs + 4] ) / 2 ) );\ + break;\ + case 1:\ + spatial_pred = cubic_interpolate(cur[-3 * refs +3], cur[-refs +1], cur[+refs - 1], cur[3* refs -3] );\ + break;\ + case 2:\ + spatial_pred = cubic_interpolate(( ( cur[-3*refs + 4] + cur[-refs + 4] ) / 2 ), cur[-refs +2], cur[+refs - 2], ( ( cur[3*refs - 4] + cur[refs - 4] ) / 2 ) );\ + break;\ + }\ + }\ + else\ + {\ + spatial_pred = ( cur[-refs +j] + cur[+refs -j] ) >>1;\ + }\ + + YADIF_CHECK(-1) YADIF_CHECK(-2) }} }} + YADIF_CHECK( 1) YADIF_CHECK( 2) }} }} } -/* EDDI: Edge Directed Deinterlacing Interpolation - Uses the Martinez-Lim Line Shift Parametric Modeling algorithm...I think. - Checks 4 different slopes to see if there is more similarity along a diagonal - than there was vertically. If a diagonal is more similar, then it indicates - an edge, so interpolate along that instead of a vertical line, using either - linear or cubic interpolation depending on mode. */ -#define YADIF_CHECK(j)\ - { int score = ABS(cur[-refs-1+j] - cur[+refs-1-j])\ - + ABS(cur[-refs +j] - cur[+refs -j])\ - + ABS(cur[-refs+1+j] - cur[+refs+1-j]);\ - if( score < spatial_score ){\ - spatial_score = score;\ - if( pv->mode > 0 && !edge )\ - {\ - switch(j)\ - {\ - case -1:\ - spatial_pred = cubic_interpolate(cur[-3 * refs - 3], cur[-refs -1], cur[+refs + 1], cur[3* refs + 3] );\ - break;\ - case -2:\ - spatial_pred = cubic_interpolate( ( ( cur[-3*refs - 4] + cur[-refs - 4] ) / 2 ) , cur[-refs -2], cur[+refs + 2], ( ( cur[3*refs + 4] + cur[refs + 4] ) / 2 ) );\ - break;\ - case 1:\ - spatial_pred = cubic_interpolate(cur[-3 * refs +3], cur[-refs +1], cur[+refs - 1], cur[3* refs -3] );\ - break;\ - case 2:\ - spatial_pred = cubic_interpolate(( ( cur[-3*refs + 4] + cur[-refs + 4] ) / 2 ), cur[-refs +2], cur[+refs - 2], ( ( cur[3*refs - 4] + cur[refs - 4] ) / 2 ) );\ - break;\ - }\ - }\ - else\ - {\ - spatial_pred = ( cur[-refs +j] + cur[+refs -j] ) >>1;\ - }\ - - YADIF_CHECK(-1) YADIF_CHECK(-2) }} }} - YADIF_CHECK( 1) YADIF_CHECK( 2) }} }} - /* Temporally adjust the spatial prediction by comparing against lines in the adjacent fields. */ int b = (prev2[-2*refs] + next2[-2*refs])>>1; @@ -738,11 +977,6 @@ static void yadif_filter_line( uint8_t *dst, } } -typedef struct yadif_thread_arg_s { - hb_filter_private_t *pv; - int segment; -} yadif_thread_arg_t; - /* * deinterlace this segment of all three planes in a single thread. */ @@ -902,9 +1136,9 @@ static void yadif_filter( uint8_t ** dst, int tff, hb_filter_private_t * pv ) { - - int is_combed = comb_segmenter( pv ); - + /* If we're running comb detection, do it now, otherwise blend if mode 4 and interpolate if not. */ + int is_combed = pv->spatial_metric >= 0 ? comb_segmenter( pv ) : pv->mode == 4 ? 2 : 1; + if( is_combed == 1 ) { pv->yadif_deinterlaced_frames++; @@ -918,6 +1152,12 @@ static void yadif_filter( uint8_t ** dst, pv->unfiltered_frames++; } + if( is_combed == 1 && pv->mode == 5 ) + { + /* Generate an EEDI2 interpolation */ + eedi2_planer( pv ); + } + if( is_combed ) { int segment; @@ -1131,6 +1371,15 @@ hb_filter_private_t * hb_decomb_init( int pix_fmt, pv->block_width = 16; pv->block_height = 16; + pv->magnitude_threshold = 10; + pv->variance_threshold = 20; + pv->laplacian_threshold = 20; + pv->dilation_threshold = 4; + pv->erosion_threshold = 2; + pv->noise_threshold = 50; + pv->maximum_search_distance = 24; + pv->post_processing = 1; + pv->parity = PARITY_DEFAULT; pv->mcdeint_mode = MCDEINT_MODE_DEFAULT; @@ -1138,14 +1387,22 @@ hb_filter_private_t * hb_decomb_init( int pix_fmt, if( settings ) { - sscanf( settings, "%d:%d:%d:%d:%d:%d:%d", + sscanf( settings, "%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d", &pv->mode, &pv->spatial_metric, &pv->motion_threshold, &pv->spatial_threshold, &pv->block_threshold, &pv->block_width, - &pv->block_height ); + &pv->block_height, + &pv->magnitude_threshold, + &pv->variance_threshold, + &pv->laplacian_threshold, + &pv->dilation_threshold, + &pv->erosion_threshold, + &pv->noise_threshold, + &pv->maximum_search_distance, + &pv->post_processing ); } pv->cpu_count = hb_get_cpu_count(); @@ -1181,7 +1438,38 @@ hb_filter_private_t * hb_decomb_init( int pix_fmt, pv->mask[i] = calloc( 1, w*h*sizeof(uint8_t) ) + 3*w; } + + if( pv->mode == 5 ) + { + /* Allocate half-height eedi2 buffers */ + height = pv->height[0] / 2; + for( i = 0; i < 3; i++ ) + { + int is_chroma = !!i; + int w = ((width + 31) & (~31))>>is_chroma; + int h = ((height+6+ 31) & (~31))>>is_chroma; + for( j = 0; j < 4; j++ ) + { + pv->eedi_half[j][i] = malloc( w*h*sizeof(uint8_t) ) + 3*w; + } + } + + /* Allocate full-height eedi2 buffers */ + height = pv->height[0]; + for( i = 0; i < 3; i++ ) + { + int is_chroma = !!i; + int w = ((width + 31) & (~31))>>is_chroma; + int h = ((height+6+ 31) & (~31))>>is_chroma; + + for( j = 0; j < 5; j++ ) + { + pv->eedi_full[j][i] = malloc( w*h*sizeof(uint8_t) ) + 3*w; + } + } + } + /* * Create yadif threads and locks. */ @@ -1264,7 +1552,62 @@ hb_filter_private_t * hb_decomb_init( int pix_fmt, hb_error( "decomb could not create threads" ); } } + + if( pv->mode == 5 ) + { + /* + * Create eedi2 threads and locks. + */ + pv->eedi2_threads = malloc( sizeof( hb_thread_t* ) * 3 ); + pv->eedi2_begin_lock = malloc( sizeof( hb_lock_t * ) * 3 ); + pv->eedi2_complete_lock = malloc( sizeof( hb_lock_t * ) * 3 ); + pv->eedi2_arguments = malloc( sizeof( eedi2_arguments_t ) * 3 ); + + if( pv->post_processing > 1 ) + { + pv->cx2 = (int*)eedi2_aligned_malloc(pv->height[0]*pv->ref_stride[0]*sizeof(int), 16); + pv->cy2 = (int*)eedi2_aligned_malloc(pv->height[0]*pv->ref_stride[0]*sizeof(int), 16); + pv->cxy = (int*)eedi2_aligned_malloc(pv->height[0]*pv->ref_stride[0]*sizeof(int), 16); + pv->tmpc = (int*)eedi2_aligned_malloc(pv->height[0]*pv->ref_stride[0]*sizeof(int), 16); + if( !pv->cx2 || !pv->cy2 || !pv->cxy || !pv->tmpc ) + hb_log("EEDI2: failed to malloc derivative arrays"); + else + hb_log("EEDI2: successfully mallloced derivative arrays"); + } + + for( i = 0; i < 3; i++ ) + { + eedi2_thread_arg_t *eedi2_thread_args; + + eedi2_thread_args = malloc( sizeof( eedi2_thread_arg_t ) ); + + if( eedi2_thread_args ) + { + eedi2_thread_args->pv = pv; + eedi2_thread_args->plane = i; + + pv->eedi2_begin_lock[i] = hb_lock_init(); + pv->eedi2_complete_lock[i] = hb_lock_init(); + + /* + * Important to start off with the threads locked waiting + * on input. + */ + hb_lock( pv->eedi2_begin_lock[i] ); + pv->eedi2_arguments[i].stop = 0; + + pv->eedi2_threads[i] = hb_thread_init( "eedi2_filter_segment", + eedi2_filter_thread, + eedi2_thread_args, + HB_NORMAL_PRIORITY ); + } + else + { + hb_error( "eedi2 could not create threads" ); + } + } + } /* Allocate mcdeint specific buffers */ @@ -1327,7 +1670,7 @@ void hb_decomb_close( hb_filter_private_t * pv ) return; } - hb_log("decomb: yadif deinterlaced %i | blend deinterlaced %i | unfiltered %i | total %i", pv->yadif_deinterlaced_frames, pv->blend_deinterlaced_frames, pv->unfiltered_frames, pv->yadif_deinterlaced_frames + pv->blend_deinterlaced_frames + pv->unfiltered_frames); + hb_log("decomb: %s deinterlaced %i | blend deinterlaced %i | unfiltered %i | total %i", pv->mode == 5 ? "yadif+eedi2" : "yadif", pv->yadif_deinterlaced_frames, pv->blend_deinterlaced_frames, pv->unfiltered_frames, pv->yadif_deinterlaced_frames + pv->blend_deinterlaced_frames + pv->unfiltered_frames); /* Cleanup frame buffers */ if( pv->buf_out[0] ) @@ -1366,6 +1709,46 @@ void hb_decomb_close( hb_filter_private_t * pv ) } } + if( pv->mode == 5 ) + { + /* Cleanup eedi-half buffers */ + int j; + for( i = 0; i<3; i++ ) + { + for( j = 0; j < 4; j++ ) + { + uint8_t **p = &pv->eedi_half[j][i]; + if (*p) + { + free( *p - 3*pv->ref_stride[i] ); + *p = NULL; + } + } + } + + /* Cleanup eedi-full buffers */ + for( i = 0; i<3; i++ ) + { + for( j = 0; j < 5; j++ ) + { + uint8_t **p = &pv->eedi_full[j][i]; + if (*p) + { + free( *p - 3*pv->ref_stride[i] ); + *p = NULL; + } + } + } + } + + if( pv->post_processing > 1 && pv->mode == 5 ) + { + if (pv->cx2) eedi2_aligned_free(pv->cx2); + if (pv->cy2) eedi2_aligned_free(pv->cy2); + if (pv->cxy) eedi2_aligned_free(pv->cxy); + if (pv->tmpc) eedi2_aligned_free(pv->tmpc); + } + for( i = 0; i < pv->cpu_count; i++) { /* @@ -1408,6 +1791,30 @@ void hb_decomb_close( hb_filter_private_t * pv ) free( pv->decomb_complete_lock ); free( pv->decomb_arguments ); + if( pv->mode == 5 ) + { + for( i = 0; i < 3; i++) + { + /* + * Tell each eedi2 thread to stop, and then cleanup. + */ + pv->eedi2_arguments[i].stop = 1; + hb_unlock( pv->eedi2_begin_lock[i] ); + + hb_thread_close( &pv->eedi2_threads[i] ); + hb_lock_close( &pv->eedi2_begin_lock[i] ); + hb_lock_close( &pv->eedi2_complete_lock[i] ); + } + + /* + * free memory for eedi2 structs + */ + free( pv->eedi2_threads ); + free( pv->eedi2_begin_lock ); + free( pv->eedi2_complete_lock ); + free( pv->eedi2_arguments ); + } + /* Cleanup mcdeint specific buffers */ if( pv->mcdeint_mode >= 0 ) { @@ -1456,6 +1863,8 @@ int hb_decomb_work( const hb_buffer_t * cbuf_in, tff = (pv->parity & 1) ^ 1; } + pv->tff = tff; + /* Store current frame in yadif cache */ store_ref( (const uint8_t**)pv->pic_in.data, pv ); diff --git a/libhb/eedi2.c b/libhb/eedi2.c new file mode 100644 index 000000000..2aa906ef0 --- /dev/null +++ b/libhb/eedi2.c @@ -0,0 +1,1870 @@ +/* $Id: eedi2.c,v 1.0 2009/03/06 5:00:00 jbrjake Exp $ + + This file is part of the HandBrake source code. + Homepage: <http://handbrake.fr/>. + It may be used under the terms of the GNU General Public License. + + The EEDI2 interpolator was created by tritical: + http://web.missouri.edu/~kes25c/ +*/ + +#include "hb.h" +#include "eedi2.h" + +/** + * EEDI2 directional limit lookup table + * + * These values are used to limit the range of edge direction searches and filtering. + */ +const int eedi2_limlut[33] __attribute__ ((aligned (16))) = { + 6, 6, 7, 7, 8, 8, 9, 9, 9, 10, + 10, 11, 11, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, -1, -1 }; + +/** + * Analog of _aligned_malloc + * @param size Size of memory being pointed to + * @param align_size Size of memory chunks to align to (must be power of 2) + */ +void *eedi2_aligned_malloc( size_t size, size_t align_size ) +{ + char * ptr, * ptr2, * aligned_ptr; + int align_mask = align_size - 1; + + ptr = (char *)malloc( size + align_size + sizeof( int ) ); + if( ptr==NULL ) return( NULL ); + + ptr2 = ptr + sizeof( int ); + aligned_ptr = ptr2 + ( align_size - ( (size_t)ptr2 & align_mask ) ); + + + ptr2 = aligned_ptr - sizeof( int ); + *( (int *)ptr2 ) = (int)( aligned_ptr - ptr ); + + return( aligned_ptr ); +} + +/** + * Analog of _aligned_free + * @param ptr The aligned pointer, created with eedi2_aligned_malloc, to be freed + */ +void eedi2_aligned_free( void *ptr ) +{ + int * ptr2 = (int *)ptr - 1; + ptr -= * ptr2; + free(ptr); +} + +/** + * Sorts metrics for median filtering + * @param order Pointer to the table of values to sort + * @param length Length of the order array + */ +void eedi2_sort_metrics( int *order, const int length ) +{ + int i; + for( i = 1; i < length; ++i ) + { + int j = i; + const int temp = order[j]; + while( j > 0 && order[j-1] > temp ) + { + order[j] = order[j-1]; + --j; + } + order[j] = temp; + } +} + +/** + * Bitblits an image plane (overwrites one bitmap with another) + * @param dtsp Pointer to destination bitmap + * @param dst_pitch Stride of destination bitmap + * @param srcp Pointer to source bitmap + * @param src_pitch Stride of destination bitmap + * @param row_size Width of the bitmap being copied + * @param height Height of the source bitmap + * + * When row_size, dst_pitch, and src_pitch are equal, eedi2_bit_blit can work more quickly by copying the whole plane at once instead of individual lines. + */ +void eedi2_bit_blit( uint8_t * dstp, int dst_pitch, + const uint8_t * srcp, int src_pitch, + int row_size, int height ) +{ + if( ( !height ) || ( !row_size ) ) + return; + + if( height == 1 || ( dst_pitch == src_pitch && src_pitch == row_size ) ) + { + memcpy( dstp, srcp, row_size * height ); + } + else + { + int y; + for( y = height; y > 0; --y ) + { + memcpy( dstp, srcp, row_size ); + dstp += dst_pitch; + srcp += src_pitch; + } + } +} + +/** + * A specialized variant of bit_blit, just for setting up the initial, field-sized bitmap planes that EEDI2 interpolates from. + * @param src Pointer to source bitmap plane being copied from + * @param dst Pointer to the destination bitmap plane being copied to + * @param pitch Stride of both bitmaps + * @param height Height of the original, full-size src plane being copied from + */ +void eedi2_fill_half_height_buffer_plane( uint8_t * src, uint8_t * dst, int pitch, int height ) +{ + /* When TFF, we want to copy alternating + lines starting at 0, the top field. + When BFF, we want to start at line 1. */ + int y; + for( y = height; y > 0; y = y - 2 ) + { + memcpy( dst, src, pitch ); + dst += pitch; + src += pitch * 2; + } +} + +/** + * A specialized variant of bit_blit, just for resizing the field-height maps EEDI2 generates to frame-height...a simple line doubler + * @param srcp Pointer to source bitmap plane being copied from + * @param dstp Pointer to the destination bitmap plane being copied to + * @param height Height of the input, half-size src plane being copied from + * @param pitch Stride of both bitmaps + */ +void eedi2_upscale_by_2( uint8_t * srcp, uint8_t * dstp, int height, int pitch ) +{ + int y; + for( y = height; y > 0; y-- ) + { + memcpy( dstp, srcp, pitch ); + dstp += pitch; + memcpy( dstp, srcp, pitch ); + srcp += pitch; + dstp += pitch; + } +} + +/** + * Finds places where verticaly adjacent pixels abruptly change in intensity, i.e., sharp edges. + * @param dstp Pointer to the destination bitmap + * @param dst_pitch Stride of dstp + * @param srcp Pointer to the source bitmap + * @param src_pitch Stride of srcp + * @param mtresh Magnitude threshold, ensures it doesn't mark edges on pixels that are too similar (10 is a good default value) + * @param vthresh Variance threshold, ensures it doesn't look for edges in highly random pixel blocks (20 is a good default value) + * @param lthresh Laplacian threshold, ensures edges are still prominent in the 2nd spatial derivative of the srcp plane (20 is a good default value) + * @param height Height of half-height single-field frame + * @param width Width of srcp bitmap rows, as opposed to the padded stride in src_pitch + */ +void eedi2_build_edge_mask( uint8_t * dstp, int dst_pitch, uint8_t *srcp, int src_pitch, + int mthresh, int lthresh, int vthresh, int height, int width ) +{ + int x, y; + + mthresh = mthresh * 10; + vthresh = vthresh * 81; + + memset( dstp, 0, ( height / 2 ) * dst_pitch ); + + srcp += src_pitch; + dstp += dst_pitch; + unsigned char *srcpp = srcp-src_pitch; + unsigned char *srcpn = srcp+src_pitch; + for( y = 1; y < height - 1; ++y ) + { + for( x = 1; x < width-1; ++x ) + { + if( ( abs( srcpp[x] - srcp[x] ) < 10 && + abs( srcp[x] - srcpn[x] ) < 10 && + abs( srcpp[x] - srcpn[x] ) < 10 ) + || + ( abs( srcpp[x-1] - srcp[x-1] ) < 10 && + abs( srcp[x-1] - srcpn[x-1] ) < 10 && + abs( srcpp[x-1] - srcpn[x-1] ) < 10 && + abs( srcpp[x+1] - srcp[x+1] ) < 10 && + abs( srcp[x+1] - srcpn[x+1] ) < 10 && + abs( srcpp[x+1] - srcpn[x+1] ) < 10) ) + continue; + + const int sum = srcpp[x-1] + srcpp[x] + srcpp[x+1] + + srcp[x-1] + srcp[x]+ srcp[x+1] + + srcpn[x-1] + srcpn[x] + srcpn[x+1]; + + const int sumsq = srcpp[x-1] * srcpp[x-1] + + srcpp[x] * srcpp[x] + + srcpp[x+1] * srcpp[x+1] + + srcp[x-1] * srcp[x-1] + + srcp[x] * srcp[x] + + srcp[x+1] * srcp[x+1] + + srcpn[x-1] * srcpn[x-1] + + srcpn[x] * srcpn[x] + + srcpn[x+1] * srcpn[x+1]; + + if( 9 * sumsq-sum * sum < vthresh ) + continue; + + const int Ix = srcp[x+1] - srcp[x-1]; + const int Iy = MAX( MAX( abs( srcpp[x] - srcpn[x] ), + abs( srcpp[x] - srcp[x] ) ), + abs( srcp[x] - srcpn[x] ) ); + if( Ix * Ix + Iy * Iy >= mthresh ) + { + dstp[x] = 255; + continue; + } + + const int Ixx = srcp[x-1] - 2 * srcp[x] + srcp[x+1]; + const int Iyy = srcpp[x] - 2 * srcp[x] + srcpn[x]; + if( abs( Ixx ) + abs( Iyy ) >= lthresh ) + dstp[x] = 255; + } + dstp += dst_pitch; + srcpp += src_pitch; + srcp += src_pitch; + srcpn += src_pitch; + } +} + +/** + * Expands and smooths out the edge mask + * @param mskp Pointer to the source edge mask being read from + * @param msk_pitch Stride of mskp + * @param dstp Pointer to the destination to store the dilated edge mask + * @param dst_pitch Stride of dstp + * @param dstr Dilation threshold, ensures a pixel is only retained as an edge in dstp if this number of adjacent pixels or greater are also edges in mskp (4 is a good default value) + * @param height Height of half-height field-sized frame + * @param width Width of mskp bitmap rows, as opposed to the pdded stride in msk_pitch + */ +void eedi2_dilate_edge_mask( uint8_t *mskp, int msk_pitch, uint8_t *dstp, int dst_pitch, + int dstr, int height, int width ) +{ + int x, y; + + eedi2_bit_blit( dstp, dst_pitch, mskp, msk_pitch, width, height ); + + mskp += msk_pitch; + unsigned char *mskpp = mskp - msk_pitch; + unsigned char *mskpn = mskp + msk_pitch; + dstp += dst_pitch; + for( y = 1; y < height - 1; ++y ) + { + for( x = 1; x < width - 1; ++x ) + { + if( mskp[x] != 0 ) + continue; + + int count = 0; + if( mskpp[x-1] == 0xFF ) ++count; + if( mskpp[x] == 0xFF ) ++count; + if( mskpp[x+1] == 0xFF ) ++count; + if( mskp[x-1] == 0xFF ) ++count; + if( mskp[x+1] == 0xFF ) ++count; + if( mskpn[x-1] == 0xFF ) ++count; + if( mskpn[x] == 0xFF ) ++count; + if( mskpn[x+1] == 0xFF ) ++count; + + if( count >= dstr ) + dstp[x] = 0xFF; + } + mskpp += msk_pitch; + mskp += msk_pitch; + mskpn += msk_pitch; + dstp += dst_pitch; + } +} + +/** + * Contracts the edge mask + * @param mskp Pointer to the source edge mask being read from + * @param msk_pitch Stride of mskp + * @param dstp Pointer to the destination to store the eroded edge mask + * @param dst_pitch Stride of dstp + * @param estr Erosion threshold, ensures a pixel isn't retained as an edge in dstp if fewer than this number of adjacent pixels are also edges in mskp (2 is a good default value) + * @param height Height of half-height field-sized frame + * @param width Width of mskp bitmap rows, as opposed to the pdded stride in msk_pitch + */ +void eedi2_erode_edge_mask( uint8_t *mskp, int msk_pitch, uint8_t *dstp, int dst_pitch, + int estr, int height, int width ) +{ + int x, y; + + eedi2_bit_blit( dstp, dst_pitch, mskp, msk_pitch, width, height ); + + mskp += msk_pitch; + unsigned char *mskpp = mskp - msk_pitch; + unsigned char *mskpn = mskp + msk_pitch; + dstp += dst_pitch; + for ( y = 1; y < height - 1; ++y ) + { + for ( x = 1; x < width - 1; ++x ) + { + if( mskp[x] != 0xFF ) continue; + + int count = 0; + if ( mskpp[x-1] == 0xFF ) ++count; + if ( mskpp[x] == 0xFF ) ++count; + if ( mskpp[x+1] == 0xFF ) ++count; + if ( mskp[x-1] == 0xFF ) ++count; + if ( mskp[x+1] == 0xFF ) ++count; + if ( mskpn[x-1] == 0xFF ) ++count; + if ( mskpn[x] == 0xFF ) ++count; + if ( mskpn[x+1] == 0xFF ) ++count; + + if ( count < estr) dstp[x] = 0; + } + mskpp += msk_pitch; + mskp += msk_pitch; + mskpn += msk_pitch; + dstp += dst_pitch; + } +} + +/** + * Smooths out horizontally aligned holes in the mask + * + * If none of the 6 horizontally adjacent pixels are edges, mark the current pixel as not edged. + * If at least 1 of the 3 on either side are edges, mark the current pixel as an edge. + * + * @param mskp Pointer to the source edge mask being read from + * @param msk_pitch Stride of mskp + * @param dstp Pointer to the destination to store the smoothed edge mask + * @param dst_pitch Stride of dstp + * @param height Height of half-height field-sized frame + * @param width Width of mskp bitmap rows, as opposed to the pdded stride in msk_pitch + */ +void eedi2_remove_small_gaps( uint8_t * mskp, int msk_pitch, uint8_t * dstp, int dst_pitch, + int height, int width ) +{ + int x, y; + + eedi2_bit_blit( dstp, dst_pitch, mskp, msk_pitch, width, height ); + + mskp += msk_pitch; + dstp += dst_pitch; + for( y = 1; y < height - 1; ++y ) + { + for( x = 3; x < width - 3; ++x ) + { + if( mskp[x] ) + { + if( mskp[x-3] ) continue; + if( mskp[x-2] ) continue; + if( mskp[x-1] ) continue; + if( mskp[x+1] ) continue; + if( mskp[x+2] ) continue; + if( mskp[x+3] ) continue; + dstp[x] = 0; + } + else + { + if ( ( mskp[x+1] && ( mskp[x-1] || mskp[x-2] || mskp[x-3] ) ) || + ( mskp[x+2] && ( mskp[x-1] || mskp[x-2] ) ) || + ( mskp[x+3] && mskp[x-1] ) ) + dstp[x] = 0xFF; + } + } + mskp += msk_pitch; + dstp += dst_pitch; + } +} + +/** + * Calculates spatial direction vectors for the edges. This is EEDI2's timesink, and can be thought of as YADIF_CHECK on steroids, as both try to discern which angle a given edge follows + * @param plane The plane of the image being processed, to know to reduce maxd for chroma planes (HandBrake only works with YUV420 video so it is assumed they are half-height) + * @param mskp Pointer to the source edge mask being read from + * @param msk_pitch Stride of mskp + * @param srcp Pointer to the source image being filtered + * @param src_pitch Stride of srcp + * @param dstp Pointer to the destination to store the dilated edge mask + * @param dst_pitch Stride of dstp + * @param maxd Maximum pixel distance to search (24 is a good default value) + * @param nt Noise threshold (50 is a good default value) + * @param height Height of half-height field-sized frame + * @param width Width of srcp bitmap rows, as opposed to the pdded stride in src_pitch + */ +void eedi2_calc_directions( const int plane, uint8_t * mskp, int msk_pitch, uint8_t * srcp, int src_pitch, + uint8_t * dstp, int dst_pitch, int maxd, int nt, int height, int width ) +{ + int x, y, u, i; + + memset( dstp, 255, dst_pitch * height ); + mskp += msk_pitch; + dstp += dst_pitch; + srcp += src_pitch; + unsigned char *src2p = srcp - src_pitch * 2; + unsigned char *srcpp = srcp - src_pitch; + unsigned char *srcpn = srcp + src_pitch; + unsigned char *src2n = srcp + src_pitch * 2; + unsigned char *mskpp = mskp - msk_pitch; + unsigned char *mskpn = mskp + msk_pitch; + const int maxdt = plane == 0 ? maxd : ( maxd >> 1 ); + + for( y = 1; y < height - 1; ++y ) + { + for( x = 1; x < width - 1; ++x ) + { + if( mskp[x] != 0xFF || ( mskp[x-1] != 0xFF && mskp[x+1] != 0xFF ) ) + continue; + const int startu = MAX( -x + 1, -maxdt ); + const int stopu = MIN( width - 2 - x, maxdt ); + int minb = MIN( 13 * nt, + ( abs( srcp[x] - srcpn[x] ) + + abs( srcp[x] - srcpp[x] ) ) * 6 ); + int mina = MIN( 19 * nt, + ( abs( srcp[x] - srcpn[x] ) + + abs( srcp[x] - srcpp[x] ) ) * 9 ); + int minc = mina; + int mind = minb; + int mine = minb; + int dira = -5000, dirb = -5000, dirc = -5000, dird = -5000, dire = -5000; + for( u = startu; u <= stopu; ++u ) + { + if( y == 1 || + mskpp[x-1+u] == 0xFF || mskpp[x+u] == 0xFF || mskpp[x+1+u] == 0xFF ) + { + if( y == height - 2 || + mskpn[x-1-u] == 0xFF || mskpn[x-u] == 0xFF || mskpn[x+1-u] == 0xFF ) + { + const int diffsn = abs( srcp[x-1] - srcpn[x-1-u] ) + + abs( srcp[x] - srcpn[x-u] ) + + abs( srcp[x+1] - srcpn[x+1-u] ); + + const int diffsp = abs( srcp[x-1] - srcpp[x-1+u] ) + + abs( srcp[x] - srcpp[x+u] ) + + abs( srcp[x+1] - srcpp[x+1+u] ); + + const int diffps = abs( srcpp[x-1] - srcp[x-1-u] ) + + abs( srcpp[x] - srcp[x-u] ) + + abs( srcpp[x+1] - srcp[x+1-u] ); + + const int diffns = abs( srcpn[x-1] - srcp[x-1+u] ) + + abs( srcpn[x] - srcp[x+u] ) + + abs( srcpn[x+1] - srcp[x+1+u] ); + + const int diff = diffsn + diffsp + diffps + diffns; + int diffd = diffsp + diffns; + int diffe = diffsn + diffps; + if( diff < minb ) + { + dirb = u; + minb = diff; + } + if( __builtin_expect( y > 1, 1) ) + { + const int diff2pp = abs( src2p[x-1] - srcpp[x-1-u] ) + + abs( src2p[x] - srcpp[x-u] ) + + abs( src2p[x+1] - srcpp[x+1-u] ); + const int diffp2p = abs( srcpp[x-1] - src2p[x-1+u] ) + + abs( srcpp[x] - src2p[x+u] ) + + abs( srcpp[x+1] - src2p[x+1+u] ); + const int diffa = diff + diff2pp + diffp2p; + diffd += diffp2p; + diffe += diff2pp; + if( diffa < mina ) + { + dira = u; + mina = diffa; + } + } + if( __builtin_expect( y < height-2, 1) ) + { + const int diff2nn = abs( src2n[x-1] - srcpn[x-1+u] ) + + abs( src2n[x] - srcpn[x+u] ) + + abs( src2n[x+1] - srcpn[x+1+u] ); + const int diffn2n = abs( srcpn[x-1] - src2n[x-1-u] ) + + abs( srcpn[x] - src2n[x-u] ) + + abs( srcpn[x+1] - src2n[x+1-u] ); + const int diffc = diff + diff2nn + diffn2n; + diffd += diff2nn; + diffe += diffn2n; + if( diffc < minc ) + { + dirc = u; + minc = diffc; + } + } + if( diffd < mind ) + { + dird = u; + mind = diffd; + } + if( diffe < mine ) + { + dire = u; + mine = diffe; + } + } + } + } + int order[5], k=0; + if( dira != -5000 ) order[k++] = dira; + if( dirb != -5000 ) order[k++] = dirb; + if( dirc != -5000 ) order[k++] = dirc; + if( dird != -5000 ) order[k++] = dird; + if( dire != -5000 ) order[k++] = dire; + if( k > 1 ) + { + eedi2_sort_metrics( order, k ); + const int mid = ( k & 1 ) ? + order[k>>1] : + ( order[(k-1)>>1] + order[k>>1] + 1 ) >> 1; + const int tlim = MAX( eedi2_limlut[abs(mid)] >> 2, 2 ); + int sum = 0, count = 0; + for( i = 0; i < k; ++i ) + { + if( abs( order[i] - mid ) <= tlim ) + { + ++count; + sum += order[i]; + } + } + if( count > 1 ) + dstp[x] = 128 + ( (int)( (float)sum / (float)count ) * 4 ); + else + dstp[x] = 128; + } + else dstp[x] = 128; + } + mskpp += msk_pitch; + mskp += msk_pitch; + mskpn += msk_pitch; + src2p += src_pitch; + srcpp += src_pitch; + srcp += src_pitch; + srcpn += src_pitch; + src2n += src_pitch; + dstp += dst_pitch; + } +} + +/** + * Filters the edge mask + * @param mskp Pointer to the source edge mask being read from + * @param msk_pitch Stride of mskp + * @param dmskp Pointer to the edge direction mask + * @param dmsk_pitch Stride of dmskp + * @param dstp Pointer to the destination to store the filtered edge mask + * @param dst_pitch Stride of dstp + * @param height Height of half-height field-sized frame + * @param width Width of mskp bitmap rows, as opposed to the pdded stride in msk_pitch + */ +void eedi2_filter_map( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch, + uint8_t * dstp, int dst_pitch, int height, int width ) +{ + int x, y, j; + + eedi2_bit_blit( dstp, dst_pitch, dmskp, dmsk_pitch, width, height ); + + mskp += msk_pitch; + dmskp += dmsk_pitch; + dstp += dst_pitch; + unsigned char *dmskpp = dmskp - dmsk_pitch; + unsigned char *dmskpn = dmskp + dmsk_pitch; + + for( y = 1; y < height - 1; ++y ) + { + for( x = 1; x < width - 1; ++x ) + { + if( dmskp[x] == 0xFF || mskp[x] != 0xFF ) + continue; + const int dir = ( dmskp[x] - 128 ) >> 2; + const int lim = MAX( abs( dir ) * 2, 12 ); + int ict = 0, icb = 0; + if( dir < 0 ) + { + const int dirt = MAX( -x, dir ); + for( j = dirt; j <= 0; ++j ) + { + if( ( abs( dmskpp[x+j] - dmskp[x] ) > lim && dmskpp[x+j] != 0xFF ) || + ( dmskp[x+j] == 0xFF && dmskpp[x+j] == 0xFF ) || + ( abs( dmskp[x+j] - dmskp[x] ) > lim && dmskp[x+j] != 0xFF ) ) + { + ict = 1; + break; + } + } + } + else + { + const int dirt = MIN( width - x - 1, dir ); + for( j = 0; j <= dirt; ++j ) + { + if( ( abs( dmskpp[x+j] - dmskp[x] ) > lim && dmskpp[x+j] != 0xFF ) || + ( dmskp[x+j] == 0xFF && dmskpp[x+j] == 0xFF ) || + ( abs( dmskp[x+j] - dmskp[x] ) > lim && dmskp[x+j] != 0xFF ) ) + { + ict = 1; + break; + } + } + } + if( ict ) + { + if( dir < 0 ) + { + const int dirt = MIN( width - x - 1, abs( dir ) ); + for( j = 0; j <= dirt; ++j ) + { + if( ( abs( dmskpn[x+j] - dmskp[x] ) > lim && dmskpn[x+j] != 0xFF ) || + ( dmskpn[x+j] == 0xFF && dmskp[x+j] == 0xFF ) || + ( abs( dmskp[x+j] - dmskp[x] ) > lim && dmskp[x+j] != 0xFF ) ) + { + icb = 1; + break; + } + } + } + else + { + const int dirt = MAX( -x, -dir ); + for( j = dirt; j <= 0; ++j ) + { + if( ( abs( dmskpn[x+j] - dmskp[x] ) > lim && dmskpn[x+j] != 0xFF ) || + ( dmskpn[x+j] == 0xFF && dmskp[x+j] == 0xFF ) || + ( abs( dmskp[x+j] - dmskp[x] ) > lim && dmskp[x+j] != 0xFF ) ) + { + icb = 1; + break; + } + } + } + if( icb ) + dstp[x] = 255; + } + } + mskp += msk_pitch; + dmskpp += dmsk_pitch; + dmskp += dmsk_pitch; + dmskpn += dmsk_pitch; + dstp += dst_pitch; + } +} + + +/** + * Filters the edge direction mask + * @param mskp Pointer to the edge mask + * @param msk_pitch Stride of mskp + * @param dmskp Pointer to the edge direction mask being read from + * @param dmsk_pitch Stride of dmskp + * @param dstp Pointer to the destination to store the filtered edge direction mask + * @param dst_pitch Stride of dstp + * @param height Height of half_height field-sized frame + * @param width Width of dmskp bitmap rows, as opposed to the pdded stride in dmsk_pitch + */ +void eedi2_filter_dir_map( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch, + uint8_t * dstp, int dst_pitch, int height, int width ) +{ + int x, y, i; + + eedi2_bit_blit( dstp, dst_pitch, dmskp, dmsk_pitch, width, height ); + + dmskp += dmsk_pitch; + unsigned char *dmskpp = dmskp - dmsk_pitch; + unsigned char *dmskpn = dmskp + dmsk_pitch; + dstp += dst_pitch; + mskp += msk_pitch; + for( y = 1; y < height - 1; ++y ) + { + for( x = 1; x < width - 1; ++x ) + { + if( mskp[x] != 0xFF ) continue; + int u = 0, order[9]; + if( dmskpp[x-1] != 0xFF ) order[u++] = dmskpp[x-1]; + if( dmskpp[x] != 0xFF ) order[u++] = dmskpp[x]; + if( dmskpp[x+1] != 0xFF ) order[u++] = dmskpp[x+1]; + if( dmskp[x-1] != 0xFF ) order[u++] = dmskp[x-1]; + if( dmskp[x] != 0xFF ) order[u++] = dmskp[x]; + if( dmskp[x+1] != 0xFF ) order[u++] = dmskp[x+1]; + if( dmskpn[x-1] != 0xFF ) order[u++] = dmskpn[x-1]; + if( dmskpn[x] != 0xFF ) order[u++] = dmskpn[x]; + if( dmskpn[x+1] != 0xFF ) order[u++] = dmskpn[x+1]; + if( u < 4 ) + { + dstp[x] = 255; + continue; + } + eedi2_sort_metrics( order, u ); + const int mid = ( u & 1 ) ? + order[u>>1] : ( order[(u-1)>>1] + order[u>>1] + 1 ) >> 1; + int sum = 0, count = 0; + const int lim = eedi2_limlut[abs(mid-128)>>2]; + for( i = 0; i < u; ++i ) + { + if( abs( order[i] - mid ) <= lim ) + { + ++count; + sum += order[i]; + } + } + if( count < 4 || ( count < 5 && dmskp[x] == 0xFF ) ) + { + dstp[x] = 255; + continue; + } + dstp[x] = (int)( ( (float)( sum + mid ) / (float)( count + 1 ) ) + 0.5f ); + } + dmskpp += dmsk_pitch; + dmskp += dmsk_pitch; + dmskpn += dmsk_pitch; + dstp += dst_pitch; + mskp += msk_pitch; + } +} + +/** + * Smoothes out the edge direction map + * @param mskp Pointer to the edge mask + * @param msk_pitch Stride of mskp + * @param dmskp Pointer to the edge direction mask being read from + * @param dmsk_pitch Stride of dmskp + * @param dstp Pointer to the destination to store the expanded edge direction mask + * @param dst_pitch Stride of dstp + * @param height Height of half-height field-sized frame + * @param width Width of dmskp bitmap rows, as opposed to the pdded stride in dmsk_pitch + */ +void eedi2_expand_dir_map( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch, + uint8_t * dstp, int dst_pitch, int height, int width ) +{ + int x, y, i; + + eedi2_bit_blit( dstp, dst_pitch, dmskp, dmsk_pitch, width, height ); + + dmskp += dmsk_pitch; + unsigned char *dmskpp = dmskp - dmsk_pitch; + unsigned char *dmskpn = dmskp + dmsk_pitch; + dstp += dst_pitch; + mskp += msk_pitch; + for( y = 1; y < height - 1; ++y ) + { + for( x = 1; x < width - 1; ++x ) + { + if( dmskp[x] != 0xFF || mskp[x] != 0xFF ) continue; + int u = 0, order[9]; + if( dmskpp[x-1] != 0xFF ) order[u++] = dmskpp[x-1]; + if( dmskpp[x] != 0xFF ) order[u++] = dmskpp[x]; + if( dmskpp[x+1] != 0xFF ) order[u++] = dmskpp[x+1]; + if( dmskp[x-1] != 0xFF ) order[u++] = dmskp[x-1]; + if( dmskp[x+1] != 0xFF ) order[u++] = dmskp[x+1]; + if( dmskpn[x-1] != 0xFF ) order[u++] = dmskpn[x-1]; + if( dmskpn[x] != 0xFF ) order[u++] = dmskpn[x]; + if( dmskpn[x+1] != 0xFF ) order[u++] = dmskpn[x+1]; + if( u < 5 ) continue; + eedi2_sort_metrics( order, u ); + const int mid = ( u & 1 ) ? + order[u>>1] : ( order[(u-1)>>1] + order[u>>1] + 1 ) >> 1; + int sum = 0, count = 0; + const int lim = eedi2_limlut[abs(mid-128)>>2]; + for( i = 0; i < u; ++i ) + { + if( abs( order[i] - mid ) <= lim ) + { + ++count; + sum += order[i]; + } + } + if( count < 5 ) continue; + dstp[x] = (int)( ( (float)( sum + mid ) / (float)( count + 1 ) ) + 0.5f ); + } + dmskpp += dmsk_pitch; + dmskp += dmsk_pitch; + dmskpn += dmsk_pitch; + dstp += dst_pitch; + mskp += msk_pitch; + } +} + +/** + * Re-draws a clearer, less blocky frame-height edge direction mask + * @param mskp Pointer to the edge mask + * @param msk_pitch Stride of mskp + * @param dmskp Pointer to the edge direction mask being read from + * @param dmsk_pitch Stride of dmskp + * @param dstp Pointer to the destination to store the redrawn direction mask + * @param dst_pitch Stride of dstp + * @param tff Whether or not the frame parity is Top Field First + * @param height Height of the full-frame output + * @param width Width of dmskp bitmap rows, as opposed to the pdded stride in dmsk_pitch + */ +void eedi2_mark_directions_2x( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch, + uint8_t * dstp, int dst_pitch, int tff, int height, int width ) +{ + int x, y, i; + memset( dstp, 255, dst_pitch * height ); + dstp += dst_pitch * ( 2 - tff ); + dmskp += dmsk_pitch * ( 1 - tff ); + mskp += msk_pitch * ( 1 - tff ); + unsigned char *dmskpn = dmskp + dmsk_pitch * 2; + unsigned char *mskpn = mskp + msk_pitch * 2; + for( y = 2 - tff; y < height - 1; y += 2 ) + { + for( x = 1; x < width - 1; ++x ) + { + if( mskp[x] != 0xFF && mskpn[x] != 0xFF ) continue; + int v = 0, order[6]; + if( dmskp[x-1] != 0xFF ) order[v++] = dmskp[x-1]; + if( dmskp[x] != 0xFF ) order[v++] = dmskp[x]; + if( dmskp[x+1] != 0xFF ) order[v++] = dmskp[x+1]; + if( dmskpn[x-1] != 0xFF ) order[v++] = dmskpn[x-1]; + if( dmskpn[x] != 0xFF ) order[v++] = dmskpn[x]; + if( dmskpn[x+1] != 0xFF ) order[v++] = dmskpn[x+1]; + if( v < 3 ) continue; + else + { + eedi2_sort_metrics( order, v ); + const int mid = ( v & 1 ) ? order[v>>1] : ( order[(v-1)>>1] + order[v>>1]+1) >> 1; + const int lim = eedi2_limlut[abs(mid-128)>>2]; + int u = 0; + if( abs( dmskp[x-1] - dmskpn[x-1] ) <= lim || + dmskp[x-1] == 0xFF || dmskpn[x-1] == 0xFF ) + ++u; + if( abs( dmskp[x] - dmskpn[x] ) <= lim || + dmskp[x] == 0xFF || dmskpn[x] == 0xFF ) + ++u; + if( abs( dmskp[x+1] - dmskpn[x-1] ) <= lim || + dmskp[x+1] == 0xFF || dmskpn[x+1] == 0xFF) + ++u; + if( u < 2 ) continue; + int count = 0, sum = 0; + for( i = 0; i < v; ++i ) + { + if( abs( order[i] - mid ) <= lim ) + { + ++count; + sum += order[i]; + } + } + if( count < v - 2 || count < 2 ) continue; + dstp[x] = (int)( ( (float)( sum + mid ) / (float)( count + 1 ) ) + 0.5f ); + } + } + mskp += msk_pitch * 2; + mskpn += msk_pitch * 2; + dstp += dst_pitch * 2; + dmskp += dmsk_pitch * 2; + dmskpn += dmsk_pitch * 2; + } +} + +/** + * Filters the frane-height edge direction mask + * @param mskp Pointer to the edge mask + * @param msk_pitch Stride of mskp + * @param dmskp Pointer to the edge direction mask being read from + * @param dmsk_pitch Stride of dmskp + * @param dstp Pointer to the destination to store the filtered direction mask + * @param dst_pitch Stride of dstp + * @param field Field to filter + * @param height Height of the full-frame output + * @param width Width of dmskp bitmap rows, as opposed to the pdded stride in dmsk_pitch + */ +void eedi2_filter_dir_map_2x( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch, + uint8_t * dstp, int dst_pitch, int field, int height, int width ) +{ + int x, y, i; + eedi2_bit_blit( dstp, dst_pitch, dmskp, dmsk_pitch, width, height ); + dmskp += dmsk_pitch * ( 2 - field ); + unsigned char *dmskpp = dmskp - dmsk_pitch * 2; + unsigned char *dmskpn = dmskp + dmsk_pitch * 2; + mskp += msk_pitch * ( 1 - field ); + unsigned char *mskpn = mskp + msk_pitch * 2; + dstp += dst_pitch * ( 2 - field ); + for( y = 2 - field; y < height - 1; y += 2 ) + { + for( x = 1; x < width - 1; ++x ) + { + if( mskp[x] != 0xFF && mskpn[x] != 0xFF ) continue; + int u = 0, order[9]; + if( y > 1 ) + { + if( dmskpp[x-1] != 0xFF ) order[u++] = dmskpp[x-1]; + if( dmskpp[x] != 0xFF ) order[u++] = dmskpp[x]; + if( dmskpp[x+1] != 0xFF ) order[u++] = dmskpp[x+1]; + } + if( dmskp[x-1] != 0xFF ) order[u++] = dmskp[x-1]; + if( dmskp[x] != 0xFF ) order[u++] = dmskp[x]; + if( dmskp[x+1] != 0xFF ) order[u++] = dmskp[x+1]; + if( y < height - 2 ) + { + if( dmskpn[x-1] != 0xFF ) order[u++] = dmskpn[x-1]; + if( dmskpn[x] != 0xFF ) order[u++] = dmskpn[x]; + if( dmskpn[x+1] != 0xFF ) order[u++] = dmskpn[x+1]; + } + if( u < 4 ) + { + dstp[x] = 255; + continue; + } + eedi2_sort_metrics( order, u ); + const int mid = ( u & 1 ) ? order[u>>1] : (order[(u-1)>>1] + order[u>>1] + 1 ) >> 1; + int sum = 0, count = 0; + const int lim = eedi2_limlut[abs(mid-128)>>2]; + for( i = 0; i < u; ++i ) + { + if( abs( order[i] - mid ) <= lim ) + { + ++count; + sum += order[i]; + } + } + if( count < 4 || ( count < 5 && dmskp[x] == 0xFF ) ) + { + dstp[x] = 255; + continue; + } + dstp[x] = (int)( ( (float)( sum + mid ) / (float)( count + 1 ) ) + 0.5f ); + } + mskp += msk_pitch * 2; + mskpn += msk_pitch * 2; + dmskpp += dmsk_pitch * 2; + dmskp += dmsk_pitch * 2; + dmskpn += dmsk_pitch * 2; + dstp += dst_pitch * 2; + } +} + +/** + * Smoothes out the frame-height edge direction mask + * @param mskp Pointer to the edge mask + * @param msk_pitch Stride of mskp + * @param dmskp Pointer to the edge direction mask being read from + * @param dmsk_pitch Stride of dmskp + * @param dstp Pointer to the destination to store the expanded direction mask + * @param dst_pitch Stride of dstp + * @param field Field to filter + * @param height Height of the full-frame output + * @param width Width of dmskp bitmap rows, as opposed to the pdded stride in dmsk_pitch + */ +void eedi2_expand_dir_map_2x( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch, + uint8_t * dstp, int dst_pitch, int field, int height, int width ) +{ + int x, y, i; + + eedi2_bit_blit( dstp, dst_pitch, dmskp, dmsk_pitch, width, height ); + + dmskp += dmsk_pitch * ( 2 - field ); + unsigned char *dmskpp = dmskp - dmsk_pitch * 2; + unsigned char *dmskpn = dmskp + dmsk_pitch * 2; + mskp += msk_pitch * ( 1 - field ); + unsigned char *mskpn = mskp + msk_pitch * 2; + dstp += dst_pitch * ( 2 - field ); + for( y = 2 - field; y < height - 1; y += 2) + { + for( x = 1; x < width - 1; ++x ) + { + if( dmskp[x] != 0xFF || ( mskp[x] != 0xFF && mskpn[x] != 0xFF ) ) continue; + int u = 0, order[9]; + if( y > 1 ) + { + if( dmskpp[x-1] != 0xFF ) order[u++] = dmskpp[x-1]; + if( dmskpp[x] != 0xFF ) order[u++] = dmskpp[x]; + if( dmskpp[x+1] != 0xFF ) order[u++] = dmskpp[x+1]; + } + if( dmskp[x-1] != 0xFF ) order[u++] = dmskp[x-1]; + if( dmskp[x+1] != 0xFF ) order[u++] = dmskp[x+1]; + if( y < height - 2 ) + { + if( dmskpn[x-1] != 0xFF) order[u++] = dmskpn[x-1]; + if( dmskpn[x] != 0xFF) order[u++] = dmskpn[x]; + if( dmskpn[x+1] != 0xFF) order[u++] = dmskpn[x+1]; + } + if( u < 5 ) continue; + eedi2_sort_metrics( order, u ); + const int mid = ( u & 1 ) ? order[u>>1] : ( order[(u-1)>>1] + order[u>>1] + 1 ) >> 1; + int sum = 0, count = 0; + const int lim = eedi2_limlut[abs(mid-128)>>2]; + for( i = 0; i < u; ++i ) + { + if( abs( order[i] - mid ) <= lim ) + { + ++count; + sum += order[i]; + } + } + if( count < 5 ) continue; + dstp[x] = (int)( ( (float)( sum + mid ) / (float)( count + 1 ) ) + 0.5f ); + } + mskp += msk_pitch * 2; + mskpn += msk_pitch * 2; + dmskpp += dmsk_pitch * 2; + dmskp += dmsk_pitch * 2; + dmskpn += dmsk_pitch * 2; + dstp += dst_pitch * 2; + } +} + +/** + * Like the name suggests, this function fills in gaps in the frame-height edge direction mask + * @param mskp Pointer to the edge mask + * @param msk_pitch Stride of mskp + * @param dmskp Pointer to the edge direction mask being read from + * @param dmsk_pitch Stride of dmskp + * @param dstp Pointer to the destination to store the filled-in direction mask + * @param dst_pitch Stride of dstp + * @param field Field to filter + * @param height Height of the full-frame output + * @param width Width of dmskp bitmap rows, as opposed to the pdded stride in dmsk_pitch + */ +void eedi2_fill_gaps_2x( uint8_t *mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch, + uint8_t * dstp, int dst_pitch, int field, int height, int width ) +{ + int x, y, j; + + eedi2_bit_blit( dstp, dst_pitch, dmskp, dmsk_pitch, width, height ); + + dmskp += dmsk_pitch * ( 2 - field ); + unsigned char *dmskpp = dmskp - dmsk_pitch * 2; + unsigned char *dmskpn = dmskp + dmsk_pitch * 2; + mskp += msk_pitch * ( 1 - field ); + unsigned char *mskpp = mskp - msk_pitch * 2; + unsigned char *mskpn = mskp + msk_pitch * 2; + unsigned char *mskpnn = mskpn + msk_pitch * 2; + dstp += dst_pitch * ( 2 - field ); + for( y = 2 - field; y < height - 1; y += 2 ) + { + for( x = 1; x < width - 1; ++x ) + { + if( dmskp[x] != 0xFF || + ( mskp[x] != 0xFF && mskpn[x] != 0xFF ) ) continue; + int u = x - 1, back = 500, forward = -500; + while( u ) + { + if( dmskp[u] != 0xFF ) + { + back = dmskp[u]; + break; + } + if( mskp[u] != 0xFF && mskpn[u] != 0xFF ) break; + --u; + } + int v = x + 1; + while( v < width ) + { + if( dmskp[v] != 0xFF ) + { + forward = dmskp[v]; + break; + } + if( mskp[v] != 0xFF && mskpn[v] != 0xFF ) break; + ++v; + } + int tc = 1, bc = 1; + int mint = 500, maxt = -20; + int minb = 500, maxb = -20; + for( j = u; j <= v; ++j ) + { + if( tc ) + { + if( y <= 2 || dmskpp[j] == 0xFF || ( mskpp[j] != 0xFF && mskp[j] != 0xFF ) ) + { + tc = 0; + mint = maxt = 20; + } + else + { + if( dmskpp[j] < mint ) mint = dmskpp[j]; + if( dmskpp[j] > maxt ) maxt = dmskpp[j]; + } + } + if( bc ) + { + if( y >= height - 3 || dmskpn[j] == 0xFF || ( mskpn[j] != 0xFF && mskpnn[j] != 0xFF ) ) + { + bc = 0; + minb = maxb = 20; + } + else + { + if( dmskpn[j] < minb ) minb = dmskpn[j]; + if( dmskpn[j] > maxb ) maxb = dmskpn[j]; + } + } + } + if( maxt == -20 ) maxt = mint = 20; + if( maxb == -20 ) maxb = minb = 20; + int thresh = MAX( + MAX( MAX( abs( forward - 128 ), abs( back - 128 ) ) >> 2, 8 ), + MAX( abs( mint - maxt ), abs( minb - maxb ) ) ); + const int flim = MIN( + MAX( abs( forward - 128 ), abs( back - 128 ) ) >> 2, + 6 ); + if( abs( forward - back ) <= thresh && ( v - u - 1 <= flim || tc || bc ) ) + { + double step = (double)( forward - back ) / (double)( v - u ); + for( j = 0; j < v - u - 1; ++j ) + dstp[u+j+1] = back + (int)( j * step + 0.5 ); + } + } + mskpp += msk_pitch * 2; + mskp += msk_pitch * 2; + mskpn += msk_pitch * 2; + mskpnn += msk_pitch * 2; + dmskpp += dmsk_pitch * 2; + dmskp += dmsk_pitch * 2; + dmskpn += dmsk_pitch * 2; + dstp += dst_pitch * 2; + } +} + +/** + * Actually renders the output frame, based on the edge and edge direction masks + * @param plane The plane of the image being processed, to know to reduce a search distance for chroma planes (HandBrake only works with YUV420 video so it is assumed they are half-height) + * @param dmskp Pointer to the edge direction mask being read from + * @param dmsk_pitch Stride of dmskp + * @param dstp Pointer to the line-doubled source field used being filtered in place + * @param dst_pitch Stride of dstp + * @param omskp Pointer to the destination to store the output edge mask used for post-processing + * @param osmk_pitch Stride of omskp + * @param field Field to filter + * @nt Noise threshold, (50 is a good default value) + * @param height Height of the full-frame output + * @param width Width of dstp bitmap rows, as opposed to the pdded stride in dst_pitch + */ +void eedi2_interpolate_lattice( const int plane, uint8_t * dmskp, int dmsk_pitch, uint8_t * dstp, + int dst_pitch, uint8_t * omskp, int omsk_pitch, int field, int nt, + int height, int width ) +{ + int x, y, u; + + if( field == 1 ) + { + eedi2_bit_blit( dstp + ( height - 1 ) * dst_pitch, + dst_pitch, + dstp + ( height - 2 ) * dst_pitch, + dst_pitch, + width, + 1 ); + } + else + { + eedi2_bit_blit( dstp, + dst_pitch, + dstp + dst_pitch, + dst_pitch, + width, + 1 ); + } + + dstp += dst_pitch * ( 1 - field ); + omskp += omsk_pitch * ( 1 - field ); + unsigned char *dstpn = dstp + dst_pitch; + unsigned char *dstpnn = dstp + dst_pitch * 2; + unsigned char *omskn = omskp + omsk_pitch * 2; + dmskp += dmsk_pitch * ( 2 - field ); + for( y = 2 - field; y < height - 1; y += 2 ) + { + for( x = 0; x < width; ++x ) + { + int dir = dmskp[x]; + const int lim = eedi2_limlut[abs(dir-128)>>2]; + if( dir == 255 || + ( abs( dmskp[x] - dmskp[x-1] ) > lim && + abs( dmskp[x] - dmskp[x+1] ) > lim ) ) + { + dstpn[x] = ( dstp[x] + dstpnn[x] + 1 ) >> 1; + if( dir != 255 ) dmskp[x] = 128; + continue; + } + if( lim < 9 ) + { + const int sum = dstp[x-1] + dstp[x] + dstp[x+1] + + dstpnn[x-1] + dstpnn[x] + dstpnn[x+1]; + const int sumsq = dstp[x-1] * dstp[x-1] + + dstp[x] * dstp[x] + + dstp[x+1] * dstp[x+1] + + dstpnn[x-1] * dstpnn[x-1] + + dstpnn[x] * dstpnn[x] + + dstpnn[x+1] * dstpnn[x+1]; + if( 6 * sumsq - sum * sum < 576 ) + { + dstpn[x] = ( dstp[x] + dstpnn[x] + 1 ) >> 1; + dmskp[x] = 255; + continue; + } + } + if( x > 1 && x < width - 2 && + ( dstp[x] < MAX( dstp[x-2], dstp[x-1] ) - 3 && + dstp[x] < MAX( dstp[x+2], dstp[x+1] ) - 3 && + dstpnn[x] < MAX( dstpnn[x-2], dstpnn[x-1] ) - 3 && + dstpnn[x] < MAX( dstpnn[x+2], dstpnn[x+1] ) - 3 ) + || + ( dstp[x] > MIN( dstp[x-2], dstp[x-1] ) + 3 && + dstp[x] > MIN( dstp[x+2], dstp[x+1] ) + 3 && + dstpnn[x] > MIN( dstpnn[x-2], dstpnn[x-1] ) + 3 && + dstpnn[x] > MIN( dstpnn[x+2], dstpnn[x+1] ) + 3 ) ) + { + dstpn[x] = ( dstp[x] + dstpnn[x] + 1 ) >> 1; + dmskp[x] = 128; + continue; + } + dir = ( dir - 128 + 2 ) >> 2; + int val = ( dstp[x] + dstpnn[x] + 1 ) >> 1; + const int startu = ( dir - 2 < 0 ) ? + MAX( -x + 1, MAX( dir - 2, -width + 2 + x ) ) + : + MIN( x - 1, MIN( dir - 2, width - 2 - x ) ); + const int stopu = ( dir + 2 < 0 ) ? + MAX( -x + 1, MAX( dir + 2, -width + 2 + x ) ) + : + MIN( x - 1, MIN( dir + 2, width - 2 - x ) ); + int min = 8 * nt; + for( u = startu; u <= stopu; ++u ) + { + const int diff = + abs( dstp[x-1] - dstpnn[x-u-1] ) + + abs( dstp[x] - dstpnn[x-u] ) + + abs( dstp[x+1] - dstpnn[x-u+1] ) + + abs( dstpnn[x-1] - dstp[x+u-1] ) + + abs( dstpnn[x] - dstp[x+u] ) + + abs( dstpnn[x+1] - dstp[x+u+1] ); + if( diff < min && + ( ( omskp[x-1+u] != 0xFF && abs( omskp[x-1+u] - dmskp[x] ) <= lim ) || + ( omskp[x+u] != 0xFF && abs( omskp[x+u] - dmskp[x]) <= lim ) || + ( omskp[x+1+u] != 0xFF && abs( omskp[x+1+u] - dmskp[x]) <= lim ) ) && + ( ( omskn[x-1-u] != 0xFF && abs( omskn[x-1-u] - dmskp[x]) <= lim ) || + ( omskn[x-u] != 0xFF && abs( omskn[x-u] - dmskp[x]) <= lim ) || + ( omskn[x+1-u] != 0xFF && abs( omskn[x+1-u] - dmskp[x]) <= lim ) ) ) + { + const int diff2 = + abs( dstp[x+(u>>1)-1] - dstpnn[x-(u>>1)-1] ) + + abs( dstp[x+(u>>1)] - dstpnn[x-(u>>1)] ) + + abs( dstp[x+(u>>1)+1] - dstpnn[x-(u>>1)+1] ); + if( diff2 < 4 * nt && + ( ( ( abs( omskp[x+(u>>1)] - omskn[x-(u>>1)] ) <= lim || + abs( omskp[x+(u>>1)] - omskn[x-((u+1)>>1)] ) <= lim ) && + omskp[x+(u>>1)] != 0xFF ) + || + ( ( abs( omskp[x+((u+1)>>1)] - omskn[x-(u>>1)] ) <= lim || + abs( omskp[x+((u+1)>>1)] - omskn[x-((u+1)>>1)] ) <= lim ) && + omskp[x+((u+1)>>1)] != 0xFF ) ) ) + { + if( ( abs( dmskp[x] - omskp[x+(u>>1)] ) <= lim || + abs( dmskp[x] - omskp[x+((u+1)>>1)] ) <= lim ) && + ( abs( dmskp[x] - omskn[x-(u>>1)] ) <= lim || + abs( dmskp[x] - omskn[x-((u+1)>>1)] ) <= lim ) ) + { + val = ( dstp[x+(u>>1)] + dstp[x+((u+1)>>1)] + + dstpnn[x-(u>>1)] + dstpnn[x-((u+1)>>1)] + 2 ) >> 2; + min = diff; + dir = u; + } + } + } + } + if( min != 8 * nt ) + { + dstpn[x] = val; + dmskp[x] = 128 + dir * 4; + } + else + { + const int minm = MIN( dstp[x], dstpnn[x] ); + const int maxm = MAX( dstp[x], dstpnn[x] ); + const int d = plane == 0 ? 4 : 2; + const int startu = MAX( -x + 1, -d ); + const int stopu = MIN( width - 2 - x, d ); + min = 7 * nt; + for( u = startu; u <= stopu; ++u ) + { + const int p1 = dstp[x+(u>>1)] + dstp[x+((u+1)>>1)]; + const int p2 = dstpnn[x-(u>>1)] + dstpnn[x-((u+1)>>1)]; + const int diff = + abs( dstp[x-1] - dstpnn[x-u-1] ) + + abs( dstp[x] - dstpnn[x-u] ) + + abs( dstp[x+1] - dstpnn[x-u+1] ) + + abs( dstpnn[x-1] - dstp[x+u-1] ) + + abs( dstpnn[x] - dstp[x+u] ) + + abs( dstpnn[x+1] - dstp[x+u+1] ) + + abs( p1 - p2 ); + if( diff < min ) + { + const int valt = ( p1 + p2 + 2 ) >> 2; + if( valt >= minm && valt <= maxm ) + { + val = valt; + min = diff; + dir = u; + } + } + } + dstpn[x] = val; + if( min == 7*nt ) dmskp[x] = 128; + else dmskp[x] = 128 + dir * 4; + } + } + dstp += dst_pitch * 2; + dstpn += dst_pitch * 2; + dstpnn += dst_pitch * 2; + dmskp += dmsk_pitch * 2; + omskp += omsk_pitch * 2; + omskn += omsk_pitch * 2; + } +} + +/** + * Applies some extra filtering to smooth the edge direction mask + * @param nmskp Pointer to the newly-filtered edge direction mask being read from + * @param nmsk_pitch Stride of nmskp + * @param omskp Pointer to the old unfiltered edge direction mask being read from + * @param omsk_pitch Stride of osmkp + * @param dstp Pointer to the output image being filtered in place + * @param src_pitch Stride of dstp ....not sure why it's named this + * @param field Field to filter + * @param height Height of the full-frame output + * @param width Width of dstp bitmap rows, as opposed to the pdded stride in src_pitch + */ +void eedi2_post_process( uint8_t * nmskp, int nmsk_pitch, uint8_t * omskp, int omsk_pitch, + uint8_t * dstp, int src_pitch, int field, int height, int width ) +{ + int x, y; + + nmskp += ( 2 - field ) * nmsk_pitch; + omskp += ( 2 - field ) * omsk_pitch; + dstp += ( 2 - field ) * src_pitch; + unsigned char *srcpp = dstp - src_pitch; + unsigned char *srcpn = dstp + src_pitch; + for( y = 2 - field; y < height - 1; y += 2 ) + { + for( x = 0; x < width; ++x ) + { + const int lim = eedi2_limlut[abs(nmskp[x]-128)>>2]; + if( abs( nmskp[x] - omskp[x] ) > lim && omskp[x] != 255 && omskp[x] != 128 ) + dstp[x] = ( srcpp[x] + srcpn[x] + 1 ) >> 1; + } + nmskp += nmsk_pitch * 2; + omskp += omsk_pitch * 2; + srcpp += src_pitch * 2; + dstp += src_pitch * 2; + srcpn += src_pitch * 2; + } +} + +/** + * Blurs the source field plane + * @param src Pointer to the half-height source field plane + * @param src_pitch Stride of src + * @param tmp Pointer to a temporary buffer for juggling bitmaps + * @param tmp_pitch Stride of tmp + * @param dst Pointer to the destination to store the blurred field plane + * @param dst_pitch Stride of dst + * @param height Height of the hakf-height field-sized frame + * @param width Width of dstp bitmap rows, as opposed to the padded stride in dst_pitch + */ +void eedi2_gaussian_blur1( uint8_t * src, int src_pitch, uint8_t * tmp, int tmp_pitch, uint8_t * dst, int dst_pitch, int height, int width ) +{ + uint8_t * srcp = src; + uint8_t * dstp = tmp; + int x, y; + + for( y = 0; y < height; ++y ) + { + dstp[0] = ( srcp[3] * 582 + srcp[2] * 7078 + srcp[1] * 31724 + + srcp[0] * 26152 + 32768 ) >> 16; + dstp[1] = ( srcp[4] * 582 + srcp[3] * 7078 + + ( srcp[0] + srcp[2] ) * 15862 + + srcp[1] * 26152 + 32768 ) >> 16; + dstp[2] = ( srcp[5] * 582 + ( srcp[0] + srcp[4] ) * 3539 + + ( srcp[1] + srcp[3] ) * 15862 + + srcp[2]*26152 + 32768 ) >> 16; + for( x = 3; x < width - 3; ++x ) + { + dstp[x] = ( ( srcp[x-3] + srcp[x+3] ) * 291 + + ( srcp[x-2] + srcp[x+2] ) * 3539 + + ( srcp[x-1] + srcp[x+1] ) * 15862 + + srcp[x] * 26152 + 32768 ) >> 16; + } + dstp[x] = ( srcp[x-3] * 582 + ( srcp[x-2] + srcp[x+2] ) * 3539 + + ( srcp[x-1] + srcp[x+1] ) * 15862 + + srcp[x] * 26152 + 32768 ) >> 16; + ++x; + dstp[x] = ( srcp[x-3] * 582 + srcp[x-2] * 7078 + + ( srcp[x-1] + srcp[x+1] ) * 15862 + + srcp[x] * 26152 + 32768 ) >> 16; + ++x; + dstp[x] = ( srcp[x-3] * 582 + srcp[x-2] * 7078 + + srcp[x-1] * 31724 + srcp[x] * 26152 + 32768 ) >> 16; + srcp += src_pitch; + dstp += tmp_pitch; + } + srcp = tmp; + dstp = dst; + unsigned char *src3p = srcp - tmp_pitch * 3; + unsigned char *src2p = srcp - tmp_pitch * 2; + unsigned char *srcpp = srcp - tmp_pitch; + unsigned char *srcpn = srcp + tmp_pitch; + unsigned char *src2n = srcp + tmp_pitch * 2; + unsigned char *src3n = srcp + tmp_pitch * 3; + for( x = 0; x < width; ++x ) + { + dstp[x] = ( src3n[x] * 582 + src2n[x] * 7078 + srcpn[x] * 31724 + + srcp[x] * 26152 + 32768 ) >> 16; + } + src3p += tmp_pitch; + src2p += tmp_pitch; + srcpp += tmp_pitch; + srcp += tmp_pitch; + srcpn += tmp_pitch; + src2n += tmp_pitch; + src3n += tmp_pitch; + dstp += dst_pitch; + for( x = 0; x < width; ++x ) + { + dstp[x] = ( src3n[x] * 582 + src2n[x] * 7078 + + ( srcpp[x] + srcpn[x] ) * 15862 + + srcp[x] * 26152 + 32768 ) >> 16; + } + src3p += tmp_pitch; + src2p += tmp_pitch; + srcpp += tmp_pitch; + srcp += tmp_pitch; + srcpn += tmp_pitch; + src2n += tmp_pitch; + src3n += tmp_pitch; + dstp += dst_pitch; + for( x = 0; x < width; ++x ) + { + dstp[x] = ( src3n[x] * 582 + ( src2p[x] + src2n[x] ) * 3539 + + ( srcpp[x] + srcpn[x] ) * 15862 + + srcp[x] * 26152 + 32768 ) >> 16; + } + src3p += src_pitch; + src2p += src_pitch; + srcpp += src_pitch; + srcp += src_pitch; + srcpn += src_pitch; + src2n += src_pitch; + src3n += src_pitch; + dstp += dst_pitch; + for( y = 3; y < height - 3; ++y ) + { + for( x = 0; x < width; ++x ) + { + dstp[x] = ( ( src3p[x] + src3n[x] ) * 291 + + ( src2p[x] + src2n[x] ) * 3539 + + ( srcpp[x] + srcpn[x] ) * 15862 + + srcp[x] * 26152 + 32768 ) >> 16; + } + src3p += tmp_pitch; + src2p += tmp_pitch; + srcpp += tmp_pitch; + srcp += tmp_pitch; + srcpn += tmp_pitch; + src2n += tmp_pitch; + src3n += tmp_pitch; + dstp += dst_pitch; + } + for( x = 0; x < width; ++x ) + { + dstp[x] = ( src3p[x] * 582 + ( src2p[x] + src2n[x] ) *3539 + + ( srcpp[x] + srcpn[x] ) * 15862 + + srcp[x] * 26152 + 32768 ) >> 16; + } + src3p += tmp_pitch; + src2p += tmp_pitch; + srcpp += tmp_pitch; + srcp += tmp_pitch; + srcpn += tmp_pitch; + src2n += tmp_pitch; + src3n += tmp_pitch; + dstp += dst_pitch; + for( x = 0; x < width; ++x ) + { + dstp[x] = ( src3p[x] * 582 + src2p[x] * 7078 + + ( srcpp[x] + srcpn[x] ) * 15862 + + srcp[x] * 26152 + 32768 ) >> 16; + } + src3p += tmp_pitch; + src2p += tmp_pitch; + srcpp += tmp_pitch; + srcp += tmp_pitch; + srcpn += tmp_pitch; + src2n += tmp_pitch; + src3n += tmp_pitch; + dstp += dst_pitch; + for( x = 0; x < width; ++x ) + { + dstp[x] = ( src3p[x] * 582 + src2p[x] * 7078 + + srcpp[x] * 31724 + srcp[x] * 26152 + 32768 ) >> 16; + } +} + + +/** + * Blurs the spatial derivatives of the source field plane + * @param src Pointer to the derivative array to filter + * @param tmp Pointer to a temporary storage for the derivative array while it's being filtered + * @param dst Pointer to the destination to store the filtered output derivative array + * @param pitch Stride of the bitmap from which the src array is derived + * @param height Height of the half-height field-sized frame from which the src array derivs were taken + * @param width Width of the bitmap from which the src array is derived, as opposed to the padded stride in pitch + */ +void eedi2_gaussian_blur_sqrt2( int *src, int *tmp, int *dst, const int pitch, int height, const int width ) +{ + int * srcp = src; + int * dstp = tmp; + int x, y; + + for( y = 0; y < height; ++y ) + { + x = 0; + dstp[x] = ( srcp[x+4] * 678 + srcp[x+3] * 3902 + srcp[x+2] * 13618 + + srcp[x+1] * 28830 + srcp[x] * 18508 + 32768 ) >> 16; + ++x; + dstp[x] = ( srcp[x+4] * 678 + srcp[x+3] * 3902 + srcp[x+2] * 13618 + + ( srcp[x-1] + srcp[x+1] ) *14415 + + srcp[x] * 18508 + 32768 ) >> 16; + ++x; + dstp[x] = ( srcp[x+4] * 678 + srcp[x+3] * 3902 + + ( srcp[x-2] + srcp[x+2] ) * 6809 + + ( srcp[x-1] + srcp[x+1] ) * 14415 + + srcp[x] * 18508 + 32768 ) >> 16; + ++x; + dstp[x] = ( srcp[x+4] * 678 + ( srcp[x-3] + srcp[x+3] ) * 1951 + + ( srcp[x-2] + srcp[x+2] ) * 6809 + + ( srcp[x-1] + srcp[x+1] ) * 14415 + + srcp[x] * 18508 + 32768 ) >> 16; + + for( x = 4; x < width - 4; ++x ) + { + dstp[x] = ( ( srcp[x-4] + srcp[x+4] ) * 339 + + ( srcp[x-3] + srcp[x+3] ) * 1951 + + ( srcp[x-2] + srcp[x+2] ) * 6809 + + ( srcp[x-1] + srcp[x+1] ) * 14415 + + srcp[x] * 18508 + 32768 ) >> 16; + } + + dstp[x] = ( srcp[x-4] * 678 + ( srcp[x-3] + srcp[x+3] ) * 1951 + + ( srcp[x-2] + srcp[x+2] ) * 6809 + + ( srcp[x-1] + srcp[x+1] ) * 14415 + + srcp[x] * 18508 + 32768 ) >> 16; + ++x; + dstp[x] = ( srcp[x-4] * 678 + srcp[x-3] * 3902 + + ( srcp[x-2] + srcp[x+2] ) * 6809 + + ( srcp[x-1] + srcp[x+1] ) * 14415 + + srcp[x] * 18508 + 32768 ) >> 16; + ++x; + dstp[x] = ( srcp[x-4] * 678 + srcp[x+3] * 3902 + srcp[x-2] * 13618 + + ( srcp[x-1] + srcp[x+1] ) * 14415 + + srcp[x] * 18508 + 32768 ) >> 16; + ++x; + dstp[x] = ( srcp[x-4] * 678 + srcp[x-3] * 3902 + srcp[x-2] * 13618 + + srcp[x-1] * 28830 + + srcp[x] * 18508 + 32768 ) >> 16; + srcp += pitch; + dstp += pitch; + } + dstp = dst; + srcp = tmp; + int * src4p = srcp - pitch * 4; + int * src3p = srcp - pitch * 3; + int * src2p = srcp - pitch * 2; + int * srcpp = srcp - pitch; + int * srcpn = srcp + pitch; + int * src2n = srcp + pitch * 2; + int * src3n = srcp + pitch * 3; + int * src4n = srcp + pitch * 4; + for( x = 0; x < width; ++x ) + { + dstp[x] = ( src4n[x] * 678 + src3n[x] * 3902 + + src2n[x] * 13618 + srcpn[x] * 28830 + + srcp[x] * 18508 + 32768 ) >> 18; + } + src4p += pitch; + src3p += pitch; + src2p += pitch; + srcpp += pitch; + srcp += pitch; + srcpn += pitch; + src2n += pitch; + src3n += pitch; + src4n += pitch; + dstp += pitch; + for( x = 0; x < width; ++x ) + { + dstp[x] = ( src4n[x] * 678 + src3n[x] * 3902 + src2n[x] * 13618 + + ( srcpp[x] + srcpn[x] ) * 14415 + + srcp[x] * 18508 + 32768 ) >> 18; + } + src4p += pitch; + src3p += pitch; + src2p += pitch; + srcpp += pitch; + srcp += pitch; + srcpn += pitch; + src2n += pitch; + src3n += pitch; + src4n += pitch; + dstp += pitch; + for( x = 0; x < width; ++x ) + { + dstp[x] = ( src4n[x] * 678 + src3n[x] * 3902 + + ( src2p[x] + src2n[x] ) * 6809 + + ( srcpp[x] + srcpn[x] ) * 14415 + + srcp[x] * 18508 + 32768 ) >> 18; + } + src4p += pitch; + src3p += pitch; + src2p += pitch; + srcpp += pitch; + srcp += pitch; + srcpn += pitch; + src2n += pitch; + src3n += pitch; + src4n += pitch; + dstp += pitch; + for( x = 0; x < width; ++x ) + { + dstp[x] = ( src4n[x] * 678 + ( src3p[x] + src3n[x] ) * 1951 + + ( src2p[x] + src2n[x] ) * 6809 + + ( srcpp[x] + srcpn[x] ) * 14415 + + srcp[x] * 18508 + 32768 ) >> 18; + } + src4p += pitch; + src3p += pitch; + src2p += pitch; + srcpp += pitch; + srcp += pitch; + srcpn += pitch; + src2n += pitch; + src3n += pitch; + src4n += pitch; + dstp += pitch; + for( y = 4; y < height - 4; ++y ) + { + for( x = 0; x < width; ++x ) + { + dstp[x] = ( ( src4p[x] + src4n[x] ) * 339 + + ( src3p[x] + src3n[x] ) * 1951 + + ( src2p[x] + src2n[x] ) * 6809 + + ( srcpp[x] + srcpn[x] ) * 14415 + + srcp[x] * 18508 + 32768 ) >> 18; + } + src4p += pitch; + src3p += pitch; + src2p += pitch; + srcpp += pitch; + srcp += pitch; + srcpn += pitch; + src2n += pitch; + src3n += pitch; + src4n += pitch; + dstp += pitch; + } + for( x = 0; x < width; ++x ) + { + dstp[x] = ( src4p[x] * 678 + + ( src3p[x] + src3n[x] ) * 1951 + + ( src2p[x] + src2n[x] ) * 6809 + + ( srcpp[x] + srcpn[x] ) * 14415 + + srcp[x] * 18508 + 32768 ) >> 18; + } + src4p += pitch; + src3p += pitch; + src2p += pitch; + srcpp += pitch; + srcp += pitch; + srcpn += pitch; + src2n += pitch; + src3n += pitch; + src4n += pitch; + dstp += pitch; + for( x = 0; x < width; ++x ) + { + dstp[x] = ( src4p[x] * 678 + src3p[x] * 3902 + + ( src2p[x] + src2n[x] ) * 6809 + + ( srcpp[x] + srcpn[x] ) * 14415 + + srcp[x] * 18508 + 32768 ) >> 18; + } + src4p += pitch; + src3p += pitch; + src2p += pitch; + srcpp += pitch; + srcp += pitch; + srcpn += pitch; + src2n += pitch; + src3n += pitch; + src4n += pitch; + dstp += pitch; + for( x = 0; x < width; ++x ) + { + dstp[x] = ( src4p[x] * 678 + src3p[x] * 3902 + src2p[x] * 13618 + + ( srcpp[x] + srcpn[x] ) * 14415 + + srcp[x] * 18508 + 32768 ) >> 18; + } + src4p += pitch; + src3p += pitch; + src2p += pitch; + srcpp += pitch; + srcp += pitch; + srcpn += pitch; + src2n += pitch; + src3n += pitch; + src4n += pitch; + dstp += pitch; + for( x = 0; x < width; ++x ) + { + dstp[x] = ( src4p[x] * 678 + src3p[x] * 3902 + + src2p[x] * 13618 + srcpp[x] * 28830 + + srcp[x] * 18508 + 32768 ) >> 18; + } +} + +/** + * Finds spatial derivatives for a a source field plane + * @param srcp Pointer to the plane to derive + * @param src_pitch Stride of srcp + * @param height Height of the half-height field-sized frame + * @param width Width of srcp bitmap rows, as opposed to the padded stride in src_pitch + * @param x2 Pointed to the array to store the x/x derivatives + * @param y2 Pointer to the array to store the y/y derivatives + * @param xy Pointer to the array to store the x/y derivatives + */ +void eedi2_calc_derivatives( uint8_t *srcp, int src_pitch, int height, int width, int *x2, int *y2, int *xy) +{ + + unsigned char * srcpp = srcp - src_pitch; + unsigned char * srcpn = srcp + src_pitch; + int x, y; + { + const int Ix = srcp[1] - srcp[0]; + const int Iy = srcp[0] - srcpn[0]; + x2[0] = ( Ix * Ix ) >> 1; + y2[0] = ( Iy * Iy ) >> 1; + xy[0] = ( Ix * Iy ) >> 1; + } + for( x = 1; x < width - 1; ++x ) + { + const int Ix = srcp[x+1] - srcp[x-1]; + const int Iy = srcp[x] - srcpn[x]; + x2[x] = ( Ix * Ix ) >> 1; + y2[x] = ( Iy * Iy ) >> 1; + xy[x] = ( Ix * Iy ) >> 1; + } + { + const int Ix = srcp[x] - srcp[x-1]; + const int Iy = srcp[x] - srcpn[x]; + x2[x] = ( Ix * Ix ) >> 1; + y2[x] = ( Iy * Iy ) >> 1; + xy[x] = ( Ix * Iy ) >> 1; + } + srcpp += src_pitch; + srcp += src_pitch; + srcpn += src_pitch; + x2 += src_pitch; + y2 += src_pitch; + xy += src_pitch; + for( y = 1; y < height - 1; ++y ) + { + { + const int Ix = srcp[1] - srcp[0]; + const int Iy = srcpp[0] - srcpn[0]; + x2[0] = ( Ix * Ix ) >> 1; + y2[0] = ( Iy * Iy ) >> 1; + xy[0] = ( Ix * Iy ) >> 1; + } + for ( x = 1; x < width - 1; ++x ) + { + const int Ix = srcp[x+1] - srcp[x-1]; + const int Iy = srcpp[x] - srcpn[x]; + x2[x] = ( Ix * Ix ) >> 1; + y2[x] = ( Iy * Iy ) >> 1; + xy[x] = ( Ix * Iy ) >> 1; + } + { + const int Ix = srcp[x] - srcp[x-1]; + const int Iy = srcpp[x] - srcpn[x]; + x2[x] = ( Ix *Ix ) >> 1; + y2[x] = ( Iy *Iy ) >> 1; + xy[x] = ( Ix *Iy ) >> 1; + } + srcpp += src_pitch; + srcp += src_pitch; + srcpn += src_pitch; + x2 += src_pitch; + y2 += src_pitch; + xy += src_pitch; + } + { + const int Ix = srcp[1] - srcp[0]; + const int Iy = srcpp[0] - srcp[0]; + x2[0] = ( Ix * Ix ) >> 1; + y2[0] = ( Iy * Iy ) >> 1; + xy[0] = ( Ix * Iy ) >> 1; + } + for( x = 1; x < width - 1; ++x ) + { + const int Ix = srcp[x+1] - srcp[x-1]; + const int Iy = srcpp[x] - srcp[x]; + x2[x] = ( Ix * Ix ) >> 1; + y2[x] = ( Iy * Iy ) >> 1; + xy[x] = ( Ix * Iy ) >> 1; + } + { + const int Ix = srcp[x] - srcp[x-1]; + const int Iy = srcpp[x] - srcp[x]; + x2[x] = ( Ix * Ix ) >> 1; + y2[x] = ( Iy * Iy ) >> 1; + xy[x] = ( Ix * Iy ) >> 1; + } +} + +/** + * Filters junctions and corners for the output image + * @param x2 Pointer to the x/x derivatives + * @param y2 Pointer to the y/y derivatives + * @param xy Pointer to the x/y derivatives + * @param pitch Stride of the source field plane from which the derivatives were calculated + * @param mskp Pointer to the edge direction mask + * @param msk_pitch Stride of mskp + * @param dstp Pointer to the output image being filtered in place + * @param dst_pitch Stride of dstp + * @param height Height of the full-frame output plane + * @param width Width of dstp bitmap rows, as opposed to the padded stride in dst_pitch + * @param field Field to filter + */ +void eedi2_post_process_corner( int *x2, int *y2, int *xy, const int pitch, uint8_t * mskp, int msk_pitch, uint8_t * dstp, int dst_pitch, int height, int width, int field ) +{ + mskp += ( 8 - field ) * msk_pitch; + dstp += ( 8 - field ) * dst_pitch; + unsigned char * dstpp = dstp - dst_pitch; + unsigned char * dstpn = dstp + dst_pitch; + x2 += pitch * 3; + y2 += pitch * 3; + xy += pitch * 3; + int *x2n = x2 + pitch; + int *y2n = y2 + pitch; + int *xyn = xy + pitch; + int x, y; + + for( y = 8 - field; y < height - 7; y += 2 ) + { + for( x = 4; x < width - 4; ++x ) + { + if( mskp[x] == 255 || mskp[x] == 128 ) continue; + const int c1 = (int)( x2[x] * y2[x] - xy[x] * xy[x] - 0.09 * + ( x2[x] + y2[x] ) * ( x2[x] + y2[x] ) ); + const int c2 = (int)( x2n[x] * y2n[x] - xyn[x]* xyn[x] - 0.09 * + ( x2n[x] + y2n[x] ) * ( x2n[x] + y2n[x] ) ); + if (c1 > 775 || c2 > 775) + dstp[x] = ( dstpp[x] + dstpn[x] + 1 ) >> 1; + } + mskp += msk_pitch * 2; + dstpp += dst_pitch * 2; + dstp += dst_pitch * 2; + dstpn += dst_pitch * 2; + x2 += pitch; + x2n += pitch; + y2 += pitch; + y2n += pitch; + xy += pitch; + xyn += pitch; + } +} diff --git a/libhb/eedi2.h b/libhb/eedi2.h new file mode 100644 index 000000000..1df7b1138 --- /dev/null +++ b/libhb/eedi2.h @@ -0,0 +1,84 @@ +// Used to order a sequeunce of metrics for median filtering +void eedi2_sort_metrics( int *order, const int length ); + +// Aping some Windows API funcctions AviSynth seems to like +// Taken from here: http://www.gidforums.com/t-8543.html +void *eedi2_aligned_malloc(size_t size, size_t align_size); +void eedi2_aligned_free(void *ptr); + +// Copies bitmaps +void eedi2_bit_blit( uint8_t * dstp, int dst_pitch, const uint8_t * srcp, int src_pitch, + int row_size, int height ); + +// Sets up the initial field-sized bitmap EEDI2 interpolates from +void eedi2_fill_half_height_buffer_plane( uint8_t * src, uint8_t * dst, int pitch, int height ); + +// Simple line doubler +void eedi2_upscale_by_2( uint8_t * srcp, uint8_t * dstp, int height, int pitch ); + +// Finds places where vertically adjacent pixels abruptly change intensity +void eedi2_build_edge_mask( uint8_t * dstp, int dst_pitch, uint8_t *srcp, int src_pitch, + int mthresh, int lthresh, int vthresh, int height, int width ); + +// Expands and smooths out the edge mask by considering a pixel +// to be masked if >= dilation threshold adjacent pixels are masked. +void eedi2_dilate_edge_mask( uint8_t *mskp, int msk_pitch, uint8_t *dstp, int dst_pitch, + int dstr, int height, int width ); + +// Contracts the edge mask by considering a pixel to be masked +// only if > erosion threshold adjacent pixels are masked +void eedi2_erode_edge_mask( uint8_t *mskp, int msk_pitch, uint8_t *dstp, int dst_pitch, + int estr, int height, int width ); + +// Smooths out horizontally aligned holes in the mask +// If none of the 6 horizontally adjacent pixels are masked, +// don't consider the current pixel masked. If there are any +// masked on both sides, consider the current pixel masked. +void eedi2_remove_small_gaps( uint8_t * mskp, int msk_pitch, uint8_t * dstp, int dst_pitch, + int height, int width ); + +// Spatial vectors. Looks at maximum_search_distance surrounding pixels +// to guess which angle edges follow. This is EEDI2's timesink, and can be +// thought of as YADIF_CHECK on steroids. Both find edge directions. +void eedi2_calc_directions( const int plane, uint8_t * mskp, int msk_pitch, uint8_t * srcp, int src_pitch, + uint8_t * dstp, int dst_pitch, int maxd, int nt, int height, int width ); + +void eedi2_filter_map( uint8_t *mskp, int msk_pitch, uint8_t *dmskp, int dmsk_pitch, + uint8_t * dstp, int dst_pitch, int height, int width ); + +void eedi2_filter_dir_map( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch, uint8_t * dstp, + int dst_pitch, int height, int width ); + +void eedi2_expand_dir_map( uint8_t * mskp, int msk_pitch, uint8_t *dmskp, int dmsk_pitch, uint8_t * dstp, + int dst_pitch, int height, int width ); + +void eedi2_mark_directions_2x( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch, uint8_t * dstp, + int dst_pitch, int tff, int height, int width ); + +void eedi2_filter_dir_map_2x( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch, uint8_t * dstp, + int dst_pitch, int field, int height, int width ); + +void eedi2_expand_dir_map_2x( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch, uint8_t * dstp, + int dst_pitch, int field, int height, int width ); + +void eedi2_fill_gaps_2x( uint8_t *mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch, uint8_t * dstp, + int dst_pitch, int field, int height, int width ); + +void eedi2_interpolate_lattice( const int plane, uint8_t * dmskp, int dmsk_pitch, uint8_t * dstp, + int dst_pitch, uint8_t * omskp, int omsk_pitch, int field, int nt, + int height, int width ); + +void eedi2_post_process( uint8_t * nmskp, int nmsk_pitch, uint8_t * omskp, int omsk_pitch, uint8_t * dstp, + int src_pitch, int field, int height, int width ); + +void eedi2_gaussian_blur1( uint8_t * src, int src_pitch, uint8_t * tmp, int tmp_pitch, uint8_t * dst, + int dst_pitch, int height, int width ); + +void eedi2_gaussian_blur_sqrt2( int *src, int *tmp, int *dst, const int pitch, + const int height, const int width ); + +void eedi2_calc_derivatives( uint8_t *srcp, int src_pitch, int height, int width, + int *x2, int *y2, int *xy); + +void eedi2_post_process_corner( int *x2, int *y2, int *xy, const int pitch, uint8_t * mskp, int msk_pitch, + uint8_t * dstp, int dst_pitch, int height, int width, int field ); |