Ports tritical's EEDI2 deinterlacing interpolator from AviSynth and implements it in the decomb filter as an optional, non-default substitute for yadif's internal, lower quality spatial predictions.

EEDI2 interpolation is enabled by setting mode (the first decomb parameter) to 5. Decomb now accepts -1 as a spatial metric (the second parameter), to bypass combing detection and force deinterlacing of all frames in the specified mode. Additional parameters have been appended to the end of the settings list for decomb, to specify various settings used by EEDI2. The default values for those should work well. git-svn-id: svn://svn.handbrake.fr/HandBrake/trunk@2264 b64f7644-9d1e-0410-96f1-a4d463321fa5
author: jbrjake <[email protected]> 2009-03-19 17:30:56 +0000
committer: jbrjake <[email protected]> 2009-03-19 17:30:56 +0000
commit: 24a14d05f1e1ddc42c9a0ccb9b0c4dfdc2ce29d2 (patch)
tree: 886a531b1f78e91bc05cb1f12e531da8fc880c6c /libhb
parent: a5e78ebff48244d281ec149c4f1b88cff328ef99 (diff)
3 files changed, 2432 insertions, 69 deletions
diff --git a/libhb/decomb.c b/libhb/decomb.c
index 4c0f42c17..bcf215f16 100644
--- a/libhb/decomb.c
+++ b/libhb/decomb.c
@@ -4,10 +4,15 @@
    Homepage: <http://handbrake.fr/>.
    It may be used under the terms of the GNU General Public License. 
    
-   The yadif algorithm was created by Michael Niedermayer. */
+   The yadif algorithm was created by Michael Niedermayer.
+   Tritical's work inspired much of the comb detection code:
+   http://web.missouri.edu/~kes25c/
+*/
+
 #include "hb.h"
 #include "hbffmpeg.h"
 #include "mpeg2dec/mpeg2.h"
+#include "eedi2.h"
 
 #define SUPPRESS_AV_LOG
 
@@ -21,17 +26,52 @@
 #define MIN3(a,b,c) MIN(MIN(a,b),c)
 #define MAX3(a,b,c) MAX(MAX(a,b),c)
 
-typedef struct yadif_arguments_s {
+// Some names to correspond to the pv->eedi_half array's contents
+#define SRCPF 0
+#define MSKPF 1
+#define TMPPF 2
+#define DSTPF 3
+// Some names to correspond to the pv->eedi_full array's contents
+#define DST2PF 0
+#define TMP2PF2 1
+#define MSK2PF 2
+#define TMP2PF 3
+#define DST2MPF 4
+
+struct yadif_arguments_s {
     uint8_t **dst;
     int parity;
     int tff;
     int stop;
     int is_combed;
-} yadif_arguments_t;
+};
+
+struct decomb_arguments_s {
+    int stop;
+};
 
-typedef struct decomb_arguments_s {
+struct eedi2_arguments_s {
     int stop;
-} decomb_arguments_t;
+};
+
+typedef struct yadif_arguments_s yadif_arguments_t;
+typedef struct decomb_arguments_s decomb_arguments_t;
+typedef struct eedi2_arguments_s eedi2_arguments_t;
+
+typedef struct eedi2_thread_arg_s {
+    hb_filter_private_t *pv;
+    int plane;
+} eedi2_thread_arg_t;
+
+typedef struct decomb_thread_arg_s {
+    hb_filter_private_t *pv;
+    int segment;
+} decomb_thread_arg_t;
+
+typedef struct yadif_thread_arg_s {
+    hb_filter_private_t *pv;
+    int segment;
+} yadif_thread_arg_t;
 
 struct hb_filter_private_s
 {
@@ -39,6 +79,7 @@ struct hb_filter_private_s
     int              width[3];
     int              height[3];
 
+    // Decomb parameters
     int              mode;
     int              spatial_metric;
     int              motion_threshold;
@@ -46,8 +87,19 @@ struct hb_filter_private_s
     int              block_threshold;
     int              block_width;
     int              block_height;
+    
+    // EEDI2 parameters
+    int              magnitude_threshold;
+    int              variance_threshold;
+    int              laplacian_threshold;
+    int              dilation_threshold;
+    int              erosion_threshold;
+    int              noise_threshold;
+    int              maximum_search_distance;
+    int              post_processing;
 
     int              parity;
+    int              tff;
     
     int              yadif_ready;
 
@@ -70,6 +122,13 @@ struct hb_filter_private_s
     /* Make a buffer to store a comb mask. */
     uint8_t        * mask[3];
 
+    uint8_t        * eedi_half[4][3];
+    uint8_t        * eedi_full[5][3];
+    int            * cx2;
+    int            * cy2;
+    int            * cxy;
+    int            * tmpc;
+    
     AVPicture        pic_in;
     AVPicture        pic_out;
     hb_buffer_t *    buf_out[2];
@@ -86,6 +145,11 @@ struct hb_filter_private_s
     hb_lock_t      ** decomb_begin_lock;     // Thread has work
     hb_lock_t      ** decomb_complete_lock;  // Thread has completed work
     decomb_arguments_t *decomb_arguments;    // Arguments to thread for work
+
+    hb_thread_t    ** eedi2_threads;        // Threads for eedi2 - one per plane
+    hb_lock_t      ** eedi2_begin_lock;     // Thread has work
+    hb_lock_t      ** eedi2_complete_lock;  // Thread has completed work
+    eedi2_arguments_t *eedi2_arguments;    // Arguments to thread for work
     
 };
 
@@ -106,7 +170,7 @@ void hb_decomb_close( hb_filter_private_t * pv );
 hb_filter_object_t hb_filter_decomb =
 {
     FILTER_DECOMB,
-    "Deinterlaces selectively with yadif/mcdeint and lowpass5 blending",
+    "Decomb",
     NULL,
     hb_decomb_init,
     hb_decomb_work,
@@ -510,10 +574,171 @@ int detect_combed_segment( hb_filter_private_t * pv, int segment_start, int segm
     }
 }
 
-typedef struct decomb_thread_arg_s {
-    hb_filter_private_t *pv;
-    int segment;
-} decomb_thread_arg_t;
+// This function calls all the eedi2 filters in sequence for a given plane.
+// It outputs the final interpolated image to pv->eedi_full[DST2PF].
+void eedi2_interpolate_plane( hb_filter_private_t * pv, int k )
+{
+    /* We need all these pointers. No, seriously.
+       I swear. It's not a joke. They're used.
+       All nine of them.                         */
+    uint8_t * mskp = pv->eedi_half[MSKPF][k];
+    uint8_t * srcp = pv->eedi_half[SRCPF][k];
+    uint8_t * tmpp = pv->eedi_half[TMPPF][k];
+    uint8_t * dstp = pv->eedi_half[DSTPF][k];
+    uint8_t * dst2p = pv->eedi_full[DST2PF][k];
+    uint8_t * tmp2p2 = pv->eedi_full[TMP2PF2][k];
+    uint8_t * msk2p = pv->eedi_full[MSK2PF][k];
+    uint8_t * tmp2p = pv->eedi_full[TMP2PF][k];
+    uint8_t * dst2mp = pv->eedi_full[DST2MPF][k];
+    int * cx2 = pv->cx2;
+    int * cy2 = pv->cy2;
+    int * cxy = pv->cxy;
+    int * tmpc = pv->tmpc;
+
+    int pitch = pv->ref_stride[k];
+    int height = pv->height[k]; int width = pv->width[k];
+    int half_height = height / 2;
+
+    // edge mask
+    eedi2_build_edge_mask( mskp, pitch, srcp, pitch,
+                     pv->magnitude_threshold, pv->variance_threshold, pv->laplacian_threshold, 
+                     half_height, width );
+    eedi2_erode_edge_mask( mskp, pitch, tmpp, pitch, pv->erosion_threshold, half_height, width );
+    eedi2_dilate_edge_mask( tmpp, pitch, mskp, pitch, pv->dilation_threshold, half_height, width );
+    eedi2_erode_edge_mask( mskp, pitch, tmpp, pitch, pv->erosion_threshold, half_height, width );
+    eedi2_remove_small_gaps( tmpp, pitch, mskp, pitch, half_height, width );
+
+    // direction mask
+    eedi2_calc_directions( k, mskp, pitch, srcp, pitch, tmpp, pitch,
+                     pv->maximum_search_distance, pv->noise_threshold,
+                     half_height, width );
+    eedi2_filter_dir_map( mskp, pitch, tmpp, pitch, dstp, pitch, half_height, width );
+    eedi2_expand_dir_map( mskp, pitch, dstp, pitch, tmpp, pitch, half_height, width );
+    eedi2_filter_map( mskp, pitch, tmpp, pitch, dstp, pitch, half_height, width );
+
+    // upscale 2x vertically
+    eedi2_upscale_by_2( srcp, dst2p, half_height, pitch );
+    eedi2_upscale_by_2( dstp, tmp2p2, half_height, pitch );
+    eedi2_upscale_by_2( mskp, msk2p, half_height, pitch );
+
+    // upscale the direction mask
+    eedi2_mark_directions_2x( msk2p, pitch, tmp2p2, pitch, tmp2p, pitch, pv->tff, height, width );
+    eedi2_filter_dir_map_2x( msk2p, pitch, tmp2p, pitch,  dst2mp, pitch, pv->tff, height, width );
+    eedi2_expand_dir_map_2x( msk2p, pitch, dst2mp, pitch, tmp2p, pitch, pv->tff, height, width );
+    eedi2_fill_gaps_2x( msk2p, pitch, tmp2p, pitch, dst2mp, pitch, pv->tff, height, width );
+    eedi2_fill_gaps_2x( msk2p, pitch, dst2mp, pitch, tmp2p, pitch, pv->tff, height, width );
+
+    // interpolate a full-size plane
+    eedi2_interpolate_lattice( k, tmp2p, pitch, dst2p, pitch, tmp2p2, pitch, pv->tff,
+                         pv->noise_threshold, height, width );
+
+    if( pv->post_processing == 1 || pv->post_processing == 3 )
+    {
+        // make sure the edge directions are consistent
+        eedi2_bit_blit( tmp2p2, pitch, tmp2p, pitch, pv->width[k], pv->height[k] );
+        eedi2_filter_dir_map_2x( msk2p, pitch, tmp2p, pitch, dst2mp, pitch, pv->tff, height, width );
+        eedi2_expand_dir_map_2x( msk2p, pitch, dst2mp, pitch, tmp2p, pitch, pv->tff, height, width );
+        eedi2_post_process( tmp2p, pitch, tmp2p2, pitch, dst2p, pitch, pv->tff, height, width );
+    }
+    if( pv->post_processing == 2 || pv->post_processing == 3 )
+    {
+        // filter junctions and corners
+        eedi2_gaussian_blur1( srcp, pitch, tmpp, pitch, srcp, pitch, half_height, width );
+        eedi2_calc_derivatives( srcp, pitch, half_height, width, cx2, cy2, cxy );
+        eedi2_gaussian_blur_sqrt2( cx2, tmpc, cx2, pitch, half_height, width);
+        eedi2_gaussian_blur_sqrt2( cy2, tmpc, cy2, pitch, half_height, width);
+        eedi2_gaussian_blur_sqrt2( cxy, tmpc, cxy, pitch, half_height, width);
+        eedi2_post_process_corner( cx2, cy2, cxy, pitch, tmp2p2, pitch, dst2p, pitch, height, width, pv->tff );
+    }
+}
+
+/*
+ *  eedi2 interpolate this plane in a single thread.
+ */
+void eedi2_filter_thread( void *thread_args_v )
+{
+    eedi2_arguments_t *eedi2_work = NULL;
+    hb_filter_private_t * pv;
+    int run = 1;
+    int plane;
+    eedi2_thread_arg_t *thread_args = thread_args_v;
+
+    pv = thread_args->pv;
+    plane = thread_args->plane;
+
+    hb_log("eedi2 thread started for plane %d", plane);
+
+    while( run )
+    {
+        /*
+         * Wait here until there is work to do. hb_lock() blocks until
+         * render releases it to say that there is more work to do.
+         */
+        hb_lock( pv->eedi2_begin_lock[plane] );
+
+        eedi2_work = &pv->eedi2_arguments[plane];
+
+        if( eedi2_work->stop )
+        {
+            /*
+             * No more work to do, exit this thread.
+             */
+            run = 0;
+            continue;
+        } 
+
+        /*
+         * Process plane
+         */
+            eedi2_interpolate_plane( pv, plane );
+        
+        /*
+         * Finished this segment, let everyone know.
+         */
+        hb_unlock( pv->eedi2_complete_lock[plane] );
+    }
+    free( thread_args_v );
+}
+
+// Sets up the input field planes for EEDI2 in pv->eedi_half[SRCPF]
+// and then runs eedi2_filter_thread for each plane.
+void eedi2_planer( hb_filter_private_t * pv )
+{
+    /* Copy the first field from the source to a half-height frame. */
+    int i;
+    for( i = 0;  i < 3; i++ )
+    {
+        int pitch = pv->ref_stride[i];
+        int start_line = !pv->tff;
+        eedi2_fill_half_height_buffer_plane( &pv->ref[1][i][pitch*start_line], pv->eedi_half[SRCPF][i], pitch, pv->height[i] );
+    }
+    
+    int plane;
+    for( plane = 0; plane < 3; plane++ )
+    {  
+        /*
+         * Let the thread for this plane know that we've setup work 
+         * for it by releasing the begin lock (ensuring that the
+         * complete lock is already locked so that we block when
+         * we try to lock it again below).
+         */
+        hb_lock( pv->eedi2_complete_lock[plane] );
+        hb_unlock( pv->eedi2_begin_lock[plane] );
+    }
+
+    /*
+     * Wait until all three threads have completed by trying to get
+     * the complete lock that we locked earlier for each thread, which
+     * will block until that thread has completed the work on that
+     * plane.
+     */
+    for( plane = 0; plane < 3; plane++ )
+    {
+        hb_lock( pv->eedi2_complete_lock[plane] );
+        hb_unlock( pv->eedi2_complete_lock[plane] );
+    }
+}
+
 
 /*
  * comb detect this segment of all three planes in a single thread.
@@ -626,10 +851,15 @@ static void yadif_filter_line( uint8_t *dst,
        to the other field in the current frame--the one not being filtered.  */
     uint8_t *prev2 = parity ? prev : cur ;
     uint8_t *next2 = parity ? cur  : next;
+    
     int w = pv->width[plane];
     int refs = pv->ref_stride[plane];
     int x;
+    int eedi2_mode = (pv->mode == 5);
     
+    /* We can replace spatial_pred with this interpolation*/
+    uint8_t * eedi2_guess = &pv->eedi_full[DST2PF][plane][y*refs];
+
     /* Decomb's cubic interpolation can only function when there are
        three samples above and below, so regress to yadif's traditional
        two-tap interpolation when filtering at the top and bottom edges. */
@@ -654,60 +884,69 @@ static void yadif_filter_line( uint8_t *dst,
         int temporal_diff2 = ( ABS(next[-refs] - cur[-refs]) + ABS(next[+refs] - cur[+refs]) ) >> 1;
         /* For the actual difference, use the largest of the previous average diffs. */
         int diff           = MAX3(temporal_diff0>>1, temporal_diff1, temporal_diff2);
-        
-        /* SAD of how the pixel-1, the pixel, and the pixel+1 change from the line above to below. */ 
-        int spatial_score  = ABS(cur[-refs-1] - cur[+refs-1]) + ABS(cur[-refs]-cur[+refs]) +
-                                     ABS(cur[-refs+1] - cur[+refs+1]) - 1;         
+
         int spatial_pred;
-         
-        /* Spatial pred is either a bilinear or cubic vertical interpolation. */
-        if( pv->mode > 0 && !edge)
+        
+        if( eedi2_mode )
         {
-            spatial_pred = cubic_interpolate( cur[-3*refs], cur[-refs], cur[+refs], cur[3*refs] );
+            /* Who needs yadif's spatial predictions when we can have EEDI2's? */
+            spatial_pred = eedi2_guess[0];
+            eedi2_guess++;
         }
-        else
+        else // Yadif spatial interpolation
         {
-            spatial_pred = (c+e)>>1;
+            /* SAD of how the pixel-1, the pixel, and the pixel+1 change from the line above to below. */ 
+            int spatial_score  = ABS(cur[-refs-1] - cur[+refs-1]) + ABS(cur[-refs]-cur[+refs]) +
+                                         ABS(cur[-refs+1] - cur[+refs+1]) - 1;         
+            
+            /* Spatial pred is either a bilinear or cubic vertical interpolation. */
+            if( pv->mode > 0 && !edge)
+            {
+                spatial_pred = cubic_interpolate( cur[-3*refs], cur[-refs], cur[+refs], cur[3*refs] );
+            }
+            else
+            {
+                spatial_pred = (c+e)>>1;
+            }
+
+        /* EDDI: Edge Directed Deinterlacing Interpolation
+           Checks 4 different slopes to see if there is more similarity along a diagonal
+           than there was vertically. If a diagonal is more similar, then it indicates
+           an edge, so interpolate along that instead of a vertical line, using either
+           linear or cubic interpolation depending on mode. */
+        #define YADIF_CHECK(j)\
+                {   int score = ABS(cur[-refs-1+j] - cur[+refs-1-j])\
+                              + ABS(cur[-refs  +j] - cur[+refs  -j])\
+                              + ABS(cur[-refs+1+j] - cur[+refs+1-j]);\
+                    if( score < spatial_score ){\
+                        spatial_score = score;\
+                        if( pv->mode > 0 && !edge )\
+                        {\
+                            switch(j)\
+                            {\
+                                case -1:\
+                                    spatial_pred = cubic_interpolate(cur[-3 * refs - 3], cur[-refs -1], cur[+refs + 1], cur[3* refs + 3] );\
+                                break;\
+                                case -2:\
+                                    spatial_pred = cubic_interpolate( ( ( cur[-3*refs - 4] + cur[-refs - 4] ) / 2 ) , cur[-refs -2], cur[+refs + 2], ( ( cur[3*refs + 4] + cur[refs + 4] ) / 2 ) );\
+                                break;\
+                                case 1:\
+                                    spatial_pred = cubic_interpolate(cur[-3 * refs +3], cur[-refs +1], cur[+refs - 1], cur[3* refs -3] );\
+                                break;\
+                                case 2:\
+                                    spatial_pred = cubic_interpolate(( ( cur[-3*refs + 4] + cur[-refs + 4] ) / 2 ), cur[-refs +2], cur[+refs - 2], ( ( cur[3*refs - 4] + cur[refs - 4] ) / 2 ) );\
+                                break;\
+                            }\
+                        }\
+                        else\
+                        {\
+                            spatial_pred = ( cur[-refs +j] + cur[+refs -j] ) >>1;\
+                        }\
+
+            YADIF_CHECK(-1) YADIF_CHECK(-2) }} }}
+            YADIF_CHECK( 1) YADIF_CHECK( 2) }} }}
         }
 
-/* EDDI: Edge Directed Deinterlacing Interpolation
-   Uses the Martinez-Lim Line Shift Parametric Modeling algorithm...I think.
-   Checks 4 different slopes to see if there is more similarity along a diagonal
-   than there was vertically. If a diagonal is more similar, then it indicates
-   an edge, so interpolate along that instead of a vertical line, using either
-   linear or cubic interpolation depending on mode. */
-#define YADIF_CHECK(j)\
-        {   int score = ABS(cur[-refs-1+j] - cur[+refs-1-j])\
-                      + ABS(cur[-refs  +j] - cur[+refs  -j])\
-                      + ABS(cur[-refs+1+j] - cur[+refs+1-j]);\
-            if( score < spatial_score ){\
-                spatial_score = score;\
-                if( pv->mode > 0 && !edge )\
-                {\
-                    switch(j)\
-                    {\
-                        case -1:\
-                            spatial_pred = cubic_interpolate(cur[-3 * refs - 3], cur[-refs -1], cur[+refs + 1], cur[3* refs + 3] );\
-                        break;\
-                        case -2:\
-                            spatial_pred = cubic_interpolate( ( ( cur[-3*refs - 4] + cur[-refs - 4] ) / 2 ) , cur[-refs -2], cur[+refs + 2], ( ( cur[3*refs + 4] + cur[refs + 4] ) / 2 ) );\
-                        break;\
-                        case 1:\
-                            spatial_pred = cubic_interpolate(cur[-3 * refs +3], cur[-refs +1], cur[+refs - 1], cur[3* refs -3] );\
-                        break;\
-                        case 2:\
-                            spatial_pred = cubic_interpolate(( ( cur[-3*refs + 4] + cur[-refs + 4] ) / 2 ), cur[-refs +2], cur[+refs - 2], ( ( cur[3*refs - 4] + cur[refs - 4] ) / 2 ) );\
-                        break;\
-                    }\
-                }\
-                else\
-                {\
-                    spatial_pred = ( cur[-refs +j] + cur[+refs -j] ) >>1;\
-                }\
-                
-                YADIF_CHECK(-1) YADIF_CHECK(-2) }} }}
-                YADIF_CHECK( 1) YADIF_CHECK( 2) }} }}
-                                
         /* Temporally adjust the spatial prediction by
            comparing against lines in the adjacent fields. */
         int b = (prev2[-2*refs] + next2[-2*refs])>>1;
@@ -738,11 +977,6 @@ static void yadif_filter_line( uint8_t *dst,
     }
 }
 
-typedef struct yadif_thread_arg_s {
-    hb_filter_private_t *pv;
-    int segment;
-} yadif_thread_arg_t;
-
 /*
  * deinterlace this segment of all three planes in a single thread.
  */
@@ -902,9 +1136,9 @@ static void yadif_filter( uint8_t ** dst,
                           int tff,
                           hb_filter_private_t * pv )
 {
-    
-    int is_combed = comb_segmenter( pv );
-    
+    /* If we're running comb detection, do it now, otherwise blend if mode 4 and interpolate if not. */
+    int is_combed = pv->spatial_metric >= 0 ? comb_segmenter( pv ) : pv->mode == 4 ? 2 : 1;
+
     if( is_combed == 1 )
     {
         pv->yadif_deinterlaced_frames++;
@@ -918,6 +1152,12 @@ static void yadif_filter( uint8_t ** dst,
         pv->unfiltered_frames++;
     }
     
+    if( is_combed == 1 && pv->mode == 5 )
+    {
+        /* Generate an EEDI2 interpolation */
+        eedi2_planer( pv );
+    }
+    
     if( is_combed )
     {
         int segment;
@@ -1131,6 +1371,15 @@ hb_filter_private_t * hb_decomb_init( int pix_fmt,
     pv->block_width = 16;
     pv->block_height = 16;
     
+    pv->magnitude_threshold = 10;
+    pv->variance_threshold = 20;
+    pv->laplacian_threshold = 20;
+    pv->dilation_threshold = 4;
+    pv->erosion_threshold = 2;
+    pv->noise_threshold = 50;
+    pv->maximum_search_distance = 24;
+    pv->post_processing = 1;
+
     pv->parity   = PARITY_DEFAULT;
 
     pv->mcdeint_mode   = MCDEINT_MODE_DEFAULT;
@@ -1138,14 +1387,22 @@ hb_filter_private_t * hb_decomb_init( int pix_fmt,
 
     if( settings )
     {
-        sscanf( settings, "%d:%d:%d:%d:%d:%d:%d",
+        sscanf( settings, "%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d",
                 &pv->mode,
                 &pv->spatial_metric,
                 &pv->motion_threshold,
                 &pv->spatial_threshold,
                 &pv->block_threshold,
                 &pv->block_width,
-                &pv->block_height );
+                &pv->block_height,
+                &pv->magnitude_threshold,
+                &pv->variance_threshold,
+                &pv->laplacian_threshold,
+                &pv->dilation_threshold,
+                &pv->erosion_threshold,
+                &pv->noise_threshold,
+                &pv->maximum_search_distance,
+                &pv->post_processing );
     }
     
     pv->cpu_count = hb_get_cpu_count();
@@ -1181,7 +1438,38 @@ hb_filter_private_t * hb_decomb_init( int pix_fmt,
 
         pv->mask[i] = calloc( 1, w*h*sizeof(uint8_t) ) + 3*w;
     }
+    
+    if( pv->mode == 5 )
+    {
+        /* Allocate half-height eedi2 buffers */
+        height = pv->height[0] / 2;
+        for( i = 0; i < 3; i++ )
+        {
+            int is_chroma = !!i;
+            int w = ((width   + 31) & (~31))>>is_chroma;
+            int h = ((height+6+ 31) & (~31))>>is_chroma;
 
+            for( j = 0; j < 4; j++ )
+            {
+                pv->eedi_half[j][i] = malloc( w*h*sizeof(uint8_t) ) + 3*w;
+            }
+        }
+
+        /* Allocate full-height eedi2 buffers */
+        height = pv->height[0];
+        for( i = 0; i < 3; i++ )
+        {
+            int is_chroma = !!i;
+            int w = ((width   + 31) & (~31))>>is_chroma;
+            int h = ((height+6+ 31) & (~31))>>is_chroma;
+
+            for( j = 0; j < 5; j++ )
+            {
+                pv->eedi_full[j][i] = malloc( w*h*sizeof(uint8_t) ) + 3*w;
+            }
+        }
+    }
+    
      /*
       * Create yadif threads and locks.
       */
@@ -1264,7 +1552,62 @@ hb_filter_private_t * hb_decomb_init( int pix_fmt,
             hb_error( "decomb could not create threads" );
         }
     }
+    
+    if( pv->mode == 5 )
+    {
+        /*
+         * Create eedi2 threads and locks.
+         */
+        pv->eedi2_threads = malloc( sizeof( hb_thread_t* ) * 3 );
+        pv->eedi2_begin_lock = malloc( sizeof( hb_lock_t * ) * 3 );
+        pv->eedi2_complete_lock = malloc( sizeof( hb_lock_t * ) * 3 );
+        pv->eedi2_arguments = malloc( sizeof( eedi2_arguments_t ) * 3 );
+
+        if( pv->post_processing > 1 )
+        {
+            pv->cx2 = (int*)eedi2_aligned_malloc(pv->height[0]*pv->ref_stride[0]*sizeof(int), 16);
+            pv->cy2 = (int*)eedi2_aligned_malloc(pv->height[0]*pv->ref_stride[0]*sizeof(int), 16);
+            pv->cxy = (int*)eedi2_aligned_malloc(pv->height[0]*pv->ref_stride[0]*sizeof(int), 16);
+            pv->tmpc = (int*)eedi2_aligned_malloc(pv->height[0]*pv->ref_stride[0]*sizeof(int), 16);
+            if( !pv->cx2 || !pv->cy2 || !pv->cxy || !pv->tmpc )
+                hb_log("EEDI2: failed to malloc derivative arrays");
+            else
+                hb_log("EEDI2: successfully mallloced derivative arrays");
+        }
+
+        for( i = 0; i < 3; i++ )
+        {
+            eedi2_thread_arg_t *eedi2_thread_args;
+
+            eedi2_thread_args = malloc( sizeof( eedi2_thread_arg_t ) );
+
+            if( eedi2_thread_args )
+            {
+                eedi2_thread_args->pv = pv;
+                eedi2_thread_args->plane = i;
+
+                pv->eedi2_begin_lock[i] = hb_lock_init();
+                pv->eedi2_complete_lock[i] = hb_lock_init();
+
+                /*
+                 * Important to start off with the threads locked waiting
+                 * on input.
+                 */
+                hb_lock( pv->eedi2_begin_lock[i] );
 
+                pv->eedi2_arguments[i].stop = 0;
+
+                pv->eedi2_threads[i] = hb_thread_init( "eedi2_filter_segment",
+                                                       eedi2_filter_thread,
+                                                       eedi2_thread_args,
+                                                       HB_NORMAL_PRIORITY );
+            }
+            else
+            {
+                hb_error( "eedi2 could not create threads" );
+            }
+        }
+    }
     
     
     /* Allocate mcdeint specific buffers */
@@ -1327,7 +1670,7 @@ void hb_decomb_close( hb_filter_private_t * pv )
         return;
     }
     
-    hb_log("decomb: yadif deinterlaced %i | blend deinterlaced %i | unfiltered %i | total %i", pv->yadif_deinterlaced_frames, pv->blend_deinterlaced_frames, pv->unfiltered_frames, pv->yadif_deinterlaced_frames + pv->blend_deinterlaced_frames + pv->unfiltered_frames);
+    hb_log("decomb: %s deinterlaced %i | blend deinterlaced %i | unfiltered %i | total %i", pv->mode == 5 ? "yadif+eedi2" : "yadif", pv->yadif_deinterlaced_frames, pv->blend_deinterlaced_frames, pv->unfiltered_frames, pv->yadif_deinterlaced_frames + pv->blend_deinterlaced_frames + pv->unfiltered_frames);
 
     /* Cleanup frame buffers */
     if( pv->buf_out[0] )
@@ -1366,6 +1709,46 @@ void hb_decomb_close( hb_filter_private_t * pv )
         }
     }
     
+    if( pv->mode == 5 )
+    {
+        /* Cleanup eedi-half  buffers */
+        int j;
+        for( i = 0; i<3; i++ )
+        {
+            for( j = 0; j < 4; j++ )
+            {
+                uint8_t **p = &pv->eedi_half[j][i];
+                if (*p)
+                {
+                    free( *p - 3*pv->ref_stride[i] );
+                    *p = NULL;
+                }            
+            }
+        }
+
+        /* Cleanup eedi-full  buffers */
+        for( i = 0; i<3; i++ )
+        {
+            for( j = 0; j < 5; j++ )
+            {
+                uint8_t **p = &pv->eedi_full[j][i];
+                if (*p)
+                {
+                    free( *p - 3*pv->ref_stride[i] );
+                    *p = NULL;
+                }            
+            }
+        }
+    }
+    
+    if( pv->post_processing > 1  && pv->mode == 5 )
+    {
+        if (pv->cx2) eedi2_aligned_free(pv->cx2);
+        if (pv->cy2) eedi2_aligned_free(pv->cy2);
+        if (pv->cxy) eedi2_aligned_free(pv->cxy);
+        if (pv->tmpc) eedi2_aligned_free(pv->tmpc);
+    }
+    
     for( i = 0; i < pv->cpu_count; i++)
     {
         /*
@@ -1408,6 +1791,30 @@ void hb_decomb_close( hb_filter_private_t * pv )
     free( pv->decomb_complete_lock );
     free( pv->decomb_arguments );
     
+    if( pv->mode == 5 )
+    {
+        for( i = 0; i < 3; i++)
+        {
+            /*
+             * Tell each eedi2 thread to stop, and then cleanup.
+             */
+            pv->eedi2_arguments[i].stop = 1;
+            hb_unlock(  pv->eedi2_begin_lock[i] );
+
+            hb_thread_close( &pv->eedi2_threads[i] );
+            hb_lock_close( &pv->eedi2_begin_lock[i] );
+            hb_lock_close( &pv->eedi2_complete_lock[i] );
+        }
+
+        /*
+         * free memory for eedi2 structs
+         */
+        free( pv->eedi2_threads );
+        free( pv->eedi2_begin_lock );
+        free( pv->eedi2_complete_lock );
+        free( pv->eedi2_arguments );
+    }
+    
     /* Cleanup mcdeint specific buffers */
     if( pv->mcdeint_mode >= 0 )
     {
@@ -1456,6 +1863,8 @@ int hb_decomb_work( const hb_buffer_t * cbuf_in,
         tff = (pv->parity & 1) ^ 1;
     }
 
+    pv->tff = tff;
+    
     /* Store current frame in yadif cache */
     store_ref( (const uint8_t**)pv->pic_in.data, pv );
 
diff --git a/libhb/eedi2.c b/libhb/eedi2.c
new file mode 100644
index 000000000..2aa906ef0
--- /dev/null
+++ b/libhb/eedi2.c
@@ -0,0 +1,1870 @@
+/* $Id: eedi2.c,v 1.0 2009/03/06 5:00:00 jbrjake Exp $
+
+   This file is part of the HandBrake source code.
+   Homepage: <http://handbrake.fr/>.
+   It may be used under the terms of the GNU General Public License.
+   
+   The EEDI2 interpolator was created by tritical:
+   http://web.missouri.edu/~kes25c/
+*/
+
+#include "hb.h"
+#include "eedi2.h"
+
+/**
+ * EEDI2 directional limit lookup table
+ *
+ * These values are used to limit the range of edge direction searches and filtering.
+ */
+const int eedi2_limlut[33] __attribute__ ((aligned (16))) = { 
+                         6, 6, 7, 7, 8, 8, 9, 9, 9, 10,
+                         10, 11, 11, 12, 12, 12, 12, 12, 12, 12,
+                         12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+                         12, -1, -1 };
+
+/**
+ * Analog of _aligned_malloc
+ * @param size Size of memory being pointed to
+ * @param align_size Size of memory chunks to align to (must be power of 2)
+ */
+void *eedi2_aligned_malloc( size_t size, size_t align_size )
+{
+  char * ptr, * ptr2, * aligned_ptr;
+  int align_mask = align_size - 1;
+
+  ptr = (char *)malloc( size + align_size + sizeof( int ) );
+  if( ptr==NULL ) return( NULL );
+
+  ptr2 = ptr + sizeof( int );
+  aligned_ptr = ptr2 + ( align_size - ( (size_t)ptr2 & align_mask ) );
+
+
+  ptr2 = aligned_ptr - sizeof( int );
+  *( (int *)ptr2 ) = (int)( aligned_ptr - ptr );
+
+  return( aligned_ptr );
+}
+
+/**
+ * Analog of _aligned_free
+ * @param ptr The aligned pointer, created with eedi2_aligned_malloc, to be freed
+ */
+void eedi2_aligned_free( void *ptr )
+{
+  int * ptr2 = (int *)ptr - 1;
+  ptr -= * ptr2;
+  free(ptr);
+}
+
+/**
+ * Sorts metrics for median filtering
+ * @param order Pointer to the table of values to sort
+ * @param length Length of the order array
+ */
+void eedi2_sort_metrics( int *order, const int length )
+{
+    int i;
+    for( i = 1; i < length; ++i ) 
+    {
+        int j = i;
+        const int temp = order[j];
+        while( j > 0 && order[j-1] > temp ) 
+        {
+            order[j] = order[j-1];
+            --j;
+        }
+        order[j] = temp;
+    }
+}
+
+/**
+ * Bitblits an image plane (overwrites one bitmap with another) 
+ * @param dtsp Pointer to destination bitmap
+ * @param dst_pitch Stride of destination bitmap
+ * @param srcp Pointer to source bitmap
+ * @param src_pitch Stride of destination bitmap
+ * @param row_size Width of the bitmap being copied
+ * @param height Height of the source bitmap
+ *
+ * When row_size, dst_pitch, and src_pitch are equal, eedi2_bit_blit can work more quickly by copying the whole plane at once instead of individual lines.
+ */
+void eedi2_bit_blit( uint8_t * dstp, int dst_pitch, 
+                     const uint8_t * srcp, int src_pitch,
+                     int row_size, int height )
+{
+    if( ( !height ) || ( !row_size ) ) 
+        return;
+
+    if( height == 1 || ( dst_pitch == src_pitch && src_pitch == row_size ) )
+    {
+        memcpy( dstp, srcp, row_size * height );
+    }
+    else
+    {
+        int y;
+        for( y = height; y > 0; --y )
+        {
+            memcpy( dstp, srcp, row_size );
+            dstp += dst_pitch;
+            srcp += src_pitch;
+        }
+    }
+}
+
+/**
+ * A specialized variant of bit_blit, just for setting up the initial, field-sized bitmap planes that EEDI2 interpolates from.
+ * @param src Pointer to source bitmap plane being copied from
+ * @param dst Pointer to the destination bitmap plane being copied to
+ * @param pitch Stride of both bitmaps
+ * @param height Height of the original, full-size src plane being copied from
+ */
+void eedi2_fill_half_height_buffer_plane( uint8_t * src, uint8_t * dst, int pitch, int height )
+{
+    /* When TFF, we want to copy alternating
+       lines starting at 0, the top field.
+       When BFF, we want to start at line 1. */
+    int y;
+    for( y = height; y > 0; y = y - 2 )
+    {
+      memcpy( dst, src, pitch );
+      dst += pitch;
+      src += pitch * 2;
+    }
+}
+
+/**
+ * A specialized variant of bit_blit, just for resizing the field-height maps EEDI2 generates to frame-height...a simple line doubler
+ * @param srcp Pointer to source bitmap plane being copied from
+ * @param dstp Pointer to the destination bitmap plane being copied to
+ * @param height Height of the input, half-size src plane being copied from
+ * @param pitch Stride of both bitmaps
+ */
+void eedi2_upscale_by_2( uint8_t * srcp, uint8_t * dstp, int height, int pitch )
+{
+    int y;
+    for( y = height; y > 0; y-- )
+    {
+      memcpy( dstp, srcp, pitch );
+      dstp += pitch;
+      memcpy( dstp, srcp, pitch );
+      srcp += pitch;
+      dstp += pitch;
+    }    
+}
+
+/**
+ * Finds places where verticaly adjacent pixels abruptly change in intensity, i.e., sharp edges.
+ * @param dstp Pointer to the destination bitmap
+ * @param dst_pitch Stride of dstp
+ * @param srcp Pointer to the source bitmap
+ * @param src_pitch Stride of srcp
+ * @param mtresh Magnitude threshold, ensures it doesn't mark edges on pixels that are too similar (10 is a good default value)
+ * @param vthresh Variance threshold, ensures it doesn't look for edges in highly random pixel blocks (20 is a good default value)
+ * @param lthresh Laplacian threshold, ensures edges are still prominent in the 2nd spatial derivative of the srcp plane (20 is a good default value)
+ * @param height Height of half-height single-field frame
+ * @param width Width of srcp bitmap rows, as opposed to the padded stride in src_pitch
+ */
+void eedi2_build_edge_mask( uint8_t * dstp, int dst_pitch, uint8_t *srcp, int src_pitch,
+                            int mthresh, int lthresh, int vthresh, int height, int width )
+{
+    int x, y;
+    
+    mthresh = mthresh * 10;
+    vthresh = vthresh * 81;
+    
+    memset( dstp, 0, ( height / 2 ) * dst_pitch );
+    
+    srcp += src_pitch;
+    dstp += dst_pitch;
+    unsigned char *srcpp = srcp-src_pitch;
+    unsigned char *srcpn = srcp+src_pitch;
+    for( y = 1; y < height - 1; ++y )
+    {
+        for( x = 1; x < width-1; ++x )
+        {
+            if( ( abs( srcpp[x]  -   srcp[x] ) < 10 &&
+                  abs(  srcp[x]  -  srcpn[x] ) < 10 &&
+                  abs( srcpp[x]  -  srcpn[x] ) < 10 )
+              ||
+                ( abs( srcpp[x-1] -  srcp[x-1] ) < 10 &&
+                  abs(  srcp[x-1] - srcpn[x-1] ) < 10 &&
+                  abs( srcpp[x-1] - srcpn[x-1] ) < 10 &&
+                  abs( srcpp[x+1] -  srcp[x+1] ) < 10 &&
+                  abs(  srcp[x+1] - srcpn[x+1] ) < 10 &&
+                  abs( srcpp[x+1] - srcpn[x+1] ) < 10) )
+                continue;
+            
+            const int sum = srcpp[x-1] + srcpp[x] + srcpp[x+1] +
+                             srcp[x-1] +  srcp[x]+   srcp[x+1] +
+                            srcpn[x-1] + srcpn[x] + srcpn[x+1];
+            
+            const int sumsq = srcpp[x-1] * srcpp[x-1] +
+                              srcpp[x]   * srcpp[x]   +
+                              srcpp[x+1] * srcpp[x+1] +
+                               srcp[x-1] *  srcp[x-1] +
+                               srcp[x]   *  srcp[x]   +
+                               srcp[x+1] *  srcp[x+1] +
+                              srcpn[x-1] * srcpn[x-1] +
+                              srcpn[x]   * srcpn[x]   +
+                              srcpn[x+1] * srcpn[x+1];
+
+            if( 9 * sumsq-sum * sum < vthresh )
+                continue;
+            
+            const int Ix = srcp[x+1] - srcp[x-1];
+            const int Iy = MAX( MAX( abs( srcpp[x] - srcpn[x] ),
+                                     abs( srcpp[x] -  srcp[x] ) ),
+                                abs( srcp[x] - srcpn[x] ) );
+            if( Ix * Ix + Iy * Iy >= mthresh )
+            {
+                dstp[x] = 255;
+                continue;
+            }
+
+            const int Ixx =  srcp[x-1] - 2 * srcp[x] +  srcp[x+1];
+            const int Iyy = srcpp[x]   - 2 * srcp[x] + srcpn[x];
+            if( abs( Ixx ) + abs( Iyy ) >= lthresh )
+                dstp[x] = 255;
+        }
+        dstp += dst_pitch;
+        srcpp += src_pitch;
+        srcp += src_pitch;
+        srcpn += src_pitch;
+    }
+}
+
+/**
+ * Expands and smooths out the edge mask
+ * @param mskp Pointer to the source edge mask being read from
+ * @param msk_pitch Stride of mskp
+ * @param dstp Pointer to the destination to store the dilated edge mask
+ * @param dst_pitch Stride of dstp
+ * @param dstr Dilation threshold, ensures a pixel is only retained as an edge in dstp if this number of adjacent pixels or greater are also edges in mskp (4 is a good default value)
+ * @param height Height of half-height field-sized frame
+ * @param width Width of mskp bitmap rows, as opposed to the pdded stride in msk_pitch
+ */
+void eedi2_dilate_edge_mask( uint8_t *mskp, int msk_pitch, uint8_t *dstp, int dst_pitch,
+                             int dstr, int height, int width )
+{
+    int x, y;
+    
+    eedi2_bit_blit( dstp, dst_pitch, mskp, msk_pitch, width, height );
+    
+    mskp += msk_pitch;
+    unsigned char *mskpp = mskp - msk_pitch;
+    unsigned char *mskpn = mskp + msk_pitch;
+    dstp += dst_pitch;
+    for( y = 1; y < height - 1; ++y )
+    {
+        for( x = 1; x < width - 1; ++x )
+        {
+            if( mskp[x] != 0 )
+                continue;
+
+            int count = 0;
+            if( mskpp[x-1] == 0xFF ) ++count;
+            if( mskpp[x]   == 0xFF ) ++count;
+            if( mskpp[x+1] == 0xFF ) ++count;
+            if(  mskp[x-1] == 0xFF ) ++count;
+            if(  mskp[x+1] == 0xFF ) ++count;
+            if( mskpn[x-1] == 0xFF ) ++count;
+            if( mskpn[x]   == 0xFF ) ++count;
+            if( mskpn[x+1] == 0xFF ) ++count;
+                
+            if( count >= dstr )
+                dstp[x] = 0xFF;
+        }
+        mskpp += msk_pitch;
+        mskp += msk_pitch;
+        mskpn += msk_pitch;
+        dstp += dst_pitch;
+    }
+}
+
+/**
+ * Contracts the edge mask
+ * @param mskp Pointer to the source edge mask being read from
+ * @param msk_pitch Stride of mskp
+ * @param dstp Pointer to the destination to store the eroded edge mask
+ * @param dst_pitch Stride of dstp
+ * @param estr Erosion threshold, ensures a pixel isn't retained as an edge in dstp if fewer than this number of adjacent pixels are also edges in mskp (2 is a good default value)
+ * @param height Height of half-height field-sized frame
+ * @param width Width of mskp bitmap rows, as opposed to the pdded stride in msk_pitch
+ */
+void eedi2_erode_edge_mask( uint8_t *mskp, int msk_pitch, uint8_t *dstp, int dst_pitch,
+                            int estr, int height, int width )
+{
+    int x, y;
+    
+    eedi2_bit_blit( dstp, dst_pitch, mskp, msk_pitch, width, height );
+    
+    mskp += msk_pitch;
+    unsigned char *mskpp = mskp - msk_pitch;
+    unsigned char *mskpn = mskp + msk_pitch;
+    dstp += dst_pitch;
+    for ( y = 1; y < height - 1; ++y )
+    {
+        for ( x = 1; x < width - 1; ++x )
+        {
+            if( mskp[x] != 0xFF ) continue;
+            
+            int count = 0;
+            if  ( mskpp[x-1] == 0xFF ) ++count;
+            if  ( mskpp[x]   == 0xFF ) ++count;
+            if  ( mskpp[x+1] == 0xFF ) ++count;
+            if  (  mskp[x-1] == 0xFF ) ++count;
+            if  (  mskp[x+1] == 0xFF ) ++count;
+            if  ( mskpn[x-1] == 0xFF ) ++count;
+            if  ( mskpn[x]   == 0xFF ) ++count;
+            if  ( mskpn[x+1] == 0xFF ) ++count;
+
+            if  ( count < estr) dstp[x] = 0;
+        }
+        mskpp += msk_pitch;
+        mskp += msk_pitch;
+        mskpn += msk_pitch;
+        dstp += dst_pitch;
+    }
+}
+
+/**
+ * Smooths out horizontally aligned holes in the mask
+ *
+ * If none of the 6 horizontally adjacent pixels are edges, mark the current pixel as not edged.
+ * If at least 1 of the 3 on either side are edges, mark the current pixel as an edge.
+ *
+ * @param mskp Pointer to the source edge mask being read from
+ * @param msk_pitch Stride of mskp
+ * @param dstp Pointer to the destination to store the smoothed edge mask
+ * @param dst_pitch Stride of dstp
+ * @param height Height of half-height field-sized frame
+ * @param width Width of mskp bitmap rows, as opposed to the pdded stride in msk_pitch
+ */
+void eedi2_remove_small_gaps( uint8_t * mskp, int msk_pitch, uint8_t * dstp, int dst_pitch, 
+                              int height, int width )
+{
+    int x, y;
+    
+    eedi2_bit_blit( dstp, dst_pitch, mskp, msk_pitch, width, height );
+    
+    mskp += msk_pitch;
+    dstp += dst_pitch;
+    for( y = 1; y < height - 1; ++y )
+    {
+        for( x = 3; x < width - 3; ++x )
+        {
+            if( mskp[x] )
+            {
+                if( mskp[x-3] ) continue;
+                if( mskp[x-2] ) continue;
+                if( mskp[x-1] ) continue;
+                if( mskp[x+1] ) continue;
+                if( mskp[x+2] ) continue;
+                if( mskp[x+3] ) continue;
+                dstp[x] = 0;
+            }
+            else
+            {
+                if ( ( mskp[x+1] && ( mskp[x-1] || mskp[x-2] || mskp[x-3] ) ) ||
+                     ( mskp[x+2] && ( mskp[x-1] || mskp[x-2] ) ) ||
+                     ( mskp[x+3] && mskp[x-1] ) )
+                    dstp[x] = 0xFF;
+            }
+        }
+        mskp += msk_pitch;
+        dstp += dst_pitch;
+    }
+}
+
+/**
+ * Calculates spatial direction vectors for the edges. This is EEDI2's timesink, and can be thought of as YADIF_CHECK on steroids, as both try to discern which angle a given edge follows
+ * @param plane The plane of the image being processed, to know to reduce maxd for chroma planes (HandBrake only works with YUV420 video so it is assumed they are half-height)
+ * @param mskp Pointer to the source edge mask being read from
+ * @param msk_pitch Stride of mskp
+ * @param srcp Pointer to the source image being filtered
+ * @param src_pitch Stride of srcp
+ * @param dstp Pointer to the destination to store the dilated edge mask
+ * @param dst_pitch Stride of dstp
+ * @param maxd Maximum pixel distance to search (24 is a good default value)
+ * @param nt Noise threshold (50 is a good default value)
+ * @param height Height of half-height field-sized frame
+ * @param width Width of srcp bitmap rows, as opposed to the pdded stride in src_pitch
+ */
+void eedi2_calc_directions( const int plane, uint8_t * mskp, int msk_pitch, uint8_t * srcp, int src_pitch,
+                            uint8_t * dstp, int dst_pitch, int maxd, int nt, int height, int width  )
+{
+    int x, y, u, i;
+    
+    memset( dstp, 255, dst_pitch * height );
+    mskp += msk_pitch;
+    dstp += dst_pitch;
+    srcp += src_pitch;
+    unsigned char *src2p = srcp - src_pitch * 2;
+    unsigned char *srcpp = srcp - src_pitch;
+    unsigned char *srcpn = srcp + src_pitch;
+    unsigned char *src2n = srcp + src_pitch * 2;
+    unsigned char *mskpp = mskp - msk_pitch;
+    unsigned char *mskpn = mskp + msk_pitch;
+    const int maxdt = plane == 0 ? maxd : ( maxd >> 1 );
+
+    for( y = 1; y < height - 1; ++y )
+    {
+        for( x = 1; x < width - 1; ++x )
+        {
+            if( mskp[x] != 0xFF || ( mskp[x-1] != 0xFF && mskp[x+1] != 0xFF ) )
+                continue;
+            const int startu = MAX( -x + 1, -maxdt );
+            const int stopu = MIN( width - 2 - x, maxdt );
+            int minb = MIN( 13 * nt,
+                            ( abs( srcp[x] - srcpn[x] ) +
+                              abs( srcp[x] - srcpp[x] ) ) * 6 );
+            int mina = MIN( 19 * nt,
+                            ( abs( srcp[x] - srcpn[x] ) +
+                              abs( srcp[x] - srcpp[x] ) ) * 9 );
+            int minc = mina;
+            int mind = minb;
+            int mine = minb;
+            int dira = -5000, dirb = -5000, dirc = -5000, dird = -5000, dire = -5000;
+            for( u = startu; u <= stopu; ++u )
+            {
+                if( y == 1 ||
+                      mskpp[x-1+u] == 0xFF || mskpp[x+u] == 0xFF || mskpp[x+1+u] == 0xFF )
+                {
+                    if( y == height - 2 ||
+                        mskpn[x-1-u] == 0xFF || mskpn[x-u] == 0xFF || mskpn[x+1-u] == 0xFF )
+                    {
+                        const int diffsn = abs(  srcp[x-1] - srcpn[x-1-u] ) +
+                                           abs(  srcp[x]   - srcpn[x-u] )   +
+                                           abs(  srcp[x+1] - srcpn[x+1-u] );
+
+                        const int diffsp = abs(  srcp[x-1] - srcpp[x-1+u] ) +
+                                           abs(  srcp[x]   - srcpp[x+u] )   +
+                                           abs(  srcp[x+1] - srcpp[x+1+u] );
+
+                        const int diffps = abs( srcpp[x-1] -  srcp[x-1-u] ) +
+                                           abs( srcpp[x]   -  srcp[x-u] )   +
+                                           abs( srcpp[x+1] -  srcp[x+1-u] );
+
+                        const int diffns = abs( srcpn[x-1] -  srcp[x-1+u] ) +
+                                           abs( srcpn[x]   -  srcp[x+u] )   +
+                                           abs( srcpn[x+1] -  srcp[x+1+u] );
+
+                        const int diff = diffsn + diffsp + diffps + diffns;
+                        int diffd = diffsp + diffns;
+                        int diffe = diffsn + diffps;
+                        if( diff < minb )
+                        {
+                            dirb = u;
+                            minb = diff;
+                        }
+                        if( __builtin_expect( y > 1, 1) )
+                        {
+                            const int diff2pp = abs( src2p[x-1] - srcpp[x-1-u] ) +
+                                            abs( src2p[x]   - srcpp[x-u] )   +
+                                            abs( src2p[x+1] - srcpp[x+1-u] );
+                            const int diffp2p = abs( srcpp[x-1] - src2p[x-1+u] ) + 
+                                            abs( srcpp[x]   - src2p[x+u] )   + 
+                                            abs( srcpp[x+1] - src2p[x+1+u] );
+                            const int diffa = diff + diff2pp + diffp2p;
+                            diffd += diffp2p;
+                            diffe += diff2pp;
+                            if( diffa < mina )
+                            {
+                                dira = u;
+                                mina = diffa;
+                            }
+                        }
+                        if( __builtin_expect( y < height-2, 1) )
+                        {
+                            const int diff2nn = abs( src2n[x-1] - srcpn[x-1+u] ) +
+                                                abs( src2n[x]   - srcpn[x+u] )   +
+                                                abs( src2n[x+1] - srcpn[x+1+u] );
+                            const int diffn2n = abs( srcpn[x-1] - src2n[x-1-u] ) +
+                                                abs( srcpn[x]   - src2n[x-u] )   +
+                                                abs( srcpn[x+1] - src2n[x+1-u] );
+                            const int diffc = diff + diff2nn + diffn2n;
+                            diffd += diff2nn;
+                            diffe += diffn2n;
+                            if( diffc < minc )
+                            {
+                                dirc = u;
+                                minc = diffc;
+                            }
+                        }
+                        if( diffd < mind )
+                        {
+                            dird = u;
+                            mind = diffd;
+                        }
+                        if( diffe < mine )
+                        {
+                            dire = u;
+                            mine = diffe;
+                        }
+                    }
+                }
+            }
+            int order[5], k=0;
+            if( dira != -5000 ) order[k++] = dira;
+            if( dirb != -5000 ) order[k++] = dirb;
+            if( dirc != -5000 ) order[k++] = dirc;
+            if( dird != -5000 ) order[k++] = dird;
+            if( dire != -5000 ) order[k++] = dire;
+            if( k > 1 )
+            {
+                eedi2_sort_metrics( order, k );
+                const int mid = ( k & 1 ) ? 
+                                    order[k>>1] :
+                                    ( order[(k-1)>>1] + order[k>>1] + 1 ) >> 1;
+                const int tlim = MAX( eedi2_limlut[abs(mid)] >> 2, 2 );
+                int sum = 0, count = 0;
+                for( i = 0; i < k; ++i )
+                {
+                    if( abs( order[i] - mid ) <= tlim )
+                    {
+                        ++count;
+                        sum += order[i];
+                    }
+                }
+                if( count > 1 ) 
+                    dstp[x] = 128 + ( (int)( (float)sum / (float)count ) * 4 );
+                else
+                    dstp[x] = 128;
+            }
+            else dstp[x] = 128;
+        }
+        mskpp += msk_pitch;
+        mskp += msk_pitch;
+        mskpn += msk_pitch;
+        src2p += src_pitch;
+        srcpp += src_pitch;
+        srcp += src_pitch;
+        srcpn += src_pitch;
+        src2n += src_pitch;
+        dstp += dst_pitch;
+    }
+}
+
+/**
+ * Filters the edge mask
+ * @param mskp Pointer to the source edge mask being read from
+ * @param msk_pitch Stride of mskp
+ * @param dmskp Pointer to the edge direction mask
+ * @param dmsk_pitch Stride of dmskp
+ * @param dstp Pointer to the destination to store the filtered edge mask
+ * @param dst_pitch Stride of dstp
+ * @param height Height of half-height field-sized frame
+ * @param width Width of mskp bitmap rows, as opposed to the pdded stride in msk_pitch
+ */
+void eedi2_filter_map( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch,
+                       uint8_t * dstp, int dst_pitch, int height, int width )
+{
+    int x, y, j;
+
+    eedi2_bit_blit( dstp, dst_pitch, dmskp, dmsk_pitch, width, height );
+    
+    mskp += msk_pitch;
+    dmskp += dmsk_pitch;
+    dstp += dst_pitch;
+    unsigned char *dmskpp = dmskp - dmsk_pitch;
+    unsigned char *dmskpn = dmskp + dmsk_pitch;
+
+    for( y = 1; y < height - 1; ++y )
+    {
+        for( x = 1; x < width - 1; ++x )
+        {
+            if( dmskp[x] == 0xFF || mskp[x] != 0xFF )
+                continue;
+            const int dir = ( dmskp[x] - 128 ) >> 2;
+            const int lim = MAX( abs( dir ) * 2, 12 );
+            int ict = 0, icb = 0;
+            if( dir < 0 )
+            {
+                const int dirt = MAX( -x, dir );
+                for( j = dirt; j <= 0; ++j )
+                {
+                    if( ( abs( dmskpp[x+j] - dmskp[x] ) > lim && dmskpp[x+j] != 0xFF ) ||
+                        ( dmskp[x+j] == 0xFF && dmskpp[x+j] == 0xFF ) ||
+                        ( abs(  dmskp[x+j] - dmskp[x] ) > lim &&  dmskp[x+j] != 0xFF ) )
+                    {
+                        ict = 1;
+                        break;
+                    }
+                }
+            }
+            else
+            {
+                const int dirt = MIN( width - x - 1, dir );
+                for( j = 0; j <= dirt; ++j )
+                {
+                    if( ( abs( dmskpp[x+j] - dmskp[x] ) > lim && dmskpp[x+j] != 0xFF ) ||
+                        ( dmskp[x+j] == 0xFF && dmskpp[x+j] == 0xFF ) ||
+                        ( abs(  dmskp[x+j] - dmskp[x] ) > lim &&  dmskp[x+j] != 0xFF ) )
+                    {
+                        ict = 1;
+                        break;
+                    }
+                }
+            }
+            if( ict )
+            {
+                if( dir < 0 )
+                {
+                    const int dirt = MIN( width - x - 1, abs( dir ) );
+                    for( j = 0; j <= dirt; ++j )
+                    {
+                        if( ( abs( dmskpn[x+j] - dmskp[x] ) > lim && dmskpn[x+j] != 0xFF ) ||
+                            ( dmskpn[x+j] == 0xFF && dmskp[x+j] == 0xFF ) ||
+                            ( abs(  dmskp[x+j] - dmskp[x] ) > lim &&  dmskp[x+j] != 0xFF ) )
+                        {
+                            icb = 1;
+                            break;
+                        }
+                    }
+                }
+                else
+                {
+                    const int dirt = MAX( -x, -dir );
+                    for( j = dirt; j <= 0; ++j )
+                    {
+                        if( ( abs( dmskpn[x+j] - dmskp[x] ) > lim && dmskpn[x+j] != 0xFF ) ||
+                            ( dmskpn[x+j] == 0xFF && dmskp[x+j] == 0xFF ) ||
+                            ( abs(  dmskp[x+j] - dmskp[x] ) > lim &&  dmskp[x+j] != 0xFF ) )
+                        {
+                            icb = 1;
+                            break;
+                        }
+                    }
+                }
+                if( icb )
+                    dstp[x] = 255;
+            }
+        }
+        mskp += msk_pitch;
+        dmskpp += dmsk_pitch;
+        dmskp += dmsk_pitch;
+        dmskpn += dmsk_pitch;
+        dstp += dst_pitch;
+    }
+}
+
+
+/**
+ * Filters the edge direction mask
+ * @param mskp Pointer to the edge mask
+ * @param msk_pitch Stride of mskp
+ * @param dmskp Pointer to the edge direction mask being read from
+ * @param dmsk_pitch Stride of dmskp
+ * @param dstp Pointer to the destination to store the filtered edge direction mask
+ * @param dst_pitch Stride of dstp
+ * @param height Height of half_height field-sized frame
+ * @param width Width of dmskp bitmap rows, as opposed to the pdded stride in dmsk_pitch
+ */
+void eedi2_filter_dir_map( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch,
+                           uint8_t * dstp, int dst_pitch, int height, int width )
+{
+    int x, y, i;
+    
+    eedi2_bit_blit( dstp, dst_pitch, dmskp, dmsk_pitch, width, height );
+    
+    dmskp += dmsk_pitch;
+    unsigned char *dmskpp = dmskp - dmsk_pitch;
+    unsigned char *dmskpn = dmskp + dmsk_pitch;
+    dstp += dst_pitch;
+    mskp += msk_pitch;
+    for( y = 1; y < height - 1; ++y )
+    {
+        for( x = 1; x < width - 1; ++x )
+        {
+            if( mskp[x] != 0xFF ) continue;
+            int u = 0, order[9];
+            if( dmskpp[x-1] != 0xFF ) order[u++] = dmskpp[x-1];
+            if( dmskpp[x]   != 0xFF ) order[u++] = dmskpp[x];
+            if( dmskpp[x+1] != 0xFF ) order[u++] = dmskpp[x+1];
+            if(  dmskp[x-1] != 0xFF ) order[u++] =  dmskp[x-1];
+            if(  dmskp[x]   != 0xFF ) order[u++] =  dmskp[x];
+            if(  dmskp[x+1] != 0xFF ) order[u++] =  dmskp[x+1];
+            if( dmskpn[x-1] != 0xFF ) order[u++] = dmskpn[x-1];
+            if( dmskpn[x]   != 0xFF ) order[u++] = dmskpn[x];
+            if( dmskpn[x+1] != 0xFF ) order[u++] = dmskpn[x+1];
+            if( u < 4 )
+            {
+                dstp[x] = 255;
+                continue;
+            }
+            eedi2_sort_metrics( order, u );
+            const int mid = ( u & 1 ) ?
+                order[u>>1] : ( order[(u-1)>>1] + order[u>>1] + 1 ) >> 1;
+            int sum = 0, count = 0;
+            const int lim = eedi2_limlut[abs(mid-128)>>2];
+            for( i = 0; i < u; ++i )
+            {
+                if( abs( order[i] - mid ) <= lim )
+                {
+                    ++count;
+                    sum += order[i];
+                }
+            }
+            if( count < 4 || ( count < 5 && dmskp[x] == 0xFF ) )
+            {
+                dstp[x] = 255;
+                continue;
+            }
+            dstp[x] = (int)( ( (float)( sum + mid ) / (float)( count + 1 ) ) + 0.5f );
+        }
+        dmskpp += dmsk_pitch;
+        dmskp += dmsk_pitch;
+        dmskpn += dmsk_pitch;
+        dstp += dst_pitch;
+        mskp += msk_pitch;
+    }
+}
+
+/**
+ * Smoothes out the edge direction map
+ * @param mskp Pointer to the edge mask
+ * @param msk_pitch Stride of mskp
+ * @param dmskp Pointer to the edge direction mask being read from
+ * @param dmsk_pitch Stride of dmskp
+ * @param dstp Pointer to the destination to store the expanded edge direction mask
+ * @param dst_pitch Stride of dstp
+ * @param height Height of half-height field-sized frame
+ * @param width Width of dmskp bitmap rows, as opposed to the pdded stride in dmsk_pitch
+ */
+void eedi2_expand_dir_map( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch,
+                           uint8_t * dstp, int dst_pitch, int height, int width )
+{
+    int x, y, i;
+
+    eedi2_bit_blit( dstp, dst_pitch, dmskp, dmsk_pitch, width, height );
+    
+    dmskp += dmsk_pitch;
+    unsigned char *dmskpp = dmskp - dmsk_pitch;
+    unsigned char *dmskpn = dmskp + dmsk_pitch;
+    dstp += dst_pitch;
+    mskp += msk_pitch;
+    for( y = 1; y < height - 1; ++y )
+    {
+        for( x = 1; x < width - 1; ++x )
+        {
+            if( dmskp[x] != 0xFF || mskp[x] != 0xFF ) continue;
+            int u = 0, order[9];
+            if( dmskpp[x-1] != 0xFF ) order[u++] = dmskpp[x-1];
+            if( dmskpp[x]   != 0xFF ) order[u++] = dmskpp[x];
+            if( dmskpp[x+1] != 0xFF ) order[u++] = dmskpp[x+1];
+            if(  dmskp[x-1] != 0xFF ) order[u++] =  dmskp[x-1];
+            if(  dmskp[x+1] != 0xFF ) order[u++] =  dmskp[x+1];
+            if( dmskpn[x-1] != 0xFF ) order[u++] = dmskpn[x-1];
+            if( dmskpn[x]   != 0xFF ) order[u++] = dmskpn[x];
+            if( dmskpn[x+1] != 0xFF ) order[u++] = dmskpn[x+1];
+            if( u < 5 ) continue;
+            eedi2_sort_metrics( order, u );
+            const int mid = ( u & 1 ) ?
+                order[u>>1] : ( order[(u-1)>>1] + order[u>>1] + 1 ) >> 1;
+            int sum = 0, count = 0;
+            const int lim = eedi2_limlut[abs(mid-128)>>2];
+            for( i = 0; i < u; ++i )
+            {
+                if( abs( order[i] - mid ) <= lim )
+                {
+                    ++count;
+                    sum += order[i];
+                }
+            }
+            if( count < 5 ) continue;
+            dstp[x] = (int)( ( (float)( sum + mid ) / (float)( count + 1 ) ) + 0.5f );
+        }
+        dmskpp += dmsk_pitch;
+        dmskp += dmsk_pitch;
+        dmskpn += dmsk_pitch;
+        dstp += dst_pitch;
+        mskp += msk_pitch;
+    }
+}
+
+/**
+ * Re-draws a clearer, less blocky frame-height edge direction mask
+ * @param mskp Pointer to the edge mask
+ * @param msk_pitch Stride of mskp
+ * @param dmskp Pointer to the edge direction mask being read from
+ * @param dmsk_pitch Stride of dmskp
+ * @param dstp Pointer to the destination to store the redrawn direction mask
+ * @param dst_pitch Stride of dstp
+ * @param tff Whether or not the frame parity is Top Field First
+ * @param height Height of the full-frame output
+ * @param width Width of dmskp bitmap rows, as opposed to the pdded stride in dmsk_pitch
+ */
+void eedi2_mark_directions_2x( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch,
+                               uint8_t * dstp, int dst_pitch, int tff, int height, int width )
+{
+    int x, y, i;
+    memset( dstp, 255, dst_pitch * height );
+    dstp  += dst_pitch  * ( 2 - tff );
+    dmskp += dmsk_pitch * ( 1 - tff );
+    mskp  += msk_pitch  * ( 1 - tff );
+    unsigned char *dmskpn = dmskp + dmsk_pitch * 2;
+    unsigned char *mskpn = mskp + msk_pitch * 2;
+    for( y = 2 - tff; y < height - 1; y += 2 )
+    {
+        for( x = 1; x < width - 1; ++x )
+        {
+            if( mskp[x] != 0xFF && mskpn[x] != 0xFF ) continue;
+            int v = 0, order[6];
+            if(  dmskp[x-1] != 0xFF ) order[v++] = dmskp[x-1];
+            if(  dmskp[x]   != 0xFF ) order[v++] = dmskp[x];
+            if(  dmskp[x+1] != 0xFF ) order[v++] = dmskp[x+1];
+            if( dmskpn[x-1] != 0xFF ) order[v++] = dmskpn[x-1];
+            if( dmskpn[x]   != 0xFF ) order[v++] = dmskpn[x];
+            if( dmskpn[x+1] != 0xFF ) order[v++] = dmskpn[x+1];
+            if( v < 3 ) continue;
+            else
+            {
+                eedi2_sort_metrics( order, v );
+                const int mid = ( v & 1 ) ? order[v>>1] : ( order[(v-1)>>1] + order[v>>1]+1) >> 1;
+                const int lim = eedi2_limlut[abs(mid-128)>>2];
+                int u = 0;
+                if( abs( dmskp[x-1] - dmskpn[x-1] ) <= lim ||
+                    dmskp[x-1] == 0xFF || dmskpn[x-1] == 0xFF )
+                        ++u;
+                if( abs( dmskp[x]   - dmskpn[x] )   <= lim ||
+                    dmskp[x]   == 0xFF || dmskpn[x]   == 0xFF )
+                        ++u;
+                if( abs( dmskp[x+1] - dmskpn[x-1] ) <= lim ||
+                    dmskp[x+1] == 0xFF || dmskpn[x+1] == 0xFF)
+                        ++u;
+                if( u < 2 ) continue;
+                int count = 0, sum = 0;
+                for( i = 0; i < v; ++i )
+                {
+                    if( abs( order[i] - mid ) <= lim )
+                    {
+                        ++count;
+                        sum += order[i];
+                    }
+                }
+                if( count < v - 2 || count < 2 ) continue;
+                dstp[x] = (int)( ( (float)( sum + mid ) / (float)( count + 1 ) ) + 0.5f );
+            }
+        }
+        mskp += msk_pitch * 2;
+        mskpn += msk_pitch * 2;
+        dstp += dst_pitch * 2;
+        dmskp += dmsk_pitch * 2;
+        dmskpn += dmsk_pitch * 2;
+    }
+}
+
+/**
+ * Filters the frane-height edge direction mask
+ * @param mskp Pointer to the edge mask
+ * @param msk_pitch Stride of mskp
+ * @param dmskp Pointer to the edge direction mask being read from
+ * @param dmsk_pitch Stride of dmskp
+ * @param dstp Pointer to the destination to store the filtered direction mask
+ * @param dst_pitch Stride of dstp
+ * @param field Field to filter
+ * @param height Height of the full-frame output
+ * @param width Width of dmskp bitmap rows, as opposed to the pdded stride in dmsk_pitch
+ */
+void eedi2_filter_dir_map_2x( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch,
+                              uint8_t * dstp, int dst_pitch, int field, int height, int width )
+{
+    int x, y, i;
+    eedi2_bit_blit( dstp, dst_pitch, dmskp, dmsk_pitch, width, height );
+    dmskp += dmsk_pitch * ( 2 - field );
+    unsigned char *dmskpp = dmskp - dmsk_pitch * 2;
+    unsigned char *dmskpn = dmskp + dmsk_pitch * 2;
+    mskp += msk_pitch * ( 1 - field );
+    unsigned char *mskpn = mskp + msk_pitch * 2;
+    dstp += dst_pitch * ( 2 - field );
+    for( y = 2 - field; y < height - 1; y += 2 )
+    {
+        for( x = 1; x < width - 1; ++x )
+        {
+            if( mskp[x] != 0xFF && mskpn[x] != 0xFF ) continue;
+            int u = 0, order[9];
+            if( y > 1 )
+            {
+                if( dmskpp[x-1] != 0xFF ) order[u++] = dmskpp[x-1];
+                if( dmskpp[x]   != 0xFF ) order[u++] = dmskpp[x];
+                if( dmskpp[x+1] != 0xFF ) order[u++] = dmskpp[x+1];
+            }
+            if( dmskp[x-1] != 0xFF ) order[u++] = dmskp[x-1];
+            if( dmskp[x]   != 0xFF ) order[u++] = dmskp[x];
+            if( dmskp[x+1] != 0xFF ) order[u++] = dmskp[x+1];
+            if( y < height - 2 )
+            {
+                if( dmskpn[x-1] != 0xFF ) order[u++] = dmskpn[x-1];
+                if( dmskpn[x]   != 0xFF ) order[u++] = dmskpn[x];
+                if( dmskpn[x+1] != 0xFF ) order[u++] = dmskpn[x+1];
+            }
+            if( u < 4 )
+            {
+                dstp[x] = 255;
+                continue;
+            }
+            eedi2_sort_metrics( order, u );
+            const int mid = ( u & 1 ) ? order[u>>1] : (order[(u-1)>>1] + order[u>>1] + 1 ) >> 1;
+            int sum = 0, count = 0;
+            const int lim = eedi2_limlut[abs(mid-128)>>2];
+            for( i = 0; i < u; ++i )
+            {
+                if( abs( order[i] - mid ) <= lim )
+                {
+                    ++count;
+                    sum += order[i];
+                }
+            }
+            if( count < 4 || ( count < 5 && dmskp[x] == 0xFF ) )
+            {
+                dstp[x] = 255;
+                continue;
+            }
+            dstp[x] = (int)( ( (float)( sum + mid ) / (float)( count + 1 ) ) + 0.5f );
+        }
+        mskp += msk_pitch * 2;
+        mskpn += msk_pitch * 2;
+        dmskpp += dmsk_pitch * 2;
+        dmskp += dmsk_pitch * 2;
+        dmskpn += dmsk_pitch * 2;
+        dstp += dst_pitch * 2;
+    }
+}
+
+/**
+ * Smoothes out the frame-height edge direction mask
+ * @param mskp Pointer to the edge mask
+ * @param msk_pitch Stride of mskp
+ * @param dmskp Pointer to the edge direction mask being read from
+ * @param dmsk_pitch Stride of dmskp
+ * @param dstp Pointer to the destination to store the expanded direction mask
+ * @param dst_pitch Stride of dstp
+ * @param field Field to filter
+ * @param height Height of the full-frame output
+ * @param width Width of dmskp bitmap rows, as opposed to the pdded stride in dmsk_pitch
+ */
+void eedi2_expand_dir_map_2x( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch,
+                              uint8_t * dstp, int dst_pitch, int field, int height, int width )
+{
+    int x, y, i;
+
+    eedi2_bit_blit( dstp, dst_pitch, dmskp, dmsk_pitch, width, height );
+
+    dmskp += dmsk_pitch * ( 2 - field );
+    unsigned char *dmskpp = dmskp - dmsk_pitch * 2;
+    unsigned char *dmskpn = dmskp + dmsk_pitch * 2;
+    mskp += msk_pitch * ( 1 - field );
+    unsigned char *mskpn = mskp + msk_pitch * 2;
+    dstp += dst_pitch * ( 2 - field );
+    for( y = 2 - field; y < height - 1; y += 2)
+    {
+        for( x = 1; x < width - 1; ++x )
+        {
+            if( dmskp[x] != 0xFF || ( mskp[x] != 0xFF && mskpn[x] != 0xFF ) ) continue;
+            int u = 0, order[9];
+            if( y > 1 )
+            {
+                if( dmskpp[x-1] != 0xFF ) order[u++] = dmskpp[x-1];
+                if( dmskpp[x]   != 0xFF ) order[u++] = dmskpp[x];
+                if( dmskpp[x+1] != 0xFF ) order[u++] = dmskpp[x+1];
+            }
+            if( dmskp[x-1] != 0xFF ) order[u++] = dmskp[x-1];
+            if( dmskp[x+1] != 0xFF ) order[u++] = dmskp[x+1];
+            if( y < height - 2 )
+            {
+                if( dmskpn[x-1] != 0xFF) order[u++] = dmskpn[x-1];
+                if( dmskpn[x]   != 0xFF) order[u++] = dmskpn[x];
+                if( dmskpn[x+1] != 0xFF) order[u++] = dmskpn[x+1];
+            }
+            if( u < 5 ) continue;
+            eedi2_sort_metrics( order, u );
+            const int mid = ( u & 1 ) ? order[u>>1] : ( order[(u-1)>>1] + order[u>>1] + 1 ) >> 1;
+            int sum = 0, count = 0;
+            const int lim = eedi2_limlut[abs(mid-128)>>2];
+            for( i = 0; i < u; ++i )
+            {
+                if( abs( order[i] - mid ) <= lim )
+                {
+                    ++count;
+                    sum += order[i];
+                }
+            }
+            if( count < 5 ) continue;
+            dstp[x] = (int)( ( (float)( sum + mid ) / (float)( count + 1 ) ) + 0.5f );
+        }
+        mskp += msk_pitch * 2;
+        mskpn += msk_pitch * 2;
+        dmskpp += dmsk_pitch * 2;
+        dmskp += dmsk_pitch * 2;
+        dmskpn += dmsk_pitch * 2;
+        dstp += dst_pitch * 2;
+    }
+}
+
+/**
+ * Like the name suggests, this function fills in gaps in the frame-height edge direction mask
+ * @param mskp Pointer to the edge mask
+ * @param msk_pitch Stride of mskp
+ * @param dmskp Pointer to the edge direction mask being read from
+ * @param dmsk_pitch Stride of dmskp
+ * @param dstp Pointer to the destination to store the filled-in direction mask
+ * @param dst_pitch Stride of dstp
+ * @param field Field to filter
+ * @param height Height of the full-frame output
+ * @param width Width of dmskp bitmap rows, as opposed to the pdded stride in dmsk_pitch
+ */
+void eedi2_fill_gaps_2x( uint8_t *mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch,
+                         uint8_t * dstp, int dst_pitch, int field, int height, int width )
+{
+    int x, y, j;
+
+    eedi2_bit_blit( dstp, dst_pitch, dmskp, dmsk_pitch, width, height );
+
+    dmskp += dmsk_pitch * ( 2 - field );
+    unsigned char *dmskpp = dmskp - dmsk_pitch * 2;
+    unsigned char *dmskpn = dmskp + dmsk_pitch * 2;
+    mskp += msk_pitch * ( 1 - field );
+    unsigned char *mskpp = mskp - msk_pitch * 2;
+    unsigned char *mskpn = mskp + msk_pitch * 2;
+    unsigned char *mskpnn = mskpn + msk_pitch * 2;
+    dstp += dst_pitch * ( 2 - field );
+    for( y = 2 - field; y < height - 1; y += 2 )
+    {
+        for( x = 1; x < width - 1; ++x )
+        {
+            if( dmskp[x] != 0xFF || 
+                ( mskp[x] != 0xFF && mskpn[x] != 0xFF ) ) continue;
+            int u = x - 1, back = 500, forward = -500;
+            while( u )
+            {
+                if( dmskp[u] != 0xFF ) 
+                { 
+                    back = dmskp[u]; 
+                    break; 
+                }
+                if( mskp[u] != 0xFF && mskpn[u] != 0xFF ) break;
+                --u;
+            }
+            int v = x + 1;
+            while( v < width )
+            {
+                if( dmskp[v] != 0xFF )
+                {
+                    forward = dmskp[v];
+                    break;
+                }
+                if( mskp[v] != 0xFF && mskpn[v] != 0xFF ) break;
+                ++v;
+            }
+            int tc = 1, bc = 1;
+            int mint = 500, maxt = -20;
+            int minb = 500, maxb = -20;
+            for( j = u; j <= v; ++j )
+            {
+                if( tc )
+                {
+                    if( y <= 2 || dmskpp[j] == 0xFF || ( mskpp[j] != 0xFF && mskp[j] != 0xFF ) )
+                    {
+                        tc = 0;
+                        mint = maxt = 20;
+                    }
+                    else
+                    {
+                        if( dmskpp[j] < mint ) mint = dmskpp[j];
+                        if( dmskpp[j] > maxt ) maxt = dmskpp[j];
+                    }
+                }
+                if( bc )
+                {
+                    if( y >= height - 3 || dmskpn[j] == 0xFF || ( mskpn[j] != 0xFF && mskpnn[j] != 0xFF ) )
+                    {
+                        bc = 0;
+                        minb = maxb = 20;
+                    }
+                    else
+                    {
+                        if( dmskpn[j] < minb ) minb = dmskpn[j];
+                        if( dmskpn[j] > maxb ) maxb = dmskpn[j];
+                    }
+                }
+            }
+            if( maxt == -20 ) maxt = mint = 20;
+            if( maxb == -20 ) maxb = minb = 20;
+            int thresh = MAX(
+                            MAX( MAX( abs( forward - 128 ), abs( back - 128 ) ) >> 2, 8 ),
+                            MAX( abs( mint - maxt ), abs( minb - maxb ) ) );
+            const int flim = MIN(
+                                MAX( abs( forward - 128 ), abs( back - 128 ) ) >> 2,
+                                6 );
+            if( abs( forward - back ) <= thresh && ( v - u - 1 <= flim || tc || bc ) )
+            {
+                double step = (double)( forward - back ) / (double)( v - u );
+                for( j = 0; j < v - u - 1; ++j )
+                    dstp[u+j+1] = back + (int)( j * step + 0.5 );
+            }
+        }
+        mskpp += msk_pitch * 2;
+        mskp += msk_pitch * 2;
+        mskpn += msk_pitch * 2;
+        mskpnn += msk_pitch * 2;
+        dmskpp += dmsk_pitch * 2;
+        dmskp += dmsk_pitch * 2;
+        dmskpn += dmsk_pitch * 2;
+        dstp += dst_pitch * 2;
+    }
+}
+
+/**
+ * Actually renders the output frame, based on the edge and edge direction masks
+ * @param plane The plane of the image being processed, to know to reduce a search distance for chroma planes (HandBrake only works with YUV420 video so it is assumed they are half-height)
+ * @param dmskp Pointer to the edge direction mask being read from
+ * @param dmsk_pitch Stride of dmskp
+ * @param dstp Pointer to the line-doubled source field used being filtered in place
+ * @param dst_pitch Stride of dstp
+ * @param omskp Pointer to the destination to store the output edge mask used for post-processing
+ * @param osmk_pitch Stride of omskp
+ * @param field Field to filter
+ * @nt Noise threshold, (50 is a good default value)
+ * @param height Height of the full-frame output
+ * @param width Width of dstp bitmap rows, as opposed to the pdded stride in dst_pitch
+ */
+void eedi2_interpolate_lattice( const int plane, uint8_t * dmskp, int dmsk_pitch, uint8_t * dstp,
+                                int dst_pitch, uint8_t * omskp, int omsk_pitch, int field, int nt,
+                                int height, int width )
+{
+    int x, y, u;
+    
+    if( field == 1 )
+    {
+        eedi2_bit_blit( dstp + ( height - 1 ) * dst_pitch,
+                  dst_pitch,
+                  dstp + ( height - 2 ) * dst_pitch,
+                  dst_pitch,
+                  width,
+                  1 );
+    }
+    else
+    {
+        eedi2_bit_blit( dstp,
+                  dst_pitch,
+                  dstp + dst_pitch,
+                  dst_pitch,
+                  width,
+                  1 );
+    }
+
+    dstp += dst_pitch * ( 1 - field );
+    omskp += omsk_pitch * ( 1 - field );
+    unsigned char *dstpn = dstp + dst_pitch;
+    unsigned char *dstpnn = dstp + dst_pitch * 2;
+    unsigned char *omskn = omskp + omsk_pitch * 2;
+    dmskp += dmsk_pitch * ( 2 - field );
+    for( y = 2 - field; y < height - 1; y += 2 )
+    {
+        for( x = 0; x < width; ++x )
+        {
+            int dir = dmskp[x];
+            const int lim = eedi2_limlut[abs(dir-128)>>2];
+            if( dir == 255 ||
+                ( abs( dmskp[x] - dmskp[x-1] ) > lim &&
+                  abs( dmskp[x] - dmskp[x+1] ) > lim ) )
+            {
+                dstpn[x] = ( dstp[x] + dstpnn[x] + 1 ) >> 1;
+                if( dir != 255 ) dmskp[x] = 128;
+                continue;
+            }
+            if( lim < 9 )
+            {
+                const int sum =   dstp[x-1] +   dstp[x] +   dstp[x+1] +
+                                dstpnn[x-1] + dstpnn[x] + dstpnn[x+1];
+                const int sumsq = dstp[x-1] *   dstp[x-1] + 
+                                  dstp[x]   *   dstp[x]   +
+                                  dstp[x+1] *   dstp[x+1] +
+                                dstpnn[x-1] * dstpnn[x-1] +
+                                dstpnn[x]   * dstpnn[x]   +
+                                dstpnn[x+1] * dstpnn[x+1];
+                if( 6 * sumsq - sum * sum < 576 )
+                {
+                    dstpn[x] = ( dstp[x] + dstpnn[x] + 1 ) >> 1;
+                    dmskp[x] = 255;
+                    continue;
+                }
+            }
+            if( x > 1 && x < width - 2 && 
+                (     dstp[x] < MAX(   dstp[x-2],   dstp[x-1] ) - 3 &&
+                      dstp[x] < MAX(   dstp[x+2],   dstp[x+1] ) - 3 &&
+                    dstpnn[x] < MAX( dstpnn[x-2], dstpnn[x-1] ) - 3 &&
+                    dstpnn[x] < MAX( dstpnn[x+2], dstpnn[x+1] ) - 3 )
+                ||
+                (     dstp[x] > MIN(   dstp[x-2],   dstp[x-1] ) + 3 &&
+                      dstp[x] > MIN(   dstp[x+2],   dstp[x+1] ) + 3 &&
+                    dstpnn[x] > MIN( dstpnn[x-2], dstpnn[x-1] ) + 3 &&
+                    dstpnn[x] > MIN( dstpnn[x+2], dstpnn[x+1] ) + 3 ) )
+            {
+                dstpn[x] = ( dstp[x] + dstpnn[x] + 1 ) >> 1;
+                dmskp[x] = 128;
+                continue;
+            }
+            dir = ( dir - 128 + 2 ) >> 2;
+            int val = ( dstp[x] + dstpnn[x] + 1 ) >> 1;
+            const int startu = ( dir - 2 < 0 ) ?
+                        MAX( -x + 1, MAX( dir - 2, -width + 2 + x ) )
+                        :
+                        MIN(  x - 1, MIN( dir - 2,  width - 2 - x ) );
+            const int stopu =  ( dir + 2 < 0 ) ?
+                        MAX( -x + 1, MAX( dir + 2, -width + 2 + x ) )
+                        :
+                        MIN(  x - 1, MIN( dir + 2,  width - 2 - x ) );
+            int min = 8 * nt;
+            for( u = startu; u <= stopu; ++u )
+            {
+                const int diff =
+                    abs(   dstp[x-1] - dstpnn[x-u-1] ) +
+                    abs(   dstp[x]   - dstpnn[x-u] )   +
+                    abs(   dstp[x+1] - dstpnn[x-u+1] ) + 
+                    abs( dstpnn[x-1] -   dstp[x+u-1] ) + 
+                    abs( dstpnn[x]   -   dstp[x+u] )   +
+                    abs( dstpnn[x+1] -   dstp[x+u+1] );
+                if( diff < min && 
+                    ( ( omskp[x-1+u] != 0xFF && abs( omskp[x-1+u] - dmskp[x] ) <= lim ) ||
+                     (  omskp[x+u]   != 0xFF && abs( omskp[x+u]   - dmskp[x]) <= lim )  ||
+                     (  omskp[x+1+u] != 0xFF && abs( omskp[x+1+u] - dmskp[x]) <= lim ) ) &&
+                    ( ( omskn[x-1-u] != 0xFF && abs( omskn[x-1-u] - dmskp[x]) <= lim ) ||
+                     (  omskn[x-u]   != 0xFF && abs( omskn[x-u]   - dmskp[x]) <= lim ) ||
+                     (  omskn[x+1-u] != 0xFF && abs( omskn[x+1-u] - dmskp[x]) <= lim ) ) )
+                {
+                    const int diff2 = 
+                        abs( dstp[x+(u>>1)-1] - dstpnn[x-(u>>1)-1] ) +
+                        abs( dstp[x+(u>>1)]   - dstpnn[x-(u>>1)]   ) +
+                        abs( dstp[x+(u>>1)+1] - dstpnn[x-(u>>1)+1] );
+                    if( diff2 < 4 * nt &&
+                        ( ( ( abs( omskp[x+(u>>1)] - omskn[x-(u>>1)]     ) <= lim ||
+                              abs( omskp[x+(u>>1)] - omskn[x-((u+1)>>1)] ) <= lim ) && 
+                            omskp[x+(u>>1)] != 0xFF )
+                          || 
+                          ( ( abs( omskp[x+((u+1)>>1)] - omskn[x-(u>>1)] )     <= lim ||
+                              abs( omskp[x+((u+1)>>1)] - omskn[x-((u+1)>>1)] ) <= lim ) && 
+                            omskp[x+((u+1)>>1)] != 0xFF ) ) ) 
+                    {
+                        if( ( abs( dmskp[x] - omskp[x+(u>>1)] )     <= lim ||
+                              abs( dmskp[x] - omskp[x+((u+1)>>1)] ) <= lim ) &&
+                            ( abs( dmskp[x] - omskn[x-(u>>1)] )     <= lim ||
+                              abs( dmskp[x] - omskn[x-((u+1)>>1)] ) <= lim ) )
+                        {
+                            val = (   dstp[x+(u>>1)] +   dstp[x+((u+1)>>1)] +
+                                    dstpnn[x-(u>>1)] + dstpnn[x-((u+1)>>1)] + 2 ) >> 2;
+                            min = diff;
+                            dir = u;
+                        }
+                    }
+                }
+            }
+            if( min != 8 * nt )
+            {
+                dstpn[x] = val;
+                dmskp[x] = 128 + dir * 4;
+            }
+            else 
+            {
+                const int minm = MIN( dstp[x], dstpnn[x] );
+                const int maxm = MAX( dstp[x], dstpnn[x] );
+                const int d = plane == 0 ? 4 : 2;
+                const int startu = MAX( -x + 1, -d );
+                const int stopu = MIN( width - 2 - x, d );
+                min = 7 * nt;
+                for( u = startu; u <= stopu; ++u )
+                {
+                    const int p1 =   dstp[x+(u>>1)] +   dstp[x+((u+1)>>1)];
+                    const int p2 = dstpnn[x-(u>>1)] + dstpnn[x-((u+1)>>1)];
+                    const int diff =
+                        abs(   dstp[x-1] - dstpnn[x-u-1] ) + 
+                        abs(   dstp[x]   - dstpnn[x-u] )   +
+                        abs(   dstp[x+1] - dstpnn[x-u+1] ) +
+                        abs( dstpnn[x-1] - dstp[x+u-1] )   + 
+                        abs( dstpnn[x]   - dstp[x+u] )     + 
+                        abs( dstpnn[x+1] - dstp[x+u+1] )   +
+                        abs( p1 - p2 );
+                    if( diff < min )
+                    {
+                        const int valt = ( p1 + p2 + 2 ) >> 2;
+                        if( valt >= minm && valt <= maxm )
+                        {
+                            val = valt;
+                            min = diff;
+                            dir = u;
+                        }
+                    }
+                }
+                dstpn[x] = val;
+                if( min == 7*nt ) dmskp[x] = 128;
+                else dmskp[x] = 128 + dir * 4;
+            }
+        }
+        dstp += dst_pitch * 2;
+        dstpn += dst_pitch * 2;
+        dstpnn += dst_pitch * 2;
+        dmskp += dmsk_pitch * 2;
+        omskp += omsk_pitch * 2;
+        omskn += omsk_pitch * 2;
+    }
+}
+
+/**
+ * Applies some extra filtering to smooth the edge direction mask
+ * @param nmskp Pointer to the newly-filtered edge direction mask being read from
+ * @param nmsk_pitch Stride of nmskp
+ * @param omskp Pointer to the old unfiltered edge direction mask being read from
+ * @param omsk_pitch Stride of osmkp
+ * @param dstp Pointer to the output image being filtered in place
+ * @param src_pitch Stride of dstp ....not sure why it's named this
+ * @param field Field to filter
+ * @param height Height of the full-frame output
+ * @param width Width of dstp bitmap rows, as opposed to the pdded stride in src_pitch
+ */
+void eedi2_post_process( uint8_t * nmskp, int nmsk_pitch, uint8_t * omskp, int omsk_pitch,
+                         uint8_t * dstp, int src_pitch, int field, int height, int width )
+{
+    int x, y;
+    
+    nmskp += ( 2 - field ) * nmsk_pitch;
+    omskp += ( 2 - field ) * omsk_pitch;
+    dstp += ( 2 - field ) * src_pitch;
+    unsigned char *srcpp = dstp - src_pitch;
+    unsigned char *srcpn = dstp + src_pitch;
+    for( y = 2 - field; y < height - 1; y += 2 )
+    {
+        for( x = 0; x < width; ++x )
+        {
+            const int lim = eedi2_limlut[abs(nmskp[x]-128)>>2];
+            if( abs( nmskp[x] - omskp[x] ) > lim && omskp[x] != 255 && omskp[x] != 128 )
+                dstp[x] = ( srcpp[x] + srcpn[x] + 1 ) >> 1;
+        }
+        nmskp += nmsk_pitch * 2;
+        omskp += omsk_pitch * 2;
+        srcpp += src_pitch * 2;
+        dstp += src_pitch * 2;
+        srcpn += src_pitch * 2;
+    }
+}
+
+/**
+ * Blurs the source field plane
+ * @param src Pointer to the half-height source field plane
+ * @param src_pitch Stride of src
+ * @param tmp Pointer to a temporary buffer for juggling bitmaps
+ * @param tmp_pitch Stride of tmp
+ * @param dst Pointer to the destination to store the blurred field plane
+ * @param dst_pitch Stride of dst
+ * @param height Height of the hakf-height field-sized frame
+ * @param width Width of dstp bitmap rows, as opposed to the padded stride in dst_pitch
+ */
+void eedi2_gaussian_blur1( uint8_t * src, int src_pitch, uint8_t * tmp, int tmp_pitch, uint8_t * dst, int dst_pitch, int height, int width )
+{
+    uint8_t * srcp = src;
+    uint8_t * dstp = tmp;
+    int x, y;
+
+    for( y = 0; y < height; ++y )
+    {
+        dstp[0] = ( srcp[3] * 582 + srcp[2] * 7078 + srcp[1] * 31724 + 
+                    srcp[0] * 26152 + 32768 ) >> 16;
+        dstp[1] = ( srcp[4] * 582 + srcp[3] * 7078 +
+                    ( srcp[0] + srcp[2] ) * 15862 +
+                    srcp[1] * 26152 + 32768 ) >> 16;
+        dstp[2] = ( srcp[5] * 582 + ( srcp[0] + srcp[4] ) * 3539 +
+                    ( srcp[1] + srcp[3] ) * 15862 + 
+                    srcp[2]*26152 + 32768 ) >> 16;
+        for( x = 3; x < width - 3; ++x )
+        {
+            dstp[x] = ( ( srcp[x-3] + srcp[x+3] ) * 291 +
+                        ( srcp[x-2] + srcp[x+2] ) * 3539 +
+                        ( srcp[x-1] + srcp[x+1] ) * 15862 +
+                        srcp[x] * 26152 + 32768 ) >> 16;
+        }
+        dstp[x] = ( srcp[x-3] * 582 + ( srcp[x-2] + srcp[x+2] ) * 3539 +
+                    ( srcp[x-1] + srcp[x+1] ) * 15862 +
+                    srcp[x]   * 26152 + 32768 ) >> 16;
+        ++x;
+        dstp[x] = ( srcp[x-3] * 582 + srcp[x-2] * 7078 +
+                    ( srcp[x-1] + srcp[x+1] ) * 15862 +
+                    srcp[x] * 26152 + 32768 ) >> 16;
+        ++x;
+        dstp[x] = ( srcp[x-3] * 582 + srcp[x-2] * 7078 +
+                    srcp[x-1] * 31724 + srcp[x] * 26152 + 32768 ) >> 16;
+        srcp += src_pitch;
+        dstp += tmp_pitch;
+    }
+    srcp = tmp;
+    dstp = dst;
+    unsigned char *src3p = srcp - tmp_pitch * 3;
+    unsigned char *src2p = srcp - tmp_pitch * 2;
+    unsigned char *srcpp = srcp - tmp_pitch;
+    unsigned char *srcpn = srcp + tmp_pitch;
+    unsigned char *src2n = srcp + tmp_pitch * 2;
+    unsigned char *src3n = srcp + tmp_pitch * 3;
+    for( x = 0; x < width; ++x )
+    {
+        dstp[x] = ( src3n[x] * 582 + src2n[x] * 7078 + srcpn[x] * 31724 + 
+                     srcp[x] * 26152 + 32768 ) >> 16;
+    }
+    src3p += tmp_pitch;
+    src2p += tmp_pitch;
+    srcpp += tmp_pitch;
+    srcp += tmp_pitch;
+    srcpn += tmp_pitch;
+    src2n += tmp_pitch;
+    src3n += tmp_pitch;
+    dstp += dst_pitch;
+    for( x = 0; x < width; ++x )
+    {
+        dstp[x] = ( src3n[x] * 582 + src2n[x] * 7078 +
+                    ( srcpp[x] + srcpn[x] ) * 15862 +
+                    srcp[x] * 26152 + 32768 ) >> 16;
+    }
+    src3p += tmp_pitch;
+    src2p += tmp_pitch;
+    srcpp += tmp_pitch;
+    srcp += tmp_pitch;
+    srcpn += tmp_pitch;
+    src2n += tmp_pitch;
+    src3n += tmp_pitch;
+    dstp += dst_pitch;
+    for( x = 0; x < width; ++x )
+    {
+        dstp[x] = ( src3n[x] * 582 + ( src2p[x] + src2n[x] ) * 3539 + 
+                    ( srcpp[x] + srcpn[x] ) * 15862 +
+                    srcp[x] * 26152 + 32768 ) >> 16;
+    }
+    src3p += src_pitch;
+    src2p += src_pitch;
+    srcpp += src_pitch;
+    srcp += src_pitch;
+    srcpn += src_pitch;
+    src2n += src_pitch;
+    src3n += src_pitch;
+    dstp += dst_pitch;
+    for( y = 3; y < height - 3; ++y )
+    {
+        for( x = 0; x < width; ++x )
+        {
+            dstp[x] = ( ( src3p[x] + src3n[x] ) * 291 +
+                        ( src2p[x] + src2n[x] ) * 3539 +
+                        ( srcpp[x] + srcpn[x] ) * 15862 +
+                        srcp[x] * 26152 + 32768 ) >> 16;
+        }
+        src3p += tmp_pitch;
+        src2p += tmp_pitch;
+        srcpp += tmp_pitch;
+        srcp += tmp_pitch;
+        srcpn += tmp_pitch;
+        src2n += tmp_pitch;
+        src3n += tmp_pitch;
+        dstp += dst_pitch;
+    }
+    for( x = 0; x < width; ++x )
+    {
+        dstp[x] = ( src3p[x] * 582 + ( src2p[x] + src2n[x] ) *3539 +
+                    ( srcpp[x] + srcpn[x] ) * 15862 +
+                    srcp[x] * 26152 + 32768 ) >> 16;
+    }
+    src3p += tmp_pitch;
+    src2p += tmp_pitch;
+    srcpp += tmp_pitch;
+    srcp += tmp_pitch;
+    srcpn += tmp_pitch;
+    src2n += tmp_pitch;
+    src3n += tmp_pitch;
+    dstp += dst_pitch;
+    for( x = 0; x < width; ++x )
+    {
+        dstp[x] = ( src3p[x] * 582 + src2p[x] * 7078 +
+                    ( srcpp[x] + srcpn[x] ) * 15862 +
+                     srcp[x] * 26152 + 32768 ) >> 16;
+    }
+    src3p += tmp_pitch;
+    src2p += tmp_pitch;
+    srcpp += tmp_pitch;
+    srcp += tmp_pitch;
+    srcpn += tmp_pitch;
+    src2n += tmp_pitch;
+    src3n += tmp_pitch;
+    dstp += dst_pitch;
+    for( x = 0; x < width; ++x )
+    {
+        dstp[x] = ( src3p[x] * 582   + src2p[x] * 7078 +
+                    srcpp[x] * 31724 +  srcp[x] * 26152 + 32768 ) >> 16;
+    }
+}
+
+
+/**
+ * Blurs the spatial derivatives of the source field plane
+ * @param src Pointer to the derivative array to filter
+ * @param tmp Pointer to a temporary storage for the derivative array while it's being filtered
+ * @param dst Pointer to the destination to store the filtered output derivative array
+ * @param pitch Stride of the bitmap from which the src array is derived
+ * @param height Height of the half-height field-sized frame from which the src array derivs were taken
+ * @param width Width of the bitmap from which the src array is derived, as opposed to the padded stride in pitch
+ */
+void eedi2_gaussian_blur_sqrt2( int *src, int *tmp, int *dst, const int pitch, int height, const int width )
+{
+    int * srcp = src;
+    int * dstp = tmp;
+    int x, y;
+    
+    for( y = 0; y < height; ++y )
+    {
+        x = 0;
+        dstp[x] = ( srcp[x+4] * 678   + srcp[x+3] * 3902  + srcp[x+2] * 13618 +
+                    srcp[x+1] * 28830 + srcp[x]   * 18508 + 32768 ) >> 16;
+        ++x;
+        dstp[x] = ( srcp[x+4] * 678   + srcp[x+3] * 3902 + srcp[x+2] * 13618 + 
+                    ( srcp[x-1] + srcp[x+1] ) *14415 +
+                    srcp[x]   * 18508 + 32768 ) >> 16;
+        ++x;
+        dstp[x] = ( srcp[x+4] * 678   + srcp[x+3] * 3902 + 
+                    ( srcp[x-2] + srcp[x+2] ) * 6809 +
+                    ( srcp[x-1] + srcp[x+1] ) * 14415 + 
+                    srcp[x]   * 18508 + 32768 ) >> 16;
+        ++x;
+        dstp[x] = ( srcp[x+4] * 678   + ( srcp[x-3] + srcp[x+3] ) * 1951 + 
+                    ( srcp[x-2] + srcp[x+2] ) * 6809 +
+                    ( srcp[x-1] + srcp[x+1] ) * 14415 + 
+                    srcp[x]   * 18508 + 32768 ) >> 16;
+
+        for( x = 4; x < width - 4; ++x )
+        {
+            dstp[x] = ( ( srcp[x-4] + srcp[x+4] ) * 339 + 
+                        ( srcp[x-3] + srcp[x+3] ) * 1951 + 
+                        ( srcp[x-2] + srcp[x+2] ) * 6809 +
+                        ( srcp[x-1] + srcp[x+1] ) * 14415 + 
+                        srcp[x] * 18508 + 32768 ) >> 16;
+        }
+
+        dstp[x] = ( srcp[x-4] * 678 + ( srcp[x-3] + srcp[x+3] ) * 1951 + 
+                    ( srcp[x-2] + srcp[x+2] ) * 6809  +
+                    ( srcp[x-1] + srcp[x+1] ) * 14415 + 
+                    srcp[x] * 18508 + 32768 ) >> 16;
+        ++x;
+        dstp[x] = ( srcp[x-4] * 678 + srcp[x-3] * 3902 + 
+                    ( srcp[x-2] + srcp[x+2] ) * 6809 +
+                    ( srcp[x-1] + srcp[x+1] ) * 14415 + 
+                    srcp[x] * 18508 + 32768 ) >> 16;
+        ++x;
+        dstp[x] = ( srcp[x-4] * 678 + srcp[x+3] * 3902 + srcp[x-2] * 13618 + 
+                    ( srcp[x-1] + srcp[x+1] ) * 14415 +
+                    srcp[x] * 18508 + 32768 ) >> 16;
+        ++x;
+        dstp[x] = ( srcp[x-4] * 678 + srcp[x-3] * 3902 + srcp[x-2] * 13618 + 
+                    srcp[x-1] * 28830 +
+                    srcp[x] * 18508 + 32768 ) >> 16;
+        srcp += pitch;
+        dstp += pitch;
+    }
+    dstp = dst;
+    srcp = tmp;
+    int * src4p = srcp - pitch * 4;
+    int * src3p = srcp - pitch * 3;
+    int * src2p = srcp - pitch * 2;
+    int * srcpp = srcp - pitch;
+    int * srcpn = srcp + pitch;
+    int * src2n = srcp + pitch * 2;
+    int * src3n = srcp + pitch * 3;
+    int * src4n = srcp + pitch * 4;
+    for( x = 0; x < width; ++x )
+    {
+        dstp[x] = ( src4n[x] * 678   + src3n[x] * 3902  + 
+                    src2n[x] * 13618 + srcpn[x] * 28830 +
+                     srcp[x] * 18508 + 32768 ) >> 18;
+    }
+    src4p += pitch;
+    src3p += pitch;
+    src2p += pitch;
+    srcpp += pitch;
+    srcp += pitch;
+    srcpn += pitch;
+    src2n += pitch;
+    src3n += pitch;
+    src4n += pitch;
+    dstp += pitch;
+    for( x = 0; x < width; ++x )
+    {
+        dstp[x] = ( src4n[x] * 678 + src3n[x] * 3902 + src2n[x] * 13618 + 
+                    ( srcpp[x] + srcpn[x] ) * 14415 +
+                    srcp[x] * 18508 + 32768 ) >> 18;
+    }
+    src4p += pitch;
+    src3p += pitch;
+    src2p += pitch;
+    srcpp += pitch;
+    srcp += pitch;
+    srcpn += pitch;
+    src2n += pitch;
+    src3n += pitch;
+    src4n += pitch;
+    dstp += pitch;
+    for( x = 0; x < width; ++x )
+    {
+        dstp[x] = ( src4n[x] * 678 + src3n[x] * 3902 + 
+                    ( src2p[x] + src2n[x] ) * 6809 + 
+                    ( srcpp[x] + srcpn[x] ) * 14415 +
+                    srcp[x] * 18508 + 32768 ) >> 18;
+    }
+    src4p += pitch;
+    src3p += pitch;
+    src2p += pitch;
+    srcpp += pitch;
+    srcp += pitch;
+    srcpn += pitch;
+    src2n += pitch;
+    src3n += pitch;
+    src4n += pitch;
+    dstp += pitch;
+    for( x = 0; x < width; ++x )
+    {
+        dstp[x] = ( src4n[x] * 678 + ( src3p[x] + src3n[x] ) * 1951 +
+                    ( src2p[x] + src2n[x] ) * 6809 +
+                    ( srcpp[x] + srcpn[x] ) * 14415 +
+                    srcp[x] * 18508 + 32768 ) >> 18;
+    }
+    src4p += pitch;
+    src3p += pitch;
+    src2p += pitch;
+    srcpp += pitch;
+    srcp += pitch;
+    srcpn += pitch;
+    src2n += pitch;
+    src3n += pitch;
+    src4n += pitch;
+    dstp += pitch;
+    for( y = 4; y < height - 4; ++y )
+    {
+        for( x = 0; x < width; ++x )
+        {
+            dstp[x] = ( ( src4p[x] + src4n[x] ) * 339 +
+                        ( src3p[x] + src3n[x] ) * 1951 +
+                        ( src2p[x] + src2n[x] ) * 6809 +
+                        ( srcpp[x] + srcpn[x] ) * 14415 +
+                        srcp[x] * 18508 + 32768 ) >> 18;
+        }
+        src4p += pitch;
+        src3p += pitch;
+        src2p += pitch;
+        srcpp += pitch;
+        srcp += pitch;
+        srcpn += pitch;
+        src2n += pitch;
+        src3n += pitch;
+        src4n += pitch;
+        dstp += pitch;
+    }
+    for( x = 0; x < width; ++x )
+    {
+        dstp[x] = ( src4p[x] * 678 +
+                    ( src3p[x] + src3n[x] ) * 1951 +
+                    ( src2p[x] + src2n[x] ) * 6809 +
+                    ( srcpp[x] + srcpn[x] ) * 14415 +
+                    srcp[x] * 18508 + 32768 ) >> 18;
+    }
+    src4p += pitch;
+    src3p += pitch;
+    src2p += pitch;
+    srcpp += pitch;
+    srcp += pitch;
+    srcpn += pitch;
+    src2n += pitch;
+    src3n += pitch;
+    src4n += pitch;
+    dstp += pitch;
+    for( x = 0; x < width; ++x )
+    {
+        dstp[x] = ( src4p[x] * 678 + src3p[x] * 3902 +
+                    ( src2p[x] + src2n[x] ) * 6809 +
+                    ( srcpp[x] + srcpn[x] ) * 14415 +
+                    srcp[x] * 18508 + 32768 ) >> 18;
+    }
+    src4p += pitch;
+    src3p += pitch;
+    src2p += pitch;
+    srcpp += pitch;
+    srcp += pitch;
+    srcpn += pitch;
+    src2n += pitch;
+    src3n += pitch;
+    src4n += pitch;
+    dstp += pitch;
+    for( x = 0; x < width; ++x )
+    {
+        dstp[x] = ( src4p[x] * 678 + src3p[x] * 3902 + src2p[x] * 13618 +
+                    ( srcpp[x] + srcpn[x] ) * 14415 +
+                    srcp[x] * 18508 + 32768 ) >> 18;
+    }
+    src4p += pitch;
+    src3p += pitch;
+    src2p += pitch;
+    srcpp += pitch;
+    srcp += pitch;
+    srcpn += pitch;
+    src2n += pitch;
+    src3n += pitch;
+    src4n += pitch;
+    dstp += pitch;
+    for( x = 0; x < width; ++x )
+    {
+        dstp[x] = ( src4p[x] * 678   + src3p[x] * 3902 +
+                    src2p[x] * 13618 + srcpp[x] * 28830 +
+                    srcp[x]  * 18508 + 32768 ) >> 18;
+    }
+}
+
+/**
+ * Finds spatial derivatives for a a source field plane
+ * @param srcp Pointer to the plane to derive
+ * @param src_pitch Stride of srcp
+ * @param height Height of the half-height field-sized frame
+ * @param width Width of srcp bitmap rows, as opposed to the padded stride in src_pitch
+ * @param x2 Pointed to the array to store the x/x derivatives
+ * @param y2 Pointer to the array to store the y/y derivatives
+ * @param xy Pointer to the array to store the x/y derivatives
+ */
+void eedi2_calc_derivatives( uint8_t *srcp, int src_pitch, int height, int width, int *x2, int *y2, int *xy)
+{
+    
+    unsigned char * srcpp = srcp - src_pitch;
+    unsigned char * srcpn = srcp + src_pitch;
+    int x, y;
+    {
+        const int Ix = srcp[1] -  srcp[0];
+        const int Iy = srcp[0] - srcpn[0];
+        x2[0] = ( Ix * Ix ) >> 1;
+        y2[0] = ( Iy * Iy ) >> 1;
+        xy[0] = ( Ix * Iy ) >> 1;
+    }
+    for( x = 1; x < width - 1; ++x )
+    {
+        const int Ix = srcp[x+1] -  srcp[x-1];
+        const int Iy = srcp[x]   - srcpn[x];
+        x2[x] = ( Ix * Ix ) >> 1;
+        y2[x] = ( Iy * Iy ) >> 1;
+        xy[x] = ( Ix * Iy ) >> 1;
+    }
+    {
+        const int Ix = srcp[x] -  srcp[x-1];
+        const int Iy = srcp[x] - srcpn[x];
+        x2[x] = ( Ix * Ix ) >> 1;
+        y2[x] = ( Iy * Iy ) >> 1;
+        xy[x] = ( Ix * Iy ) >> 1;
+    }
+    srcpp += src_pitch;
+    srcp += src_pitch;
+    srcpn += src_pitch;
+    x2 += src_pitch;
+    y2 += src_pitch;
+    xy += src_pitch;
+    for( y = 1; y < height - 1; ++y )
+    {
+        {
+            const int Ix =  srcp[1] -  srcp[0];
+            const int Iy = srcpp[0] - srcpn[0];
+            x2[0] = ( Ix * Ix ) >> 1;
+            y2[0] = ( Iy * Iy ) >> 1;
+            xy[0] = ( Ix * Iy ) >> 1;
+        }
+        for ( x = 1; x < width - 1; ++x )
+        {
+            const int Ix =  srcp[x+1] -  srcp[x-1];
+            const int Iy = srcpp[x]   - srcpn[x];
+            x2[x] = ( Ix * Ix ) >> 1;
+            y2[x] = ( Iy * Iy ) >> 1;
+            xy[x] = ( Ix * Iy ) >> 1;
+        }
+        {
+            const int Ix =  srcp[x] -  srcp[x-1];
+            const int Iy = srcpp[x] - srcpn[x];
+            x2[x] = ( Ix *Ix ) >> 1;
+            y2[x] = ( Iy *Iy ) >> 1;
+            xy[x] = ( Ix *Iy ) >> 1;
+        }
+        srcpp += src_pitch;
+        srcp += src_pitch;
+        srcpn += src_pitch;
+        x2 += src_pitch;
+        y2 += src_pitch;
+        xy += src_pitch;
+    }
+    {
+        const int Ix =  srcp[1] - srcp[0];
+        const int Iy = srcpp[0] - srcp[0];
+        x2[0] = ( Ix * Ix ) >> 1;
+        y2[0] = ( Iy * Iy ) >> 1;
+        xy[0] = ( Ix * Iy ) >> 1;
+    }
+    for( x = 1; x < width - 1; ++x )
+    {
+        const int Ix =  srcp[x+1] - srcp[x-1];
+        const int Iy = srcpp[x]   - srcp[x];
+        x2[x] = ( Ix * Ix ) >> 1;
+        y2[x] = ( Iy * Iy ) >> 1;
+        xy[x] = ( Ix * Iy ) >> 1;
+    }
+    {
+        const int Ix =  srcp[x] - srcp[x-1];
+        const int Iy = srcpp[x] - srcp[x];
+        x2[x] = ( Ix * Ix ) >> 1;
+        y2[x] = ( Iy * Iy ) >> 1;
+        xy[x] = ( Ix * Iy ) >> 1;
+    }
+}
+
+/**
+ * Filters junctions and corners for the output image
+ * @param x2 Pointer to the x/x derivatives
+ * @param y2 Pointer to the y/y derivatives
+ * @param xy Pointer to the x/y derivatives
+ * @param pitch Stride of the source field plane from which the derivatives were calculated
+ * @param mskp Pointer to the edge direction mask
+ * @param msk_pitch Stride of mskp
+ * @param dstp Pointer to the output image being filtered in place
+ * @param dst_pitch Stride of dstp
+ * @param height Height of the full-frame output plane
+ * @param width Width of dstp bitmap rows, as opposed to the padded stride in dst_pitch
+ * @param field Field to filter
+ */
+void eedi2_post_process_corner( int *x2, int *y2, int *xy, const int pitch, uint8_t * mskp, int msk_pitch, uint8_t * dstp, int dst_pitch, int height, int width, int field )
+{
+    mskp += ( 8 - field ) * msk_pitch;
+    dstp += ( 8 - field ) * dst_pitch;
+    unsigned char * dstpp = dstp - dst_pitch;
+    unsigned char * dstpn = dstp + dst_pitch;
+    x2 += pitch * 3;
+    y2 += pitch * 3;
+    xy += pitch * 3;
+    int *x2n = x2 + pitch;
+    int *y2n = y2 + pitch;
+    int *xyn = xy + pitch;
+    int x, y;
+    
+    for( y = 8 - field; y < height - 7; y += 2 )
+    {
+        for( x = 4; x < width - 4; ++x )
+        {
+            if( mskp[x] == 255 || mskp[x] == 128 ) continue;
+            const int c1 = (int)( x2[x]  *  y2[x] -  xy[x] * xy[x] - 0.09 *
+                                  ( x2[x]  + y2[x] )  * ( x2[x]  + y2[x] ) );
+            const int c2 = (int)( x2n[x] * y2n[x] - xyn[x]* xyn[x] - 0.09 * 
+                                  ( x2n[x] + y2n[x] ) * ( x2n[x] + y2n[x] ) );
+            if (c1 > 775 || c2 > 775)
+                dstp[x] = ( dstpp[x] + dstpn[x] + 1 ) >> 1;
+        }
+        mskp += msk_pitch * 2;
+        dstpp += dst_pitch * 2;
+        dstp += dst_pitch * 2;
+        dstpn += dst_pitch * 2;
+        x2 += pitch;
+        x2n += pitch;
+        y2 += pitch;
+        y2n += pitch;
+        xy += pitch;
+        xyn += pitch;
+    }
+}
diff --git a/libhb/eedi2.h b/libhb/eedi2.h
new file mode 100644
index 000000000..1df7b1138
--- /dev/null
+++ b/libhb/eedi2.h
@@ -0,0 +1,84 @@
+// Used to order a sequeunce of metrics for median filtering
+void eedi2_sort_metrics( int *order, const int length );
+
+// Aping some Windows API funcctions AviSynth seems to like
+// Taken from here: http://www.gidforums.com/t-8543.html
+void *eedi2_aligned_malloc(size_t size, size_t align_size);
+void eedi2_aligned_free(void *ptr);
+
+// Copies bitmaps
+void eedi2_bit_blit( uint8_t * dstp, int dst_pitch, const uint8_t * srcp, int src_pitch,
+                     int row_size, int height );
+
+// Sets up the initial field-sized bitmap EEDI2 interpolates from
+void eedi2_fill_half_height_buffer_plane( uint8_t * src, uint8_t * dst, int pitch, int height );
+
+// Simple line doubler
+void eedi2_upscale_by_2( uint8_t * srcp, uint8_t * dstp, int height, int pitch );
+
+// Finds places where vertically adjacent pixels abruptly change intensity
+void eedi2_build_edge_mask( uint8_t * dstp, int dst_pitch, uint8_t *srcp, int src_pitch,
+                            int mthresh, int lthresh, int vthresh, int height, int width );
+
+// Expands and smooths out the edge mask by considering a pixel
+// to be masked if >= dilation threshold adjacent pixels are masked.
+void eedi2_dilate_edge_mask( uint8_t *mskp, int msk_pitch, uint8_t *dstp, int dst_pitch,
+                             int dstr, int height, int width );
+
+// Contracts the edge mask by considering a pixel to be masked
+// only if > erosion threshold adjacent pixels are masked
+void eedi2_erode_edge_mask( uint8_t *mskp, int msk_pitch, uint8_t *dstp, int dst_pitch,
+                            int estr, int height, int width );
+
+// Smooths out horizontally aligned holes in the mask
+// If none of the 6 horizontally adjacent pixels are masked,
+// don't consider the current pixel masked. If there are any
+// masked on both sides, consider the current pixel masked.
+void eedi2_remove_small_gaps( uint8_t * mskp, int msk_pitch, uint8_t * dstp, int dst_pitch, 
+                              int height, int width );
+
+// Spatial vectors. Looks at maximum_search_distance surrounding pixels
+// to guess which angle edges follow. This is EEDI2's timesink, and can be
+// thought of as YADIF_CHECK on steroids. Both find edge directions.
+void eedi2_calc_directions( const int plane, uint8_t * mskp, int msk_pitch, uint8_t * srcp, int src_pitch,
+                            uint8_t * dstp, int dst_pitch, int maxd, int nt, int height, int width  );
+
+void eedi2_filter_map( uint8_t *mskp, int msk_pitch, uint8_t *dmskp, int dmsk_pitch,
+                       uint8_t * dstp, int dst_pitch, int height, int width );
+
+void eedi2_filter_dir_map( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch, uint8_t * dstp,
+                           int dst_pitch, int height, int width );
+
+void eedi2_expand_dir_map( uint8_t * mskp, int msk_pitch, uint8_t  *dmskp, int dmsk_pitch, uint8_t * dstp,
+                           int dst_pitch, int height, int width );
+
+void eedi2_mark_directions_2x( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch, uint8_t * dstp,
+                               int dst_pitch, int tff, int height, int width );
+
+void eedi2_filter_dir_map_2x( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch, uint8_t * dstp,
+                              int dst_pitch, int field, int height, int width );
+
+void eedi2_expand_dir_map_2x( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch, uint8_t * dstp,
+                              int dst_pitch, int field, int height, int width );
+
+void eedi2_fill_gaps_2x( uint8_t *mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch, uint8_t * dstp,
+                         int dst_pitch, int field, int height, int width );
+
+void eedi2_interpolate_lattice( const int plane, uint8_t * dmskp, int dmsk_pitch, uint8_t * dstp,
+                                int dst_pitch, uint8_t * omskp, int omsk_pitch, int field, int nt,
+                                int height, int width );
+
+void eedi2_post_process( uint8_t * nmskp, int nmsk_pitch, uint8_t * omskp, int omsk_pitch, uint8_t * dstp,
+                         int src_pitch, int field, int height, int width );
+
+void eedi2_gaussian_blur1( uint8_t * src, int src_pitch, uint8_t * tmp, int tmp_pitch, uint8_t * dst,
+                           int dst_pitch, int height, int width );
+                           
+void eedi2_gaussian_blur_sqrt2( int *src, int *tmp, int *dst, const int pitch,
+                                const int height, const int width );
+                                
+void eedi2_calc_derivatives( uint8_t *srcp, int src_pitch, int height, int width,
+                             int *x2, int *y2, int *xy);
+
+void eedi2_post_process_corner( int *x2, int *y2, int *xy, const int pitch, uint8_t * mskp, int msk_pitch,
+                                uint8_t * dstp, int dst_pitch, int height, int width, int field );
author	jbrjake <[email protected]>	2009-03-19 17:30:56 +0000
committer	jbrjake <[email protected]>	2009-03-19 17:30:56 +0000
commit	24a14d05f1e1ddc42c9a0ccb9b0c4dfdc2ce29d2 (patch)
tree	886a531b1f78e91bc05cb1f12e531da8fc880c6c /libhb
parent	a5e78ebff48244d281ec149c4f1b88cff328ef99 (diff)