3 files changed, 2432 insertions, 69 deletions
diff --git a/libhb/decomb.c b/libhb/decomb.c
index 4c0f42c17..bcf215f16 100644
--- a/libhb/decomb.c
+++ b/libhb/decomb.c
@@ -4,10 +4,15 @@
    Homepage: <http://handbrake.fr/>.
    It may be used under the terms of the GNU General Public License. 
    
-   The yadif algorithm was created by Michael Niedermayer. */
+   The yadif algorithm was created by Michael Niedermayer.
+   Tritical's work inspired much of the comb detection code:
+   http://web.missouri.edu/~kes25c/
+*/
+
 #include "hb.h"
 #include "hbffmpeg.h"
 #include "mpeg2dec/mpeg2.h"
+#include "eedi2.h"
 
 #define SUPPRESS_AV_LOG
 
@@ -21,17 +26,52 @@
 #define MIN3(a,b,c) MIN(MIN(a,b),c)
 #define MAX3(a,b,c) MAX(MAX(a,b),c)
 
-typedef struct yadif_arguments_s {
+// Some names to correspond to the pv->eedi_half array's contents
+#define SRCPF 0
+#define MSKPF 1
+#define TMPPF 2
+#define DSTPF 3
+// Some names to correspond to the pv->eedi_full array's contents
+#define DST2PF 0
+#define TMP2PF2 1
+#define MSK2PF 2
+#define TMP2PF 3
+#define DST2MPF 4
+
+struct yadif_arguments_s {
     uint8_t **dst;
     int parity;
     int tff;
     int stop;
     int is_combed;
-} yadif_arguments_t;
+};
+
+struct decomb_arguments_s {
+    int stop;
+};
 
-typedef struct decomb_arguments_s {
+struct eedi2_arguments_s {
     int stop;
-} decomb_arguments_t;
+};
+
+typedef struct yadif_arguments_s yadif_arguments_t;
+typedef struct decomb_arguments_s decomb_arguments_t;
+typedef struct eedi2_arguments_s eedi2_arguments_t;
+
+typedef struct eedi2_thread_arg_s {
+    hb_filter_private_t *pv;
+    int plane;
+} eedi2_thread_arg_t;
+
+typedef struct decomb_thread_arg_s {
+    hb_filter_private_t *pv;
+    int segment;
+} decomb_thread_arg_t;
+
+typedef struct yadif_thread_arg_s {
+    hb_filter_private_t *pv;
+    int segment;
+} yadif_thread_arg_t;
 
 struct hb_filter_private_s
 {
@@ -39,6 +79,7 @@ struct hb_filter_private_s
     int              width[3];
     int              height[3];
 
+    // Decomb parameters
     int              mode;
     int              spatial_metric;
     int              motion_threshold;
@@ -46,8 +87,19 @@ struct hb_filter_private_s
     int              block_threshold;
     int              block_width;
     int              block_height;
+    
+    // EEDI2 parameters
+    int              magnitude_threshold;
+    int              variance_threshold;
+    int              laplacian_threshold;
+    int              dilation_threshold;
+    int              erosion_threshold;
+    int              noise_threshold;
+    int              maximum_search_distance;
+    int              post_processing;
 
     int              parity;
+    int              tff;
     
     int              yadif_ready;
 
@@ -70,6 +122,13 @@ struct hb_filter_private_s
     /* Make a buffer to store a comb mask. */
     uint8_t        * mask[3];
 
+    uint8_t        * eedi_half[4][3];
+    uint8_t        * eedi_full[5][3];
+    int            * cx2;
+    int            * cy2;
+    int            * cxy;
+    int            * tmpc;
+    
     AVPicture        pic_in;
     AVPicture        pic_out;
     hb_buffer_t *    buf_out[2];
@@ -86,6 +145,11 @@ struct hb_filter_private_s
     hb_lock_t      ** decomb_begin_lock;     // Thread has work
     hb_lock_t      ** decomb_complete_lock;  // Thread has completed work
     decomb_arguments_t *decomb_arguments;    // Arguments to thread for work
+
+    hb_thread_t    ** eedi2_threads;        // Threads for eedi2 - one per plane
+    hb_lock_t      ** eedi2_begin_lock;     // Thread has work
+    hb_lock_t      ** eedi2_complete_lock;  // Thread has completed work
+    eedi2_arguments_t *eedi2_arguments;    // Arguments to thread for work
     
 };
 
@@ -106,7 +170,7 @@ void hb_decomb_close( hb_filter_private_t * pv );
 hb_filter_object_t hb_filter_decomb =
 {
     FILTER_DECOMB,
-    "Deinterlaces selectively with yadif/mcdeint and lowpass5 blending",
+    "Decomb",
     NULL,
     hb_decomb_init,
     hb_decomb_work,
@@ -510,10 +574,171 @@ int detect_combed_segment( hb_filter_private_t * pv, int segment_start, int segm
     }
 }
 
-typedef struct decomb_thread_arg_s {
-    hb_filter_private_t *pv;
-    int segment;
-} decomb_thread_arg_t;
+// This function calls all the eedi2 filters in sequence for a given plane.
+// It outputs the final interpolated image to pv->eedi_full[DST2PF].
+void eedi2_interpolate_plane( hb_filter_private_t * pv, int k )
+{
+    /* We need all these pointers. No, seriously.
+       I swear. It's not a joke. They're used.
+       All nine of them.                         */
+    uint8_t * mskp = pv->eedi_half[MSKPF][k];
+    uint8_t * srcp = pv->eedi_half[SRCPF][k];
+    uint8_t * tmpp = pv->eedi_half[TMPPF][k];
+    uint8_t * dstp = pv->eedi_half[DSTPF][k];
+    uint8_t * dst2p = pv->eedi_full[DST2PF][k];
+    uint8_t * tmp2p2 = pv->eedi_full[TMP2PF2][k];
+    uint8_t * msk2p = pv->eedi_full[MSK2PF][k];
+    uint8_t * tmp2p = pv->eedi_full[TMP2PF][k];
+    uint8_t * dst2mp = pv->eedi_full[DST2MPF][k];
+    int * cx2 = pv->cx2;
+    int * cy2 = pv->cy2;
+    int * cxy = pv->cxy;
+    int * tmpc = pv->tmpc;
+
+    int pitch = pv->ref_stride[k];
+    int height = pv->height[k]; int width = pv->width[k];
+    int half_height = height / 2;
+
+    // edge mask
+    eedi2_build_edge_mask( mskp, pitch, srcp, pitch,
+                     pv->magnitude_threshold, pv->variance_threshold, pv->laplacian_threshold, 
+                     half_height, width );
+    eedi2_erode_edge_mask( mskp, pitch, tmpp, pitch, pv->erosion_threshold, half_height, width );
+    eedi2_dilate_edge_mask( tmpp, pitch, mskp, pitch, pv->dilation_threshold, half_height, width );
+    eedi2_erode_edge_mask( mskp, pitch, tmpp, pitch, pv->erosion_threshold, half_height, width );
+    eedi2_remove_small_gaps( tmpp, pitch, mskp, pitch, half_height, width );
+
+    // direction mask
+    eedi2_calc_directions( k, mskp, pitch, srcp, pitch, tmpp, pitch,
+                     pv->maximum_search_distance, pv->noise_threshold,
+                     half_height, width );
+    eedi2_filter_dir_map( mskp, pitch, tmpp, pitch, dstp, pitch, half_height, width );
+    eedi2_expand_dir_map( mskp, pitch, dstp, pitch, tmpp, pitch, half_height, width );
+    eedi2_filter_map( mskp, pitch, tmpp, pitch, dstp, pitch, half_height, width );
+
+    // upscale 2x vertically
+    eedi2_upscale_by_2( srcp, dst2p, half_height, pitch );
+    eedi2_upscale_by_2( dstp, tmp2p2, half_height, pitch );
+    eedi2_upscale_by_2( mskp, msk2p, half_height, pitch );
+
+    // upscale the direction mask
+    eedi2_mark_directions_2x( msk2p, pitch, tmp2p2, pitch, tmp2p, pitch, pv->tff, height, width );
+    eedi2_filter_dir_map_2x( msk2p, pitch, tmp2p, pitch,  dst2mp, pitch, pv->tff, height, width );
+    eedi2_expand_dir_map_2x( msk2p, pitch, dst2mp, pitch, tmp2p, pitch, pv->tff, height, width );
+    eedi2_fill_gaps_2x( msk2p, pitch, tmp2p, pitch, dst2mp, pitch, pv->tff, height, width );
+    eedi2_fill_gaps_2x( msk2p, pitch, dst2mp, pitch, tmp2p, pitch, pv->tff, height, width );
+
+    // interpolate a full-size plane
+    eedi2_interpolate_lattice( k, tmp2p, pitch, dst2p, pitch, tmp2p2, pitch, pv->tff,
+                         pv->noise_threshold, height, width );
+
+    if( pv->post_processing == 1 || pv->post_processing == 3 )
+    {
+        // make sure the edge directions are consistent
+        eedi2_bit_blit( tmp2p2, pitch, tmp2p, pitch, pv->width[k], pv->height[k] );
+        eedi2_filter_dir_map_2x( msk2p, pitch, tmp2p, pitch, dst2mp, pitch, pv->tff, height, width );
+        eedi2_expand_dir_map_2x( msk2p, pitch, dst2mp, pitch, tmp2p, pitch, pv->tff, height, width );
+        eedi2_post_process( tmp2p, pitch, tmp2p2, pitch, dst2p, pitch, pv->tff, height, width );
+    }
+    if( pv->post_processing == 2 || pv->post_processing == 3 )
+    {
+        // filter junctions and corners
+        eedi2_gaussian_blur1( srcp, pitch, tmpp, pitch, srcp, pitch, half_height, width );
+        eedi2_calc_derivatives( srcp, pitch, half_height, width, cx2, cy2, cxy );
+        eedi2_gaussian_blur_sqrt2( cx2, tmpc, cx2, pitch, half_height, width);
+        eedi2_gaussian_blur_sqrt2( cy2, tmpc, cy2, pitch, half_height, width);
+        eedi2_gaussian_blur_sqrt2( cxy, tmpc, cxy, pitch, half_height, width);
+        eedi2_post_process_corner( cx2, cy2, cxy, pitch, tmp2p2, pitch, dst2p, pitch, height, width, pv->tff );
+    }
+}
+
+/*
+ *  eedi2 interpolate this plane in a single thread.
+ */
+void eedi2_filter_thread( void *thread_args_v )
+{
+    eedi2_arguments_t *eedi2_work = NULL;
+    hb_filter_private_t * pv;
+    int run = 1;
+    int plane;
+    eedi2_thread_arg_t *thread_args = thread_args_v;
+
+    pv = thread_args->pv;
+    plane = thread_args->plane;
+
+    hb_log("eedi2 thread started for plane %d", plane);
+
+    while( run )
+    {
+        /*
+         * Wait here until there is work to do. hb_lock() blocks until
+         * render releases it to say that there is more work to do.
+         */
+        hb_lock( pv->eedi2_begin_lock[plane] );
+
+        eedi2_work = &pv->eedi2_arguments[plane];
+
+        if( eedi2_work->stop )
+        {
+            /*
+             * No more work to do, exit this thread.
+             */
+            run = 0;
+            continue;
+        } 
+
+        /*
+         * Process plane
+         */
+            eedi2_interpolate_plane( pv, plane );
+        
+        /*
+         * Finished this segment, let everyone know.
+         */
+        hb_unlock( pv->eedi2_complete_lock[plane] );
+    }
+    free( thread_args_v );
+}
+
+// Sets up the input field planes for EEDI2 in pv->eedi_half[SRCPF]
+// and then runs eedi2_filter_thread for each plane.
+void eedi2_planer( hb_filter_private_t * pv )
+{
+    /* Copy the first field from the source to a half-height frame. */
+    int i;
+    for( i = 0;  i < 3; i++ )
+    {
+        int pitch = pv->ref_stride[i];
+        int start_line = !pv->tff;
+        eedi2_fill_half_height_buffer_plane( &pv->ref[1][i][pitch*start_line], pv->eedi_half[SRCPF][i], pitch, pv->height[i] );
+    }
+    
+    int plane;
+    for( plane = 0; plane < 3; plane++ )
+    {  
+        /*
+         * Let the thread for this plane know that we've setup work 
+         * for it by releasing the begin lock (ensuring that the
+         * complete lock is already locked so that we block when
+         * we try to lock it again below).
+         */
+        hb_lock( pv->eedi2_complete_lock[plane] );
+        hb_unlock( pv->eedi2_begin_lock[plane] );
+    }
+
+    /*
+     * Wait until all three threads have completed by trying to get
+     * the complete lock that we locked earlier for each thread, which
+     * will block until that thread has completed the work on that
+     * plane.
+     */
+    for( plane = 0; plane < 3; plane++ )
+    {
+        hb_lock( pv->eedi2_complete_lock[plane] );
+        hb_unlock( pv->eedi2_complete_lock[plane] );
+    }
+}
+
 
 /*
  * comb detect this segment of all three planes in a single thread.
@@ -626,10 +851,15 @@ static void yadif_filter_line( uint8_t *dst,
        to the other field in the current frame--the one not being filtered.  */
     uint8_t *prev2 = parity ? prev : cur ;
     uint8_t *next2 = parity ? cur  : next;
+    
     int w = pv->width[plane];
     int refs = pv->ref_stride[plane];
     int x;
+    int eedi2_mode = (pv->mode == 5);
     
+    /* We can replace spatial_pred with this interpolation*/
+    uint8_t * eedi2_guess = &pv->eedi_full[DST2PF][plane][y*refs];
+
     /* Decomb's cubic interpolation can only function when there are
        three samples above and below, so regress to yadif's traditional
        two-tap interpolation when filtering at the top and bottom edges. */
@@ -654,60 +884,69 @@ static void yadif_filter_line( uint8_t *dst,
         int temporal_diff2 = ( ABS(next[-refs] - cur[-refs]) + ABS(next[+refs] - cur[+refs]) ) >> 1;
         /* For the actual difference, use the largest of the previous average diffs. */
         int diff           = MAX3(temporal_diff0>>1, temporal_diff1, temporal_diff2);
-        
-        /* SAD of how the pixel-1, the pixel, and the pixel+1 change from the line above to below. */ 
-        int spatial_score  = ABS(cur[-refs-1] - cur[+refs-1]) + ABS(cur[-refs]-cur[+refs]) +
-                                     ABS(cur[-refs+1] - cur[+refs+1]) - 1;         
+
         int spatial_pred;
-         
-        /* Spatial pred is either a bilinear or cubic vertical interpolation. */
-        if( pv->mode > 0 && !edge)
+        
+        if( eedi2_mode )
         {
-            spatial_pred = cubic_interpolate( cur[-3*refs], cur[-refs], cur[+refs], cur[3*refs] );
+            /* Who needs yadif's spatial predictions when we can have EEDI2's? */
+            spatial_pred = eedi2_guess[0];
+            eedi2_guess++;
         }
-        else
+        else // Yadif spatial interpolation
         {
-            spatial_pred = (c+e)>>1;
+            /* SAD of how the pixel-1, the pixel, and the pixel+1 change from the line above to below. */ 
+            int spatial_score  = ABS(cur[-refs-1] - cur[+refs-1]) + ABS(cur[-refs]-cur[+refs]) +
+                                         ABS(cur[-refs+1] - cur[+refs+1]) - 1;         
+            
+            /* Spatial pred is either a bilinear or cubic vertical interpolation. */
+            if( pv->mode > 0 && !edge)
+            {
+                spatial_pred = cubic_interpolate( cur[-3*refs], cur[-refs], cur[+refs], cur[3*refs] );
+            }
+            else
+            {
+                spatial_pred = (c+e)>>1;
+            }
+
+        /* EDDI: Edge Directed Deinterlacing Interpolation
+           Checks 4 different slopes to see if there is more similarity along a diagonal
+           than there was vertically. If a diagonal is more similar, then it indicates
+           an edge, so interpolate along that instead of a vertical line, using either
+           linear or cubic interpolation depending on mode. */
+        #define YADIF_CHECK(j)\
+                {   int score = ABS(cur[-refs-1+j] - cur[+refs-1-j])\
+                              + ABS(cur[-refs  +j] - cur[+refs  -j])\
+                              + ABS(cur[-refs+1+j] - cur[+refs+1-j]);\
+                    if( score < spatial_score ){\
+                        spatial_score = score;\
+                        if( pv->mode > 0 && !edge )\
+                        {\
+                            switch(j)\
+                            {\
+                                case -1:\
+                                    spatial_pred = cubic_interpolate(cur[-3 * refs - 3], cur[-refs -1], cur[+refs + 1], cur[3* refs + 3] );\
+                                break;\
+                                case -2:\
+                                    spatial_pred = cubic_interpolate( ( ( cur[-3*refs - 4] + cur[-refs - 4] ) / 2 ) , cur[-refs -2], cur[+refs + 2], ( ( cur[3*refs + 4] + cur[refs + 4] ) / 2 ) );\
+                                break;\
+                                case 1:\
+                                    spatial_pred = cubic_interpolate(cur[-3 * refs +3], cur[-refs +1], cur[+refs - 1], cur[3* refs -3] );\
+                                break;\
+                                case 2:\
+                                    spatial_pred = cubic_interpolate(( ( cur[-3*refs + 4] + cur[-refs + 4] ) / 2 ), cur[-refs +2], cur[+refs - 2], ( ( cur[3*refs - 4] + cur[refs - 4] ) / 2 ) );\
+                                break;\
+                            }\
+                        }\
+                        else\
+                        {\
+                            spatial_pred = ( cur[-refs +j] + cur[+refs -j] ) >>1;\
+                        }\
+
+            YADIF_CHECK(-1) YADIF_CHECK(-2) }} }}
+            YADIF_CHECK( 1) YADIF_CHECK( 2) }} }}
         }
 
-/* EDDI: Edge Directed Deinterlacing Interpolation
-   Uses the Martinez-Lim Line Shift Parametric Modeling algorithm...I think.
-   Checks 4 different slopes to see if there is more similarity along a diagonal
-   than there was vertically. If a diagonal is more similar, then it indicates
-   an edge, so interpolate along that instead of a vertical line, using either
-   linear or cubic interpolation depending on mode. */
-#define YADIF_CHECK(j)\
-        {   int score = ABS(cur[-refs-1+j] - cur[+refs-1-j])\
-                      + ABS(cur[-refs  +j] - cur[+refs  -j])\
-                      + ABS(cur[-refs+1+j] - cur[+refs+1-j]);\
-            if( score < spatial_score ){\
-                spatial_score = score;\
-                if( pv->mode > 0 && !edge )\
-                {\
-                    switch(j)\
-                    {\
-                        case -1:\
-                            spatial_pred = cubic_interpolate(cur[-3 * refs - 3], cur[-refs -1], cur[+refs + 1], cur[3* refs + 3] );\
-                        break;\
-                        case -2:\
-                            spatial_pred = cubic_interpolate( ( ( cur[-3*refs - 4] + cur[-refs - 4] ) / 2 ) , cur[-refs -2], cur[+refs + 2], ( ( cur[3*refs + 4] + cur[refs + 4] ) / 2 ) );\
-                        break;\
-                        case 1:\
-                            spatial_pred = cubic_interpolate(cur[-3 * refs +3], cur[-refs +1], cur[+refs - 1], cur[3* refs -3] );\
-                        break;\
-                        case 2:\
-                            spatial_pred = cubic_interpolate(( ( cur[-3*refs + 4] + cur[-refs + 4] ) / 2 ), cur[-refs +2], cur[+refs - 2], ( ( cur[3*refs - 4] + cur[refs - 4] ) / 2 ) );\
-                        break;\
-                    }\
-                }\
-                else\
-                {\
-                    spatial_pred = ( cur[-refs +j] + cur[+refs -j] ) >>1;\
-                }\
-                
-                YADIF_CHECK(-1) YADIF_CHECK(-2) }} }}
-                YADIF_CHECK( 1) YADIF_CHECK( 2) }} }}
-                                
         /* Temporally adjust the spatial prediction by
            comparing against lines in the adjacent fields. */
         int b = (prev2[-2*refs] + next2[-2*refs])>>1;
@@ -738,11 +977,6 @@ static void yadif_filter_line( uint8_t *dst,
     }
 }
 
-typedef struct yadif_thread_arg_s {
-    hb_filter_private_t *pv;
-    int segment;
-} yadif_thread_arg_t;
-
 /*
  * deinterlace this segment of all three planes in a single thread.
  */
@@ -902,9 +1136,9 @@ static void yadif_filter( uint8_t ** dst,
                           int tff,
                           hb_filter_private_t * pv )
 {
-    
-    int is_combed = comb_segmenter( pv );
-    
+    /* If we're running comb detection, do it now, otherwise blend if mode 4 and interpolate if not. */
+    int is_combed = pv->spatial_metric >= 0 ? comb_segmenter( pv ) : pv->mode == 4 ? 2 : 1;
+
     if( is_combed == 1 )
     {
         pv->yadif_deinterlaced_frames++;
@@ -918,6 +1152,12 @@ static void yadif_filter( uint8_t ** dst,
         pv->unfiltered_frames++;
     }
     
+    if( is_combed == 1 && pv->mode == 5 )
+    {
+        /* Generate an EEDI2 interpolation */
+        eedi2_planer( pv );
+    }
+    
     if( is_combed )
     {
         int segment;
@@ -1131,6 +1371,15 @@ hb_filter_private_t * hb_decomb_init( int pix_fmt,
     pv->block_width = 16;
     pv->block_height = 16;
     
+    pv->magnitude_threshold = 10;
+    pv->variance_threshold = 20;
+    pv->laplacian_threshold = 20;
+    pv->dilation_threshold = 4;
+    pv->erosion_threshold = 2;
+    pv->noise_threshold = 50;
+    pv->maximum_search_distance = 24;
+    pv->post_processing = 1;
+
     pv->parity   = PARITY_DEFAULT;
 
     pv->mcdeint_mode   = MCDEINT_MODE_DEFAULT;
@@ -1138,14 +1387,22 @@ hb_filter_private_t * hb_decomb_init( int pix_fmt,
 
     if( settings )
     {
-        sscanf( settings, "%d:%d:%d:%d:%d:%d:%d",
+        sscanf( settings, "%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d",
                 &pv->mode,
                 &pv->spatial_metric,
                 &pv->motion_threshold,
                 &pv->spatial_threshold,
                 &pv->block_threshold,
                 &pv->block_width,
-                &pv->block_height );
+                &pv->block_height,
+                &pv->magnitude_threshold,
+                &pv->variance_threshold,
+                &pv->laplacian_threshold,
+                &pv->dilation_threshold,
+                &pv->erosion_threshold,
+                &pv->noise_threshold,
+                &pv->maximum_search_distance,
+                &pv->post_processing );
     }
     
     pv->cpu_count = hb_get_cpu_count();
@@ -1181,7 +1438,38 @@ hb_filter_private_t * hb_decomb_init( int pix_fmt,
 
         pv->mask[i] = calloc( 1, w*h*sizeof(uint8_t) ) + 3*w;
     }
+    
+    if( pv->mode == 5 )
+    {
+        /* Allocate half-height eedi2 buffers */
+        height = pv->height[0] / 2;
+        for( i = 0; i < 3; i++ )
+        {
+            int is_chroma = !!i;
+            int w = ((width   + 31) & (~31))>>is_chroma;
+            int h = ((height+6+ 31) & (~31))>>is_chroma;
 
+            for( j = 0; j < 4; j++ )
+            {
+                pv->eedi_half[j][i] = malloc( w*h*sizeof(uint8_t) ) + 3*w;
+            }
+        }
+
+        /* Allocate full-height eedi2 buffers */
+        height = pv->height[0];
+        for( i = 0; i < 3; i++ )
+        {
+            int is_chroma = !!i;
+            int w = ((width   + 31) & (~31))>>is_chroma;
+            int h = ((height+6+ 31) & (~31))>>is_chroma;
+
+            for( j = 0; j < 5; j++ )
+            {
+                pv->eedi_full[j][i] = malloc( w*h*sizeof(uint8_t) ) + 3*w;
+            }
+        }
+    }
+    
      /*
       * Create yadif threads and locks.
       */
@@ -1264,7 +1552,62 @@ hb_filter_private_t * hb_decomb_init( int pix_fmt,
             hb_error( "decomb could not create threads" );
         }
     }
+    
+    if( pv->mode == 5 )
+    {
+        /*
+         * Create eedi2 threads and locks.
+         */
+        pv->eedi2_threads = malloc( sizeof( hb_thread_t* ) * 3 );
+        pv->eedi2_begin_lock = malloc( sizeof( hb_lock_t * ) * 3 );
+        pv->eedi2_complete_lock = malloc( sizeof( hb_lock_t * ) * 3 );
+        pv->eedi2_arguments = malloc( sizeof( eedi2_arguments_t ) * 3 );
+
+        if( pv->post_processing > 1 )
+        {
+            pv->cx2 = (int*)eedi2_aligned_malloc(pv->height[0]*pv->ref_stride[0]*sizeof(int), 16);
+            pv->cy2 = (int*)eedi2_aligned_malloc(pv->height[0]*pv->ref_stride[0]*sizeof(int), 16);
+            pv->cxy = (int*)eedi2_aligned_malloc(pv->height[0]*pv->ref_stride[0]*sizeof(int), 16);
+            pv->tmpc = (int*)eedi2_aligned_malloc(pv->height[0]*pv->ref_stride[0]*sizeof(int), 16);
+            if( !pv->cx2 || !pv->cy2 || !pv->cxy || !pv->tmpc )
+                hb_log("EEDI2: failed to malloc derivative arrays");
+            else
+                hb_log("EEDI2: successfully mallloced derivative arrays");
+        }
+
+        for( i = 0; i < 3; i++ )
+        {
+            eedi2_thread_arg_t *eedi2_thread_args;
+
+            eedi2_thread_args = malloc( sizeof( eedi2_thread_arg_t ) );
+
+            if( eedi2_thread_args )
+            {
+                eedi2_thread_args->pv = pv;
+                eedi2_thread_args->plane = i;
+
+                pv->eedi2_begin_lock[i] = hb_lock_init();
+                pv->eedi2_complete_lock[i] = hb_lock_init();
+
+                /*
+                 * Important to start off with the threads locked waiting
+                 * on input.
+                 */
+                hb_lock( pv->eedi2_begin_lock[i] );
 
+                pv->eedi2_arguments[i].stop = 0;
+
+                pv->eedi2_threads[i] = hb_thread_init( "eedi2_filter_segment",
+                                                       eedi2_filter_thread,
+                                                       eedi2_thread_args,
+                                                       HB_NORMAL_PRIORITY );
+            }
+            else
+            {
+                hb_error( "eedi2 could not create threads" );
+            }
+        }
+    }
     
     
     /* Allocate mcdeint specific buffers */
@@ -1327,7 +1670,7 @@ void hb_decomb_close( hb_filter_private_t * pv )
         return;
     }
     
-    hb_log("decomb: yadif deinterlaced %i | blend deinterlaced %i | unfiltered %i | total %i", pv->yadif_deinterlaced_frames, pv->blend_deinterlaced_frames, pv->unfiltered_frames, pv->yadif_deinterlaced_frames + pv->blend_deinterlaced_frames + pv->unfiltered_frames);
+    hb_log("decomb: %s deinterlaced %i | blend deinterlaced %i | unfiltered %i | total %i", pv->mode == 5 ? "yadif+eedi2" : "yadif", pv->yadif_deinterlaced_frames, pv->blend_deinterlaced_frames, pv->unfiltered_frames, pv->yadif_deinterlaced_frames + pv->blend_deinterlaced_frames + pv->unfiltered_frames);
 
     /* Cleanup frame buffers */
     if( pv->buf_out[0] )
@@ -1366,6 +1709,46 @@ void hb_decomb_close( hb_filter_private_t * pv )
         }
     }
     
+    if( pv->mode == 5 )
+    {
+        /* Cleanup eedi-half  buffers */
+        int j;
+        for( i = 0; i<3; i++ )
+        {
+            for( j = 0; j < 4; j++ )
+            {
+                uint8_t **p = &pv->eedi_half[j][i];
+                if (*p)
+                {
+                    free( *p - 3*pv->ref_stride[i] );
+                    *p = NULL;
+                }            
+            }
+        }
+
+        /* Cleanup eedi-full  buffers */
+        for( i = 0; i<3; i++ )
+        {
+            for( j = 0; j < 5; j++ )
+            {
+                uint8_t **p = &pv->eedi_full[j][i];
+                if (*p)
+                {
+                    free( *p - 3*pv->ref_stride[i] );
+                    *p = NULL;
+                }            
+            }
+        }
+    }
+    
+    if( pv->post_processing > 1  && pv->mode == 5 )
+    {
+        if (pv->cx2) eedi2_aligned_free(pv->cx2);
+        if (pv->cy2) eedi2_aligned_free(pv->cy2);
+        if (pv->cxy) eedi2_aligned_free(pv->cxy);
+        if (pv->tmpc) eedi2_aligned_free(pv->tmpc);
+    }
+    
     for( i = 0; i < pv->cpu_count; i++)
     {
         /*
@@ -1408,6 +1791,30 @@ void hb_decomb_close( hb_filter_private_t * pv )
     free( pv->decomb_complete_lock );
     free( pv->decomb_arguments );
     
+    if( pv->mode == 5 )
+    {
+        for( i = 0; i < 3; i++)
+        {
+            /*
+             * Tell each eedi2 thread to stop, and then cleanup.
+             */
+            pv->eedi2_arguments[i].stop = 1;
+            hb_unlock(  pv->eedi2_begin_lock[i] );
+
+            hb_thread_close( &pv->eedi2_threads[i] );
+            hb_lock_close( &pv->eedi2_begin_lock[i] );
+            hb_lock_close( &pv->eedi2_complete_lock[i] );
+        }
+
+        /*
+         * free memory for eedi2 structs
+         */
+        free( pv->eedi2_threads );
+        free( pv->eedi2_begin_lock );
+        free( pv->eedi2_complete_lock );
+        free( pv->eedi2_arguments );
+    }
+    
     /* Cleanup mcdeint specific buffers */
     if( pv->mcdeint_mode >= 0 )
     {
@@ -1456,6 +1863,8 @@ int hb_decomb_work( const hb_buffer_t * cbuf_in,
         tff = (pv->parity & 1) ^ 1;
     }
 
+    pv->tff = tff;
+    
     /* Store current frame in yadif cache */
     store_ref( (const uint8_t**)pv->pic_in.data, pv );
 
diff --git a/libhb/eedi2.c b/libhb/eedi2.c
new file mode 100644
index 000000000..2aa906ef0
--- /dev/null
+++ b/libhb/eedi2.c
@@ -0,0 +1,1870 @@
+/* $Id: eedi2.c,v 1.0 2009/03/06 5:00:00 jbrjake Exp $
+
+   This file is part of the HandBrake source code.
+   Homepage: <http://handbrake.fr/>.
+   It may be used under the terms of the GNU General Public License.
+   
+   The EEDI2 interpolator was created by tritical:
+   http://web.missouri.edu/~kes25c/
+*/
+
+#include "hb.h"
+#include "eedi2.h"
+
+/**
+ * EEDI2 directional limit lookup table
+ *
+ * These values are used to limit the range of edge direction searches and filtering.
+ */
+const int eedi2_limlut[33] __attribute__ ((aligned (16))) = { 
+                         6, 6, 7, 7, 8, 8, 9, 9, 9, 10,
+                         10, 11, 11, 12, 12, 12, 12, 12, 12, 12,
+                         12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+                         12, -1, -1 };
+
+/**
+ * Analog of _aligned_malloc
+ * @param size Size of memory being pointed to
+ * @param align_size Size of memory chunks to align to (must be power of 2)
+ */
+void *eedi2_aligned_malloc( size_t size, size_t align_size )
+{
+  char * ptr, * ptr2, * aligned_ptr;
+  int align_mask = align_size - 1;
+
+  ptr = (char *)malloc( size + align_size + sizeof( int ) );
+  if( ptr==NULL ) return( NULL );
+
+  ptr2 = ptr + sizeof( int );
+  aligned_ptr = ptr2 + ( align_size - ( (size_t)ptr2 & align_mask ) );
+
+
+  ptr2 = aligned_ptr - sizeof( int );
+  *( (int *)ptr2 ) = (int)( aligned_ptr - ptr );
+
+  return( aligned_ptr );
+}
+
+/**
+ * Analog of _aligned_free
+ * @param ptr The aligned pointer, created with eedi2_aligned_malloc, to be freed
+ */
+void eedi2_aligned_free( void *ptr )
+{
+  int * ptr2 = (int *)ptr - 1;
+  ptr -= * ptr2;
+  free(ptr);
+}
+
+/**
+ * Sorts metrics for median filtering
+ * @param order Pointer to the table of values to sort
+ * @param length Length of the order array
+ */
+void eedi2_sort_metrics( int *order, const int length )
+{
+    int i;
+    for( i = 1; i < length; ++i ) 
+    {
+        int j = i;
+        const int temp = order[j];
+        while( j > 0 && order[j-1] > temp ) 
+        {
+            order[j] = order[j-1];
+            --j;
+        }
+        order[j] = temp;
+    }
+}
+
+/**
+ * Bitblits an image plane (overwrites one bitmap with another) 
+ * @param dtsp Pointer to destination bitmap
+ * @param dst_pitch Stride of destination bitmap
+ * @param srcp Pointer to source bitmap
+ * @param src_pitch Stride of destination bitmap
+ * @param row_size Width of the bitmap being copied
+ * @param height Height of the source bitmap
+ *
+ * When row_size, dst_pitch, and src_pitch are equal, eedi2_bit_blit can work more quickly by copying the whole plane at once instead of individual lines.
+ */
+void eedi2_bit_blit( uint8_t * dstp, int dst_pitch, 
+                     const uint8_t * srcp, int src_pitch,
+                     int row_size, int height )
+{
+    if( ( !height ) || ( !row_size ) ) 
+        return;
+
+    if( height == 1 || ( dst_pitch == src_pitch && src_pitch == row_size ) )
+    {
+        memcpy( dstp, srcp, row_size * height );
+    }
+    else
+    {
+        int y;
+        for( y = height; y > 0; --y )
+        {
+            memcpy( dstp, srcp, row_size );
+            dstp += dst_pitch;
+            srcp += src_pitch;
+        }
+    }
+}
+
+/**
+ * A specialized variant of bit_blit, just for setting up the initial, field-sized bitmap planes that EEDI2 interpolates from.
+ * @param src Pointer to source bitmap plane being copied from
+ * @param dst Pointer to the destination bitmap plane being copied to
+ * @param pitch Stride of both bitmaps
+ * @param height Height of the original, full-size src plane being copied from
+ */
+void eedi2_fill_half_height_buffer_plane( uint8_t * src, uint8_t * dst, int pitch, int height )
+{
+    /* When TFF, we want to copy alternating
+       lines starting at 0, the top field.
+       When BFF, we want to start at line 1. */
+    int y;
+    for( y = height; y > 0; y = y - 2 )
+    {
+      memcpy( dst, src, pitch );
+      dst += pitch;
+      src += pitch * 2;
+    }
+}
+
+/**
+ * A specialized variant of bit_blit, just for resizing the field-height maps EEDI2 generates to frame-height...a simple line doubler
+ * @param srcp Pointer to source bitmap plane being copied from
+ * @param dstp Pointer to the destination bitmap plane being copied to
+ * @param height Height of the input, half-size src plane being copied from
+ * @param pitch Stride of both bitmaps
+ */
+void eedi2_upscale_by_2( uint8_t * srcp, uint8_t * dstp, int height, int pitch )
+{
+    int y;
+    for( y = height; y > 0; y-- )
+    {
+      memcpy( dstp, srcp, pitch );
+      dstp += pitch;
+      memcpy( dstp, srcp, pitch );
+      srcp += pitch;
+      dstp += pitch;
+    }    
+}
+
+/**
+ * Finds places where verticaly adjacent pixels abruptly change in intensity, i.e., sharp edges.
+ * @param dstp Pointer to the destination bitmap
+ * @param dst_pitch Stride of dstp
+ * @param srcp Pointer to the source bitmap
+ * @param src_pitch Stride of srcp
+ * @param mtresh Magnitude threshold, ensures it doesn't mark edges on pixels that are too similar (10 is a good default value)
+ * @param vthresh Variance threshold, ensures it doesn't look for edges in highly random pixel blocks (20 is a good default value)
+ * @param lthresh Laplacian threshold, ensures edges are still prominent in the 2nd spatial derivative of the srcp plane (20 is a good default value)
+ * @param height Height of half-height single-field frame
+ * @param width Width of srcp bitmap rows, as opposed to the padded stride in src_pitch
+ */
+void eedi2_build_edge_mask( uint8_t * dstp, int dst_pitch, uint8_t *srcp, int src_pitch,
+                            int mthresh, int lthresh, int vthresh, int height, int width )
+{
+    int x, y;
+    
+    mthresh = mthresh * 10;
+    vthresh = vthresh * 81;
+    
+    memset( dstp, 0, ( height / 2 ) * dst_pitch );
+    
+    srcp += src_pitch;
+    dstp += dst_pitch;
+    unsigned char *srcpp = srcp-src_pitch;
+    unsigned char *srcpn = srcp+src_pitch;
+    for( y = 1; y < height - 1; ++y )
+    {
+        for( x = 1; x < width-1; ++x )
+        {
+            if( ( abs( srcpp[x]  -   srcp[x] ) < 10 &&
+                  abs(  srcp[x]  -  srcpn[x] ) < 10 &&
+                  abs( srcpp[x]  -  srcpn[x] ) < 10 )
+              ||
+                ( abs( srcpp[x-1] -  srcp[x-1] ) < 10 &&
+                  abs(  srcp[x-1] - srcpn[x-1] ) < 10 &&
+                  abs( srcpp[x-1] - srcpn[x-1] ) < 10 &&
+                  abs( srcpp[x+1] -  srcp[x+1] ) < 10 &&
+                  abs(  srcp[x+1] - srcpn[x+1] ) < 10 &&
+                  abs( srcpp[x+1] - srcpn[x+1] ) < 10) )
+                continue;
+            
+            const int sum = srcpp[x-1] + srcpp[x] + srcpp[x+1] +
+                             srcp[x-1] +  srcp[x]+   srcp[x+1] +
+                            srcpn[x-1] + srcpn[x] + srcpn[x+1];
+            
+            const int sumsq = srcpp[x-1] * srcpp[x-1] +
+                              srcpp[x]   * srcpp[x]   +
+                              srcpp[x+1] * srcpp[x+1] +
+                               srcp[x-1] *  srcp[x-1] +
+                               srcp[x]   *  srcp[x]   +
+                               srcp[x+1] *  srcp[x+1] +
+                              srcpn[x-1] * srcpn[x-1] +
+                              srcpn[x]   * srcpn[x]   +
+                              srcpn[x+1] * srcpn[x+1];
+
+            if( 9 * sumsq-sum * sum < vthresh )
+                continue;
+            
+            const int Ix = srcp[x+1] - srcp[x-1];
+            const int Iy = MAX( MAX( abs( srcpp[x] - srcpn[x] ),
+                                     abs( srcpp[x] -  srcp[x] ) ),
+                                abs( srcp[x] - srcpn[x] ) );
+            if( Ix * Ix + Iy * Iy >= mthresh )
+            {
+                dstp[x] = 255;
+                continue;
+            }
+
+            const int Ixx =  srcp[x-1] - 2 * srcp[x] +  srcp[x+1];
+            const int Iyy = srcpp[x]   - 2 * srcp[x] + srcpn[x];
+            if( abs( Ixx ) + abs( Iyy ) >= lthresh )
+                dstp[x] = 255;
+        }
+        dstp += dst_pitch;
+        srcpp += src_pitch;
+        srcp += src_pitch;
+        srcpn += src_pitch;
+    }
+}
+
+/**
+ * Expands and smooths out the edge mask
+ * @param mskp Pointer to the source edge mask being read from
+ * @param msk_pitch Stride of mskp
+ * @param dstp Pointer to the destination to store the dilated edge mask
+ * @param dst_pitch Stride of dstp
+ * @param dstr Dilation threshold, ensures a pixel is only retained as an edge in dstp if this number of adjacent pixels or greater are also edges in mskp (4 is a good default value)
+ * @param height Height of half-height field-sized frame
+ * @param width Width of mskp bitmap rows, as opposed to the pdded stride in msk_pitch
+ */
+void eedi2_dilate_edge_mask( uint8_t *mskp, int msk_pitch, uint8_t *dstp, int dst_pitch,
+                             int dstr, int height, int width )
+{
+    int x, y;
+    
+    eedi2_bit_blit( dstp, dst_pitch, mskp, msk_pitch, width, height );
+    
+    mskp += msk_pitch;
+    unsigned char *mskpp = mskp - msk_pitch;
+    unsigned char *mskpn = mskp + msk_pitch;
+    dstp += dst_pitch;
+    for( y = 1; y < height - 1; ++y )
+    {
+        for( x = 1; x < width - 1; ++x )
+        {
+            if( mskp[x] != 0 )
+                continue;
+
+            int count = 0;
+            if( mskpp[x-1] == 0xFF ) ++count;
+            if( mskpp[x]   == 0xFF ) ++count;
+            if( mskpp[x+1] == 0xFF ) ++count;
+            if(  mskp[x-1] == 0xFF ) ++count;
+            if(  mskp[x+1] == 0xFF ) ++count;
+            if( mskpn[x-1] == 0xFF ) ++count;
+            if( mskpn[x]   == 0xFF ) ++count;
+            if( mskpn[x+1] == 0xFF ) ++count;
+                
+            if( count >= dstr )
+                dstp[x] = 0xFF;
+        }
+        mskpp += msk_pitch;
+        mskp += msk_pitch;
+        mskpn += msk_pitch;
+        dstp += dst_pitch;
+    }
+}
+
+/**
+ * Contracts the edge mask
+ * @param mskp Pointer to the source edge mask being read from
+ * @param msk_pitch Stride of mskp
+ * @param dstp Pointer to the destination to store the eroded edge mask
+ * @param dst_pitch Stride of dstp
+ * @param estr Erosion threshold, ensures a pixel isn't retained as an edge in dstp if fewer than this number of adjacent pixels are also edges in mskp (2 is a good default value)
+ * @param height Height of half-height field-sized frame
+ * @param width Width of mskp bitmap rows, as opposed to the pdded stride in msk_pitch
+ */
+void eedi2_erode_edge_mask( uint8_t *mskp, int msk_pitch, uint8_t *dstp, int dst_pitch,
+                            int estr, int height, int width )
+{
+    int x, y;
+    
+    eedi2_bit_blit( dstp, dst_pitch, mskp, msk_pitch, width, height );
+    
+    mskp += msk_pitch;
+    unsigned char *mskpp = mskp - msk_pitch;
+    unsigned char *mskpn = mskp + msk_pitch;
+    dstp += dst_pitch;
+    for ( y = 1; y < height - 1; ++y )
+    {
+        for ( x = 1; x < width - 1; ++x )
+        {
+            if( mskp[x] != 0xFF ) continue;
+            
+            int count = 0;
+            if  ( mskpp[x-1] == 0xFF ) ++count;
+            if  ( mskpp[x]   == 0xFF ) ++count;
+            if  ( mskpp[x+1] == 0xFF ) ++count;
+            if  (  mskp[x-1] == 0xFF ) ++count;
+            if  (  mskp[x+1] == 0xFF ) ++count;
+            if  ( mskpn[x-1] == 0xFF ) ++count;
+            if  ( mskpn[x]   == 0xFF ) ++count;
+            if  ( mskpn[x+1] == 0xFF ) ++count;
+
+            if  ( count < estr) dstp[x] = 0;
+        }
+        mskpp += msk_pitch;
+        mskp += msk_pitch;
+        mskpn += msk_pitch;
+        dstp += dst_pitch;
+    }
+}
+
+/**
+ * Smooths out horizontally aligned holes in the mask
+ *
+ * If none of the 6 horizontally adjacent pixels are edges, mark the current pixel as not edged.
+ * If at least 1 of the 3 on either side are edges, mark the current pixel as an edge.
+ *
+ * @param mskp Pointer to the source edge mask being read from
+ * @param msk_pitch Stride of mskp
+ * @param dstp Pointer to the destination to store the smoothed edge mask
+ * @param dst_pitch Stride of dstp
+ * @param height Height of half-height field-sized frame
+ * @param width Width of mskp bitmap rows, as opposed to the pdded stride in msk_pitch
+ */
+void eedi2_remove_small_gaps( uint8_t * mskp, int msk_pitch, uint8_t * dstp, int dst_pitch, 
+                              int height, int width )
+{
+    int x, y;
+    
+    eedi2_bit_blit( dstp, dst_pitch, mskp, msk_pitch, width, height );
+    
+    mskp += msk_pitch;
+    dstp += dst_pitch;
+    for( y = 1; y < height - 1; ++y )
+    {
+        for( x = 3; x < width - 3; ++x )
+        {
+            if( mskp[x] )
+            {
+                if( mskp[x-3] ) continue;
+                if( mskp[x-2] ) continue;
+                if( mskp[x-1] ) continue;
+                if( mskp[x+1] ) continue;
+                if( mskp[x+2] ) continue;
+                if( mskp[x+3] ) continue;
+                dstp[x] = 0;
+            }
+            else
+            {
+                if ( ( mskp[x+1] && ( mskp[x-1] || mskp[x-2] || mskp[x-3] ) ) ||
+                     ( mskp[x+2] && ( mskp[x-1] || mskp[x-2] ) ) ||
+                     ( mskp[x+3] && mskp[x-1] ) )
+                    dstp[x] = 0xFF;
+            }
+        }
+        mskp += msk_pitch;
+        dstp += dst_pitch;
+    }
+}
+
+/**
+ * Calculates spatial direction vectors for the edges. This is EEDI2's timesink, and can be thought of as YADIF_CHECK on steroids, as both try to discern which angle a given edge follows
+ * @param plane The plane of the image being processed, to know to reduce maxd for chroma planes (HandBrake only works with YUV420 video so it is assumed they are half-height)
+ * @param mskp Pointer to the source edge mask being read from
+ * @param msk_pitch Stride of mskp
+ * @param srcp Pointer to the source image being filtered
+ * @param src_pitch Stride of srcp
+ * @param dstp Pointer to the destination to store the dilated edge mask
+ * @param dst_pitch Stride of dstp
+ * @param maxd Maximum pixel distance to search (24 is a good default value)
+ * @param nt Noise threshold (50 is a good default value)
+ * @param height Height of half-height field-sized frame
+ * @param width Width of srcp bitmap rows, as opposed to the pdded stride in src_pitch
+ */
+void eedi2_calc_directions( const int plane, uint8_t * mskp, int msk_pitch, uint8_t * srcp, int src_pitch,
+                            uint8_t * dstp, int dst_pitch, int maxd, int nt, int height, int width  )
+{
+    int x, y, u, i;
+    
+    memset( dstp, 255, dst_pitch * height );
+    mskp += msk_pitch;
+    dstp += dst_pitch;
+    srcp += src_pitch;
+    unsigned char *src2p = srcp - src_pitch * 2;
+    unsigned char *srcpp = srcp - src_pitch;
+    unsigned char *srcpn = srcp + src_pitch;
+    unsigned char *src2n = srcp + src_pitch * 2;
+    unsigned char *mskpp = mskp - msk_pitch;
+    unsigned char *mskpn = mskp + msk_pitch;
+    const int maxdt = plane == 0 ? maxd : ( maxd >> 1 );
+
+    for( y = 1; y < height - 1; ++y )
+    {
+        for( x = 1; x < width - 1; ++x )
+        {
+            if( mskp[x] != 0xFF || ( mskp[x-1] != 0xFF && mskp[x+1] != 0xFF ) )
+                continue;
+            const int startu = MAX( -x + 1, -maxdt );
+            const int stopu = MIN( width - 2 - x, maxdt );
+            int minb = MIN( 13 * nt,
+                            ( abs( srcp[x] - srcpn[x] ) +
+                              abs( srcp[x] - srcpp[x] ) ) * 6 );
+            int mina = MIN( 19 * nt,
+                            ( abs( srcp[x] - srcpn[x] ) +
+                              abs( srcp[x] - srcpp[x] ) ) * 9 );
+            int minc = mina;
+            int mind = minb;
+            int mine = minb;
+            int dira = -5000, dirb = -5000, dirc = -5000, dird = -5000, dire = -5000;
+            for( u = startu; u <= stopu; ++u )
+            {
+                if( y == 1 ||
+                      mskpp[x-1+u] == 0xFF || mskpp[x+u] == 0xFF || mskpp[x+1+u] == 0xFF )
+                {
+                    if( y == height - 2 ||
+                        mskpn[x-1-u] == 0xFF || mskpn[x-u] == 0xFF || mskpn[x+1-u] == 0xFF )
+                    {
+                        const int diffsn = abs(  srcp[x-1] - srcpn[x-1-u] ) +
+                                           abs(  srcp[x]   - srcpn[x-u] )   +
+                                           abs(  srcp[x+1] - srcpn[x+1-u] );
+
+                        const int diffsp = abs(  srcp[x-1] - srcpp[x-1+u] ) +
+                                           abs(  srcp[x]   - srcpp[x+u] )   +
+                                           abs(  srcp[x+1] - srcpp[x+1+u] );
+
+                        const int diffps = abs( srcpp[x-1] -  srcp[x-1-u] ) +
+                                           abs( srcpp[x]   -  srcp[x-u] )   +
+                                           abs( srcpp[x+1] -  srcp[x+1-u] );
+
+                        const int diffns = abs( srcpn[x-1] -  srcp[x-1+u] ) +
+                                           abs( srcpn[x]   -  srcp[x+u] )   +
+                                           abs( srcpn[x+1] -  srcp[x+1+u] );
+
+                        const int diff = diffsn + diffsp + diffps + diffns;
+                        int diffd = diffsp + diffns;
+                        int diffe = diffsn + diffps;
+                        if( diff < minb )
+                        {
+                            dirb = u;
+                            minb = diff;
+                        }
+                        if( __builtin_expect( y > 1, 1) )
+                        {
+                            const int diff2pp = abs( src2p[x-1] - srcpp[x-1-u] ) +
+                                            abs( src2p[x]   - srcpp[x-u] )   +
+                                            abs( src2p[x+1] - srcpp[x+1-u] );
+                            const int diffp2p = abs( srcpp[x-1] - src2p[x-1+u] ) + 
+                                            abs( srcpp[x]   - src2p[x+u] )   + 
+                                            abs( srcpp[x+1] - src2p[x+1+u] );
+                            const int diffa = diff + diff2pp + diffp2p;
+                            diffd += diffp2p;
+                            diffe += diff2pp;
+                            if( diffa < mina )
+                            {
+                                dira = u;
+                                mina = diffa;
+                            }
+                        }
+                        if( __builtin_expect( y < height-2, 1) )
+                        {
+                            const int diff2nn = abs( src2n[x-1] - srcpn[x-1+u] ) +
+                                                abs( src2n[x]   - srcpn[x+u] )   +
+                                                abs( src2n[x+1] - srcpn[x+1+u] );
+                            const int diffn2n = abs( srcpn[x-1] - src2n[x-1-u] ) +
+                                                abs( srcpn[x]   - src2n[x-u] )   +
+                                                abs( srcpn[x+1] - src2n[x+1-u] );
+                            const int diffc = diff + diff2nn + diffn2n;
+                            diffd += diff2nn;
+                            diffe += diffn2n;
+                            if( diffc < minc )
+                            {
+                                dirc = u;
+                                minc = diffc;
+                            }
+                        }
+                        if( diffd < mind )
+                        {
+                            dird = u;
+                            mind = diffd;
+                        }
+                        if( diffe < mine )
+                        {
+                            dire = u;
+                            mine = diffe;
+                        }
+                    }
+                }
+            }
+            int order[5], k=0;
+            if( dira != -5000 ) order[k++] = dira;
+            if( dirb != -5000 ) order[k++] = dirb;
+            if( dirc != -5000 ) order[k++] = dirc;
+            if( dird != -5000 ) order[k++] = dird;
+            if( dire != -5000 ) order[k++] = dire;
+            if( k > 1 )
+            {
+                eedi2_sort_metrics( order, k );
+                const int mid = ( k & 1 ) ? 
+                                    order[k>>1] :
+                                    ( order[(k-1)>>1] + order[k>>1] + 1 ) >> 1;
+                const int tlim = MAX( eedi2_limlut[abs(mid)] >> 2, 2 );
+                int sum = 0, count = 0;
+                for( i = 0; i < k; ++i )
+                {
+                    if( abs( order[i] - mid ) <= tlim )
+                    {
+                        ++count;
+                        sum += order[i];
+                    }
+                }
+                if( count > 1 ) 
+                    dstp[x] = 128 + ( (int)( (float)sum / (float)count ) * 4 );
+                else
+                    dstp[x] = 128;
+            }
+            else dstp[x] = 128;
+        }
+        mskpp += msk_pitch;
+        mskp += msk_pitch;
+        mskpn += msk_pitch;
+        src2p += src_pitch;
+        srcpp += src_pitch;
+        srcp += src_pitch;
+        srcpn += src_pitch;
+        src2n += src_pitch;
+        dstp += dst_pitch;
+    }
+}
+
+/**
+ * Filters the edge mask
+ * @param mskp Pointer to the source edge mask being read from
+ * @param msk_pitch Stride of mskp
+ * @param dmskp Pointer to the edge direction mask
+ * @param dmsk_pitch Stride of dmskp
+ * @param dstp Pointer to the destination to store the filtered edge mask
+ * @param dst_pitch Stride of dstp
+ * @param height Height of half-height field-sized frame
+ * @param width Width of mskp bitmap rows, as opposed to the pdded stride in msk_pitch
+ */
+void eedi2_filter_map( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch,
+                       uint8_t * dstp, int dst_pitch, int height, int width )
+{
+    int x, y, j;
+
+    eedi2_bit_blit( dstp, dst_pitch, dmskp, dmsk_pitch, width, height );
+    
+    mskp += msk_pitch;
+    dmskp += dmsk_pitch;
+    dstp += dst_pitch;
+    unsigned char *dmskpp = dmskp - dmsk_pitch;
+    unsigned char *dmskpn = dmskp + dmsk_pitch;
+
+    for( y = 1; y < height - 1; ++y )
+    {
+        for( x = 1; x < width - 1; ++x )
+        {
+            if( dmskp[x] == 0xFF || mskp[x] != 0xFF )
+                continue;
+            const int dir = ( dmskp[x] - 128 ) >> 2;
+            const int lim = MAX( abs( dir ) * 2, 12 );
+            int ict = 0, icb = 0;
+            if( dir < 0 )
+            {
+                const int dirt = MAX( -x, dir );
+                for( j = dirt; j <= 0; ++j )
+                {
+                    if( ( abs( dmskpp[x+j] - dmskp[x] ) > lim && dmskpp[x+j] != 0xFF ) ||
+                        ( dmskp[x+j] == 0xFF && dmskpp[x+j] == 0xFF ) ||
+                        ( abs(  dmskp[x+j] - dmskp[x] ) > lim &&  dmskp[x+j] != 0xFF ) )
+                    {
+                        ict = 1;
+                        break;
+                    }
+                }
+            }
+            else
+            {
+                const int dirt = MIN( width - x - 1, dir );
+                for( j = 0; j <= dirt; ++j )
+                {
+                    if( ( abs( dmskpp[x+j] - dmskp[x] ) > lim && dmskpp[x+j] != 0xFF ) ||
+                        ( dmskp[x+j] == 0xFF && dmskpp[x+j] == 0xFF ) ||
+                        ( abs(  dmskp[x+j] - dmskp[x] ) > lim &&  dmskp[x+j] != 0xFF ) )
+                    {
+                        ict = 1;
+                        break;
+                    }
+                }
+            }
+            if( ict )
+            {
+                if( dir < 0 )
+                {
+                    const int dirt = MIN( width - x - 1, abs( dir ) );
+                    for( j = 0; j <= dirt; ++j )
+                    {
+                        if( ( abs( dmskpn[x+j] - dmskp[x] ) > lim && dmskpn[x+j] != 0xFF ) ||
+                            ( dmskpn[x+j] == 0xFF && dmskp[x+j] == 0xFF ) ||
+                            ( abs(  dmskp[x+j] - dmskp[x] ) > lim &&  dmskp[x+j] != 0xFF ) )
+                        {
+                            icb = 1;
+                            break;
+                        }
+                    }
+                }
+                else
+                {
+                    const int dirt = MAX( -x, -dir );
+                    for( j = dirt; j <= 0; ++j )
+                    {
+                        if( ( abs( dmskpn[x+j] - dmskp[x] ) > lim && dmskpn[x+j] != 0xFF ) ||
+                            ( dmskpn[x+j] == 0xFF && dmskp[x+j] == 0xFF ) ||
+                            ( abs(  dmskp[x+j] - dmskp[x] ) > lim &&  dmskp[x+j] != 0xFF ) )
+                        {
+                            icb = 1;
+                            break;
+                        }
+                    }
+                }
+                if( icb )
+                    dstp[x] = 255;
+            }
+        }
+        mskp += msk_pitch;
+        dmskpp += dmsk_pitch;
+        dmskp += dmsk_pitch;
+        dmskpn += dmsk_pitch;
+        dstp += dst_pitch;
+    }
+}
+
+
+/**
+ * Filters the edge direction mask
+ * @param mskp Pointer to the edge mask
+ * @param msk_pitch Stride of mskp
+ * @param dmskp Pointer to the edge direction mask being read from
+ * @param dmsk_pitch Stride of dmskp
+ * @param dstp Pointer to the destination to store the filtered edge direction mask
+ * @param dst_pitch Stride of dstp
+ * @param height Height of half_height field-sized frame
+ * @param width Width of dmskp bitmap rows, as opposed to the pdded stride in dmsk_pitch
+ */
+void eedi2_filter_dir_map( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch,
+                           uint8_t * dstp, int dst_pitch, int height, int width )
+{
+    int x, y, i;
+    
+    eedi2_bit_blit( dstp, dst_pitch, dmskp, dmsk_pitch, width, height );
+    
+    dmskp += dmsk_pitch;
+    unsigned char *dmskpp = dmskp - dmsk_pitch;
+    unsigned char *dmskpn = dmskp + dmsk_pitch;
+    dstp += dst_pitch;
+    mskp += msk_pitch;
+    for( y = 1; y < height - 1; ++y )
+    {
+        for( x = 1; x < width - 1; ++x )
+        {
+            if( mskp[x] != 0xFF ) continue;
+            int u = 0, order[9];
+            if( dmskpp[x-1] != 0xFF ) order[u++] = dmskpp[x-1];
+            if( dmskpp[x]   != 0xFF ) order[u++] = dmskpp[x];
+            if( dmskpp[x+1] != 0xFF ) order[u++] = dmskpp[x+1];
+            if(  dmskp[x-1] != 0xFF ) order[u++] =  dmskp[x-1];
+            if(  dmskp[x]   != 0xFF ) order[u++] =  dmskp[x];
+            if(  dmskp[x+1] != 0xFF ) order[u++] =  dmskp[x+1];
+            if( dmskpn[x-1] != 0xFF ) order[u++] = dmskpn[x-1];
+            if( dmskpn[x]   != 0xFF ) order[u++] = dmskpn[x];
+            if( dmskpn[x+1] != 0xFF ) order[u++] = dmskpn[x+1];
+            if( u < 4 )
+            {
+                dstp[x] = 255;
+                continue;
+            }
+            eedi2_sort_metrics( order, u );
+            const int mid = ( u & 1 ) ?
+                order[u>>1] : ( order[(u-1)>>1] + order[u>>1] + 1 ) >> 1;
+            int sum = 0, count = 0;
+            const int lim = eedi2_limlut[abs(mid-128)>>2];
+            for( i = 0; i < u; ++i )
+            {
+                if( abs( order[i] - mid ) <= lim )
+                {
+                    ++count;
+                    sum += order[i];
+                }
+            }
+            if( count < 4 || ( count < 5 && dmskp[x] == 0xFF ) )
+            {
+                dstp[x] = 255;
+                continue;
+            }
+            dstp[x] = (int)( ( (float)( sum + mid ) / (float)( count + 1 ) ) + 0.5f );
+        }
+        dmskpp += dmsk_pitch;
+        dmskp += dmsk_pitch;
+        dmskpn += dmsk_pitch;
+        dstp += dst_pitch;
+        mskp += msk_pitch;
+    }
+}
+
+/**
+ * Smoothes out the edge direction map
+ * @param mskp Pointer to the edge mask
+ * @param msk_pitch Stride of mskp
+ * @param dmskp Pointer to the edge direction mask being read from
+ * @param dmsk_pitch Stride of dmskp
+ * @param dstp Pointer to the destination to store the expanded edge direction mask
+ * @param dst_pitch Stride of dstp
+ * @param height Height of half-height field-sized frame
+ * @param width Width of dmskp bitmap rows, as opposed to the pdded stride in dmsk_pitch
+ */
+void eedi2_expand_dir_map( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch,
+                           uint8_t * dstp, int dst_pitch, int height, int width )
+{
+    int x, y, i;
+
+    eedi2_bit_blit( dstp, dst_pitch, dmskp, dmsk_pitch, width, height );
+    
+    dmskp += dmsk_pitch;
+    unsigned char *dmskpp = dmskp - dmsk_pitch;
+    unsigned char *dmskpn = dmskp + dmsk_pitch;
+    dstp += dst_pitch;
+    mskp += msk_pitch;
+    for( y = 1; y < height - 1; ++y )
+    {
+        for( x = 1; x < width - 1; ++x )
+        {
+            if( dmskp[x] != 0xFF || mskp[x] != 0xFF ) continue;
+            int u = 0, order[9];
+            if( dmskpp[x-1] != 0xFF ) order[u++] = dmskpp[x-1];
+            if( dmskpp[x]   != 0xFF ) order[u++] = dmskpp[x];
+            if( dmskpp[x+1] != 0xFF ) order[u++] = dmskpp[x+1];
+            if(  dmskp[x-1] != 0xFF ) order[u++] =  dmskp[x-1];
+            if(  dmskp[x+1] != 0xFF ) order[u++] =  dmskp[x+1];
+            if( dmskpn[x-1] != 0xFF ) order[u++] = dmskpn[x-1];
+            if( dmskpn[x]   != 0xFF ) order[u++] = dmskpn[x];
+            if( dmskpn[x+1] != 0xFF ) order[u++] = dmskpn[x+1];
+            if( u < 5 ) continue;
+            eedi2_sort_metrics( order, u );
+            const int mid = ( u & 1 ) ?
+                order[u>>1] : ( order[(u-1)>>1] + order[u>>1] + 1 ) >> 1;
+            int sum = 0, count = 0;
+            const int lim = eedi2_limlut[abs(mid-128)>>2];
+            for( i = 0; i < u; ++i )
+            {
+                if( abs( order[i] - mid ) <= lim )
+                {
+                    ++count;
+                    sum += order[i];
+                }
+            }
+            if( count < 5 ) continue;
+            dstp[x] = (int)( ( (float)( sum + mid ) / (float)( count + 1 ) ) + 0.5f );
+        }
+        dmskpp += dmsk_pitch;
+        dmskp += dmsk_pitch;
+        dmskpn += dmsk_pitch;
+        dstp += dst_pitch;
+        mskp += msk_pitch;
+    }
+}
+
+/**
+ * Re-draws a clearer, less blocky frame-height edge direction mask
+ * @param mskp Pointer to the edge mask
+ * @param msk_pitch Stride of mskp
+ * @param dmskp Pointer to the edge direction mask being read from
+ * @param dmsk_pitch Stride of dmskp
+ * @param dstp Pointer to the destination to store the redrawn direction mask
+ * @param dst_pitch Stride of dstp
+ * @param tff Whether or not the frame parity is Top Field First
+ * @param height Height of the full-frame output
+ * @param width Width of dmskp bitmap rows, as opposed to the pdded stride in dmsk_pitch
+ */
+void eedi2_mark_directions_2x( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch,
+                               uint8_t * dstp, int dst_pitch, int tff, int height, int width )
+{
+    int x, y, i;
+    memset( dstp, 255, dst_pitch * height );
+    dstp  += dst_pitch  * ( 2 - tff );
+    dmskp += dmsk_pitch * ( 1 - tff );
+    mskp  += msk_pitch  * ( 1 - tff );
+    unsigned char *dmskpn = dmskp + dmsk_pitch * 2;
+    unsigned char *mskpn = mskp + msk_pitch * 2;
+    for( y = 2 - tff; y < height - 1; y += 2 )
+    {
+        for( x = 1; x < width - 1; ++x )
+        {
+            if( mskp[x] != 0xFF && mskpn[x] != 0xFF ) continue;
+            int v = 0, order[6];
+            if(  dmskp[x-1] != 0xFF ) order[v++] = dmskp[x-1];
+            if(  dmskp[x]   != 0xFF ) order[v++] = dmskp[x];
+            if(  dmskp[x+1] != 0xFF ) order[v++] = dmskp[x+1];
+            if( dmskpn[x-1] != 0xFF ) order[v++] = dmskpn[x-1];
+            if( dmskpn[x]   != 0xFF ) order[v++] = dmskpn[x];
+            if( dmskpn[x+1] != 0xFF ) order[v++] = dmskpn[x+1];
+            if( v < 3 ) continue;
+            else
+            {
+                eedi2_sort_metrics( order, v );
+                const int mid = ( v & 1 ) ? order[v>>1] : ( order[(v-1)>>1] + order[v>>1]+1) >> 1;
+                const int lim = eedi2_limlut[abs(mid-128)>>2];
+                int u = 0;
+                if( abs( dmskp[x-1] - dmskpn[x-1] ) <= lim ||
+                    dmskp[x-1] == 0xFF || dmskpn[x-1] == 0xFF )
+                        ++u;
+                if( abs( dmskp[x]   - dmskpn[x] )   <= lim ||
+                    dmskp[x]   == 0xFF || dmskpn[x]   == 0xFF )
+                        ++u;
+                if( abs( dmskp[x+1] - dmskpn[x-1] ) <= lim ||
+                    dmskp[x+1] == 0xFF || dmskpn[x+1] == 0xFF)
+                        ++u;
+                if( u < 2 ) continue;
+                int count = 0, sum = 0;
+                for( i = 0; i < v; ++i )
+                {
+                    if( abs( order[i] - mid ) <= lim )
+                    {
+                        ++count;
+                        sum += order[i];
+                    }
+                }
+                if( count < v - 2 || count < 2 ) continue;
+                dstp[x] = (int)( ( (float)( sum + mid ) / (float)( count + 1 ) ) + 0.5f );
+            }
+        }
+        mskp += msk_pitch * 2;
+        mskpn += msk_pitch * 2;
+        dstp += dst_pitch * 2;
+        dmskp += dmsk_pitch * 2;
+        dmskpn += dmsk_pitch * 2;
+    }
+}
+
+/**
+ * Filters the frane-height edge direction mask
+ * @param mskp Pointer to the edge mask
+ * @param msk_pitch Stride of mskp
+ * @param dmskp Pointer to the edge direction mask being read from
+ * @param dmsk_pitch Stride of dmskp
+ * @param dstp Pointer to the destination to store the filtered direction mask
+ * @param dst_pitch Stride of dstp
+ * @param field Field to filter
+ * @param height Height of the full-frame output
+ * @param width Width of dmskp bitmap rows, as opposed to the pdded stride in dmsk_pitch
+ */
+void eedi2_filter_dir_map_2x( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch,
+                              uint8_t * dstp, int dst_pitch, int field, int height, int width )
+{
+    int x, y, i;
+    eedi2_bit_blit( dstp, dst_pitch, dmskp, dmsk_pitch, width, height );
+    dmskp += dmsk_pitch * ( 2 - field );
+    unsigned char *dmskpp = dmskp - dmsk_pitch * 2;
+    unsigned char *dmskpn = dmskp + dmsk_pitch * 2;
+    mskp += msk_pitch * ( 1 - field );
+    unsigned char *mskpn = mskp + msk_pitch * 2;
+    dstp += dst_pitch * ( 2 - field );
+    for( y = 2 - field; y < height - 1; y += 2 )
+    {
+        for( x = 1; x < width - 1; ++x )
+        {
+            if( mskp[x] != 0xFF && mskpn[x] != 0xFF ) continue;
+            int u = 0, order[9];
+            if( y > 1 )
+            {
+                if( dmskpp[x-1] != 0xFF ) order[u++] = dmskpp[x-1];
+                if( dmskpp[x]   != 0xFF ) order[u++] = dmskpp[x];
+                if( dmskpp[x+1] != 0xFF ) order[u++] = dmskpp[x+1];
+            }
+            if( dmskp[x-1] != 0xFF ) order[u++] = dmskp[x-1];
+            if( dmskp[x]   != 0xFF ) order[u++] = dmskp[x];
+            if( dmskp[x+1] != 0xFF ) order[u++] = dmskp[x+1];
+            if( y < height - 2 )
+            {
+                if( dmskpn[x-1] != 0xFF ) order[u++] = dmskpn[x-1];
+                if( dmskpn[x]   != 0xFF ) order[u++] = dmskpn[x];
+                if( dmskpn[x+1] != 0xFF ) order[u++] = dmskpn[x+1];
+            }
+            if( u < 4 )
+            {
+                dstp[x] = 255;
+                continue;
+            }
+            eedi2_sort_metrics( order, u );
+            const int mid = ( u & 1 ) ? order[u>>1] : (order[(u-1)>>1] + order[u>>1] + 1 ) >> 1;
+            int sum = 0, count = 0;
+            const int lim = eedi2_limlut[abs(mid-128)>>2];
+            for( i = 0; i < u; ++i )
+            {
+                if( abs( order[i] - mid ) <= lim )
+                {
+                    ++count;
+                    sum += order[i];
+                }
+            }
+            if( count < 4 || ( count < 5 && dmskp[x] == 0xFF ) )
+            {
+                dstp[x] = 255;
+                continue;
+            }
+            dstp[x] = (int)( ( (float)( sum + mid ) / (float)( count + 1 ) ) + 0.5f );
+        }
+        mskp += msk_pitch * 2;
+        mskpn += msk_pitch * 2;
+        dmskpp += dmsk_pitch * 2;
+        dmskp += dmsk_pitch * 2;
+        dmskpn += dmsk_pitch * 2;
+        dstp += dst_pitch * 2;
+    }
+}
+
+/**
+ * Smoothes out the frame-height edge direction mask
+ * @param mskp Pointer to the edge mask
+ * @param msk_pitch Stride of mskp
+ * @param dmskp Pointer to the edge direction mask being read from
+ * @param dmsk_pitch Stride of dmskp
+ * @param dstp Pointer to the destination to store the expanded direction mask
+ * @param dst_pitch Stride of dstp
+ * @param field Field to filter
+ * @param height Height of the full-frame output
+ * @param width Width of dmskp bitmap rows, as opposed to the pdded stride in dmsk_pitch
+ */
+void eedi2_expand_dir_map_2x( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch,
+                              uint8_t * dstp, int dst_pitch, int field, int height, int width )
+{
+    int x, y, i;
+
+    eedi2_bit_blit( dstp, dst_pitch, dmskp, dmsk_pitch, width, height );
+
+    dmskp += dmsk_pitch * ( 2 - field );
+    unsigned char *dmskpp = dmskp - dmsk_pitch * 2;
+    unsigned char *dmskpn = dmskp + dmsk_pitch * 2;
+    mskp += msk_pitch * ( 1 - field );
+    unsigned char *mskpn = mskp + msk_pitch * 2;
+    dstp += dst_pitch * ( 2 - field );
+    for( y = 2 - field; y < height - 1; y += 2)
+    {
+        for( x = 1; x < width - 1; ++x )
+        {
+            if( dmskp[x] != 0xFF || ( mskp[x] != 0xFF && mskpn[x] != 0xFF ) ) continue;
+            int u = 0, order[9];
+            if( y > 1 )
+            {
+                if( dmskpp[x-1] != 0xFF ) order[u++] = dmskpp[x-1];
+                if( dmskpp[x]   != 0xFF ) order[u++] = dmskpp[x];
+                if( dmskpp[x+1] != 0xFF ) order[u++] = dmskpp[x+1];
+            }
+            if( dmskp[x-1] != 0xFF ) order[u++] = dmskp[x-1];
+            if( dmskp[x+1] != 0xFF ) order[u++] = dmskp[x+1];
+            if( y < height - 2 )
+            {
+                if( dmskpn[x-1] != 0xFF) order[u++] = dmskpn[x-1];
+                if( dmskpn[x]   != 0xFF) order[u++] = dmskpn[x];
+                if( dmskpn[x+1] != 0xFF) order[u++] = dmskpn[x+1];
+            }
+            if( u < 5 ) continue;
+            eedi2_sort_metrics( order, u );
+            const int mid = ( u & 1 ) ? order[u>>1] : ( order[(u-1)>>1] + order[u>>1] + 1 ) >> 1;
+            int sum = 0, count = 0;
+            const int lim = eedi2_limlut[abs(mid-128)>>2];
+            for( i = 0; i < u; ++i )
+            {
+                if( abs( order[i] - mid ) <= lim )
+                {
+                    ++count;
+                    sum += order[i];
+                }
+            }
+            if( count < 5 ) continue;
+            dstp[x] = (int)( ( (float)( sum + mid ) / (float)( count + 1 ) ) + 0.5f );
+        }
+        mskp += msk_pitch * 2;
+        mskpn += msk_pitch * 2;
+        dmskpp += dmsk_pitch * 2;
+        dmskp += dmsk_pitch * 2;
+        dmskpn += dmsk_pitch * 2;
+        dstp += dst_pitch * 2;
+    }
+}
+
+/**
+ * Like the name suggests, this function fills in gaps in the frame-height edge direction mask
+ * @param mskp Pointer to the edge mask
+ * @param msk_pitch Stride of mskp
+ * @param dmskp Pointer to the edge direction mask being read from
+ * @param dmsk_pitch Stride of dmskp
+ * @param dstp Pointer to the destination to store the filled-in direction mask
+ * @param dst_pitch Stride of dstp
+ * @param field Field to filter
+ * @param height Height of the full-frame output
+ * @param width Width of dmskp bitmap rows, as opposed to the pdded stride in dmsk_pitch
+ */
+void eedi2_fill_gaps_2x( uint8_t *mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch,
+                         uint8_t * dstp, int dst_pitch, int field, int height, int width )
+{
+    int x, y, j;
+
+    eedi2_bit_blit( dstp, dst_pitch, dmskp, dmsk_pitch, width, height );
+
+    dmskp += dmsk_pitch * ( 2 - field );
+    unsigned char *dmskpp = dmskp - dmsk_pitch * 2;
+    unsigned char *dmskpn = dmskp + dmsk_pitch * 2;
+    mskp += msk_pitch * ( 1 - field );
+    unsigned char *mskpp = mskp - msk_pitch * 2;
+    unsigned char *mskpn = mskp + msk_pitch * 2;
+    unsigned char *mskpnn = mskpn + msk_pitch * 2;
+    dstp += dst_pitch * ( 2 - field );
+    for( y = 2 - field; y < height - 1; y += 2 )
+    {
+        for( x = 1; x < width - 1; ++x )
+        {
+            if( dmskp[x] != 0xFF || 
+                ( mskp[x] != 0xFF && mskpn[x] != 0xFF ) ) continue;
+            int u = x - 1, back = 500, forward = -500;
+            while( u )
+            {
+                if( dmskp[u] != 0xFF ) 
+                { 
+                    back = dmskp[u]; 
+                    break; 
+                }
+                if( mskp[u] != 0xFF && mskpn[u] != 0xFF ) break;
+                --u;
+            }
+            int v = x + 1;
+            while( v < width )
+            {
+                if( dmskp[v] != 0xFF )
+                {
+                    forward = dmskp[v];
+                    break;
+                }
+                if( mskp[v] != 0xFF && mskpn[v] != 0xFF ) break;
+                ++v;
+            }
+            int tc = 1, bc = 1;
+            int mint = 500, maxt = -20;
+            int minb = 500, maxb = -20;
+            for( j = u; j <= v; ++j )
+            {
+                if( tc )
+                {
+                    if( y <= 2 || dmskpp[j] == 0xFF || ( mskpp[j] != 0xFF && mskp[j] != 0xFF ) )
+                    {
+                        tc = 0;
+                        mint = maxt = 20;
+                    }
+                    else
+                    {
+                        if( dmskpp[j] < mint ) mint = dmskpp[j];
+                        if( dmskpp[j] > maxt ) maxt = dmskpp[j];
+                    }
+                }
+                if( bc )
+                {
+                    if( y >= height - 3 || dmskpn[j] == 0xFF || ( mskpn[j] != 0xFF && mskpnn[j] != 0xFF ) )
+                    {
+                        bc = 0;
+                        minb = maxb = 20;
+                    }
+                    else
+                    {
+                        if( dmskpn[j] < minb ) minb = dmskpn[j];
+                        if( dmskpn[j] > maxb ) maxb = dmskpn[j];
+                    }
+                }
+            }
+            if( maxt == -20 ) maxt = mint = 20;
+            if( maxb == -20 ) maxb = minb = 20;
+            int thresh = MAX(
+                            MAX( MAX( abs( forward - 128 ), abs( back - 128 ) ) >> 2, 8 ),
+                            MAX( abs( mint - maxt ), abs( minb - maxb ) ) );
+            const int flim = MIN(
+                                MAX( abs( forward - 128 ), abs( back - 128 ) ) >> 2,
+                                6 );
+            if( abs( forward - back ) <= thresh && ( v - u - 1 <= flim || tc || bc ) )
+            {
+                double step = (double)( forward - back ) / (double)( v - u );
+                for( j = 0; j < v - u - 1; ++j )
+                    dstp[u+j+1] = back + (int)( j * step + 0.5 );
+            }
+        }
+        mskpp += msk_pitch * 2;
+        mskp += msk_pitch * 2;
+        mskpn += msk_pitch * 2;
+        mskpnn += msk_pitch * 2;
+        dmskpp += dmsk_pitch * 2;
+        dmskp += dmsk_pitch * 2;
+        dmskpn += dmsk_pitch * 2;
+        dstp += dst_pitch * 2;
+    }
+}
+
+/**
+ * Actually renders the output frame, based on the edge and edge direction masks
+ * @param plane The plane of the image being processed, to know to reduce a search distance for chroma planes (HandBrake only works with YUV420 video so it is assumed they are half-height)
+ * @param dmskp Pointer to the edge direction mask being read from
+ * @param dmsk_pitch Stride of dmskp
+ * @param dstp Pointer to the line-doubled source field used being filtered in place
+ * @param dst_pitch Stride of dstp
+ * @param omskp Pointer to the destination to store the output edge mask used for post-processing
+ * @param osmk_pitch Stride of omskp
+ * @param field Field to filter
+ * @nt Noise threshold, (50 is a good default value)
+ * @param height Height of the full-frame output
+ * @param width Width of dstp bitmap rows, as opposed to the pdded stride in dst_pitch
+ */
+void eedi2_interpolate_lattice( const int plane, uint8_t * dmskp, int dmsk_pitch, uint8_t * dstp,
+                                int dst_pitch, uint8_t * omskp, int omsk_pitch, int field, int nt,
+                                int height, int width )
+{
+    int x, y, u;
+    
+    if( field == 1 )
+    {
+        eedi2_bit_blit( dstp + ( height - 1 ) * dst_pitch,
+                  dst_pitch,
+                  dstp + ( height - 2 ) * dst_pitch,
+                  dst_pitch,
+                  width,
+                  1 );
+    }
+    else
+    {
+        eedi2_bit_blit( dstp,
+                  dst_pitch,
+                  dstp + dst_pitch,
+                  dst_pitch,
+                  width,
+                  1 );
+    }
+
+    dstp += dst_pitch * ( 1 - field );
+    omskp += omsk_pitch * ( 1 - field );
+    unsigned char *dstpn = dstp + dst_pitch;
+    unsigned char *dstpnn = dstp + dst_pitch * 2;
+    unsigned char *omskn = omskp + omsk_pitch * 2;
+    dmskp += dmsk_pitch * ( 2 - field );
+    for( y = 2 - field; y < height - 1; y += 2 )
+    {
+        for( x = 0; x < width; ++x )
+        {
+            int dir = dmskp[x];
+            const int lim = eedi2_limlut[abs(dir-128)>>2];
+            if( dir == 255 ||
+                ( abs( dmskp[x] - dmskp[x-1] ) > lim &&
+                  abs( dmskp[x] - dmskp[x+1] ) > lim ) )
+            {
+                dstpn[x] = ( dstp[x] + dstpnn[x] + 1 ) >> 1;
+                if( dir != 255 ) dmskp[x] = 128;
+                continue;
+            }
+            if( lim < 9 )
+            {
+                const int sum =   dstp[x-1] +   dstp[x] +   dstp[x+1] +
+                                dstpnn[x-1] + dstpnn[x] + dstpnn[x+1];
+                const int sumsq = dstp[x-1] *   dstp[x-1] + 
+                                  dstp[x]   *   dstp[x]   +
+                                  dstp[x+1] *   dstp[x+1] +
+                                dstpnn[x-1] * dstpnn[x-1] +
+                                dstpnn[x]   * dstpnn[x]   +
+                                dstpnn[x+1] * dstpnn[x+1];
+                if( 6 * sumsq - sum * sum < 576 )
+                {
+                    dstpn[x] = ( dstp[x] + dstpnn[x] + 1 ) >> 1;
+                    dmskp[x] = 255;
+                    continue;
+                }
+            }
+            if( x > 1 && x < width - 2 && 
+                (     dstp[x] < MAX(   dstp[x-2],   dstp[x-1] ) - 3 &&
+                      dstp[x] < MAX(   dstp[x+2],   dstp[x+1] ) - 3 &&
+                    dstpnn[x] < MAX( dstpnn[x-2], dstpnn[x-1] ) - 3 &&
+                    dstpnn[x] < MAX( dstpnn[x+2], dstpnn[x+1] ) - 3 )
+                ||
+                (     dstp[x] > MIN(   dstp[x-2],   dstp[x-1] ) + 3 &&
+                      dstp[x] > MIN(   dstp[x+2],   dstp[x+1] ) + 3 &&
+                    dstpnn[x] > MIN( dstpnn[x-2], dstpnn[x-1] ) + 3 &&
+                    dstpnn[x] > MIN( dstpnn[x+2], dstpnn[x+1] ) + 3 ) )
+            {
+                dstpn[x] = ( dstp[x] + dstpnn[x] + 1 ) >> 1;
+                dmskp[x] = 128;
+                continue;
+            }
+            dir = ( dir - 128 + 2 ) >> 2;
+            int val = ( dstp[x] + dstpnn[x] + 1 ) >> 1;
+            const int startu = ( dir - 2 < 0 ) ?
+                        MAX( -x + 1, MAX( dir - 2, -width + 2 + x ) )
+                        :
+                        MIN(  x - 1, MIN( dir - 2,  width - 2 - x ) );
+            const int stopu =  ( dir + 2 < 0 ) ?
+                        MAX( -x + 1, MAX( dir + 2, -width + 2 + x ) )
+                        :
+                        MIN(  x - 1, MIN( dir + 2,  width - 2 - x ) );
+            int min = 8 * nt;
+            for( u = startu; u <= stopu; ++u )
+            {
+                const int diff =
+                    abs(   dstp[x-1] - dstpnn[x-u-1] ) +
+                    abs(   dstp[x]   - dstpnn[x-u] )   +
+                    abs(   dstp[x+1] - dstpnn[x-u+1] ) + 
+                    abs( dstpnn[x-1] -   dstp[x+u-1] ) + 
+                    abs( dstpnn[x]   -   dstp[x+u] )   +
+                    abs( dstpnn[x+1] -   dstp[x+u+1] );
+                if( diff < min && 
+                    ( ( omskp[x-1+u] != 0xFF && abs( omskp[x-1+u] - dmskp[x] ) <= lim ) ||
+                     (  omskp[x+u]   != 0xFF && abs( omskp[x+u]   - dmskp[x]) <= lim )  ||
+                     (  omskp[x+1+u] != 0xFF && abs( omskp[x+1+u] - dmskp[x]) <= lim ) ) &&
+                    ( ( omskn[x-1-u] != 0xFF && abs( omskn[x-1-u] - dmskp[x]) <= lim ) ||
+                     (  omskn[x-u]   != 0xFF && abs( omskn[x-u]   - dmskp[x]) <= lim ) ||
+                     (  omskn[x+1-u] != 0xFF && abs( omskn[x+1-u] - dmskp[x]) <= lim ) ) )
+                {
+                    const int diff2 = 
+                        abs( dstp[x+(u>>1)-1] - dstpnn[x-(u>>1)-1] ) +
+                        abs( dstp[x+(u>>1)]   - dstpnn[x-(u>>1)]   ) +
+                        abs( dstp[x+(u>>1)+1] - dstpnn[x-(u>>1)+1] );
+                    if( diff2 < 4 * nt &&
+                        ( ( ( abs( omskp[x+(u>>1)] - omskn[x-(u>>1)]     ) <= lim ||
+                              abs( omskp[x+(u>>1)] - omskn[x-((u+1)>>1)] ) <= lim ) && 
+                            omskp[x+(u>>1)] != 0xFF )
+                          || 
+                          ( ( abs( omskp[x+((u+1)>>1)] - omskn[x-(u>>1)] )     <= lim ||
+                              abs( omskp[x+((u+1)>>1)] - omskn[x-((u+1)>>1)] ) <= lim ) && 
+                            omskp[x+((u+1)>>1)] != 0xFF ) ) ) 
+                    {
+                        if( ( abs( dmskp[x] - omskp[x+(u>>1)] )     <= lim ||
+                              abs( dmskp[x] - omskp[x+((u+1)>>1)] ) <= lim ) &&
+                            ( abs( dmskp[x] - omskn[x-(u>>1)] )     <= lim ||
+                              abs( dmskp[x] - omskn[x-((u+1)>>1)] ) <= lim ) )
+                        {
+                            val = (   dstp[x+(u>>1)] +   dstp[x+((u+1)>>1)] +
+                                    dstpnn[x-(u>>1)] + dstpnn[x-((u+1)>>1)] + 2 ) >> 2;
+                            min = diff;
+                            dir = u;
+                        }
+                    }
+                }
+            }
+            if( min != 8 * nt )
+            {
+                dstpn[x] = val;
+                dmskp[x] = 128 + dir * 4;
+            }
+            else 
+            {
+                const int minm = MIN( dstp[x], dstpnn[x] );
+                const int maxm = MAX( dstp[x], dstpnn[x] );
+                const int d = plane == 0 ? 4 : 2;
+                const int startu = MAX( -x + 1, -d );
+                const int stopu = MIN( width - 2 - x, d );
+                min = 7 * nt;
+                for( u = startu; u <= stopu; ++u )
+                {
+                    const int p1 =   dstp[x+(u>>1)] +   dstp[x+((u+1)>>1)];
+                    const int p2 = dstpnn[x-(u>>1)] + dstpnn[x-((u+1)>>1)];
+                    const int diff =
+                        abs(   dstp[x-1] - dstpnn[x-u-1] ) + 
+                        abs(   dstp[x]   - dstpnn[x-u] )   +
+                        abs(   dstp[x+1] - dstpnn[x-u+1] ) +
+                        abs( dstpnn[x-1] - dstp[x+u-1] )   + 
+                        abs( dstpnn[x]   - dstp[x+u] )     + 
+                        abs( dstpnn[x+1] - dstp[x+u+1] )   +
+                        abs( p1 - p2 );
+                    if( diff < min )
+                    {
+                        const int valt = ( p1 + p2 + 2 ) >> 2;
+                        if( valt >= minm && valt <= maxm )
+                        {
+                            val = valt;
+                            min = diff;
+                            dir = u;
+                        }
+                    }
+                }
+                dstpn[x] = val;
+                if( min == 7*nt ) dmskp[x] = 128;
+                else dmskp[x] = 128 + dir * 4;
+            }
+        }
+        dstp += dst_pitch * 2;
+        dstpn += dst_pitch * 2;
+        dstpnn += dst_pitch * 2;
+        dmskp += dmsk_pitch * 2;
+        omskp += omsk_pitch * 2;
+        omskn += omsk_pitch * 2;
+    }
+}
+
+/**
+ * Applies some extra filtering to smooth the edge direction mask
+ * @param nmskp Pointer to the newly-filtered edge direction mask being read from
+ * @param nmsk_pitch Stride of nmskp
+ * @param omskp Pointer to the old unfiltered edge direction mask being read from
+ * @param omsk_pitch Stride of osmkp
+ * @param dstp Pointer to the output image being filtered in place
+ * @param src_pitch Stride of dstp ....not sure why it's named this
+ * @param field Field to filter
+ * @param height Height of the full-frame output
+ * @param width Width of dstp bitmap rows, as opposed to the pdded stride in src_pitch
+ */
+void eedi2_post_process( uint8_t * nmskp, int nmsk_pitch, uint8_t * omskp, int omsk_pitch,
+                         uint8_t * dstp, int src_pitch, int field, int height, int width )
+{
+    int x, y;
+    
+    nmskp += ( 2 - field ) * nmsk_pitch;
+    omskp += ( 2 - field ) * omsk_pitch;
+    dstp += ( 2 - field ) * src_pitch;
+    unsigned char *srcpp = dstp - src_pitch;
+    unsigned char *srcpn = dstp + src_pitch;
+    for( y = 2 - field; y < height - 1; y += 2 )
+    {
+        for( x = 0; x < width; ++x )
+        {
+            const int lim = eedi2_limlut[abs(nmskp[x]-128)>>2];
+            if( abs( nmskp[x] - omskp[x] ) > lim && omskp[x] != 255 && omskp[x] != 128 )
+                dstp[x] = ( srcpp[x] + srcpn[x] + 1 ) >> 1;
+        }
+        nmskp += nmsk_pitch * 2;
+        omskp += omsk_pitch * 2;
+        srcpp += src_pitch * 2;
+        dstp += src_pitch * 2;
+        srcpn += src_pitch * 2;
+    }
+}
+
+/**
+ * Blurs the source field plane
+ * @param src Pointer to the half-height source field plane
+ * @param src_pitch Stride of src
+ * @param tmp Pointer to a temporary buffer for juggling bitmaps
+ * @param tmp_pitch Stride of tmp
+ * @param dst Pointer to the destination to store the blurred field plane
+ * @param dst_pitch Stride of dst
+ * @param height Height of the hakf-height field-sized frame
+ * @param width Width of dstp bitmap rows, as opposed to the padded stride in dst_pitch
+ */
+void eedi2_gaussian_blur1( uint8_t * src, int src_pitch, uint8_t * tmp, int tmp_pitch, uint8_t * dst, int dst_pitch, int height, int width )
+{
+    uint8_t * srcp = src;
+    uint8_t * dstp = tmp;
+    int x, y;
+
+    for( y = 0; y < height; ++y )
+    {
+        dstp[0] = ( srcp[3] * 582 + srcp[2] * 7078 + srcp[1] * 31724 + 
+                    srcp[0] * 26152 + 32768 ) >> 16;
+        dstp[1] = ( srcp[4] * 582 + srcp[3] * 7078 +
+                    ( srcp[0] + srcp[2] ) * 15862 +
+                    srcp[1] * 26152 + 32768 ) >> 16;
+        dstp[2] = ( srcp[5] * 582 + ( srcp[0] + srcp[4] ) * 3539 +
+                    ( srcp[1] + srcp[3] ) * 15862 + 
+                    srcp[2]*26152 + 32768 ) >> 16;
+        for( x = 3; x < width - 3; ++x )
+        {
+            dstp[x] = ( ( srcp[x-3] + srcp[x+3] ) * 291 +
+                        ( srcp[x-2] + srcp[x+2] ) * 3539 +
+                        ( srcp[x-1] + srcp[x+1] ) * 15862 +
+                        srcp[x] * 26152 + 32768 ) >> 16;
+        }
+        dstp[x] = ( srcp[x-3] * 582 + ( srcp[x-2] + srcp[x+2] ) * 3539 +
+                    ( srcp[x-1] + srcp[x+1] ) * 15862 +
+                    srcp[x]   * 26152 + 32768 ) >> 16;
+        ++x;
+        dstp[x] = ( srcp[x-3] * 582 + srcp[x-2] * 7078 +
+                    ( srcp[x-1] + srcp[x+1] ) * 15862 +
+                    srcp[x] * 26152 + 32768 ) >> 16;
+        ++x;
+        dstp[x] = ( srcp[x-3] * 582 + srcp[x-2] * 7078 +
+                    srcp[x-1] * 31724 + srcp[x] * 26152 + 32768 ) >> 16;
+        srcp += src_pitch;
+        dstp += tmp_pitch;
+    }
+    srcp = tmp;
+    dstp = dst;
+    unsigned char *src3p = srcp - tmp_pitch * 3;
+    unsigned char *src2p = srcp - tmp_pitch * 2;
+    unsigned char *srcpp = srcp - tmp_pitch;
+    unsigned char *srcpn = srcp + tmp_pitch;
+    unsigned char *src2n = srcp + tmp_pitch * 2;
+    unsigned char *src3n = srcp + tmp_pitch * 3;
+    for( x = 0; x < width; ++x )
+    {
+        dstp[x] = ( src3n[x] * 582 + src2n[x] * 7078 + srcpn[x] * 31724 + 
+                     srcp[x] * 26152 + 32768 ) >> 16;
+    }
+    src3p += tmp_pitch;
+    src2p += tmp_pitch;
+    srcpp += tmp_pitch;
+    srcp += tmp_pitch;
+    srcpn += tmp_pitch;
+    src2n += tmp_pitch;
+    src3n += tmp_pitch;
+    dstp += dst_pitch;
+    for( x = 0; x < width; ++x )
+    {
+        dstp[x] = ( src3n[x] * 582 + src2n[x] * 7078 +
+                    ( srcpp[x] + srcpn[x] ) * 15862 +
+                    srcp[x] * 26152 + 32768 ) >> 16;
+    }
+    src3p += tmp_pitch;
+    src2p += tmp_pitch;
+    srcpp += tmp_pitch;
+    srcp += tmp_pitch;
+    srcpn += tmp_pitch;
+    src2n += tmp_pitch;
+    src3n += tmp_pitch;
+    dstp += dst_pitch;
+    for( x = 0; x < width; ++x )
+    {
+        dstp[x] = ( src3n[x] * 582 + ( src2p[x] + src2n[x] ) * 3539 + 
+                    ( srcpp[x] + srcpn[x] ) * 15862 +
+                    srcp[x] * 26152 + 32768 ) >> 16;
+    }
+    src3p += src_pitch;
+    src2p += src_pitch;
+    srcpp += src_pitch;
+    srcp += src_pitch;
+    srcpn += src_pitch;
+    src2n += src_pitch;
+    src3n += src_pitch;
+    dstp += dst_pitch;
+    for( y = 3; y < height - 3; ++y )
+    {
+        for( x = 0; x < width; ++x )
+        {
+            dstp[x] = ( ( src3p[x] + src3n[x] ) * 291 +
+                        ( src2p[x] + src2n[x] ) * 3539 +
+                        ( srcpp[x] + srcpn[x] ) * 15862 +
+                        srcp[x] * 26152 + 32768 ) >> 16;
+        }
+        src3p += tmp_pitch;
+        src2p += tmp_pitch;
+        srcpp += tmp_pitch;
+        srcp += tmp_pitch;
+        srcpn += tmp_pitch;
+        src2n += tmp_pitch;
+        src3n += tmp_pitch;
+        dstp += dst_pitch;
+    }
+    for( x = 0; x < width; ++x )
+    {
+        dstp[x] = ( src3p[x] * 582 + ( src2p[x] + src2n[x] ) *3539 +
+                    ( srcpp[x] + srcpn[x] ) * 15862 +
+                    srcp[x] * 26152 + 32768 ) >> 16;
+    }
+    src3p += tmp_pitch;
+    src2p += tmp_pitch;
+    srcpp += tmp_pitch;
+    srcp += tmp_pitch;
+    srcpn += tmp_pitch;
+    src2n += tmp_pitch;
+    src3n += tmp_pitch;
+    dstp += dst_pitch;
+    for( x = 0; x < width; ++x )
+    {
+        dstp[x] = ( src3p[x] * 582 + src2p[x] * 7078 +
+                    ( srcpp[x] + srcpn[x] ) * 15862 +
+                     srcp[x] * 26152 + 32768 ) >> 16;
+    }
+    src3p += tmp_pitch;
+    src2p += tmp_pitch;
+    srcpp += tmp_pitch;
+    srcp += tmp_pitch;
+    srcpn += tmp_pitch;
+    src2n += tmp_pitch;
+    src3n += tmp_pitch;
+    dstp += dst_pitch;
+    for( x = 0; x < width; ++x )
+    {
+        dstp[x] = ( src3p[x] * 582   + src2p[x] * 7078 +
+                    srcpp[x] * 31724 +  srcp[x] * 26152 + 32768 ) >> 16;
+    }
+}
+
+
+/**
+ * Blurs the spatial derivatives of the source field plane
+ * @param src Pointer to the derivative array to filter
+ * @param tmp Pointer to a temporary storage for the derivative array while it's being filtered
+ * @param dst Pointer to the destination to store the filtered output derivative array
+ * @param pitch Stride of the bitmap from which the src array is derived
+ * @param height Height of the half-height field-sized frame from which the src array derivs were taken
+ * @param width Width of the bitmap from which the src array is derived, as opposed to the padded stride in pitch
+ */
+void eedi2_gaussian_blur_sqrt2( int *src, int *tmp, int *dst, const int pitch, int height, const int width )
+{
+    int * srcp = src;
+    int * dstp = tmp;
+    int x, y;
+    
+    for( y = 0; y < height; ++y )
+    {
+        x = 0;
+        dstp[x] = ( srcp[x+4] * 678   + srcp[x+3] * 3902  + srcp[x+2] * 13618 +
+                    srcp[x+1] * 28830 + srcp[x]   * 18508 + 32768 ) >> 16;
+        ++x;
+        dstp[x] = ( srcp[x+4] * 678   + srcp[x+3] * 3902 + srcp[x+2] * 13618 + 
+                    ( srcp[x-1] + srcp[x+1] ) *14415 +
+                    srcp[x]   * 18508 + 32768 ) >> 16;
+        ++x;
+        dstp[x] = ( srcp[x+4] * 678   + srcp[x+3] * 3902 + 
+                    ( srcp[x-2] + srcp[x+2] ) * 6809 +
+                    ( srcp[x-1] + srcp[x+1] ) * 14415 + 
+                    srcp[x]   * 18508 + 32768 ) >> 16;
+        ++x;
+        dstp[x] = ( srcp[x+4] * 678   + ( srcp[x-3] + srcp[x+3] ) * 1951 + 
+                    ( srcp[x-2] + srcp[x+2] ) * 6809 +
+                    ( srcp[x-1] + srcp[x+1] ) * 14415 + 
+                    srcp[x]   * 18508 + 32768 ) >> 16;
+
+        for( x = 4; x < width - 4; ++x )
+        {
+            dstp[x] = ( ( srcp[x-4] + srcp[x+4] ) * 339 + 
+                        ( srcp[x-3] + srcp[x+3] ) * 1951 + 
+                        ( srcp[x-2] + srcp[x+2] ) * 6809 +
+                        ( srcp[x-1] + srcp[x+1] ) * 14415 + 
+                        srcp[x] * 18508 + 32768 ) >> 16;
+        }
+
+        dstp[x] = ( srcp[x-4] * 678 + ( srcp[x-3] + srcp[x+3] ) * 1951 + 
+                    ( srcp[x-2] + srcp[x+2] ) * 6809  +
+                    ( srcp[x-1] + srcp[x+1] ) * 14415 + 
+                    srcp[x] * 18508 + 32768 ) >> 16;
+        ++x;
+        dstp[x] = ( srcp[x-4] * 678 + srcp[x-3] * 3902 + 
+                    ( srcp[x-2] + srcp[x+2] ) * 6809 +
+                    ( srcp[x-1] + srcp[x+1] ) * 14415 + 
+                    srcp[x] * 18508 + 32768 ) >> 16;
+        ++x;
+        dstp[x] = ( srcp[x-4] * 678 + srcp[x+3] * 3902 + srcp[x-2] * 13618 + 
+                    ( srcp[x-1] + srcp[x+1] ) * 14415 +
+                    srcp[x] * 18508 + 32768 ) >> 16;
+        ++x;
+        dstp[x] = ( srcp[x-4] * 678 + srcp[x-3] * 3902 + srcp[x-2] * 13618 + 
+                    srcp[x-1] * 28830 +
+                    srcp[x] * 18508 + 32768 ) >> 16;
+        srcp += pitch;
+        dstp += pitch;
+    }
+    dstp = dst;
+    srcp = tmp;
+    int * src4p = srcp - pitch * 4;
+    int * src3p = srcp - pitch * 3;
+    int * src2p = srcp - pitch * 2;
+    int * srcpp = srcp - pitch;
+    int * srcpn = srcp + pitch;
+    int * src2n = srcp + pitch * 2;
+    int * src3n = srcp + pitch * 3;
+    int * src4n = srcp + pitch * 4;
+    for( x = 0; x < width; ++x )
+    {
+        dstp[x] = ( src4n[x] * 678   + src3n[x] * 3902  + 
+                    src2n[x] * 13618 + srcpn[x] * 28830 +
+                     srcp[x] * 18508 + 32768 ) >> 18;
+    }
+    src4p += pitch;
+    src3p += pitch;
+    src2p += pitch;
+    srcpp += pitch;
+    srcp += pitch;
+    srcpn += pitch;
+    src2n += pitch;
+    src3n += pitch;
+    src4n += pitch;
+    dstp += pitch;
+    for( x = 0; x < width; ++x )
+    {
+        dstp[x] = ( src4n[x] * 678 + src3n[x] * 3902 + src2n[x] * 13618 + 
+                    ( srcpp[x] + srcpn[x] ) * 14415 +
+                    srcp[x] * 18508 + 32768 ) >> 18;
+    }
+    src4p += pitch;
+    src3p += pitch;
+    src2p += pitch;
+    srcpp += pitch;
+    srcp += pitch;
+    srcpn += pitch;
+    src2n += pitch;
+    src3n += pitch;
+    src4n += pitch;
+    dstp += pitch;
+    for( x = 0; x < width; ++x )
+    {
+        dstp[x] = ( src4n[x] * 678 + src3n[x] * 3902 + 
+                    ( src2p[x] + src2n[x] ) * 6809 + 
+                    ( srcpp[x] + srcpn[x] ) * 14415 +
+                    srcp[x] * 18508 + 32768 ) >> 18;
+    }
+    src4p += pitch;
+    src3p += pitch;
+    src2p += pitch;
+    srcpp += pitch;
+    srcp += pitch;
+    srcpn += pitch;
+    src2n += pitch;
+    src3n += pitch;
+    src4n += pitch;
+    dstp += pitch;
+    for( x = 0; x < width; ++x )
+    {
+        dstp[x] = ( src4n[x] * 678 + ( src3p[x] + src3n[x] ) * 1951 +
+                    ( src2p[x] + src2n[x] ) * 6809 +
+                    ( srcpp[x] + srcpn[x] ) * 14415 +
+                    srcp[x] * 18508 + 32768 ) >> 18;
+    }
+    src4p += pitch;
+    src3p += pitch;
+    src2p += pitch;
+    srcpp += pitch;
+    srcp += pitch;
+    srcpn += pitch;
+    src2n += pitch;
+    src3n += pitch;
+    src4n += pitch;
+    dstp += pitch;
+    for( y = 4; y < height - 4; ++y )
+    {
+        for( x = 0; x < width; ++x )
+        {
+            dstp[x] = ( ( src4p[x] + src4n[x] ) * 339 +
+                        ( src3p[x] + src3n[x] ) * 1951 +
+                        ( src2p[x] + src2n[x] ) * 6809 +
+                        ( srcpp[x] + srcpn[x] ) * 14415 +
+                        srcp[x] * 18508 + 32768 ) >> 18;
+        }
+        src4p += pitch;
+        src3p += pitch;
+        src2p += pitch;
+        srcpp += pitch;
+        srcp += pitch;
+        srcpn += pitch;
+        src2n += pitch;
+        src3n += pitch;
+        src4n += pitch;
+        dstp += pitch;
+    }
+    for( x = 0; x < width; ++x )
+    {
+        dstp[x] = ( src4p[x] * 678 +
+                    ( src3p[x] + src3n[x] ) * 1951 +
+                    ( src2p[x] + src2n[x] ) * 6809 +
+                    ( srcpp[x] + srcpn[x] ) * 14415 +
+                    srcp[x] * 18508 + 32768 ) >> 18;
+    }
+    src4p += pitch;
+    src3p += pitch;
+    src2p += pitch;
+    srcpp += pitch;
+    srcp += pitch;
+    srcpn += pitch;
+    src2n += pitch;
+    src3n += pitch;
+    src4n += pitch;
+    dstp += pitch;
+    for( x = 0; x < width; ++x )
+    {
+        dstp[x] = ( src4p[x] * 678 + src3p[x] * 3902 +
+                    ( src2p[x] + src2n[x] ) * 6809 +
+                    ( srcpp[x] + srcpn[x] ) * 14415 +
+                    srcp[x] * 18508 + 32768 ) >> 18;
+    }
+    src4p += pitch;
+    src3p += pitch;
+    src2p += pitch;
+    srcpp += pitch;
+    srcp += pitch;
+    srcpn += pitch;
+    src2n += pitch;
+    src3n += pitch;
+    src4n += pitch;
+    dstp += pitch;
+    for( x = 0; x < width; ++x )
+    {
+        dstp[x] = ( src4p[x] * 678 + src3p[x] * 3902 + src2p[x] * 13618 +
+                    ( srcpp[x] + srcpn[x] ) * 14415 +
+                    srcp[x] * 18508 + 32768 ) >> 18;
+    }
+    src4p += pitch;
+    src3p += pitch;
+    src2p += pitch;
+    srcpp += pitch;
+    srcp += pitch;
+    srcpn += pitch;
+    src2n += pitch;
+    src3n += pitch;
+    src4n += pitch;
+    dstp += pitch;
+    for( x = 0; x < width; ++x )
+    {
+        dstp[x] = ( src4p[x] * 678   + src3p[x] * 3902 +
+                    src2p[x] * 13618 + srcpp[x] * 28830 +
+                    srcp[x]  * 18508 + 32768 ) >> 18;
+    }
+}
+
+/**
+ * Finds spatial derivatives for a a source field plane
+ * @param srcp Pointer to the plane to derive
+ * @param src_pitch Stride of srcp
+ * @param height Height of the half-height field-sized frame
+ * @param width Width of srcp bitmap rows, as opposed to the padded stride in src_pitch
+ * @param x2 Pointed to the array to store the x/x derivatives
+ * @param y2 Pointer to the array to store the y/y derivatives
+ * @param xy Pointer to the array to store the x/y derivatives
+ */
+void eedi2_calc_derivatives( uint8_t *srcp, int src_pitch, int height, int width, int *x2, int *y2, int *xy)
+{
+    
+    unsigned char * srcpp = srcp - src_pitch;
+    unsigned char * srcpn = srcp + src_pitch;
+    int x, y;
+    {
+        const int Ix = srcp[1] -  srcp[0];
+        const int Iy = srcp[0] - srcpn[0];
+        x2[0] = ( Ix * Ix ) >> 1;
+        y2[0] = ( Iy * Iy ) >> 1;
+        xy[0] = ( Ix * Iy ) >> 1;
+    }
+    for( x = 1; x < width - 1; ++x )
+    {
+        const int Ix = srcp[x+1] -  srcp[x-1];
+        const int Iy = srcp[x]   - srcpn[x];
+        x2[x] = ( Ix * Ix ) >> 1;
+        y2[x] = ( Iy * Iy ) >> 1;
+        xy[x] = ( Ix * Iy ) >> 1;
+    }
+    {
+        const int Ix = srcp[x] -  srcp[x-1];
+        const int Iy = srcp[x] - srcpn[x];
+        x2[x] = ( Ix * Ix ) >> 1;
+        y2[x] = ( Iy * Iy ) >> 1;
+        xy[x] = ( Ix * Iy ) >> 1;
+    }
+    srcpp += src_pitch;
+    srcp += src_pitch;
+    srcpn += src_pitch;
+    x2 += src_pitch;
+    y2 += src_pitch;
+    xy += src_pitch;
+    for( y = 1; y < height - 1; ++y )
+    {
+        {
+            const int Ix =  srcp[1] -  srcp[0];
+            const int Iy = srcpp[0] - srcpn[0];
+            x2[0] = ( Ix * Ix ) >> 1;
+            y2[0] = ( Iy * Iy ) >> 1;
+            xy[0] = ( Ix * Iy ) >> 1;
+        }
+        for ( x = 1; x < width - 1; ++x )
+        {
+            const int Ix =  srcp[x+1] -  srcp[x-1];
+            const int Iy = srcpp[x]   - srcpn[x];
+            x2[x] = ( Ix * Ix ) >> 1;
+            y2[x] = ( Iy * Iy ) >> 1;
+            xy[x] = ( Ix * Iy ) >> 1;
+        }
+        {
+            const int Ix =  srcp[x] -  srcp[x-1];
+            const int Iy = srcpp[x] - srcpn[x];
+            x2[x] = ( Ix *Ix ) >> 1;
+            y2[x] = ( Iy *Iy ) >> 1;
+            xy[x] = ( Ix *Iy ) >> 1;
+        }
+        srcpp += src_pitch;
+        srcp += src_pitch;
+        srcpn += src_pitch;
+        x2 += src_pitch;
+        y2 += src_pitch;
+        xy += src_pitch;
+    }
+    {
+        const int Ix =  srcp[1] - srcp[0];
+        const int Iy = srcpp[0] - srcp[0];
+        x2[0] = ( Ix * Ix ) >> 1;
+        y2[0] = ( Iy * Iy ) >> 1;
+        xy[0] = ( Ix * Iy ) >> 1;
+    }
+    for( x = 1; x < width - 1; ++x )
+    {
+        const int Ix =  srcp[x+1] - srcp[x-1];
+        const int Iy = srcpp[x]   - srcp[x];
+        x2[x] = ( Ix * Ix ) >> 1;
+        y2[x] = ( Iy * Iy ) >> 1;
+        xy[x] = ( Ix * Iy ) >> 1;
+    }
+    {
+        const int Ix =  srcp[x] - srcp[x-1];
+        const int Iy = srcpp[x] - srcp[x];
+        x2[x] = ( Ix * Ix ) >> 1;
+        y2[x] = ( Iy * Iy ) >> 1;
+        xy[x] = ( Ix * Iy ) >> 1;
+    }
+}
+
+/**
+ * Filters junctions and corners for the output image
+ * @param x2 Pointer to the x/x derivatives
+ * @param y2 Pointer to the y/y derivatives
+ * @param xy Pointer to the x/y derivatives
+ * @param pitch Stride of the source field plane from which the derivatives were calculated
+ * @param mskp Pointer to the edge direction mask
+ * @param msk_pitch Stride of mskp
+ * @param dstp Pointer to the output image being filtered in place
+ * @param dst_pitch Stride of dstp
+ * @param height Height of the full-frame output plane
+ * @param width Width of dstp bitmap rows, as opposed to the padded stride in dst_pitch
+ * @param field Field to filter
+ */
+void eedi2_post_process_corner( int *x2, int *y2, int *xy, const int pitch, uint8_t * mskp, int msk_pitch, uint8_t * dstp, int dst_pitch, int height, int width, int field )
+{
+    mskp += ( 8 - field ) * msk_pitch;
+    dstp += ( 8 - field ) * dst_pitch;
+    unsigned char * dstpp = dstp - dst_pitch;
+    unsigned char * dstpn = dstp + dst_pitch;
+    x2 += pitch * 3;
+    y2 += pitch * 3;
+    xy += pitch * 3;
+    int *x2n = x2 + pitch;
+    int *y2n = y2 + pitch;
+    int *xyn = xy + pitch;
+    int x, y;
+    
+    for( y = 8 - field; y < height - 7; y += 2 )
+    {
+        for( x = 4; x < width - 4; ++x )
+        {
+            if( mskp[x] == 255 || mskp[x] == 128 ) continue;
+            const int c1 = (int)( x2[x]  *  y2[x] -  xy[x] * xy[x] - 0.09 *
+                                  ( x2[x]  + y2[x] )  * ( x2[x]  + y2[x] ) );
+            const int c2 = (int)( x2n[x] * y2n[x] - xyn[x]* xyn[x] - 0.09 * 
+                                  ( x2n[x] + y2n[x] ) * ( x2n[x] + y2n[x] ) );
+            if (c1 > 775 || c2 > 775)
+                dstp[x] = ( dstpp[x] + dstpn[x] + 1 ) >> 1;
+        }
+        mskp += msk_pitch * 2;
+        dstpp += dst_pitch * 2;
+        dstp += dst_pitch * 2;
+        dstpn += dst_pitch * 2;
+        x2 += pitch;
+        x2n += pitch;
+        y2 += pitch;
+        y2n += pitch;
+        xy += pitch;
+        xyn += pitch;
+    }
+}
diff --git a/libhb/eedi2.h b/libhb/eedi2.h
new file mode 100644
index 000000000..1df7b1138
--- /dev/null
+++ b/libhb/eedi2.h
@@ -0,0 +1,84 @@
+// Used to order a sequeunce of metrics for median filtering
+void eedi2_sort_metrics( int *order, const int length );
+
+// Aping some Windows API funcctions AviSynth seems to like
+// Taken from here: http://www.gidforums.com/t-8543.html
+void *eedi2_aligned_malloc(size_t size, size_t align_size);
+void eedi2_aligned_free(void *ptr);
+
+// Copies bitmaps
+void eedi2_bit_blit( uint8_t * dstp, int dst_pitch, const uint8_t * srcp, int src_pitch,
+                     int row_size, int height );
+
+// Sets up the initial field-sized bitmap EEDI2 interpolates from
+void eedi2_fill_half_height_buffer_plane( uint8_t * src, uint8_t * dst, int pitch, int height );
+
+// Simple line doubler
+void eedi2_upscale_by_2( uint8_t * srcp, uint8_t * dstp, int height, int pitch );
+
+// Finds places where vertically adjacent pixels abruptly change intensity
+void eedi2_build_edge_mask( uint8_t * dstp, int dst_pitch, uint8_t *srcp, int src_pitch,
+                            int mthresh, int lthresh, int vthresh, int height, int width );
+
+// Expands and smooths out the edge mask by considering a pixel
+// to be masked if >= dilation threshold adjacent pixels are masked.
+void eedi2_dilate_edge_mask( uint8_t *mskp, int msk_pitch, uint8_t *dstp, int dst_pitch,
+                             int dstr, int height, int width );
+
+// Contracts the edge mask by considering a pixel to be masked
+// only if > erosion threshold adjacent pixels are masked
+void eedi2_erode_edge_mask( uint8_t *mskp, int msk_pitch, uint8_t *dstp, int dst_pitch,
+                            int estr, int height, int width );
+
+// Smooths out horizontally aligned holes in the mask
+// If none of the 6 horizontally adjacent pixels are masked,
+// don't consider the current pixel masked. If there are any
+// masked on both sides, consider the current pixel masked.
+void eedi2_remove_small_gaps( uint8_t * mskp, int msk_pitch, uint8_t * dstp, int dst_pitch, 
+                              int height, int width );
+
+// Spatial vectors. Looks at maximum_search_distance surrounding pixels
+// to guess which angle edges follow. This is EEDI2's timesink, and can be
+// thought of as YADIF_CHECK on steroids. Both find edge directions.
+void eedi2_calc_directions( const int plane, uint8_t * mskp, int msk_pitch, uint8_t * srcp, int src_pitch,
+                            uint8_t * dstp, int dst_pitch, int maxd, int nt, int height, int width  );
+
+void eedi2_filter_map( uint8_t *mskp, int msk_pitch, uint8_t *dmskp, int dmsk_pitch,
+                       uint8_t * dstp, int dst_pitch, int height, int width );
+
+void eedi2_filter_dir_map( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch, uint8_t * dstp,
+                           int dst_pitch, int height, int width );
+
+void eedi2_expand_dir_map( uint8_t * mskp, int msk_pitch, uint8_t  *dmskp, int dmsk_pitch, uint8_t * dstp,
+                           int dst_pitch, int height, int width );
+
+void eedi2_mark_directions_2x( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch, uint8_t * dstp,
+                               int dst_pitch, int tff, int height, int width );
+
+void eedi2_filter_dir_map_2x( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch, uint8_t * dstp,
+                              int dst_pitch, int field, int height, int width );
+
+void eedi2_expand_dir_map_2x( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch, uint8_t * dstp,
+                              int dst_pitch, int field, int height, int width );
+
+void eedi2_fill_gaps_2x( uint8_t *mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch, uint8_t * dstp,
+                         int dst_pitch, int field, int height, int width );
+
+void eedi2_interpolate_lattice( const int plane, uint8_t * dmskp, int dmsk_pitch, uint8_t * dstp,
+                                int dst_pitch, uint8_t * omskp, int omsk_pitch, int field, int nt,
+                                int height, int width );
+
+void eedi2_post_process( uint8_t * nmskp, int nmsk_pitch, uint8_t * omskp, int omsk_pitch, uint8_t * dstp,
+                         int src_pitch, int field, int height, int width );
+
+void eedi2_gaussian_blur1( uint8_t * src, int src_pitch, uint8_t * tmp, int tmp_pitch, uint8_t * dst,
+                           int dst_pitch, int height, int width );
+                           
+void eedi2_gaussian_blur_sqrt2( int *src, int *tmp, int *dst, const int pitch,
+                                const int height, const int width );
+                                
+void eedi2_calc_derivatives( uint8_t *srcp, int src_pitch, int height, int width,
+                             int *x2, int *y2, int *xy);
+
+void eedi2_post_process_corner( int *x2, int *y2, int *xy, const int pitch, uint8_t * mskp, int msk_pitch,
+                                uint8_t * dstp, int dst_pitch, int height, int width, int field );