add a new compile option: --enable-hwd

change the GUI uvd checkbox's name to hardware decoder modify the issue compile on linux modify the issue on Intel platform with Intel integrate graphic add a new opencl scale algorithm, PSNR goes up (added files: scale.h, scale.c, scale_kernel.h, scale_kernel.c) merge the cropscaleaccl.c to cropscale.c merge the decavcodecaccl.c to decavcodec.c git-svn-id: svn://svn.handbrake.fr/HandBrake/branches/opencl@5227 b64f7644-9d1e-0410-96f1-a4d463321fa5
author: handbrake <[email protected]> 2013-01-31 08:18:55 +0000
committer: handbrake <[email protected]> 2013-01-31 08:18:55 +0000
commit: bc3b56381c6ac214b7736a00972c9d0902902453 (patch)
tree: 6caffbe88ef2ee5efbd2279cf00556a986a7c22e
parent: 39821495c82b104b77e628d5cb244be041e285ed (diff)
41 files changed, 2740 insertions, 1783 deletions
diff --git a/contrib/ffmpeg/module.defs b/contrib/ffmpeg/module.defs
index 01fd5f753..435ce778b 100644
--- a/contrib/ffmpeg/module.defs
+++ b/contrib/ffmpeg/module.defs
@@ -20,11 +20,6 @@ FFMPEG.CONFIGURE.extra = \
     --disable-network \
     --disable-hwaccels \
     --disable-encoders \
-    --enable-dxva2 \
-    --enable-hwaccel=h264_dxva2 \
-    --enable-hwaccel=mpeg2_dxva2 \
-    --enable-hwaccel=vc1_dxva2 \
-    --enable-hwaccel=wmv3_dxva2 \
     --enable-encoder=aac \
     --enable-encoder=ac3 \
     --enable-encoder=flac \
@@ -47,6 +42,11 @@ else ifeq (1-mingw,$(BUILD.cross)-$(BUILD.system))
     FFMPEG.CONFIGURE.extra += \
         --enable-w32threads \
         --enable-memalign-hack \
+        --enable-dxva2 \
+        --enable-hwaccel=h264_dxva2 \
+        --enable-hwaccel=mpeg2_dxva2 \
+        --enable-hwaccel=vc1_dxva2 \
+        --enable-hwaccel=wmv3_dxva2 \
         --target-os=mingw32 \
         --arch=i386 \
         --enable-cross-compile --cross-prefix=$(BUILD.cross.prefix)
diff --git a/libhb/common.c b/libhb/common.c
index eccc00354..c10eae93d 100644
--- a/libhb/common.c
+++ b/libhb/common.c
@@ -1739,6 +1739,7 @@ static void job_setup( hb_job_t * job, hb_title_t * title )
 
     job->list_attachment = hb_attachment_list_copy( title->list_attachment );
     job->metadata = hb_metadata_copy( title->metadata );
+    job->use_hw_decode = 0;
 }
 
 static void job_clean( hb_job_t * job )
@@ -1993,11 +1994,7 @@ hb_filter_object_t * hb_filter_init( int filter_id )
         case HB_FILTER_CROP_SCALE:
             filter = &hb_filter_crop_scale;
             break;
-#ifdef USE_OPENCL
-        case HB_FILTER_CROP_SCALE_ACCL:
-            filter = &hb_filter_crop_scale_accl;
-            break;
-#endif
+
         case HB_FILTER_ROTATE:
             filter = &hb_filter_rotate;
             break;
@@ -2974,15 +2971,15 @@ int hb_use_dxva( hb_title_t * title )
 int hb_get_gui_info(hb_gui_t * gui, int option)
 {
     if ( option == 1 )
-        return gui->use_uvd;
+        return gui->use_hwd;
     else if ( option == 2 )
         return gui->use_opencl;
     else
         return gui->title_scan;
 }
-void hb_set_gui_info(hb_gui_t *gui, int uvd, int opencl, int titlescan)
+void hb_set_gui_info(hb_gui_t *gui, int hwd, int opencl, int titlescan)
 {
-    gui->use_uvd = uvd;
+    gui->use_hwd = hwd;
     gui->use_opencl = opencl;
     gui->title_scan = titlescan;
 }
diff --git a/libhb/common.h b/libhb/common.h
index 3d5f38f15..8586d2a6b 100644
--- a/libhb/common.h
+++ b/libhb/common.h
@@ -148,10 +148,10 @@ int hb_subtitle_can_pass( int source, int mux );
 
 hb_attachment_t *hb_attachment_copy(const hb_attachment_t *src);
 int hb_get_gui_info(hb_gui_t *gui, int option);
-void hb_set_gui_info(hb_gui_t *gui, int uvd, int opencl, int titlescan);
+void hb_set_gui_info(hb_gui_t *gui, int hwd, int opencl, int titlescan);
 struct hb_gui_s
 {
-    int use_uvd;
+    int use_hwd;
     int use_opencl;
     int title_scan;
 };
@@ -441,7 +441,8 @@ struct hb_job_s
                                         //  initially (for frame accurate positioning
                                         //  to non-I frames).
     int use_opencl;/* 0 is disable use of opencl. 1 is enable use of opencl */
-    int use_uvd;
+    int use_hwd;
+    int use_hw_decode;
 
 #ifdef __LIBHB__
     /* Internal data */
@@ -786,7 +787,7 @@ struct hb_title_s
     uint32_t    flags;
                 // set if video stream doesn't have IDR frames
     int         opencl_support;
-    int         uvd_support;
+    int         hwd_support;
 #define         HBTF_NO_IDR (1 << 0)
 #define         HBTF_SCAN_COMPLETE (1 << 0)
 };
@@ -1086,9 +1087,7 @@ enum
     HB_FILTER_DENOISE,
     HB_FILTER_RENDER_SUB,
     HB_FILTER_CROP_SCALE,
-#ifdef USE_OPENCL    
-    HB_FILTER_CROP_SCALE_ACCL,
-#endif    
+
     // Finally filters that don't care what order they are in,
     // except that they must be after the above filters
     HB_FILTER_ROTATE,
diff --git a/libhb/cropscale.c b/libhb/cropscale.c
index 61aa85101..e0a3e2af3 100644
--- a/libhb/cropscale.c
+++ b/libhb/cropscale.c
@@ -9,6 +9,8 @@
    
 #include "hb.h"
 #include "hbffmpeg.h"
+#include "common.h"
+
 
 struct hb_filter_private_s
 {
@@ -19,7 +21,13 @@ struct hb_filter_private_s
     int                 width_out;
     int                 height_out;
     int                 crop[4];
+    
+#ifdef USE_OPENCL
     int                 use_dxva;
+    int                 title_width;
+    int                 title_height;
+    hb_oclscale_t       * os; //ocl scaler handler
+#endif	
     struct SwsContext * context;
 };
 
@@ -59,6 +67,16 @@ static int hb_crop_scale_init( hb_filter_object_t * filter,
     pv->height_in = init->height;
     pv->width_out = init->width;
     pv->height_out = init->height;
+#ifdef USE_OPENCL
+    pv->use_dxva = init->use_dxva;
+    if ( hb_get_gui_info(&hb_gui, 2) )
+    {
+        pv->title_width = init->title_width;
+        pv->title_height = init->title_height;
+	pv->os = ( hb_oclscale_t * )malloc( sizeof( hb_oclscale_t ) );
+        memset( pv->os, 0, sizeof( hb_oclscale_t ) );
+    }
+#endif
     memcpy( pv->crop, init->crop, sizeof( int[4] ) );
     if( filter->settings )
     {
@@ -95,6 +113,30 @@ static int hb_crop_scale_info( hb_filter_object_t * filter,
     info->out.height = pv->height_out;
     memcpy( info->out.crop, pv->crop, sizeof( int[4] ) );
 
+#ifdef USE_OPENCL
+    if ( hb_get_gui_info(&hb_gui, 2) )
+    {
+        int cropped_width = pv->title_width - ( pv->crop[2] + pv->crop[3] );
+        int cropped_height = pv->title_height - ( pv->crop[0] + pv->crop[1] );
+
+        sprintf( info->human_readable_desc,
+            "source: %d * %d, crop (%d/%d/%d/%d): %d * %d, scale: %d * %d",
+            pv->title_width, pv->title_height,
+            pv->crop[0], pv->crop[1], pv->crop[2], pv->crop[3],
+            cropped_width, cropped_height, pv->width_out, pv->height_out );
+    }
+    else
+    {
+	    int cropped_width = pv->width_in - ( pv->crop[2] + pv->crop[3] );
+        int cropped_height = pv->height_in - ( pv->crop[0] + pv->crop[1] );
+
+        sprintf( info->human_readable_desc, 
+            "source: %d * %d, crop (%d/%d/%d/%d): %d * %d, scale: %d * %d",
+            pv->width_in, pv->height_in,
+            pv->crop[0], pv->crop[1], pv->crop[2], pv->crop[3],
+            cropped_width, cropped_height, pv->width_out, pv->height_out );
+    }
+#else
     int cropped_width = pv->width_in - ( pv->crop[2] + pv->crop[3] );
     int cropped_height = pv->height_in - ( pv->crop[0] + pv->crop[1] );
 
@@ -103,7 +145,7 @@ static int hb_crop_scale_info( hb_filter_object_t * filter,
         pv->width_in, pv->height_in,
         pv->crop[0], pv->crop[1], pv->crop[2], pv->crop[3],
         cropped_width, cropped_height, pv->width_out, pv->height_out );
-
+#endif
     return 0;
 }
 
@@ -115,7 +157,23 @@ static void hb_crop_scale_close( hb_filter_object_t * filter )
     {
         return;
     }
-
+#ifdef USE_OPENCL
+    if ( hb_get_gui_info(&hb_gui, 2) && pv->os)
+    {
+	CL_FREE( pv->os->h_in_buf );
+        CL_FREE( pv->os->h_out_buf );
+        CL_FREE( pv->os->v_out_buf );
+        CL_FREE( pv->os->h_coeff_y );
+        CL_FREE( pv->os->h_coeff_uv );
+        CL_FREE( pv->os->h_index_y );
+        CL_FREE( pv->os->h_index_uv );
+        CL_FREE( pv->os->v_coeff_y );
+        CL_FREE( pv->os->v_coeff_uv );
+        CL_FREE( pv->os->v_index_y );
+        CL_FREE( pv->os->v_index_uv );
+        free( pv->os );
+    }
+#endif
     if ( pv->context )
     {
         sws_freeContext( pv->context );
@@ -124,7 +182,24 @@ static void hb_crop_scale_close( hb_filter_object_t * filter )
     free( pv );
     filter->private_data = NULL;
 }
-
+#ifdef USE_OPENCL
+static uint8_t *copy_plane( uint8_t *dst, uint8_t* src, int dstride, int sstride, int h )
+{
+    if( dstride == sstride )
+    {
+        memcpy( dst, src, dstride * h );
+        return dst + dstride * h;
+    }
+    int lbytes = dstride <= sstride ? dstride : sstride;
+    while( --h >= 0 )
+    {
+        memcpy( dst, src, lbytes );
+        src += sstride;
+        dst += dstride;
+    }
+    return dst;
+}
+#endif
 static hb_buffer_t* crop_scale( hb_filter_private_t * pv, hb_buffer_t * in )
 {
     AVPicture           pic_in;
@@ -141,6 +216,76 @@ static hb_buffer_t* crop_scale( hb_filter_private_t * pv, hb_buffer_t * in )
     av_picture_crop( &pic_crop, &pic_in, in->f.fmt,
                      pv->crop[0], pv->crop[2] );
 
+#ifdef USE_OPENCL
+    if ( hb_get_gui_info(&hb_gui, 2) )
+    {
+	int w = in->f.width - ( pv->crop[2] + pv->crop[3] );
+	int h = in->f.height - ( pv->crop[0] + pv->crop[1] );
+	uint8_t *tmp_in = malloc( w * h * 3 / 2 );
+	uint8_t *tmp_out = malloc( pv->width_out * pv->height_out * 3 / 2 );
+	if( pic_crop.data[0] || pic_crop.data[1] || pic_crop.data[2] || pic_crop.data[3] )
+	{
+	    int i;
+	    for( i = 0; i< h>>1; i++ )
+	    {
+	        memcpy( tmp_in + ( ( i<<1 ) + 0 ) * w, pic_crop.data[0]+ ( ( i<<1 ) + 0 ) * pic_crop.linesize[0], w );
+	        memcpy( tmp_in + ( ( i<<1 ) + 1 ) * w, pic_crop.data[0]+ ( ( i<<1 ) + 1 ) * pic_crop.linesize[0], w );
+	        memcpy( tmp_in + ( w * h ) + i * ( w>>1 ), pic_crop.data[1] + i * pic_crop.linesize[1], w >> 1 );
+	        memcpy( tmp_in + ( w * h ) + ( ( w * h )>>2 ) + i * ( w>>1 ), pic_crop.data[2] + i * pic_crop.linesize[2], w >> 1 );
+	    }
+	}
+	else
+	{
+	    memcpy( tmp_in, pic_crop.data[0], w * h );
+	    memcpy( tmp_in + w * h, pic_crop.data[1], (w*h)>>2 );
+	    memcpy( tmp_in + w * h + ((w*h)>>2), pic_crop.data[2], (w*h)>>2 );
+	}
+	hb_ocl_scale( NULL, tmp_in, tmp_out, w, h, out->f.width, out->f.height, pv->os );
+	w = out->plane[0].stride;
+	h = out->plane[0].height;
+	uint8_t *dst = out->plane[0].data;
+	copy_plane( dst, tmp_out, w, pv->width_out, h );
+	w = out->plane[1].stride;
+	h = out->plane[1].height;
+	dst = out->plane[1].data;
+	copy_plane( dst, tmp_out + pv->width_out * pv->height_out, w, pv->width_out>>1, h );
+	w = out->plane[2].stride;
+	h = out->plane[2].height;
+	dst = out->plane[2].data;
+	copy_plane( dst, tmp_out + pv->width_out * pv->height_out +( ( pv->width_out * pv->height_out )>>2 ), w, pv->width_out>>1, h );
+	free( tmp_out );
+	free( tmp_in );
+    }
+    else
+    {
+	if ( !pv->context ||
+	     pv->width_in   != in->f.width  ||
+	     pv->height_in  != in->f.height ||
+	     pv->pix_fmt != in->f.fmt )
+	{
+	    // Something changed, need a new scaling context.
+	    if( pv->context )
+	        sws_freeContext( pv->context );
+	        pv->context = hb_sws_get_context(
+	                                in->f.width  - (pv->crop[2] + pv->crop[3]),
+	                                in->f.height - (pv->crop[0] + pv->crop[1]),
+	                                in->f.fmt,
+	                                out->f.width, out->f.height, out->f.fmt,
+	                                SWS_LANCZOS | SWS_ACCURATE_RND );
+	        pv->width_in = in->f.width;
+	        pv->height_in = in->f.height;
+	        pv->pix_fmt = in->f.fmt;
+        }
+
+	    // Scale pic_crop into pic_render according to the
+	    // context set up above
+	sws_scale(pv->context,
+	              (const uint8_t* const*)pic_crop.data,
+	              pic_crop.linesize,
+	              0, in->f.height - (pv->crop[0] + pv->crop[1]),
+	              pic_out.data,  pic_out.linesize);
+    }
+#else
     if ( !pv->context ||
          pv->width_in   != in->f.width  ||
          pv->height_in  != in->f.height ||
@@ -168,7 +313,7 @@ static hb_buffer_t* crop_scale( hb_filter_private_t * pv, hb_buffer_t * in )
               pic_crop.linesize,
               0, in->f.height - (pv->crop[0] + pv->crop[1]),
               pic_out.data,  pic_out.linesize);
-
+#endif
     out->s = in->s;
     hb_buffer_move_subs( out, in );
     return out;
@@ -202,6 +347,17 @@ static int hb_crop_scale_work( hb_filter_object_t * filter,
         pv->width_out = in->f.width - (pv->crop[2] + pv->crop[3]);
         pv->height_out = in->f.height - (pv->crop[0] + pv->crop[1]);
     }
+#ifdef USE_OPENCL
+    if ( (in->f.fmt == pv->pix_fmt_out &&
+         !pv->crop[0] && !pv->crop[1] && !pv->crop[2] && !pv->crop[3] &&
+         in->f.width == pv->width_out && in->f.height == pv->height_out) ||
+         (pv->use_dxva && in->f.width == pv->width_out && in->f.height == pv->height_out) )
+    {
+        *buf_out = in;
+        *buf_in = NULL;
+        return HB_FILTER_OK;
+    }
+#else
     if ( in->f.fmt == pv->pix_fmt_out &&
          !pv->crop[0] && !pv->crop[1] && !pv->crop[2] && !pv->crop[3] &&
          in->f.width == pv->width_out && in->f.height == pv->height_out )
@@ -210,13 +366,6 @@ static int hb_crop_scale_work( hb_filter_object_t * filter,
         *buf_in = NULL;
         return HB_FILTER_OK;
     }
-#ifdef USE_OPENCL
-	if ( pv->use_dxva && in->f.width == pv->width_out && in->f.height == pv->height_out )
-	{
-		*buf_out = in;
-		*buf_in = NULL;
-		return HB_FILTER_OK;
-	}
 #endif
 
     *buf_out = crop_scale( pv, in );
diff --git a/libhb/cropscaleaccl.c b/libhb/cropscaleaccl.c
deleted file mode 100644
index b254acde0..000000000
--- a/libhb/cropscaleaccl.c
+++ /dev/null
@@ -1,262 +0,0 @@
-/* cropscaleaccl.c
-
-   Copyright (c) 2003-2012 HandBrake Team
-   This file is part of the HandBrake source code
-   Homepage: <http://handbrake.fr/>.
-   It may be used under the terms of the GNU General Public License v2.
-   For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
-
-   Authors: Peng Gao <[email protected]> <http://www.multicorewareinc.com/>
-            Li   Cao <[email protected]> <http://www.multicorewareinc.com/>
- */
-#ifdef USE_OPENCL
-#include "hb.h"
-#include "common.h"
-
-struct hb_filter_private_s
-{
-    int                 width_in;
-    int                 height_in;
-    int                 pix_fmt;
-    int                 pix_fmt_out;
-    int                 width_out;
-    int                 height_out;
-    int                 crop[4];
-    int                 use_dxva;
-
-    int                 title_width;
-    int                 title_height;
-    hb_oclscale_t       * os; //ocl scaler handler
-    struct SwsContext * context;
-};
-
-static int hb_crop_scale_init( hb_filter_object_t * filter,
-                               hb_filter_init_t * init );
-
-static int hb_crop_scale_work( hb_filter_object_t * filter,
-                               hb_buffer_t ** buf_in,
-                               hb_buffer_t ** buf_out );
-
-static int hb_crop_scale_info( hb_filter_object_t * filter,
-                               hb_filter_info_t * info );
-
-static void hb_crop_scale_close( hb_filter_object_t * filter );
-
-hb_filter_object_t hb_filter_crop_scale_accl =
-{
-    .id            = HB_FILTER_CROP_SCALE_ACCL,
-    .enforce_order = 1,
-    .name          = "Custom Crop and Scale",
-    .settings      = NULL,
-    .init          = hb_crop_scale_init,
-    .work          = hb_crop_scale_work,
-    .close         = hb_crop_scale_close,
-    .info          = hb_crop_scale_info,
-};
-
-static int hb_crop_scale_init( hb_filter_object_t * filter,
-                               hb_filter_init_t * init )
-{
-    filter->private_data = calloc( 1, sizeof(struct hb_filter_private_s) );
-    hb_filter_private_t * pv = filter->private_data;
-
-    // TODO: add pix format option to settings
-    pv->pix_fmt_out = init->pix_fmt;
-    pv->width_in = init->width;
-    pv->height_in = init->height;
-    pv->width_out = init->width;
-    pv->height_out = init->height;
-    pv->use_dxva = init->use_dxva;
-    pv->title_width = init->title_width;
-    pv->title_height = init->title_height;
-    memcpy( pv->crop, init->crop, sizeof( int[4] ) );
-    if( filter->settings )
-    {
-        sscanf( filter->settings, "%d:%d:%d:%d:%d:%d",
-                &pv->width_out, &pv->height_out,
-                &pv->crop[0], &pv->crop[1], &pv->crop[2], &pv->crop[3] );
-    }
-    // Set init values so the next stage in the pipline
-    // knows what it will be getting
-    init->pix_fmt = pv->pix_fmt;
-    init->width = pv->width_out;
-    init->height = pv->height_out;
-    memcpy( init->crop, pv->crop, sizeof( int[4] ) );
-    pv->os = ( hb_oclscale_t * )malloc( sizeof( hb_oclscale_t ) );
-    memset( pv->os, 0, sizeof( hb_oclscale_t ) );
-    return 0;
-}
-
-static int hb_crop_scale_info( hb_filter_object_t * filter,
-                               hb_filter_info_t * info )
-{
-    hb_filter_private_t * pv = filter->private_data;
-
-    if( !pv )
-        return 0;
-
-    // Set init values so the next stage in the pipline
-    // knows what it will be getting
-    memset( info, 0, sizeof( hb_filter_info_t ) );
-    info->out.pix_fmt = pv->pix_fmt;
-    info->out.width = pv->width_out;
-    info->out.height = pv->height_out;
-    memcpy( info->out.crop, pv->crop, sizeof( int[4] ) );
-
-    int cropped_width = pv->title_width - ( pv->crop[2] + pv->crop[3] );
-    int cropped_height = pv->title_height - ( pv->crop[0] + pv->crop[1] );
-
-    sprintf( info->human_readable_desc,
-             "source: %d * %d, crop (%d/%d/%d/%d): %d * %d, scale: %d * %d",
-             pv->title_width, pv->title_height,
-             pv->crop[0], pv->crop[1], pv->crop[2], pv->crop[3],
-             cropped_width, cropped_height, pv->width_out, pv->height_out );
-
-    return 0;
-}
-
-static void hb_crop_scale_close( hb_filter_object_t * filter )
-{
-    hb_filter_private_t * pv = filter->private_data;
-
-    if( !pv )
-    {
-        return;
-    }
-    if ( pv->os )
-    {
-        CL_FREE( pv->os->h_in_buf );
-        CL_FREE( pv->os->h_out_buf );
-        CL_FREE( pv->os->v_out_buf );
-        CL_FREE( pv->os->h_coeff_y );
-        CL_FREE( pv->os->h_coeff_uv );
-        CL_FREE( pv->os->h_index_y );
-        CL_FREE( pv->os->h_index_uv );
-        CL_FREE( pv->os->v_coeff_y );
-        CL_FREE( pv->os->v_coeff_uv );
-        CL_FREE( pv->os->v_index_y );
-        CL_FREE( pv->os->v_index_uv );
-        free( pv->os );
-    }        
-    free( pv );
-    filter->private_data = NULL;
-}
-
-static uint8_t *copy_plane( uint8_t *dst, uint8_t* src, int dstride, int sstride, int h )
-{
-    if( dstride == sstride )
-    {
-        memcpy( dst, src, dstride * h );
-        return dst + dstride * h;
-    }
-    int lbytes = dstride <= sstride ? dstride : sstride;
-    while( --h >= 0 )
-    {
-        memcpy( dst, src, lbytes );
-        src += sstride;
-        dst += dstride;
-    }
-    return dst;
-}
-
-static hb_buffer_t* crop_scale( hb_filter_private_t * pv, hb_buffer_t * in )
-{
-    AVPicture           pic_in;
-    AVPicture           pic_out;
-    AVPicture           pic_crop;
-    hb_buffer_t * out;
-    out = hb_video_buffer_init( pv->width_out, pv->height_out );
-
-    hb_avpicture_fill( &pic_in, in );
-    hb_avpicture_fill( &pic_out, out );
-
-    // Crop; this alters the pointer to the data to point to the
-    // correct place for cropped frame
-    av_picture_crop( &pic_crop, &pic_in, in->f.fmt,
-                     pv->crop[0], pv->crop[2] );
-
-
-    int w = in->f.width - ( pv->crop[2] + pv->crop[3] );
-    int h = in->f.height - ( pv->crop[0] + pv->crop[1] );
-    uint8_t *tmp_in = malloc( w * h * 3 / 2 );
-    uint8_t *tmp_out = malloc( pv->width_out * pv->height_out * 3 / 2 );
-    if( pic_crop.data[0] || pic_crop.data[1] || pic_crop.data[2] || pic_crop.data[3] )
-    {
-        int i;
-        for( i = 0; i< h>>1; i++ )
-        {
-            memcpy( tmp_in + ( ( i<<1 ) + 0 ) * w, pic_crop.data[0]+ ( ( i<<1 ) + 0 ) * pic_crop.linesize[0], w );
-            memcpy( tmp_in + ( ( i<<1 ) + 1 ) * w, pic_crop.data[0]+ ( ( i<<1 ) + 1 ) * pic_crop.linesize[0], w );
-            memcpy( tmp_in + ( w * h ) + i * ( w>>1 ), pic_crop.data[1] + i * pic_crop.linesize[1], w >> 1 );
-            memcpy( tmp_in + ( w * h ) + ( ( w * h )>>2 ) + i * ( w>>1 ), pic_crop.data[2] + i * pic_crop.linesize[2], w >> 1 );
-        }
-    }
-    else
-    {
-        memcpy( tmp_in, pic_crop.data[0], w * h );
-        memcpy( tmp_in + w * h, pic_crop.data[1], (w*h)>>2 );
-        memcpy( tmp_in + w * h + ((w*h)>>2), pic_crop.data[2], (w*h)>>2 );
-    }
-    hb_ocl_scale( NULL, tmp_in, tmp_out, w, h, out->f.width, out->f.height, pv->os );
-    w = out->plane[0].stride;
-    h = out->plane[0].height;
-    uint8_t *dst = out->plane[0].data;
-    copy_plane( dst, tmp_out, w, pv->width_out, h );
-    w = out->plane[1].stride;
-    h = out->plane[1].height;
-    dst = out->plane[1].data;
-    copy_plane( dst, tmp_out + pv->width_out * pv->height_out, w, pv->width_out>>1, h );
-    w = out->plane[2].stride;
-    h = out->plane[2].height;
-    dst = out->plane[2].data;
-    copy_plane( dst, tmp_out + pv->width_out * pv->height_out +( ( pv->width_out * pv->height_out )>>2 ), w, pv->width_out>>1, h );
-    free( tmp_out );
-    free( tmp_in );
-    out->s = in->s;
-    hb_buffer_move_subs( out, in );
-    return out;
-}
-
-static int hb_crop_scale_work( hb_filter_object_t * filter,
-                               hb_buffer_t ** buf_in,
-                               hb_buffer_t ** buf_out )
-{
-    hb_filter_private_t * pv = filter->private_data;
-    hb_buffer_t * in = *buf_in;
-
-    if( in->size <= 0 )
-    {
-        *buf_out = in;
-        *buf_in = NULL;
-        return HB_FILTER_DONE;
-    }
-
-    if( !pv )
-    {
-        *buf_out = in;
-        *buf_in = NULL;
-        return HB_FILTER_OK;
-    }
-
-    // If width or height were not set, set them now based on the
-    // input width & height
-    if( pv->width_out <= 0 || pv->height_out <= 0 )
-    {
-        pv->width_out = in->f.width - (pv->crop[2] + pv->crop[3]);
-        pv->height_out = in->f.height - (pv->crop[0] + pv->crop[1]);
-    }
-    if( ( in->f.fmt == pv->pix_fmt_out &&
-          !pv->crop[0] && !pv->crop[1] && !pv->crop[2] && !pv->crop[3] &&
-          in->f.width == pv->width_out && in->f.height == pv->height_out ) ||
-        ( pv->use_dxva && in->f.width == pv->width_out && in->f.height == pv->height_out ) )
-    {
-        *buf_out = in;
-        *buf_in = NULL;
-        return HB_FILTER_OK;
-    }
-    *buf_out = crop_scale( pv, in );
-
-
-    return HB_FILTER_OK;
-}
-#endif
diff --git a/libhb/decavcodec.c b/libhb/decavcodec.c
index fe59300fe..43457720a 100644
--- a/libhb/decavcodec.c
+++ b/libhb/decavcodec.c
@@ -40,8 +40,13 @@
 
 #include "hb.h"
 #include "hbffmpeg.h"
+#include "audio_remap.h"
 #include "audio_resample.h"
 
+#ifdef USE_HWD
+#include "vadxva2.h"
+#endif
+
 static void compute_frame_duration( hb_work_private_t *pv );
 static void flushDelayQueue( hb_work_private_t *pv );
 static int  decavcodecaInit( hb_work_object_t *, hb_job_t * );
@@ -99,7 +104,11 @@ struct hb_work_private_s
     int             sws_pix_fmt;
     int cadence[12];
     int wait_for_keyframe;
-
+#ifdef USE_HWD
+    hb_va_dxva2_t * dxva2;
+    uint8_t *dst_frame;
+    hb_oclscale_t  *os;
+#endif
     hb_audio_resample_t *resample;
 };
 
@@ -264,6 +273,32 @@ static void closePrivData( hb_work_private_t ** ppv )
             hb_list_empty( &pv->list );
         }
         hb_audio_resample_free(pv->resample);
+#ifdef USE_HWD
+        if ( pv->os )
+        {
+#ifdef USE_OPENCL
+            CL_FREE( pv->os->h_in_buf );
+            CL_FREE( pv->os->h_out_buf );
+            CL_FREE( pv->os->v_out_buf );
+            CL_FREE( pv->os->h_coeff_y );
+            CL_FREE( pv->os->h_coeff_uv );
+            CL_FREE( pv->os->h_index_y );
+            CL_FREE( pv->os->h_index_uv );
+            CL_FREE( pv->os->v_coeff_y );
+            CL_FREE( pv->os->v_coeff_uv );
+            CL_FREE( pv->os->v_index_y );
+            CL_FREE( pv->os->v_index_uv );
+#endif
+            free( pv->os );
+        }
+        if ( pv->dxva2 )
+        {
+#ifdef USE_OPENCL
+            CL_FREE( pv->dxva2->cl_mem_nv12 );
+#endif
+            hb_va_close( pv->dxva2 );    
+        }        
+#endif   
         free( pv );
     }
     *ppv = NULL;
@@ -272,7 +307,9 @@ static void closePrivData( hb_work_private_t ** ppv )
 static void decavcodecClose( hb_work_object_t * w )
 {
     hb_work_private_t * pv = w->private_data;
-
+#ifdef USE_HWD    
+    if( pv->dst_frame ) free( pv->dst_frame );
+#endif
     if ( pv )
     {
         closePrivData( &pv );
@@ -501,6 +538,48 @@ static hb_buffer_t *copy_frame( hb_work_private_t *pv, AVFrame *frame )
         w =  pv->job->title->width;
         h =  pv->job->title->height;
     }
+#ifdef USE_HWD
+    if  (pv->dxva2 && pv->job)
+    {
+        hb_buffer_t *buf;
+        int ww, hh;
+        if( (w > pv->job->width || h > pv->job->height) && (hb_get_gui_info(&hb_gui, 2) == 1) )
+        {
+            buf = hb_video_buffer_init( pv->job->width, pv->job->height );
+            ww = pv->job->width;
+            hh = pv->job->height;
+        }
+        else
+        {
+            buf = hb_video_buffer_init( w, h );
+            ww = w;
+            hh = h;
+        }
+        if( !pv->dst_frame )
+        {
+            pv->dst_frame = malloc( ww * hh * 3 / 2 );
+        }
+        if( hb_va_extract( pv->dxva2, pv->dst_frame, frame, pv->job->width, pv->job->height, pv->job->title->crop, pv->os ) == HB_WORK_ERROR )
+        {
+            hb_log( "hb_va_Extract failed!!!!!!" );
+        }
+        w = buf->plane[0].stride;
+        h = buf->plane[0].height;
+        uint8_t *dst = buf->plane[0].data;
+        copy_plane( dst, pv->dst_frame, w, ww, h );
+        w = buf->plane[1].stride;
+        h = buf->plane[1].height;
+        dst = buf->plane[1].data;
+        copy_plane( dst, pv->dst_frame + ww * hh, w, ww>>1, h );
+        w = buf->plane[2].stride;
+        h = buf->plane[2].height;
+        dst = buf->plane[2].data;
+        copy_plane( dst, pv->dst_frame + ww * hh +( ( ww * hh )>>2 ), w, ww>>1, h );
+        return buf;
+    }
+    else
+    {
+#endif
     hb_buffer_t *buf = hb_video_buffer_init( w, h );
     uint8_t *dst = buf->data;
 
@@ -547,10 +626,26 @@ static hb_buffer_t *copy_frame( hb_work_private_t *pv, AVFrame *frame )
         copy_plane( dst, frame->data[2], w, frame->linesize[2], h );
     }
     return buf;
+#ifdef USE_HWD
+}
+#endif
 }
 
 static int get_frame_buf( AVCodecContext *context, AVFrame *frame )
 {
+#ifdef USE_HWD
+    hb_work_private_t *pv = (hb_work_private_t*)context->opaque;
+    if ( (pv != NULL) && pv->dxva2  )
+    {
+        int result = HB_WORK_ERROR;
+        hb_work_private_t *pv = (hb_work_private_t*)context->opaque;
+        result = hb_va_get_frame_buf( pv->dxva2, context, frame );
+        if( result==HB_WORK_ERROR )
+            return avcodec_default_get_buffer( context, frame );
+        return 0;
+    }
+    else
+#endif
     return avcodec_default_get_buffer( context, frame );
 }
 
@@ -763,7 +858,18 @@ static int decodeFrame( hb_work_object_t *w, uint8_t *data, int size, int sequen
         {
             frame_dur += frame.repeat_pict * pv->field_duration;
         }
-
+#ifdef USE_HWD
+        if( pv->dxva2 && pv->dxva2->do_job==HB_WORK_OK )
+        {
+            if( avp.pts>0 )
+            {
+                if( pv->dxva2->input_pts[0]!=0 && pv->dxva2->input_pts[1]==0 )
+                    frame.pkt_pts = pv->dxva2->input_pts[0];
+                else
+                    frame.pkt_pts = pv->dxva2->input_pts[0]<pv->dxva2->input_pts[1] ? pv->dxva2->input_pts[0] : pv->dxva2->input_pts[1];
+            }
+        }
+#endif
         // If there was no pts for this frame, assume constant frame rate
         // video & estimate the next frame time from the last & duration.
         double pts;
@@ -949,6 +1055,24 @@ static hb_buffer_t *link_buf_list( hb_work_private_t *pv )
     }
     return head;
 }
+#ifdef USE_HWD
+static void hb_ffmpeg_release_frame_buf( struct AVCodecContext *p_context, AVFrame *frame )
+{
+    hb_work_private_t *p_dec = (hb_work_private_t*)p_context->opaque;
+    int i;
+    if( p_dec->dxva2 )
+    {
+        hb_va_release( p_dec->dxva2, frame );
+    }
+    else if( !frame->opaque )
+    {
+        if( frame->type == FF_BUFFER_TYPE_INTERNAL )
+            avcodec_default_release_buffer( p_context, frame );
+    }
+    for( i = 0; i < 4; i++ )
+        frame->data[i] = NULL;
+}
+#endif
 
 static void init_video_avcodec_context( hb_work_private_t *pv )
 {
@@ -956,6 +1080,10 @@ static void init_video_avcodec_context( hb_work_private_t *pv )
     pv->context->opaque = pv;
     pv->context->get_buffer = get_frame_buf;
     pv->context->reget_buffer = reget_frame_buf;
+#ifdef USE_HWD
+    if( pv->dxva2 && pv->dxva2->do_job==HB_WORK_OK )
+        pv->context->release_buffer = hb_ffmpeg_release_frame_buf;
+#endif
 }
 
 static int decavcodecvInit( hb_work_object_t * w, hb_job_t * job )
@@ -990,7 +1118,27 @@ static int decavcodecvInit( hb_work_object_t * w, hb_job_t * job )
         pv->context->workaround_bugs = FF_BUG_AUTODETECT;
         pv->context->err_recognition = AV_EF_CRCCHECK;
         pv->context->error_concealment = FF_EC_GUESS_MVS|FF_EC_DEBLOCK;
+#ifdef USE_HWD
+        if( ((w->codec_param==AV_CODEC_ID_H264) 
+             || (w->codec_param==AV_CODEC_ID_MPEG2VIDEO)
+             || (w->codec_param==AV_CODEC_ID_VC1)
+             || (w->codec_param==AV_CODEC_ID_WMV3) 
+             || (w->codec_param==AV_CODEC_ID_MPEG4)) 
+             && pv->job && job->use_hw_decode)
+        {
+            pv->dxva2 = hb_va_create_dxva2( pv->dxva2, w->codec_param );
+            if( pv->dxva2 && pv->dxva2->do_job==HB_WORK_OK )
+            {
+                hb_va_new_dxva2( pv->dxva2, pv->context );
+                init_video_avcodec_context( pv );
+                pv->context->get_format = hb_ffmpeg_get_format;
+                pv->os = ( hb_oclscale_t * )malloc( sizeof( hb_oclscale_t ) );
+                memset( pv->os, 0, sizeof( hb_oclscale_t ) );
+                pv->threads = 1;
 
+            }
+        }
+#endif
         if ( hb_avcodec_open( pv->context, codec, NULL, pv->threads ) )
         {
             hb_log( "decavcodecvInit: avcodec_open failed" );
@@ -1180,6 +1328,16 @@ static int decavcodecvWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
         pv->new_chap = in->s.new_chap;
         pv->chap_time = pts >= 0? pts : pv->pts_next;
     }
+#ifdef USE_HWD
+    if( pv->dxva2 && pv->dxva2->do_job==HB_WORK_OK )
+    {
+        if( pv->dxva2->input_pts[0]<=pv->dxva2->input_pts[1] )
+            pv->dxva2->input_pts[0] = pts;
+        else if( pv->dxva2->input_pts[0]>pv->dxva2->input_pts[1] )
+            pv->dxva2->input_pts[1] = pts;
+        pv->dxva2->input_dts = dts;
+    }
+#endif
     decodeVideo( w, in->data, in->size, in->sequence, pts, dts, in->s.frametype );
     hb_buffer_close( &in );
     *buf_out = link_buf_list( pv );
@@ -1421,7 +1579,19 @@ hb_work_object_t hb_decavcodecv =
     .info = decavcodecvInfo,
     .bsinfo = decavcodecvBSInfo
 };
-
+#ifdef USE_HWD
+hb_work_object_t hb_decavcodecv_accl =
+{
+    .id = WORK_DECAVCODECVACCL,
+    .name = "Video hardware decoder (libavcodec)",
+    .init = decavcodecvInit,
+    .work = decavcodecvWork,
+    .close = decavcodecClose,
+    .flush = decavcodecvFlush,
+    .info = decavcodecvInfo,
+    .bsinfo = decavcodecvBSInfo
+};
+#endif
 static void decodeAudio(hb_audio_t *audio, hb_work_private_t *pv, uint8_t *data,
                         int size, int64_t pts)
 {
diff --git a/libhb/decavcodecaccl.c b/libhb/decavcodecaccl.c
deleted file mode 100644
index 3842053ec..000000000
--- a/libhb/decavcodecaccl.c
+++ /dev/null
@@ -1,1311 +0,0 @@
-/* decavcodecaccl.c
-
-   Copyright (c) 2003-2012 HandBrake Team
-   This file is part of the HandBrake source code
-   Homepage: <http://handbrake.fr/>.
-   It may be used under the terms of the GNU General Public License v2.
-   For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
-
-   Authors: Peng Gao <[email protected]> <http://www.multicorewareinc.com/>
-            Li   Cao <[email protected]> <http://www.multicorewareinc.com/>
-
- */
-
-/* This module is Handbrake's interface to the ffmpeg decoder library
-   (libavcodec & small parts of libavformat). It contains four Handbrake
-   "work objects":
-
-    decavcodeca connects HB to an ffmpeg audio decoder
-    decavcodecvaccl connects HB to an ffmpeg video decoder
-
-        (Two different routines are needed because the ffmpeg library
-        has different decoder calling conventions for audio & video.
-        These work objects are self-contained & follow all
-        of HB's conventions for a decoder module. They can be used like
-        any other HB decoder (deca52, decmpeg2, etc.).
-
-    These decoders handle 2 kinds of input.  Streams that are demuxed
-    by HandBrake and streams that are demuxed by libavformat.  In the
-    case of streams that are demuxed by HandBrake, there is an extra
-    parse step required that happens in decodeVideo and decavcodecaWork.
-    In the case of streams that are demuxed by libavformat, there is context
-    information that we need from the libavformat.  This information is
-    propagated from hb_stream_open to these decoders through title->opaque_priv.
-
-    A consequence of the above is that the streams that are demuxed by HandBrake
-    *can't* use information from the AVStream because there isn't one - they
-    get their data from either the dvd reader or the mpeg reader, not the ffmpeg
-    stream reader. That means that they have to make up for deficiencies in the
-    AVCodecContext info by using stuff kept in the HB "title" struct. It
-    also means that ffmpeg codecs that randomly scatter state needed by
-    the decoder across both the AVCodecContext & the AVStream (e.g., the
-    VC1 decoder) can't easily be used by the HB mpeg stream reader.
- */
-#define HAVE_DXVA2
-#ifdef HAVE_DXVA2
-#include "hb.h"
-#include "hbffmpeg.h"
-#include "vadxva2.h"
-#include "audio_remap.h"
-#include "audio_resample.h"
-
-static void compute_frame_duration( hb_work_private_t *pv );
-static void flushDelayQueue( hb_work_private_t *pv );
-
-#define HEAP_SIZE 8
-typedef struct {
-    // there are nheap items on the heap indexed 1..nheap (i.e., top of
-    // heap is 1). The 0th slot is unused - a marker is put there to check
-    // for overwrite errs.
-    int64_t h[HEAP_SIZE+1];
-    int     nheap;
-} pts_heap_t;
-
-struct hb_work_private_s
-{
-    hb_job_t        *job;
-    hb_title_t      *title;
-    AVCodecContext  *context;
-    AVCodecParserContext *parser;
-    int             threads;
-    int             video_codec_opened;
-    hb_list_t       *list;
-    double          duration;   // frame duration (for video)
-    double          field_duration;   // field duration (for video)
-    int             frame_duration_set; // Indicates valid timing was found in stream
-    double          pts_next;   // next pts we expect to generate
-    int64_t         chap_time;  // time of next chap mark (if new_chap != 0)
-    int             new_chap;   // output chapter mark pending
-    uint32_t        nframes;
-    uint32_t        ndrops;
-    uint32_t        decode_errors;
-    int             brokenByMicrosoft; // video stream may contain packed b-frames
-    hb_buffer_t*    delayq[HEAP_SIZE];
-    int             queue_primed;
-    pts_heap_t      pts_heap;
-    void*           buffer;
-    struct SwsContext *sws_context; // if we have to rescale or convert color space
-    int             sws_width;
-    int             sws_height;
-    int             sws_pix_fmt;
-    int cadence[12];
-    int wait_for_keyframe;
-    hb_va_dxva2_t * dxva2;
-    uint8_t *dst_frame;
-    hb_oclscale_t  *os;
-    hb_audio_resample_t *resample;
-};
-
-static hb_buffer_t *link_buf_list( hb_work_private_t *pv );
-
-
-static int64_t heap_pop( pts_heap_t *heap )
-{
-    int64_t result;
-
-    if( heap->nheap <= 0 )
-    {
-        return -1;
-    }
-
-    // return the top of the heap then put the bottom element on top,
-    // decrease the heap size by one & rebalence the heap.
-    result = heap->h[1];
-
-    int64_t v = heap->h[heap->nheap--];
-    int parent = 1;
-    int child = parent << 1;
-    while( child <= heap->nheap )
-    {
-        // find the smallest of the two children of parent
-        if (child < heap->nheap && heap->h[child] > heap->h[child+1] )
-            ++child;
-
-        if (v <= heap->h[child])
-            // new item is smaller than either child so it's the new parent.
-            break;
-
-        // smallest child is smaller than new item so move it up then
-        // check its children.
-        int64_t hp = heap->h[child];
-        heap->h[parent] = hp;
-        parent = child;
-        child = parent << 1;
-    }
-    heap->h[parent] = v;
-    return result;
-}
-
-static void heap_push( pts_heap_t *heap, int64_t v )
-{
-    if ( heap->nheap < HEAP_SIZE )
-    {
-        ++heap->nheap;
-    }
-
-    // stick the new value on the bottom of the heap then bubble it
-    // up to its correct spot.
-    int child = heap->nheap;
-    while (child > 1) {
-        int parent = child >> 1;
-        if (heap->h[parent] <= v)
-            break;
-        // move parent down
-        int64_t hp = heap->h[parent];
-        heap->h[child] = hp;
-        child = parent;
-    }
-    heap->h[child] = v;
-}
-
-/***********************************************************************
- * Close
- ***********************************************************************
- *
- **********************************************************************/
-static void closePrivData( hb_work_private_t ** ppv )
-{
-    hb_work_private_t * pv = *ppv;
-
-    if ( pv )
-    {
-        flushDelayQueue( pv );
-
-        if ( pv->job && pv->context && pv->context->codec )
-        {
-            hb_log( "%s-decoder done: %u frames, %u decoder errors, %u drops",
-                    pv->context->codec->name, pv->nframes, pv->decode_errors,
-                    pv->ndrops );
-        }
-        if ( pv->sws_context )
-        {
-            sws_freeContext( pv->sws_context );
-        }
-        if ( pv->parser )
-        {
-            av_parser_close(pv->parser);
-        }
-        if ( pv->context && pv->context->codec )
-        {
-            hb_avcodec_close( pv->context );
-        }
-        if ( pv->context )
-        {
-            av_freep( &pv->context->extradata );
-            av_free( pv->context );
-        }
-        if ( pv->list )
-        {
-            hb_list_empty( &pv->list );
-        }
-
-        hb_audio_resample_free( pv->resample );
-        if ( pv->os )
-        {
-#ifdef USE_OPENCL
-            CL_FREE( pv->os->h_in_buf );
-            CL_FREE( pv->os->h_out_buf );
-            CL_FREE( pv->os->v_out_buf );
-            CL_FREE( pv->os->h_coeff_y );
-            CL_FREE( pv->os->h_coeff_uv );
-            CL_FREE( pv->os->h_index_y );
-            CL_FREE( pv->os->h_index_uv );
-            CL_FREE( pv->os->v_coeff_y );
-            CL_FREE( pv->os->v_coeff_uv );
-            CL_FREE( pv->os->v_index_y );
-            CL_FREE( pv->os->v_index_uv );
-#endif
-            free( pv->os );
-        }
-        if ( pv->dxva2 )
-        {
-
-#ifdef USE_OPENCL
-            CL_FREE( pv->dxva2->cl_mem_nv12 );
-#endif
-            hb_va_close( pv->dxva2 );    
-        }        
-        free( pv );
-    }
-    *ppv = NULL;
-}
-
-/* -------------------------------------------------------------
- * General purpose video decoder using libavcodec
- */
-
-static uint8_t *copy_plane( uint8_t *dst, uint8_t* src, int dstride, int sstride,
-                            int h )
-{
-    if ( dstride == sstride )
-    {
-        memcpy( dst, src, dstride * h );
-        return dst + dstride * h;
-    }
-    int lbytes = dstride <= sstride? dstride : sstride;
-    while ( --h >= 0 )
-    {
-        memcpy( dst, src, lbytes );
-        src += sstride;
-        dst += dstride;
-    }
-    return dst;
-}
-
-// copy one video frame into an HB buf. If the frame isn't in our color space
-// or at least one of its dimensions is odd, use sws_scale to convert/rescale it.
-// Otherwise just copy the bits.
-static hb_buffer_t *copy_frame( hb_work_private_t *pv, AVFrame *frame )
-{
-    AVCodecContext *context = pv->context;
-    int w, h;
-    if ( ! pv->job )
-    {
-        // HandBrake's video pipeline uses yuv420 color.  This means all
-        // dimensions must be even.  So we must adjust the dimensions
-        // of incoming video if not even.
-        w = context->width & ~1;
-        h = context->height & ~1;
-    }
-    else
-    {
-        w =  pv->job->title->width;
-        h =  pv->job->title->height;
-    }
-    if( pv->dxva2 && pv->job )
-    {
-        hb_buffer_t *buf;
-        int ww, hh;
-        if( (w > pv->job->width || h > pv->job->height) && (hb_get_gui_info(&hb_gui, 2) == 1) )
-        {
-            buf = hb_video_buffer_init( pv->job->width, pv->job->height );
-            ww = pv->job->width;
-            hh = pv->job->height;
-        }
-        else
-        {
-            buf = hb_video_buffer_init( w, h );
-            ww = w;
-            hh = h;
-        }
-        if( !pv->dst_frame )
-        {
-            pv->dst_frame = malloc( ww * hh * 3 / 2 );
-        }
-        if( hb_va_extract( pv->dxva2, pv->dst_frame, frame, pv->job->width, pv->job->height, pv->job->title->crop, pv->os ) == HB_WORK_ERROR )
-        {
-            hb_log( "hb_va_Extract failed!!!!!!" );
-        }
-
-        w = buf->plane[0].stride;
-        h = buf->plane[0].height;
-        uint8_t *dst = buf->plane[0].data;
-        copy_plane( dst, pv->dst_frame, w, ww, h );
-        w = buf->plane[1].stride;
-        h = buf->plane[1].height;
-        dst = buf->plane[1].data;
-        copy_plane( dst, pv->dst_frame + ww * hh, w, ww>>1, h );
-        w = buf->plane[2].stride;
-        h = buf->plane[2].height;
-        dst = buf->plane[2].data;
-        copy_plane( dst, pv->dst_frame + ww * hh +( ( ww * hh )>>2 ), w, ww>>1, h );
-        return buf;
-    }
-    else
-    {
-        hb_buffer_t *buf = hb_video_buffer_init( w, h );
-        uint8_t *dst = buf->data;
-        if( context->pix_fmt != AV_PIX_FMT_YUV420P || w != context->width ||
-            h != context->height )
-        {
-            // have to convert to our internal color space and/or rescale
-            AVPicture dstpic;
-            hb_avpicture_fill( &dstpic, buf );
-            if( !pv->sws_context ||
-                pv->sws_width != context->width ||
-                pv->sws_height != context->height ||
-                pv->sws_pix_fmt != context->pix_fmt )
-            {
-                if( pv->sws_context )
-                    sws_freeContext( pv->sws_context );
-                pv->sws_context = hb_sws_get_context(
-                    context->width, context->height, context->pix_fmt,
-                    w, h, PIX_FMT_YUV420P,
-                    SWS_LANCZOS|SWS_ACCURATE_RND );
-                pv->sws_width = context->width;
-                pv->sws_height = context->height;
-                pv->sws_pix_fmt = context->pix_fmt;
-            }
-            sws_scale( pv->sws_context, (const uint8_t*const*)frame->data,
-                       frame->linesize, 0, context->height,
-                       dstpic.data, dstpic.linesize );
-        }
-        else
-        {
-            w = buf->plane[0].stride;
-            h = buf->plane[0].height;
-            dst = buf->plane[0].data;
-            copy_plane( dst, frame->data[0], w, frame->linesize[0], h );
-            w = buf->plane[1].stride;
-            h = buf->plane[1].height;
-            dst = buf->plane[1].data;
-            copy_plane( dst, frame->data[1], w, frame->linesize[1], h );
-            w = buf->plane[2].stride;
-            h = buf->plane[2].height;
-            dst = buf->plane[2].data;
-            copy_plane( dst, frame->data[2], w, frame->linesize[2], h );
-        }
-        return buf;
-    }
-
-}
-
-
-static int get_frame_buf( AVCodecContext *context, AVFrame *frame )
-{
-    int result = HB_WORK_ERROR;
-    hb_work_private_t *pv = (hb_work_private_t*)context->opaque;
-    if( pv->dxva2 )
-    {
-        result = hb_va_get_frame_buf( pv->dxva2, context, frame );
-    }
-    if( result==HB_WORK_ERROR )
-        return avcodec_default_get_buffer( context, frame );
-    return 0;
-}
-
-static int reget_frame_buf( AVCodecContext *context, AVFrame *frame )
-{
-    return avcodec_default_reget_buffer( context, frame );
-}
-
-static void log_chapter( hb_work_private_t *pv, int chap_num, int64_t pts )
-{
-    hb_chapter_t *c;
-
-    if ( !pv->job )
-        return;
-
-    c = hb_list_item( pv->job->list_chapter, chap_num - 1 );
-    if ( c && c->title )
-    {
-        hb_log( "%s: \"%s\" (%d) at frame %u time %"PRId64,
-                pv->context->codec->name, c->title, chap_num, pv->nframes, pts );
-    }
-    else
-    {
-        hb_log( "%s: Chapter %d at frame %u time %"PRId64,
-                pv->context->codec->name, chap_num, pv->nframes, pts );
-    }
-}
-
-static void flushDelayQueue( hb_work_private_t *pv )
-{
-    hb_buffer_t *buf;
-    int slot = pv->queue_primed ? pv->nframes & (HEAP_SIZE-1) : 0;
-
-    // flush all the video packets left on our timestamp-reordering delay q
-    while ( ( buf = pv->delayq[slot] ) != NULL )
-    {
-        buf->s.start = heap_pop( &pv->pts_heap );
-        hb_list_add( pv->list, buf );
-        pv->delayq[slot] = NULL;
-        slot = ( slot + 1 ) & (HEAP_SIZE-1);
-    }
-}
-
-#define TOP_FIRST PIC_FLAG_TOP_FIELD_FIRST
-#define PROGRESSIVE PIC_FLAG_PROGRESSIVE_FRAME
-#define REPEAT_FIRST PIC_FLAG_REPEAT_FIRST_FIELD
-#define TB 8
-#define BT 16
-#define BT_PROG 32
-#define BTB_PROG 64
-#define TB_PROG 128
-#define TBT_PROG 256
-
-static void checkCadence( int * cadence, uint16_t flags, int64_t start )
-{
-    /*  Rotate the cadence tracking. */
-    int i = 0;
-    for(i=11; i > 0; i--)
-    {
-        cadence[i] = cadence[i-1];
-    }
-
-    if ( !(flags & PROGRESSIVE) && !(flags & TOP_FIRST) )
-    {
-        /* Not progressive, not top first...
-           That means it's probably bottom
-           first, 2 fields displayed.
-        */
-        //hb_log("MPEG2 Flag: Bottom field first, 2 fields displayed.");
-        cadence[0] = BT;
-    }
-    else if ( !(flags & PROGRESSIVE) && (flags & TOP_FIRST) )
-    {
-        /* Not progressive, top is first,
-           Two fields displayed.
-        */
-        //hb_log("MPEG2 Flag: Top field first, 2 fields displayed.");
-        cadence[0] = TB;
-    }
-    else if ( (flags & PROGRESSIVE) && !(flags & TOP_FIRST) && !( flags & REPEAT_FIRST )  )
-    {
-        /* Progressive, but noting else.
-           That means Bottom first,
-           2 fields displayed.
-        */
-        //hb_log("MPEG2 Flag: Progressive. Bottom field first, 2 fields displayed.");
-        cadence[0] = BT_PROG;
-    }
-    else if ( (flags & PROGRESSIVE) && !(flags & TOP_FIRST) && ( flags & REPEAT_FIRST )  )
-    {
-        /* Progressive, and repeat. .
-           That means Bottom first,
-           3 fields displayed.
-        */
-        //hb_log("MPEG2 Flag: Progressive repeat. Bottom field first, 3 fields displayed.");
-        cadence[0] = BTB_PROG;
-    }
-    else if ( (flags & PROGRESSIVE) && (flags & TOP_FIRST) && !( flags & REPEAT_FIRST )  )
-    {
-        /* Progressive, top first.
-           That means top first,
-           2 fields displayed.
-        */
-        //hb_log("MPEG2 Flag: Progressive. Top field first, 2 fields displayed.");
-        cadence[0] = TB_PROG;
-    }
-    else if ( (flags & PROGRESSIVE) && (flags & TOP_FIRST) && ( flags & REPEAT_FIRST )  )
-    {
-        /* Progressive, top, repeat.
-           That means top first,
-           3 fields displayed.
-        */
-        //hb_log("MPEG2 Flag: Progressive repeat. Top field first, 3 fields displayed.");
-        cadence[0] = TBT_PROG;
-    }
-
-    if ( (cadence[2] <= TB) && (cadence[1] <= TB) && (cadence[0] > TB) && (cadence[11]) )
-        hb_log("%fs: Video -> Film", (float)start / 90000);
-    if ( (cadence[2] > TB) && (cadence[1] <= TB) && (cadence[0] <= TB) && (cadence[11]) )
-        hb_log("%fs: Film -> Video", (float)start / 90000);
-}
-
-/*
- * Decodes a video frame from the specified raw packet data
- *      ('data', 'size', 'sequence').
- * The output of this function is stored in 'pv->list', which contains a list
- * of zero or more decoded packets.
- *
- * The returned packets are guaranteed to have their timestamps in the correct
- * order, even if the original packets decoded by libavcodec have misordered
- * timestamps, due to the use of 'packed B-frames'.
- *
- * Internally the set of decoded packets may be buffered in 'pv->delayq'
- * until enough packets have been decoded so that the timestamps can be
- * correctly rewritten, if this is necessary.
- */
-static int decodeFrame( hb_work_object_t *w, uint8_t *data, int size, int sequence, int64_t pts, int64_t dts, uint8_t frametype )
-{
-    hb_work_private_t *pv = w->private_data;
-    int got_picture, oldlevel = 0;
-    AVFrame frame = { { 0 } };
-    AVPacket avp;
-
-    if ( global_verbosity_level <= 1 )
-    {
-        oldlevel = av_log_get_level();
-        av_log_set_level( AV_LOG_QUIET );
-    }
-
-    av_init_packet(&avp);
-    avp.data = data;
-    avp.size = size;
-    avp.pts  = pts;
-    avp.dts  = dts;
-    /*
-     * libav avcodec_decode_video2() needs AVPacket flagged with AV_PKT_FLAG_KEY
-     * for some codecs. For example, sequence of PNG in a mov container.
-     */ 
-    if ( frametype & HB_FRAME_KEY )
-    {
-        avp.flags |= AV_PKT_FLAG_KEY;
-    }
-
-    if ( avcodec_decode_video2( pv->context, &frame, &got_picture, &avp ) < 0 )
-    {
-        ++pv->decode_errors;
-    }
-    if ( global_verbosity_level <= 1 )
-    {
-        av_log_set_level( oldlevel );
-    }
-    if( got_picture && pv->wait_for_keyframe > 0 )
-    {
-        // Libav is inconsistant about how it flags keyframes.  For many
-        // codecs it simply sets frame.key_frame.  But for others, it only
-        // sets frame.pict_type. And for yet others neither gets set at all
-        // (qtrle).
-        int key = frame.key_frame || (w->codec_param != AV_CODEC_ID_H264 &&
-                                      (frame.pict_type == 0 ||
-                                       frame.pict_type == AV_PICTURE_TYPE_I));
-        if( !key )
-        {
-            pv->wait_for_keyframe--;
-            return 0;
-        }
-        pv->wait_for_keyframe = 0;
-    }
-    if( got_picture )
-    {
-        uint16_t flags = 0;
-
-        // ffmpeg makes it hard to attach a pts to a frame. if the MPEG ES
-        // packet had a pts we handed it to av_parser_parse (if the packet had
-        // no pts we set it to AV_NOPTS_VALUE, but before the parse we can't
-        // distinguish between the start of a video frame with no pts & an
-        // intermediate packet of some frame which never has a pts). we hope
-        // that when parse returns the frame to us the pts we originally
-        // handed it will be in parser->pts. we put this pts into avp.pts so
-        // that when avcodec_decode_video finally gets around to allocating an
-        // AVFrame to hold the decoded frame, avcodec_default_get_buffer can
-        // stuff that pts into the it. if all of these relays worked at this
-        // point frame.pts should hold the frame's pts from the original data
-        // stream or AV_NOPTS_VALUE if it didn't have one. in the latter case
-        // we generate the next pts in sequence for it.
-        if ( !pv->frame_duration_set )
-            compute_frame_duration( pv );
-
-        double frame_dur = pv->duration;
-        if ( frame.repeat_pict )
-        {
-            frame_dur += frame.repeat_pict * pv->field_duration;
-        }
-
-
-        if( pv->dxva2 && pv->dxva2->do_job==HB_WORK_OK )
-        {
-            if( avp.pts>0 )
-            {
-                if( pv->dxva2->input_pts[0]!=0 && pv->dxva2->input_pts[1]==0 )
-                    frame.pkt_pts = pv->dxva2->input_pts[0];
-                else
-                    frame.pkt_pts = pv->dxva2->input_pts[0]<pv->dxva2->input_pts[1] ? pv->dxva2->input_pts[0] : pv->dxva2->input_pts[1];
-            }
-        }
-        // If there was no pts for this frame, assume constant frame rate
-        // video & estimate the next frame time from the last & duration.
-        double pts;
-        if (frame.pkt_pts == AV_NOPTS_VALUE)
-        {
-            pts = pv->pts_next;
-        }
-        else
-        {
-            pts = frame.pkt_pts;
-        }
-        pv->pts_next = pts + frame_dur;
-
-        if ( frame.top_field_first )
-        {
-            flags |= PIC_FLAG_TOP_FIELD_FIRST;
-        }
-        if ( !frame.interlaced_frame )
-        {
-            flags |= PIC_FLAG_PROGRESSIVE_FRAME;
-        }
-        if ( frame.repeat_pict == 1 )
-        {
-            flags |= PIC_FLAG_REPEAT_FIRST_FIELD;
-        }
-        if ( frame.repeat_pict == 2 )
-        {
-            flags |= PIC_FLAG_REPEAT_FRAME;
-        }
-
-        hb_buffer_t *buf;
-
-        // if we're doing a scan or this content couldn't have been broken
-        // by Microsoft we don't worry about timestamp reordering
-        if ( ! pv->job || ! pv->brokenByMicrosoft )
-        {
-            buf = copy_frame( pv, &frame );
-            buf->s.start = pts;
-            buf->sequence = sequence;
-
-            buf->s.flags = flags;
-
-            if ( pv->new_chap && buf->s.start >= pv->chap_time )
-            {
-                buf->s.new_chap = pv->new_chap;
-                log_chapter( pv, pv->new_chap, buf->s.start );
-                pv->new_chap = 0;
-                pv->chap_time = 0;
-            }
-            else if ( pv->nframes == 0 && pv->job )
-            {
-                log_chapter( pv, pv->job->chapter_start, buf->s.start );
-            }
-            checkCadence( pv->cadence, flags, buf->s.start );
-            hb_list_add( pv->list, buf );
-            ++pv->nframes;
-            return got_picture;
-        }
-
-        // XXX This following probably addresses a libavcodec bug but I don't
-        //     see an easy fix so we workaround it here.
-        //
-        // The M$ 'packed B-frames' atrocity results in decoded frames with
-        // the wrong timestamp. E.g., if there are 2 b-frames the timestamps
-        // we see here will be "2 3 1 5 6 4 ..." instead of "1 2 3 4 5 6".
-        // The frames are actually delivered in the right order but with
-        // the wrong timestamp. To get the correct timestamp attached to
-        // each frame we have a delay queue (longer than the max number of
-        // b-frames) & a sorting heap for the timestamps. As each frame
-        // comes out of the decoder the oldest frame in the queue is removed
-        // and associated with the smallest timestamp. Then the new frame is
-        // added to the queue & its timestamp is pushed on the heap.
-        // This does nothing if the timestamps are correct (i.e., the video
-        // uses a codec that Micro$oft hasn't broken yet) but the frames
-        // get timestamped correctly even when M$ has munged them.
-
-        // remove the oldest picture from the frame queue (if any) &
-        // give it the smallest timestamp from our heap. The queue size
-        // is a power of two so we get the slot of the oldest by masking
-        // the frame count & this will become the slot of the newest
-        // once we've removed & processed the oldest.
-        int slot = pv->nframes & (HEAP_SIZE-1);
-        if ( ( buf = pv->delayq[slot] ) != NULL )
-        {
-            pv->queue_primed = 1;
-            buf->s.start = heap_pop( &pv->pts_heap );
-
-            if ( pv->new_chap && buf->s.start >= pv->chap_time )
-            {
-                buf->s.new_chap = pv->new_chap;
-                log_chapter( pv, pv->new_chap, buf->s.start );
-                pv->new_chap = 0;
-                pv->chap_time = 0;
-            }
-            else if ( pv->nframes == 0 && pv->job )
-            {
-                log_chapter( pv, pv->job->chapter_start, buf->s.start );
-            }
-            checkCadence( pv->cadence, buf->s.flags, buf->s.start );
-            hb_list_add( pv->list, buf );
-        }
-
-        // add the new frame to the delayq & push its timestamp on the heap
-        buf = copy_frame( pv, &frame );
-        buf->sequence = sequence;
-        /* Store picture flags for later use by filters */
-        buf->s.flags = flags;
-        pv->delayq[slot] = buf;
-        heap_push( &pv->pts_heap, pts );
-
-        ++pv->nframes;
-    }
-
-    return got_picture;
-}
-static void decodeVideo( hb_work_object_t *w, uint8_t *data, int size, int sequence, int64_t pts, int64_t dts, uint8_t frametype )
-{
-    hb_work_private_t *pv = w->private_data;
-
-    /*
-     * The following loop is a do..while because we need to handle both
-     * data & the flush at the end (signaled by size=0). At the end there's
-     * generally a frame in the parser & one or more frames in the decoder
-     * (depending on the bframes setting).
-     */
-    int pos = 0;
-    do {
-        uint8_t *pout;
-        int pout_len, len;
-        int64_t parser_pts, parser_dts;
-        if ( pv->parser )
-        {
-            len = av_parser_parse2( pv->parser, pv->context, &pout, &pout_len,
-                                    data + pos, size - pos, pts, dts, 0 );
-            parser_pts = pv->parser->pts;
-            parser_dts = pv->parser->dts;
-        }
-        else
-        {
-            pout = data;
-            len = pout_len = size;
-            parser_pts = pts;
-            parser_dts = dts;
-        }
-        pos += len;
-
-        if ( pout_len > 0 )
-        {
-            decodeFrame( w, pout, pout_len, sequence, parser_pts, parser_dts, frametype );
-        }
-    } while ( pos < size );
-
-    /* the stuff above flushed the parser, now flush the decoder */
-    if ( size <= 0 )
-    {
-        while ( decodeFrame( w, NULL, 0, sequence, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0 ) )
-        {
-        }
-        flushDelayQueue( pv );
-    }
-}
-
-/*
- * Removes all packets from 'pv->list', links them together into
- * a linked-list, and returns the first packet in the list.
- */
-static hb_buffer_t *link_buf_list( hb_work_private_t *pv )
-{
-    hb_buffer_t *head = hb_list_item( pv->list, 0 );
-
-    if ( head )
-    {
-        hb_list_rem( pv->list, head );
-
-        hb_buffer_t *last = head, *buf;
-
-        while ( ( buf = hb_list_item( pv->list, 0 ) ) != NULL )
-        {
-            hb_list_rem( pv->list, buf );
-            last->next = buf;
-            last = buf;
-        }
-    }
-    return head;
-}
-static void hb_ffmpeg_release_frame_buf( struct AVCodecContext *p_context, AVFrame *frame )
-{
-    hb_work_private_t *p_dec = (hb_work_private_t*)p_context->opaque;
-    int i;
-    if( p_dec->dxva2 )
-    {
-        hb_va_release( p_dec->dxva2, frame );
-    }
-    else if( !frame->opaque )
-    {
-        if( frame->type == FF_BUFFER_TYPE_INTERNAL )
-            avcodec_default_release_buffer( p_context, frame );
-    }
-    for( i = 0; i < 4; i++ )
-        frame->data[i] = NULL;
-}
-
-static void init_video_avcodec_context( hb_work_private_t *pv )
-{
-    /* we have to wrap ffmpeg's get_buffer to be able to set the pts (?!) */
-    pv->context->opaque = pv;
-    pv->context->get_buffer = get_frame_buf;
-    pv->context->reget_buffer = reget_frame_buf;
-    if( pv->dxva2 && pv->dxva2->do_job==HB_WORK_OK )
-        pv->context->release_buffer = hb_ffmpeg_release_frame_buf;
-}
-
-static int decavcodecvInit( hb_work_object_t * w, hb_job_t * job )
-{
-
-    hb_work_private_t *pv = calloc( 1, sizeof( hb_work_private_t ) );
-
-    w->private_data = pv;
-    pv->wait_for_keyframe = 60;
-    pv->job   = job;
-    if ( job )
-        pv->title = job->title;
-    else
-        pv->title = w->title;
-    pv->list = hb_list_init();
-
-    if( pv->job && pv->job->title && !pv->job->title->has_resolution_change )
-    {
-        pv->threads = HB_FFMPEG_THREADS_AUTO;
-    }
-    if ( pv->title->opaque_priv )
-    {
-        AVFormatContext *ic = (AVFormatContext*)pv->title->opaque_priv;
-        AVCodec *codec = avcodec_find_decoder( w->codec_param );
-        if ( codec == NULL )
-        {
-            hb_log( "decavcodecvInit: failed to find codec for id (%d)", w->codec_param );
-            return 1;
-        }
-        pv->context = avcodec_alloc_context3(codec);
-        avcodec_copy_context( pv->context, ic->streams[pv->title->video_id]->codec);
-        pv->context->workaround_bugs = FF_BUG_AUTODETECT;
-        pv->context->err_recognition = AV_EF_CRCCHECK;
-        pv->context->error_concealment = FF_EC_GUESS_MVS|FF_EC_DEBLOCK;
-        if( ((w->codec_param==AV_CODEC_ID_H264) 
-             || (w->codec_param==AV_CODEC_ID_MPEG2VIDEO)
-             || (w->codec_param==AV_CODEC_ID_VC1)
-             || (w->codec_param==AV_CODEC_ID_WMV3) 
-             || (w->codec_param==AV_CODEC_ID_MPEG4)) 
-             && pv->job )
-        {
-            pv->dxva2 = hb_va_create_dxva2( pv->dxva2, w->codec_param );
-            if( pv->dxva2 && pv->dxva2->do_job==HB_WORK_OK )
-            {
-                hb_va_new_dxva2( pv->dxva2, pv->context );
-                init_video_avcodec_context( pv );
-                pv->context->get_format = hb_ffmpeg_get_format;
-                pv->os = ( hb_oclscale_t * )malloc( sizeof( hb_oclscale_t ) );
-                memset( pv->os, 0, sizeof( hb_oclscale_t ) );
-                pv->threads = 1;
-
-            }
-        }
-        if( hb_avcodec_open( pv->context, codec, NULL, pv->threads ) )
-        {
-            hb_log( "decavcodecvInit: avcodec_open failed" );
-            return 1;
-        }
-        pv->video_codec_opened = 1;
-        // avi, mkv and possibly mp4 containers can contain the M$ VFW packed
-        // b-frames abortion that messes up frame ordering and timestamps.
-        // XXX ffmpeg knows which streams are broken but doesn't expose the
-        //     info externally. We should patch ffmpeg to add a flag to the
-        //     codec context for this but until then we mark all ffmpeg streams
-        //     as suspicious.
-        pv->brokenByMicrosoft = 1;
-    }
-    else
-    {
-        AVCodec *codec = avcodec_find_decoder( w->codec_param );
-        pv->parser = av_parser_init( w->codec_param );
-        pv->context = avcodec_alloc_context3( codec );
-        pv->context->workaround_bugs = FF_BUG_AUTODETECT;
-        pv->context->err_recognition = AV_EF_CRCCHECK;
-        pv->context->error_concealment = FF_EC_GUESS_MVS|FF_EC_DEBLOCK;
-        init_video_avcodec_context( pv );
-    }
-    return 0;
-}
-
-static int next_hdr( hb_buffer_t *in, int offset )
-{
-    uint8_t *dat = in->data;
-    uint16_t last2 = 0xffff;
-    for ( ; in->size - offset > 1; ++offset )
-    {
-        if ( last2 == 0 && dat[offset] == 0x01 )
-            // found an mpeg start code
-            return offset - 2;
-
-        last2 = ( last2 << 8 ) | dat[offset];
-    }
-
-    return -1;
-}
-
-static int find_hdr( hb_buffer_t *in, int offset, uint8_t hdr_type )
-{
-    if ( in->size - offset < 4 )
-        // not enough room for an mpeg start code
-        return -1;
-
-    for ( ; ( offset = next_hdr( in, offset ) ) >= 0; ++offset )
-    {
-        if ( in->data[offset+3] == hdr_type )
-            // found it
-            break;
-    }
-    return offset;
-}
-
-static int setup_extradata( hb_work_object_t *w, hb_buffer_t *in )
-{
-    hb_work_private_t *pv = w->private_data;
-
-    // we can't call the avstream funcs but the read_header func in the
-    // AVInputFormat may set up some state in the AVContext. In particular
-    // vc1t_read_header allocates 'extradata' to deal with header issues
-    // related to Microsoft's bizarre engineering notions. We alloc a chunk
-    // of space to make vc1 work then associate the codec with the context.
-    if ( w->codec_param != AV_CODEC_ID_VC1 )
-    {
-        // we haven't been inflicted with M$ - allocate a little space as
-        // a marker and return success.
-        pv->context->extradata_size = 0;
-        // av_malloc uses posix_memalign which is allowed to
-        // return NULL when allocating 0 bytes.  We use extradata == NULL
-        // to trigger initialization of extradata and the decoder, so
-        // we can not set it to NULL here. So allocate a small
-        // buffer instead.
-        pv->context->extradata = av_malloc(1);
-        return 0;
-    }
-
-    // find the start and and of the sequence header
-    int shdr, shdr_end;
-    if ( ( shdr = find_hdr( in, 0, 0x0f ) ) < 0 )
-    {
-        // didn't find start of seq hdr
-        return 1;
-    }
-    if ( ( shdr_end = next_hdr( in, shdr + 4 ) ) < 0 )
-    {
-        shdr_end = in->size;
-    }
-    shdr_end -= shdr;
-
-    // find the start and and of the entry point header
-    int ehdr, ehdr_end;
-    if ( ( ehdr = find_hdr( in, 0, 0x0e ) ) < 0 )
-    {
-        // didn't find start of entry point hdr
-        return 1;
-    }
-    if ( ( ehdr_end = next_hdr( in, ehdr + 4 ) ) < 0 )
-    {
-        ehdr_end = in->size;
-    }
-    ehdr_end -= ehdr;
-
-    // found both headers - allocate an extradata big enough to hold both
-    // then copy them into it.
-    pv->context->extradata_size = shdr_end + ehdr_end;
-    pv->context->extradata = av_malloc(pv->context->extradata_size + 8);
-    memcpy( pv->context->extradata, in->data + shdr, shdr_end );
-    memcpy( pv->context->extradata + shdr_end, in->data + ehdr, ehdr_end );
-    memset( pv->context->extradata + shdr_end + ehdr_end, 0, 8);
-    return 0;
-}
-
-static int decavcodecvWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
-                            hb_buffer_t ** buf_out )
-{
-    hb_work_private_t *pv = w->private_data;
-    hb_buffer_t *in = *buf_in;
-    int64_t pts = AV_NOPTS_VALUE;
-    int64_t dts = pts;
-
-    *buf_in = NULL;
-    *buf_out = NULL;
-
-    /* if we got an empty buffer signaling end-of-stream send it downstream */
-    if ( in->size == 0 )
-    {
-        if ( pv->context->codec != NULL )
-        {
-            decodeVideo( w, in->data, in->size, in->sequence, pts, dts, in->s.frametype );
-        }
-        hb_list_add( pv->list, in );
-        *buf_out = link_buf_list( pv );
-        return HB_WORK_DONE;
-    }
-
-    // if this is the first frame open the codec (we have to wait for the
-    // first frame because of M$ VC1 braindamage).
-    if ( !pv->video_codec_opened )
-    {
-        AVCodec *codec = avcodec_find_decoder( w->codec_param );
-        if ( codec == NULL )
-        {
-            hb_log( "decavcodecvWork: failed to find codec for id (%d)", w->codec_param );
-            *buf_out = hb_buffer_init( 0 );;
-            return HB_WORK_DONE;
-        }
-        // Note that there is currently a small memory leak in libav at this
-        // point.  pv->context->priv_data gets allocated by
-        // avcodec_alloc_context3(), then avcodec_get_context_defaults3()
-        // memsets the context and looses the pointer.
-        //
-        // avcodec_get_context_defaults3() looks as if they intended for
-        // it to preserve any existing priv_data because they test the pointer
-        // before allocating new memory, but the memset has already cleared it.
-        avcodec_get_context_defaults3( pv->context, codec );
-        init_video_avcodec_context( pv );
-        if ( setup_extradata( w, in ) )
-        {
-            // we didn't find the headers needed to set up extradata.
-            // the codec will abort if we open it so just free the buf
-            // and hope we eventually get the info we need.
-            hb_buffer_close( &in );
-            return HB_WORK_OK;
-        }
-        // disable threaded decoding for scan, can cause crashes
-        if ( hb_avcodec_open( pv->context, codec, NULL, pv->threads ) )
-        {
-            hb_log( "decavcodecvWork: avcodec_open failed" );
-            *buf_out = hb_buffer_init( 0 );;
-            return HB_WORK_DONE;
-        }
-        pv->video_codec_opened = 1;
-    }
-
-    if( in->s.start >= 0 )
-    {
-        pts = in->s.start;
-        dts = in->s.renderOffset;
-    }
-    if ( in->s.new_chap )
-    {
-        pv->new_chap = in->s.new_chap;
-        pv->chap_time = pts >= 0? pts : pv->pts_next;
-    }
-    if( pv->dxva2 && pv->dxva2->do_job==HB_WORK_OK )
-    {
-        if( pv->dxva2->input_pts[0]<=pv->dxva2->input_pts[1] )
-            pv->dxva2->input_pts[0] = pts;
-        else if( pv->dxva2->input_pts[0]>pv->dxva2->input_pts[1] )
-            pv->dxva2->input_pts[1] = pts;
-        pv->dxva2->input_dts = dts;
-    }
-    decodeVideo( w, in->data, in->size, in->sequence, pts, dts, in->s.frametype );
-    hb_buffer_close( &in );
-    *buf_out = link_buf_list( pv );
-    return HB_WORK_OK;
-}
-
-static void compute_frame_duration( hb_work_private_t *pv )
-{
-    double duration = 0.;
-    int64_t max_fps = 64L;
-
-    // context->time_base may be in fields, so set the max *fields* per second
-    if ( pv->context->ticks_per_frame > 1 )
-        max_fps *= pv->context->ticks_per_frame;
-
-    if ( pv->title->opaque_priv )
-    {
-        // If ffmpeg is demuxing for us, it collects some additional
-        // information about framerates that is often more accurate
-        // than context->time_base.
-        AVFormatContext *ic = (AVFormatContext*)pv->title->opaque_priv;
-        AVStream *st = ic->streams[pv->title->video_id];
-        if ( st->nb_frames && st->duration )
-        {
-            // compute the average frame duration from the total number
-            // of frames & the total duration.
-            duration = ( (double)st->duration * (double)st->time_base.num ) /
-                       ( (double)st->nb_frames * (double)st->time_base.den );
-        }
-        else
-        {
-            // XXX We don't have a frame count or duration so try to use the
-            // far less reliable time base info in the stream.
-            // Because the time bases are so screwed up, we only take values
-            // in the range 8fps - 64fps.
-            AVRational *tb = NULL;
-            if ( st->avg_frame_rate.den * 64L > st->avg_frame_rate.num &&
-                 st->avg_frame_rate.num > st->avg_frame_rate.den * 8L )
-            {
-                tb = &(st->avg_frame_rate);
-                duration =  (double)tb->den / (double)tb->num;
-            }
-            else if ( st->time_base.num * 64L > st->time_base.den &&
-                      st->time_base.den > st->time_base.num * 8L )
-            {
-                tb = &(st->time_base);
-                duration =  (double)tb->num / (double)tb->den;
-            }
-            else if ( st->r_frame_rate.den * 64L > st->r_frame_rate.num &&
-                      st->r_frame_rate.num > st->r_frame_rate.den * 8L )
-            {
-                tb = &(st->r_frame_rate);
-                duration =  (double)tb->den / (double)tb->num;
-            }
-        }
-        if ( !duration &&
-             pv->context->time_base.num * max_fps > pv->context->time_base.den &&
-             pv->context->time_base.den > pv->context->time_base.num * 8L )
-        {
-            duration =  (double)pv->context->time_base.num /
-                        (double)pv->context->time_base.den;
-            if ( pv->context->ticks_per_frame > 1 )
-            {
-                // for ffmpeg 0.5 & later, the H.264 & MPEG-2 time base is
-                // field rate rather than frame rate so convert back to frames.
-                duration *= pv->context->ticks_per_frame;
-            }
-        }
-    }
-    else
-    {
-        if ( pv->context->time_base.num * max_fps > pv->context->time_base.den &&
-             pv->context->time_base.den > pv->context->time_base.num * 8L )
-        {
-            duration =  (double)pv->context->time_base.num /
-                            (double)pv->context->time_base.den;
-            if ( pv->context->ticks_per_frame > 1 )
-            {
-                // for ffmpeg 0.5 & later, the H.264 & MPEG-2 time base is
-                // field rate rather than frame rate so convert back to frames.
-                duration *= pv->context->ticks_per_frame;
-            }
-        }
-    }
-    if ( duration == 0 )
-    {
-        // No valid timing info found in the stream, so pick some value
-        duration = 1001. / 24000.;
-    }
-    else
-    {
-        pv->frame_duration_set = 1;
-    }
-    pv->duration = duration * 90000.;
-    pv->field_duration = pv->duration;
-    if ( pv->context->ticks_per_frame > 1 )
-    {
-        pv->field_duration /= pv->context->ticks_per_frame;
-    }
-}
-
-static int decavcodecvInfo( hb_work_object_t *w, hb_work_info_t *info )
-{
-    hb_work_private_t *pv = w->private_data;
-
-    memset( info, 0, sizeof(*info) );
-
-    info->bitrate = pv->context->bit_rate;
-    // HandBrake's video pipeline uses yuv420 color.  This means all
-    // dimensions must be even.  So we must adjust the dimensions
-    // of incoming video if not even.
-    info->width = pv->context->width & ~1;
-    info->height = pv->context->height & ~1;
-
-    info->pixel_aspect_width = pv->context->sample_aspect_ratio.num;
-    info->pixel_aspect_height = pv->context->sample_aspect_ratio.den;
-
-    compute_frame_duration( pv );
-    info->rate = 27000000;
-    info->rate_base = pv->duration * 300.;
-
-    info->profile = pv->context->profile;
-    info->level = pv->context->level;
-    info->name = pv->context->codec->name;
-
-    switch( pv->context->color_primaries )
-    {
-        case AVCOL_PRI_BT709:
-            info->color_prim = HB_COLR_PRI_BT709;
-            break;
-        case AVCOL_PRI_BT470BG:
-            info->color_prim = HB_COLR_PRI_EBUTECH;
-            break;
-        case AVCOL_PRI_BT470M:
-        case AVCOL_PRI_SMPTE170M:
-        case AVCOL_PRI_SMPTE240M:
-            info->color_prim = HB_COLR_PRI_SMPTEC;
-            break;
-        default:
-        {
-            if( ( info->width >= 1280 || info->height >= 720 ) ||
-                ( info->width >   720 && info->height >  576 ) )
-                // ITU BT.709 HD content
-                info->color_prim = HB_COLR_PRI_BT709;
-            else if( info->rate_base == 1080000 )
-                // ITU BT.601 DVD or SD TV content (PAL)
-                info->color_prim = HB_COLR_PRI_EBUTECH;
-            else
-                // ITU BT.601 DVD or SD TV content (NTSC)
-                info->color_prim = HB_COLR_PRI_SMPTEC;
-            break;
-        }
-    }
-
-    switch( pv->context->color_trc )
-    {
-        case AVCOL_TRC_SMPTE240M:
-            info->color_transfer = HB_COLR_TRA_SMPTE240M;
-            break;
-        default:
-            // ITU BT.601, BT.709, anything else
-            info->color_transfer = HB_COLR_TRA_BT709;
-            break;
-    }
-
-    switch( pv->context->colorspace )
-    {
-        case AVCOL_SPC_BT709:
-            info->color_matrix = HB_COLR_MAT_BT709;
-            break;
-        case AVCOL_SPC_FCC:
-        case AVCOL_SPC_BT470BG:
-        case AVCOL_SPC_SMPTE170M:
-        case AVCOL_SPC_RGB: // libswscale rgb2yuv
-            info->color_matrix = HB_COLR_MAT_SMPTE170M;
-            break;
-        case AVCOL_SPC_SMPTE240M:
-            info->color_matrix = HB_COLR_MAT_SMPTE240M;
-            break;
-        default:
-        {
-            if( ( info->width >= 1280 || info->height >= 720 ) ||
-                ( info->width >   720 && info->height >  576 ) )
-                // ITU BT.709 HD content
-                info->color_matrix = HB_COLR_MAT_BT709;
-            else
-                // ITU BT.601 DVD or SD TV content (PAL)
-                // ITU BT.601 DVD or SD TV content (NTSC)
-                info->color_matrix = HB_COLR_MAT_SMPTE170M;
-            break;
-        }
-    }
-
-    return 1;
-}
-
-static int decavcodecvBSInfo( hb_work_object_t *w, const hb_buffer_t *buf,
-                              hb_work_info_t *info )
-{
-    return 0;
-}
-
-static void decavcodecvFlush( hb_work_object_t *w )
-{
-    hb_work_private_t *pv = w->private_data;
-
-    if ( pv->context->codec )
-    {
-        flushDelayQueue( pv );
-        hb_buffer_t *buf = link_buf_list( pv );
-        hb_buffer_close( &buf );
-        if ( pv->title->opaque_priv == NULL )
-        {
-            pv->video_codec_opened = 0;
-            hb_avcodec_close( pv->context );
-            av_freep( &pv->context->extradata );
-            if ( pv->parser )
-            {
-                av_parser_close(pv->parser);
-            }
-            pv->parser = av_parser_init( w->codec_param );
-        }
-        else
-        {
-            avcodec_flush_buffers( pv->context );
-        }
-    }
-    pv->wait_for_keyframe = 60;
-}
-
-static void decavcodecClose( hb_work_object_t * w )
-{
-    hb_work_private_t * pv = w->private_data;
-    if( pv->dst_frame ) free( pv->dst_frame );
-    if( pv )
-    {
-        closePrivData( &pv );
-        w->private_data = NULL;
-    }
-}
-
-hb_work_object_t hb_decavcodecv_accl =
-{
-    .id = WORK_DECAVCODECVACCL,
-    .name = "Video hardware decoder (libavcodec)",
-    .init = decavcodecvInit,
-    .work = decavcodecvWork,
-    .close = decavcodecClose,
-    .flush = decavcodecvFlush,
-    .info = decavcodecvInfo,
-    .bsinfo = decavcodecvBSInfo
-};
-
-#endif
diff --git a/libhb/dxva2api.c b/libhb/dxva2api.c
index 04011c0c5..004cd681a 100644
--- a/libhb/dxva2api.c
+++ b/libhb/dxva2api.c
@@ -10,6 +10,7 @@
             Li   Cao <[email protected]> <http://www.multicorewareinc.com/>
 
  */
+#ifdef USE_HWD
 #include "dxva2api.h"
 
 __inline float hb_dx_fixedtofloat( const DXVA2_Fixed32 _fixed_ )
@@ -34,3 +35,4 @@ __inline DXVA2_Fixed32 hb_dx_floattofixed( const float _float_ )
     _fixed_.Value = HIWORD( _float_ * 0x10000 );
     return _fixed_;
 }
+#endif
diff --git a/libhb/dxva2api.h b/libhb/dxva2api.h
index dc5909477..a64d700fb 100644
--- a/libhb/dxva2api.h
+++ b/libhb/dxva2api.h
@@ -14,7 +14,7 @@
 
 #ifndef _DXVA2API_H
 #define _DXVA2API_H
-
+#ifdef USE_HWD
 #define MINGW_DXVA2API_H_VERSION (2)
 
 #if __GNUC__ >=3
@@ -818,5 +818,5 @@ __inline float hb_dx_fixedtofloat( const DXVA2_Fixed32 _fixed_ );
 __inline const DXVA2_Fixed32 hb_dx_fixed32_opaque_alpha();
 
 __inline DXVA2_Fixed32 hb_dx_floattofixed( const float _float_ );
-
+#endif
 #endif //_DXVA2API_H
diff --git a/libhb/hb.c b/libhb/hb.c
index 63c6c6931..c1188caa2 100644
--- a/libhb/hb.c
+++ b/libhb/hb.c
@@ -484,7 +484,9 @@ hb_handle_t * hb_init( int verbose, int update_check )
 #endif
 	hb_register( &hb_encavcodeca );
 	hb_register( &hb_reader );
+#ifdef USE_HWD
 	hb_register( &hb_decavcodecv_accl );
+#endif
     
     return h;
 }
@@ -583,7 +585,9 @@ hb_handle_t * hb_init_dl( int verbose, int update_check )
 #endif
 	hb_register( &hb_encavcodeca );
 	hb_register( &hb_reader );
+#ifdef USE_HWD
 	hb_register( &hb_decavcodecv_accl );
+#endif
 	return h;
 }
 
diff --git a/libhb/internal.h b/libhb/internal.h
index 9262ff6f8..79ed86f89 100644
--- a/libhb/internal.h
+++ b/libhb/internal.h
@@ -428,7 +428,6 @@ extern hb_filter_object_t hb_filter_denoise;
 extern hb_filter_object_t hb_filter_decomb;
 extern hb_filter_object_t hb_filter_rotate;
 extern hb_filter_object_t hb_filter_crop_scale;
-extern hb_filter_object_t hb_filter_crop_scale_accl;
 extern hb_filter_object_t hb_filter_render_sub;
 extern hb_filter_object_t hb_filter_vfr;
 
diff --git a/libhb/module.defs b/libhb/module.defs
index d3d409513..d679c68f2 100644
--- a/libhb/module.defs
+++ b/libhb/module.defs
@@ -40,10 +40,14 @@ endif
 ifeq (1,$(FEATURE.opencl))
 LIBHB.GCC.D += USE_OPENCL
 endif
+ifeq (1,$(FEATURE.hwd))
+LIBHB.GCC.D += USE_HWD
+endif
 LIBHB.GCC.D += __LIBHB__ USE_PTHREAD
 LIBHB.GCC.I += $(LIBHB.build/) $(CONTRIB.build/)include
+ifeq (1,$(FEATURE.opencl))
 LIBHB.GCC.I += $(AMDAPPSDKROOT)/include
-
+endif
 ifeq ($(BUILD.system),cygwin)
     LIBHB.GCC.D += SYS_CYGWIN
 else ifeq ($(BUILD.system),darwin)
diff --git a/libhb/oclnv12toyuv.c b/libhb/oclnv12toyuv.c
index 8d49563bf..ee0f7661c 100644
--- a/libhb/oclnv12toyuv.c
+++ b/libhb/oclnv12toyuv.c
@@ -11,6 +11,7 @@
  */
 
 #ifdef USE_OPENCL
+#ifdef USE_HWD
 #include "vadxva2.h"
 #include "oclnv12toyuv.h"
 
@@ -220,3 +221,4 @@ int hb_ocl_nv12toyuv( uint8_t *bufi[], int p, int w, int h, int *crop, hb_va_dxv
     return 0;
 }
 #endif
+#endif
diff --git a/libhb/oclnv12toyuv.h b/libhb/oclnv12toyuv.h
index 3307b8efe..5098d805e 100644
--- a/libhb/oclnv12toyuv.h
+++ b/libhb/oclnv12toyuv.h
@@ -22,8 +22,8 @@
  * nv12 to yuv interface
  * bufi is input frame of nv12, w is input frame width, h is input frame height
  */
-//int hb_ocl_nv12toyuv( uint8_t *bufi, int w, int h, int *crop, hb_va_dxva2_t *dxva2 );
+#ifdef USE_HWD
 int hb_ocl_nv12toyuv( uint8_t *bufi[], int p, int w, int h, int *crop, hb_va_dxva2_t *dxva2 );
-
+#endif
 #endif
 #endif
diff --git a/libhb/openclkernels.h b/libhb/openclkernels.h
index 0ab3014ab..8b95ff234 100644
--- a/libhb/openclkernels.h
+++ b/libhb/openclkernels.h
@@ -119,4 +119,378 @@ char *kernel_src_nvtoyuv = KERNEL(
     }
     );
 
+char *kernel_src_hscaleall = KERNEL(
+
+    kernel void hscale_all_opencl(
+        global short *dst,
+        const global unsigned char *src,
+        const global short *yfilter,
+        const global int *yfilterPos,
+        int yfilterSize,
+        const global short *cfilter,
+        const global int *cfilterPos,
+        int cfilterSize,
+        int dstWidth,
+        int dstHeight,
+        int srcWidth,
+        int srcHeight,
+        int dstStride,
+        int dstChrStride,
+        int srcStride,
+        int srcChrStride)
+    {
+        int w = get_global_id(0);
+        int h = get_global_id(1);
+
+        int chrWidth = get_global_size(0);
+        int chrHeight = get_global_size(1);
+
+        int srcPos1 = h * srcStride + yfilterPos[w];
+        int srcPos2 =  h * srcStride + yfilterPos[w + chrWidth];
+        int srcPos3 =  (h + (srcHeight >> 1)) * srcStride + yfilterPos[w];
+        int srcPos4 =  (h + (srcHeight >> 1)) * srcStride + yfilterPos[w + chrWidth];
+        int srcc1Pos =  srcStride * srcHeight + (h) * (srcChrStride) + cfilterPos[w];
+        int srcc2Pos =  srcc1Pos + ((srcChrStride)*(chrHeight));
+
+        int val1 = 0;
+        int val2 = 0;
+        int val3 = 0;
+        int val4 = 0;
+        int val5 = 0;
+        int val6 = 0;
+
+        int filterPos1 = yfilterSize * w;
+        int filterPos2 = yfilterSize * (w + chrWidth);
+        int cfilterPos1 = cfilterSize * w;
+
+        int j;
+        for (j = 0; j < yfilterSize; j++)
+        {
+            val1 += src[srcPos1 + j] * yfilter[filterPos1+ j];
+            val2 += src[srcPos2 + j] * yfilter[filterPos2 + j];
+            val3 += src[srcPos3 + j] * yfilter[filterPos1 + j];
+            val4 += src[srcPos4 + j] * yfilter[filterPos2 + j];
+            val5 += src[srcc1Pos+j] * cfilter[cfilterPos1 + j];
+            val6 += src[srcc2Pos+j] * cfilter[cfilterPos1 + j];
+        }
+        int dstPos1 = h *dstStride;
+        int dstPos2 = (h + chrHeight) * dstStride;
+
+        dst[dstPos1  + w] = ((val1 >> 7) > ((1 << 15) - 1) ? ((1 << 15) - 1) : (val1 >> 7));
+        dst[dstPos1  + w + chrWidth] = ((val2 >> 7) > ((1 << 15) - 1) ? ((1 << 15) - 1) : (val2 >> 7));
+        dst[dstPos2 + w] = ((val3 >> 7) > ((1 << 15) - 1) ? ((1 << 15) - 1) : (val3 >> 7));
+        dst[dstPos2 + w + chrWidth] = ((val4 >> 7) > ((1 << 15) - 1) ? ((1 << 15) - 1) : (val4 >> 7));
+
+        int dstPos3 = h * (dstChrStride) + w + dstStride*dstHeight;
+        int dstPos4 = h * (dstChrStride) + w + dstStride*dstHeight + ((dstChrStride)*chrHeight);
+        dst[dstPos3] = ((val5 >> 7) > ((1 << 15) - 1) ? ((1 << 15) - 1) : (val5 >> 7));
+        dst[dstPos4] = ((val6 >> 7) > ((1 << 15) - 1) ? ((1 << 15) - 1) : (val6 >> 7));
+    }
+    );
+
+char *kernel_src_hscalefast = KERNEL(
+
+    kernel void hscale_fast_opencl(
+        global short *dst,
+        const global unsigned char *src,
+        int xInc,
+        int chrXInc,
+        int dstWidth,
+        int dstHeight,
+        int srcWidth,
+        int srcHeight,
+        int dstStride,
+        int dstChrStride,
+        int srcStride,
+        int srcChrStride)
+    {
+
+        int w = get_global_id(0);
+        int h = get_global_id(1);
+
+        int chrWidth = get_global_size(0);
+        int chrHeight = get_global_size(1);
+        int xpos1 = 0;
+        int xpos2 = 0;
+        int xx = xpos1 >> 16;
+        int xalpha = (xpos1 & 0xFFFF) >> 9;
+        dst[h * dstStride + w] = (src[h * srcStride + xx] << 7) + (src[h * srcStride + xx + 1] -src[h * srcStride + xx]) * xalpha;
+        int lowpart = h + (chrHeight);
+        dst[lowpart * dstStride + w] = (src[lowpart * srcStride + xx] << 7) + (src[lowpart * srcStride + xx + 1] - src[lowpart * srcStride + xx]) * xalpha;
+
+        int inv_i = w * xInc >> 16;
+        if( inv_i >= srcWidth - 1)
+        {
+            dst[h*dstStride + w] = src[h*srcStride + srcWidth-1]*128;
+            dst[lowpart*dstStride + w] = src[lowpart*srcStride + srcWidth - 1] * 128;
+        }
+
+        int rightpart = w + (chrWidth);
+        xx = xpos2 >> 16;
+        xalpha = (xpos2 & 0xFFFF) >> 9;
+        dst[h * dstStride + rightpart] = (src[h *srcStride + xx] << 7) + (src[h * srcStride + xx + 1] - src[h * srcStride + xx]) * xalpha;
+        dst[lowpart * dstStride + rightpart] = (src[lowpart * srcStride + xx] << 7) + (src[lowpart * srcStride + xx + 1] - src[lowpart * srcStride + xx]) * xalpha;
+        inv_i = rightpart * xInc >> 16;
+        if( inv_i >= srcWidth - 1)
+        {
+            dst[h*dstStride + rightpart] = src[h*srcStride + srcWidth - 1] * 128;
+            dst[lowpart*dstStride + rightpart] = src[lowpart * srcStride + srcWidth - 1] * 128;
+        }
+
+        int xpos = 0;
+        xpos = chrXInc * w;
+        xx = xpos >> 16;
+        xalpha = (xpos & 0xFFFF) >> 9;
+        src += srcStride * srcHeight;
+        dst += dstStride * dstHeight;
+        dst[h*(dstChrStride) + w] = (src[h * (srcChrStride) + xx] *(xalpha^127) + src[h * (srcChrStride) + xx + 1] * xalpha);
+        inv_i = w * xInc >> 16;
+        if( inv_i >= (srcWidth >> 1) - 1)
+        {
+            dst[h*(dstChrStride) + w] = src[h*(srcChrStride) + (srcWidth >> 1) -1]*128;
+        }
+
+        xpos = chrXInc * (w);
+        xx = xpos >> 16;
+        src += srcChrStride * srcHeight >> 1;
+        dst += (dstChrStride * chrHeight);
+        dst[h*(dstChrStride) + w] = (src[h * (srcChrStride) + xx]*(xalpha^127) + src[h * (srcChrStride) + xx + 1 ] * xalpha);
+
+        if( inv_i >= (srcWidth >> 1) - 1)
+        {
+            //v channel:
+            dst[h*(dstChrStride) + w] = src[h*(srcChrStride)+ (srcWidth >> 1) -1]*128;
+        }
+
+
+    }
+    );
+
+char *kernel_src_vscalealldither = KERNEL(
+
+    kernel void vscale_all_dither_opencl(
+        global unsigned char *dst,
+        const global short *src,
+        const global short *yfilter,
+        int yfilterSize,
+        const global short *cfilter,
+        int cfilterSize,
+        const global int *yfilterPos,
+        const global int *cfilterPos,
+        int dstWidth,
+        int dstHeight,
+        int srcWidth,
+        int srcHeight,
+        int dstStride,
+        int dstChrStride,
+        int srcStride,
+        int srcChrStride)
+    {
+		const unsigned char hb_dither_8x8_128[8][8] = {
+    		{  36, 68,  60, 92,  34, 66,  58, 90, },
+    		{ 100,  4, 124, 28,  98,  2, 122, 26, },
+    		{  52, 84,  44, 76,  50, 82,  42, 74, },
+    		{ 116, 20, 108, 12, 114, 18, 106, 10, },
+    		{  32, 64,  56, 88,  38, 70,  62, 94, },
+    		{  96,  0, 120, 24, 102,  6, 126, 30, },
+    		{  48, 80,  40, 72,  54, 86,  46, 78, },
+    		{ 112, 16, 104,  8, 118, 22, 110, 14, },
+		};
+
+
+        int w = get_global_id(0);
+        int h = get_global_id(1);
+
+        int chrWidth = get_global_size(0);
+        int chrHeight = get_global_size(1);
+        const unsigned char *local_up_dither;
+        const unsigned char *local_down_dither;
+
+        local_up_dither = hb_dither_8x8_128[h & 7];
+        local_down_dither = hb_dither_8x8_128[(h + chrHeight) & 7];
+
+        //yscale;
+        int srcPos1 = (yfilterPos[h]) * srcStride + w;
+        int srcPos2 = (yfilterPos[h]) * srcStride + w + (chrWidth);
+        int srcPos3 = (yfilterPos[h + chrHeight]) * srcStride + w;
+        int srcPos4 = (yfilterPos[h + chrHeight]) * srcStride + w + chrWidth;
+        int src1Pos = dstStride * srcHeight + (cfilterPos[h]) * dstChrStride + (w);
+        int src2Pos = dstStride * srcHeight + (dstChrStride*(srcHeight>>1)) + (cfilterPos[h]) * dstChrStride + w;
+
+        int val1 = (local_up_dither[w & 7] << 12); //y offset is 0;
+        int val2 = (local_up_dither[(w + chrWidth) & 7] << 12);
+        int val3 = (local_down_dither[w &7] << 12);
+        int val4 = (local_down_dither[(w + chrWidth) & 7] << 12);
+        int val5 = (local_up_dither[w & 7] << 12);
+        int val6 = (local_up_dither[(w + 3) & 7] << 12);   // 3 is offset of the chrome channel.
+
+        int j;
+        int filterPos1 = h * yfilterSize;
+        int filterPos2 = ( h + chrHeight ) * yfilterSize;
+        for(j = 0; j < yfilterSize; j++)
+        {
+            val1 += src[srcPos1] * yfilter[filterPos1 + j];
+            srcPos1 += srcStride;
+            val2 += src[srcPos2] * yfilter[filterPos1 + j];
+            srcPos2 += srcStride;
+            val3 += src[srcPos3] * yfilter[filterPos2 + j];
+            srcPos3 += srcStride;
+            val4 += src[srcPos4] * yfilter[filterPos2 + j];
+            srcPos4 += srcStride;
+            val5 += src[src1Pos] * cfilter[filterPos1 + j];
+            val6 += src[src2Pos] * cfilter[filterPos1 + j];
+            src1Pos += dstChrStride;
+            src2Pos += dstChrStride;
+        }
+        dst[h * dstStride + w] = (((val1 >> 19)&(~0xFF)) ? ((-(val1 >> 19)) >> 31) : (val1 >> 19));
+        dst[h * dstStride + w + chrWidth] = (((val2 >> 19)&(~0xFF)) ? ((-(val2 >> 19)) >> 31) : (val2 >> 19));
+        dst[(h + chrHeight) * dstStride + w] = (((val3 >> 19)&(~0xFF)) ? ((-(val3 >> 19)) >> 31) : (val3 >> 19));
+        dst[(h + chrHeight) * dstStride + w + chrWidth] = (((val4 >> 19)&(~0xFF)) ? ((-(val4 >> 19)) >> 31) : (val4 >> 19));
+
+        int dst1Pos = dstStride * dstHeight + h*(dstChrStride)+(w);
+        int dst2Pos = (dstChrStride * chrHeight) + dst1Pos;
+        dst[dst1Pos] = (((val5 >> 19)&(~0xFF)) ? ((-(val5 >> 19)) >> 31) : (val5 >> 19));
+        dst[dst2Pos] = (((val6 >> 19)&(~0xFF)) ? ((-(val6 >> 19)) >> 31) : (val6 >> 19));
+    }
+    );
+
+char *kernel_src_vscaleallnodither = KERNEL(
+
+    kernel void vscale_all_nodither_opencl(
+        global unsigned char *dst,
+        const global short *src,
+        const global short *yfilter,
+        int yfilterSize,
+        const global short *cfilter,
+        int cfilterSize,
+        const global int *yfilterPos,
+        const global int *cfilterPos,
+        int dstWidth,
+        int dstHeight,
+        int srcWidth,
+        int srcHeight,
+        int dstStride,
+        int dstChrStride,
+        int srcStride,
+        int srcChrStride)
+    {
+		const unsigned char hb_sws_pb_64[8] = {
+    		64, 64, 64, 64, 64, 64, 64, 64
+		};
+
+        int w = get_global_id(0);
+        int h = get_global_id(1);
+
+        int chrWidth = get_global_size(0);
+        int chrHeight = get_global_size(1);
+        const unsigned char *local_up_dither;
+        const unsigned char *local_down_dither;
+
+        local_up_dither = hb_sws_pb_64;
+        local_down_dither = hb_sws_pb_64;
+
+
+        //yscale;
+        int srcPos1 = (yfilterPos[h]) * srcStride + w;
+        int srcPos2 = (yfilterPos[h]) * srcStride + w + (chrWidth);
+        int srcPos3 = (yfilterPos[h + chrHeight]) * srcStride + w;
+        int srcPos4 = (yfilterPos[h + chrHeight]) * srcStride + w + chrWidth;
+        int src1Pos = dstStride * srcHeight + (cfilterPos[h]) * dstChrStride + (w);
+        int src2Pos = dstStride * srcHeight + (dstChrStride*(srcHeight>>1)) + (cfilterPos[h]) * dstChrStride + w;
+
+        int val1 = (local_up_dither[w & 7] << 12); //y offset is 0;
+        int val2 = (local_up_dither[(w + chrWidth) & 7] << 12);
+        int val3 = (local_down_dither[w &7] << 12);
+        int val4 = (local_down_dither[(w + chrWidth) & 7] << 12);
+        int val5 = (local_up_dither[w & 7] << 12);
+        int val6 = (local_up_dither[(w + 3) & 7] << 12);   // 3 is offset of the chrome channel.
+
+
+        int j;
+        int filterPos1 = h * yfilterSize;
+        int filterPos2 = ( h + chrHeight ) * yfilterSize;
+        for(j = 0; j < yfilterSize; j++)
+        {
+            val1 += src[srcPos1] * yfilter[filterPos1 + j];
+            srcPos1 += srcStride;
+            val2 += src[srcPos2] * yfilter[filterPos1 + j];
+            srcPos2 += srcStride;
+            val3 += src[srcPos3] * yfilter[filterPos2 + j];
+            srcPos3 += srcStride;
+            val4 += src[srcPos4] * yfilter[filterPos2 + j];
+            srcPos4 += srcStride;
+            val5 += src[src1Pos] * cfilter[filterPos1 + j];
+            val6 += src[src2Pos] * cfilter[filterPos1 + j];
+            src1Pos += dstChrStride;
+            src2Pos += dstChrStride;
+        }
+        dst[h * dstStride + w] = (((val1 >> 19)&(~0xFF)) ? ((-(val1 >> 19)) >> 31) : (val1 >> 19));
+        dst[h * dstStride + w + chrWidth] = (((val2 >> 19)&(~0xFF)) ? ((-(val2 >> 19)) >> 31) : (val2 >> 19));
+        dst[(h + chrHeight) * dstStride + w] = (((val3 >> 19)&(~0xFF)) ? ((-(val3 >> 19)) >> 31) : (val3 >> 19));
+        dst[(h + chrHeight) * dstStride + w + chrWidth] = (((val4 >> 19)&(~0xFF)) ? ((-(val4 >> 19)) >> 31) : (val4 >> 19));;
+
+        int dst1Pos = dstStride * dstHeight + h*(dstChrStride)+(w);
+        int dst2Pos = (dstChrStride * chrHeight) + dst1Pos;
+        dst[dst1Pos] = (((val5 >> 19)&(~0xFF)) ? ((-(val5 >> 19)) >> 31) : (val5 >> 19));
+        dst[dst2Pos] = (((val6 >> 19)&(~0xFF)) ? ((-(val6 >> 19)) >> 31) : (val6 >> 19));
+    }
+    );
+
+char *kernel_src_vscalefast = KERNEL(
+
+    kernel void vscale_fast_opencl(
+        global unsigned char *dst,
+        const global short *src,
+        const global int *yfilterPos,
+        const global int *cfilterPos,
+        int dstWidth,
+        int dstHeight,
+        int srcWidth,
+        int srcHeight,
+        int dstStride,
+        int dstChrStride,
+        int srcStride,
+        int srcChrStride)
+    {
+		const unsigned char hb_sws_pb_64[8] = {
+    		64, 64, 64, 64, 64, 64, 64, 64
+		};
+
+        int w = get_global_id(0);
+        int h = get_global_id(1);
+
+        int chrWidth = get_global_size(0);
+        int chrHeight = get_global_size(1);
+
+        const unsigned char *local_up_dither;
+        const unsigned char *local_down_dither;
+
+        local_up_dither = hb_sws_pb_64;
+        local_down_dither = hb_sws_pb_64;
+
+
+        int rightpart = w + chrWidth;
+        int bh = h + chrHeight; // bottom part
+        short val1 = (src[(yfilterPos[h]) * dstStride + w] + local_up_dither[(w + 0) & 7]) >> 7; //lum offset is 0;
+        short val2 = (src[(yfilterPos[h]) * dstStride + rightpart] + local_up_dither[rightpart & 7]) >> 7;
+        short val3 = (src[(yfilterPos[bh]) * dstStride + w] + local_down_dither[w & 7]) >> 7;
+        short val4 = (src[(yfilterPos[bh]) * dstStride + rightpart] + local_down_dither[rightpart & 7]) >> 7;
+        dst[h * dstStride + w] = ((val1&(~0xFF)) ? ((-val1) >> 31) : (val1));
+        dst[h * dstStride + rightpart] = ((val2&(~0xFF)) ? ((-val2) >> 31) : (val2));
+        dst[bh * dstStride + w] = ((val3&(~0xFF)) ? ((-val3) >> 31) : (val3));
+        dst[bh * dstStride + rightpart] = ((val4&(~0xFF)) ? ((-val4) >> 31) : (val4));
+
+        src += dstStride * srcHeight;
+        dst += dstStride * dstHeight;
+        val1 = (src[cfilterPos[h] * (dstChrStride) + w] + local_up_dither[ w & 7]) >> 7;
+        dst[h * (dstChrStride) + w] = ((val1&(~0xFF)) ? ((-val1) >> 31) : (val1));
+
+        src += dstChrStride * (srcHeight >> 1);
+        dst += dstChrStride * chrHeight;
+        val1 = (src[cfilterPos[h] * dstChrStride + w] + local_up_dither[ (w + 3) & 7] ) >> 7;
+        dst[h * dstChrStride + w] = ((val1&(~0xFF)) ? ((-val1) >> 31) : (val1));
+
+    }
+    );
+
 #endif
diff --git a/libhb/openclwrapper.c b/libhb/openclwrapper.c
index a10e9d64d..2371c2167 100644
--- a/libhb/openclwrapper.c
+++ b/libhb/openclwrapper.c
@@ -99,6 +99,7 @@ int hb_regist_opencl_kernel()
     ADD_KERNEL_CFG( 0, "frame_h_scale", NULL )
     ADD_KERNEL_CFG( 1, "frame_v_scale", NULL )
     ADD_KERNEL_CFG( 2, "nv12toyuv", NULL )
+	ADD_KERNEL_CFG( 3, "scale_opencl", NULL )
 
     return 0;
 }
@@ -331,7 +332,7 @@ int hb_generat_bin_from_kernel_source( cl_program program, const char * cl_file_
 
             if( !hb_write_binary_to_file( fileName, binaries[i], binarySizes[i] ))
             {
-                hb_log( "Notice: Unable to write opencl kernel, writing to tempory directory instead." );
+                hb_log( "Notice: Unable to write opencl kernel, writing to temporary directory instead." );
                 //printf( "opencl-wrapper: write binary[%s] failds\n", fileName);
                 return 0;
             } //else
@@ -450,8 +451,7 @@ int hb_init_opencl_env( GPUEnv *gpu_info )
 
                 if( status != CL_SUCCESS )
                 {
-					hb_log( "Notice: No more platform vendor info.\n" );
-                    return(1);
+                    continue;
                 }
                 gpu_info->platform = platforms[i];
 
@@ -470,8 +470,7 @@ int hb_init_opencl_env( GPUEnv *gpu_info )
 
                 if( status != CL_SUCCESS )
 				{
-					hb_log( "Notice: No available GPU device.\n" );
-                        return(1);
+                    continue;
 				}
 
                 if( numDevices )
@@ -482,6 +481,12 @@ int hb_init_opencl_env( GPUEnv *gpu_info )
         }
         if( NULL == gpu_info->platform )
         {
+	    hb_log( "Notice: No OpenCL-compatible GPU found.\n" );
+            return(1);
+        }
+        if( status != CL_SUCCESS )
+        {
+            hb_log( "Notice: No OpenCL-compatible GPU found.\n" );
             return(1);
         }
 
@@ -659,14 +664,18 @@ int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info,
     if( status == 0 )
         return(0);
 #else
-    int kernel_src_size = strlen( kernel_src_hscale )+strlen( kernel_src_vscale )+strlen( kernel_src_nvtoyuv );
-    source_str = (char*)malloc( kernel_src_size+2 );
+	int kernel_src_size = strlen( kernel_src_hscale ) + strlen( kernel_src_vscale ) + strlen( kernel_src_nvtoyuv ) + strlen( kernel_src_hscaleall ) + strlen( kernel_src_hscalefast ) + strlen( kernel_src_vscalealldither ) + strlen( kernel_src_vscaleallnodither ) + strlen( kernel_src_vscalefast );
+    source_str = (char*)malloc( kernel_src_size + 2 );
     strcpy( source_str, kernel_src_hscale );
     strcat( source_str, kernel_src_vscale );
     strcat( source_str, kernel_src_nvtoyuv );
+    strcat( source_str, kernel_src_hscaleall );
+    strcat( source_str, kernel_src_hscalefast );
+    strcat( source_str, kernel_src_vscalealldither );
+    strcat( source_str, kernel_src_vscaleallnodither );
+    strcat( source_str, kernel_src_vscalefast );
 #endif
 
-
     source = source_str;
     source_size[0] = strlen( source );
 
@@ -678,7 +687,8 @@ int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info,
                                    sizeof(numDevices),
                                    &numDevices,
                                    NULL );
-        if( status != CL_SUCCESS ){
+        if( status != CL_SUCCESS )
+        {
 			hb_log( "Notice: Unable to get the number of devices in context.\n" );
             return 0;
 		}
@@ -730,7 +740,6 @@ int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info,
         gpu_info->programs[idx] = clCreateProgramWithSource(
             gpu_info->context, 1, &source, source_size, &status );
     }
-
     if((gpu_info->programs[idx] == (cl_program)NULL) || (status != CL_SUCCESS)){
 		hb_log( "Notice: Unable to get list of devices in context.\n" );
         return(0);
@@ -931,4 +940,31 @@ int hb_get_opencl_env()
     }
     return status;
 }
+
+
+int hb_create_buffer(cl_mem *cl_Buf,int flags,int size)
+{
+	int status;
+	*cl_Buf = clCreateBuffer( gpu_env.context, (flags), (size), NULL, &status );
+	
+    if( status != CL_SUCCESS )
+	{ 
+		printf("clCreateBuffer error '%d'\n\n",status);
+	    return 0; 
+	}
+	return 1;
+}
+
+int hb_read_opencl_buffer(cl_mem cl_inBuf,unsigned char *outbuf,int size)
+{
+	int status;
+
+	status = clEnqueueReadBuffer(gpu_env.command_queue, cl_inBuf, CL_TRUE, 0, size, outbuf, 0, 0, 0);	
+    if( status != CL_SUCCESS )
+	{ 
+		printf("av_read_opencl_buffer error '%d'\n",status);
+	    return 0; 
+	}
+	return 1;
+}
 #endif
diff --git a/libhb/openclwrapper.h b/libhb/openclwrapper.h
index 933e7a3b3..c4d96f1e9 100644
--- a/libhb/openclwrapper.h
+++ b/libhb/openclwrapper.h
@@ -13,6 +13,7 @@
  */
 #ifndef __OPENCL_WRAPPER_H
 #define __OPENCL_WRAPPER_H
+#ifdef USE_OPENCL
 #include "common.h"
 
 //support AMD opencl
@@ -21,24 +22,20 @@
 
 typedef struct _KernelEnv
 {
-#ifdef USE_OPENCL
     cl_context context;
     cl_command_queue command_queue;
     cl_program program;
     cl_kernel kernel;
-#endif
     char kernel_name[150];
     int isAMD;
 }KernelEnv;
 
 typedef struct _OpenCLEnv
 {
-#ifdef USE_OPENCL
     cl_platform_id platform;
     cl_context   context;
     cl_device_id devices;
     cl_command_queue command_queue;
-#endif
 }OpenCLEnv;
 
 
@@ -76,4 +73,10 @@ int hb_create_kernel( char * kernelname, KernelEnv * env );
 int hb_release_kernel( KernelEnv * env );
 
 int hb_get_opencl_env();
+
+int hb_create_buffer(cl_mem *cl_Buf,int flags,int size);
+
+int hb_read_opencl_buffer(cl_mem cl_inBuf,unsigned char *outbuf,int size);
+
+#endif
 #endif
diff --git a/libhb/scale.c b/libhb/scale.c
new file mode 100644
index 000000000..27adcba32
--- /dev/null
+++ b/libhb/scale.c
@@ -0,0 +1,1199 @@
+#ifdef USE_OPENCL
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "hb.h"
+#include "scale.h"
+#include "scale_kernel.h"
+#include "libavutil/pixdesc.h"
+
+#define isScaleRGBinInt(x) \
+    (           \
+        (x)==AV_PIX_FMT_RGB48BE   ||   \
+        (x)==AV_PIX_FMT_RGB48LE   ||   \
+        (x)==AV_PIX_FMT_RGB32     ||   \
+        (x)==AV_PIX_FMT_RGB32_1   ||   \
+        (x)==AV_PIX_FMT_RGB24     ||   \
+        (x)==AV_PIX_FMT_RGB565BE  ||   \
+        (x)==AV_PIX_FMT_RGB565LE  ||   \
+        (x)==AV_PIX_FMT_RGB555BE  ||   \
+        (x)==AV_PIX_FMT_RGB555LE  ||   \
+        (x)==AV_PIX_FMT_RGB444BE  ||   \
+        (x)==AV_PIX_FMT_RGB444LE  ||   \
+        (x)==AV_PIX_FMT_RGB8      ||   \
+        (x)==AV_PIX_FMT_RGB4      ||   \
+        (x)==AV_PIX_FMT_RGB4_BYTE ||   \
+        (x)==AV_PIX_FMT_MONOBLACK ||   \
+        (x)==AV_PIX_FMT_MONOWHITE   \
+    )
+#define isScaleBGRinInt(x) \
+    (           \
+         (x)==AV_PIX_FMT_BGR48BE  ||   \
+         (x)==AV_PIX_FMT_BGR48LE  ||   \
+         (x)==AV_PIX_FMT_BGR32    ||   \
+         (x)==AV_PIX_FMT_BGR32_1  ||   \
+         (x)==AV_PIX_FMT_BGR24    ||   \
+         (x)==AV_PIX_FMT_BGR565BE ||   \
+         (x)==AV_PIX_FMT_BGR565LE ||   \
+         (x)==AV_PIX_FMT_BGR555BE ||   \
+         (x)==AV_PIX_FMT_BGR555LE ||   \
+         (x)==AV_PIX_FMT_BGR444BE ||   \
+         (x)==AV_PIX_FMT_BGR444LE ||   \
+         (x)==AV_PIX_FMT_BGR8     ||   \
+         (x)==AV_PIX_FMT_BGR4     ||   \
+         (x)==AV_PIX_FMT_BGR4_BYTE||   \
+         (x)==AV_PIX_FMT_MONOBLACK||   \
+         (x)==AV_PIX_FMT_MONOWHITE   \
+    )
+
+#define isScaleAnyRGB(x) \
+    (           \
+          isScaleRGBinInt(x)       ||    \
+          isScaleBGRinInt(x)             \
+    )
+
+#define isScaleGray(x)                      \
+    ((x) == AV_PIX_FMT_GRAY8       ||     \
+     (x) == AV_PIX_FMT_Y400A       ||     \
+     (x) == AV_PIX_FMT_GRAY16BE    ||     \
+     (x) == AV_PIX_FMT_GRAY16LE)
+
+static ScaleContext *g_scale;
+
+static double getScaleSplineCoeff(double a, double b, double c, double d,
+                             double dist)
+{
+    if (dist <= 1.0)
+        return ((d * dist + c) * dist + b) * dist + a;
+    else
+        return getScaleSplineCoeff(0.0,
+                               b + 2.0 * c + 3.0 * d,
+                               c + 3.0 * d,
+                              -b - 3.0 * c - 6.0 * d,
+                              dist - 1.0);
+}
+
+static int initScaleFilter(int16_t **outFilter, int32_t **filterPos,
+                      int *outFilterSize, int xInc, int srcW, int dstW,
+                      int filterAlign, int one, int flags, int cpu_flags,
+                      ScaleVector *srcFilter, ScaleVector *dstFilter,
+                      double param[2])
+{
+    int i;
+    int filterSize;
+    int filter2Size;
+    int minFilterSize;
+    int64_t *filter    = NULL;
+    int64_t *filter2   = NULL;
+    const int64_t fone = 1LL << 54;
+    int ret            = -1;
+
+	*filterPos = (int32_t *)av_malloc((dstW + 3) * sizeof(**filterPos));
+	if (*filterPos == NULL && ((dstW + 3) * sizeof(**filterPos)) != 0) {
+	    printf("Cannot allocate memory.\n"); 
+        goto fail;
+	}
+
+    if (FFABS(xInc - 0x10000) < 10) { // unscaled
+        int i;
+        filterSize = 1;
+     // FF_ALLOCZ_OR_GOTO(NULL, filter,dstW * sizeof(*filter) * filterSize, fail);
+		filter = (int64_t *)av_mallocz(dstW * sizeof(*filter) * filterSize);
+		if (filter == NULL && (dstW * sizeof(*filter) * filterSize) != 0) {
+        printf("Cannot allocate memory.\n"); 
+        goto fail;
+		}
+
+
+        for (i = 0; i < dstW; i++) {
+            filter[i * filterSize] = fone;
+            (*filterPos)[i]        = i;
+        }
+    } else if (flags & SWS_POINT) { // lame looking point sampling mode
+        int i;
+        int64_t xDstInSrc;
+        filterSize = 1;
+      //FF_ALLOC_OR_GOTO(NULL, filter,
+      //                 dstW * sizeof(*filter) * filterSize, fail);
+	  filter = (int64_t *)av_malloc(dstW * sizeof(*filter) * filterSize);
+	  if(filter == NULL && (dstW * sizeof(*filter) * filterSize) != 0){
+		  printf("Cannot allocate memory.\n"); 
+        goto fail;
+	  }
+
+        xDstInSrc = xInc / 2 - 0x8000;
+        for (i = 0; i < dstW; i++) {
+            int xx = (xDstInSrc - ((filterSize - 1) << 15) + (1 << 15)) >> 16;
+
+            (*filterPos)[i] = xx;
+            filter[i]       = fone;
+            xDstInSrc      += xInc;
+        }
+    } else if ((xInc <= (1 << 16) && (flags & SWS_AREA)) ||
+               (flags & SWS_FAST_BILINEAR)) { // bilinear upscale
+        int i;
+        int64_t xDstInSrc;
+        filterSize = 2;
+       //FF_ALLOC_OR_GOTO(NULL, filter,
+      //                 dstW * sizeof(*filter) * filterSize, fail);
+	   filter = (int64_t *)av_malloc(dstW * sizeof(*filter) * filterSize);
+	  if(filter == NULL && (dstW * sizeof(*filter) * filterSize) != 0){
+		  printf("Cannot allocate memory.\n"); 
+        goto fail;
+	  }
+
+        xDstInSrc = xInc / 2 - 0x8000;
+        for (i = 0; i < dstW; i++) {
+            int xx = (xDstInSrc - ((filterSize - 1) << 15) + (1 << 15)) >> 16;
+            int j;
+
+            (*filterPos)[i] = xx;
+            // bilinear upscale / linear interpolate / area averaging
+            for (j = 0; j < filterSize; j++) {
+                int64_t coeff= fone - FFABS(((int64_t)xx<<16) - xDstInSrc)*(fone>>16);
+                if (coeff < 0)
+                    coeff = 0;
+                filter[i * filterSize + j] = coeff;
+                xx++;
+            }
+            xDstInSrc += xInc;
+        }
+    } else {
+        int64_t xDstInSrc;
+        int sizeFactor;
+
+        if (flags & SWS_BICUBIC)
+            sizeFactor = 4;
+        else if (flags & SWS_X)
+            sizeFactor = 8;
+        else if (flags & SWS_AREA)
+            sizeFactor = 1;     // downscale only, for upscale it is bilinear
+        else if (flags & SWS_GAUSS)
+            sizeFactor = 8;     // infinite ;)
+        else if (flags & SWS_LANCZOS)
+            sizeFactor = param[0] != SWS_PARAM_DEFAULT ? ceil(2 * param[0]) : 6;
+        else if (flags & SWS_SINC)
+            sizeFactor = 20;    // infinite ;)
+        else if (flags & SWS_SPLINE)
+            sizeFactor = 20;    // infinite ;)
+        else if (flags & SWS_BILINEAR)
+            sizeFactor = 2;
+        else {
+            sizeFactor = 0;     // GCC warning killer
+            assert(0);
+        }
+
+        if (xInc <= 1 << 16)
+            filterSize = 1 + sizeFactor;    // upscale
+        else
+            filterSize = 1 + (sizeFactor * srcW + dstW - 1) / dstW;
+
+
+        filterSize = FFMIN(filterSize, srcW - 2);
+        filterSize = FFMAX(filterSize, 1);
+
+      // FF_ALLOC_OR_GOTO(NULL, filter,
+       //                dstW * sizeof(*filter) * filterSize, fail);
+	   filter = (int64_t *)av_malloc(dstW * sizeof(*filter) * filterSize);
+	  if(filter == NULL && (dstW * sizeof(*filter) * filterSize) != 0){
+		  printf("Cannot allocate memory.\n"); 
+        goto fail;
+	  }
+
+        xDstInSrc = xInc - 0x10000;
+        for (i = 0; i < dstW; i++) {
+            int xx = (xDstInSrc - ((filterSize - 2) << 16)) / (1 << 17);
+            int j;
+            (*filterPos)[i] = xx;
+            for (j = 0; j < filterSize; j++) {
+                int64_t d = (FFABS(((int64_t)xx << 17) - xDstInSrc)) << 13;
+                double floatd;
+                int64_t coeff;
+
+                if (xInc > 1 << 16)
+                    d = d * dstW / srcW;
+                floatd = d * (1.0 / (1 << 30));
+
+                if (flags & SWS_BICUBIC) {
+                    int64_t B = (param[0] != SWS_PARAM_DEFAULT ? param[0] :   0) * (1 << 24);
+                    int64_t C = (param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6) * (1 << 24);
+
+                    if (d >= 1LL << 31) {
+                        coeff = 0.0;
+                    } else {
+                        int64_t dd  = (d  * d) >> 30;
+                        int64_t ddd = (dd * d) >> 30;
+
+                        if (d < 1LL << 30)
+                            coeff =  (12 * (1 << 24) -  9 * B - 6 * C) * ddd +
+                                    (-18 * (1 << 24) + 12 * B + 6 * C) *  dd +
+                                      (6 * (1 << 24) -  2 * B)         * (1 << 30);
+                        else
+                            coeff =      (-B -  6 * C) * ddd +
+                                      (6 * B + 30 * C) * dd  +
+                                    (-12 * B - 48 * C) * d   +
+                                      (8 * B + 24 * C) * (1 << 30);
+                    }
+                    coeff *= fone >> (30 + 24);
+                }
+#if 0
+                else if (flags & SWS_X) {
+                    double p  = param ? param * 0.01 : 0.3;
+                    coeff     = d ? sin(d * M_PI) / (d * M_PI) : 1.0;
+                    coeff    *= pow(2.0, -p * d * d);
+                }
+#endif
+                else if (flags & SWS_X) {
+                    double A = param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0;
+                    double c;
+
+                    if (floatd < 1.0)
+                        c = cos(floatd * M_PI);
+                    else
+                        c = -1.0;
+                    if (c < 0.0)
+                        c = -pow(-c, A);
+                    else
+                        c = pow(c, A);
+                    coeff = (c * 0.5 + 0.5) * fone;
+                } else if (flags & SWS_AREA) {
+                    int64_t d2 = d - (1 << 29);
+                    if (d2 * xInc < -(1LL << (29 + 16)))
+                        coeff = 1.0 * (1LL << (30 + 16));
+                    else if (d2 * xInc < (1LL << (29 + 16)))
+                        coeff = -d2 * xInc + (1LL << (29 + 16));
+                    else
+                        coeff = 0.0;
+                    coeff *= fone >> (30 + 16);
+                } else if (flags & SWS_GAUSS) {
+                    double p = param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
+                    coeff = (pow(2.0, -p * floatd * floatd)) * fone;
+                } else if (flags & SWS_SINC) {
+                    coeff = (d ? sin(floatd * M_PI) / (floatd * M_PI) : 1.0) * fone;
+                } else if (flags & SWS_LANCZOS) {
+                    double p = param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
+                    coeff = (d ? sin(floatd * M_PI) * sin(floatd * M_PI / p) /
+                             (floatd * floatd * M_PI * M_PI / p) : 1.0) * fone;
+                    if (floatd > p)
+                        coeff = 0;
+                } else if (flags & SWS_BILINEAR) {
+                    coeff = (1 << 30) - d;
+                    if (coeff < 0)
+                        coeff = 0;
+                    coeff *= fone >> 30;
+                } else if (flags & SWS_SPLINE) {
+                    double p = -2.196152422706632;
+                    coeff = getScaleSplineCoeff(1.0, 0.0, p, -p - 1.0, floatd) * fone;
+                } else {
+                    coeff = 0.0; // GCC warning killer
+                    assert(0);
+                }
+
+                filter[i * filterSize + j] = coeff;
+                xx++;
+            }
+            xDstInSrc += 2 * xInc;
+        }
+    }
+
+    assert(filterSize > 0);
+    filter2Size = filterSize;
+    if (srcFilter)
+        filter2Size += srcFilter->length - 1;
+    if (dstFilter)
+        filter2Size += dstFilter->length - 1;
+    assert(filter2Size > 0);
+  //FF_ALLOCZ_OR_GOTO(NULL, filter2, filter2Size * dstW * sizeof(*filter2), fail);
+	filter2 = (int64_t *)av_mallocz(filter2Size * dstW * sizeof(*filter2));
+	if(filter2 == NULL && (filter2Size * dstW * sizeof(*filter2)) != 0)
+	{
+		printf("Can't alloc memory\n");
+		goto fail;
+	}
+
+    for (i = 0; i < dstW; i++) {
+        int j, k;
+
+        if (srcFilter) {
+            for (k = 0; k < srcFilter->length; k++) {
+                for (j = 0; j < filterSize; j++)
+                    filter2[i * filter2Size + k + j] +=
+                        srcFilter->coeff[k] * filter[i * filterSize + j];
+            }
+        } else {
+            for (j = 0; j < filterSize; j++)
+                filter2[i * filter2Size + j] = filter[i * filterSize + j];
+        }
+        // FIXME dstFilter
+
+        (*filterPos)[i] += (filterSize - 1) / 2 - (filter2Size - 1) / 2;
+    }
+    av_freep(&filter);
+
+    // Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not).
+    minFilterSize = 0;
+    for (i = dstW - 1; i >= 0; i--) {
+        int min = filter2Size;
+        int j;
+        int64_t cutOff = 0.0;
+
+        for (j = 0; j < filter2Size; j++) {
+            int k;
+            cutOff += FFABS(filter2[i * filter2Size]);
+
+            if (cutOff > SWS_MAX_REDUCE_CUTOFF * fone)
+                break;
+
+            if (i < dstW - 1 && (*filterPos)[i] >= (*filterPos)[i + 1])
+                break;
+
+            // move filter coefficients left
+            for (k = 1; k < filter2Size; k++)
+                filter2[i * filter2Size + k - 1] = filter2[i * filter2Size + k];
+            filter2[i * filter2Size + k - 1] = 0;
+            (*filterPos)[i]++;
+        }
+
+        cutOff = 0;
+        for (j = filter2Size - 1; j > 0; j--) {
+            cutOff += FFABS(filter2[i * filter2Size + j]);
+
+            if (cutOff > SWS_MAX_REDUCE_CUTOFF * fone)
+                break;
+            min--;
+        }
+
+        if (min > minFilterSize)
+            minFilterSize = min;
+    }
+
+
+    assert(minFilterSize > 0);
+    filterSize = (minFilterSize + (filterAlign - 1)) & (~(filterAlign - 1));
+    assert(filterSize > 0);
+    filter = (int64_t *)av_malloc(filterSize * dstW * sizeof(*filter));
+    if (filterSize >= MAX_FILTER_SIZE * 16 /
+                      ((flags & SWS_ACCURATE_RND) ? APCK_SIZE : 16) || !filter)
+        goto fail;
+    *outFilterSize = filterSize;
+
+    if (flags & SWS_PRINT_INFO)
+        //av_log(NULL, AV_LOG_VERBOSE,
+        //       "SwScaler: reducing / aligning filtersize %d -> %d\n",
+        //       filter2Size, filterSize);
+		printf("SwScaler: reducing / aligning filtersize %d -> %d\n",filter2Size,filterSize);
+    for (i = 0; i < dstW; i++) {
+        int j;
+
+        for (j = 0; j < filterSize; j++) {
+            if (j >= filter2Size)
+                filter[i * filterSize + j] = 0;
+            else
+                filter[i * filterSize + j] = filter2[i * filter2Size + j];
+            if ((flags & SWS_BITEXACT) && j >= minFilterSize)
+                filter[i * filterSize + j] = 0;
+        }
+    }
+
+    // FIXME try to align filterPos if possible
+
+    // fix borders
+    for (i = 0; i < dstW; i++) {
+        int j;
+        if ((*filterPos)[i] < 0) {
+            // move filter coefficients left to compensate for filterPos
+            for (j = 1; j < filterSize; j++) {
+                int left = FFMAX(j + (*filterPos)[i], 0);
+                filter[i * filterSize + left] += filter[i * filterSize + j];
+                filter[i * filterSize + j]     = 0;
+            }
+            (*filterPos)[i]= 0;
+        }
+
+        if ((*filterPos)[i] + filterSize > srcW) {
+            int shift = (*filterPos)[i] + filterSize - srcW;
+            // move filter coefficients right to compensate for filterPos
+            for (j = filterSize - 2; j >= 0; j--) {
+                int right = FFMIN(j + shift, filterSize - 1);
+                filter[i * filterSize + right] += filter[i * filterSize + j];
+                filter[i * filterSize + j]      = 0;
+            }
+            (*filterPos)[i]= srcW - filterSize;
+        }
+    }
+
+    // Note the +1 is for the MMX scaler which reads over the end
+   // FF_ALLOCZ_OR_GOTO(NULL, *outFilter,
+   //                 *outFilterSize * (dstW + 3) * sizeof(int16_t), fail);
+	*outFilter = (int16_t *)av_mallocz(*outFilterSize * (dstW + 3) * sizeof(int16_t));
+	if( *outFilter == NULL && (*outFilterSize * (dstW + 3) * sizeof(int16_t)) != 0)
+	{
+		printf("Can't alloc memory\n");
+		goto fail;
+	}
+
+    for (i = 0; i < dstW; i++) {
+        int j;
+        int64_t error = 0;
+        int64_t sum   = 0;
+
+        for (j = 0; j < filterSize; j++) {
+            sum += filter[i * filterSize + j];
+        }
+        sum = (sum + one / 2) / one;
+        for (j = 0; j < *outFilterSize; j++) {
+            int64_t v = filter[i * filterSize + j] + error;
+            int intV  = ROUNDED_DIV(v, sum);
+            (*outFilter)[i * (*outFilterSize) + j] = intV;
+            error                                  = v - intV * sum;
+        }
+    }
+
+    (*filterPos)[dstW + 0] =
+    (*filterPos)[dstW + 1] =
+    (*filterPos)[dstW + 2] = (*filterPos)[dstW - 1]; 
+    for (i = 0; i < *outFilterSize; i++) {
+        int k = (dstW - 1) * (*outFilterSize) + i;
+        (*outFilter)[k + 1 * (*outFilterSize)] =
+        (*outFilter)[k + 2 * (*outFilterSize)] =
+        (*outFilter)[k + 3 * (*outFilterSize)] = (*outFilter)[k];
+    }
+
+    ret = 0;
+
+fail:
+    av_free(filter);
+    av_free(filter2);
+    return ret;
+}
+
+static int handle_scale_jpeg(enum PixelFormat *format)
+{                            
+    switch (*format) {       
+    case AV_PIX_FMT_YUVJ420P:
+        *format = AV_PIX_FMT_YUV420P;
+        return 1;              
+    case AV_PIX_FMT_YUVJ422P:     
+        *format = AV_PIX_FMT_YUV422P;                                             
+        return 1;                                                              
+    case AV_PIX_FMT_YUVJ444P:                                                
+        *format = AV_PIX_FMT_YUV444P;                             
+        return 1;                                       
+    case AV_PIX_FMT_YUVJ440P:                              
+        *format = AV_PIX_FMT_YUV440P;
+        return 1;   
+    default:
+        return 0;
+    }        
+}
+
+static void scaleGetSubSampleFactors(int *h, int *v, enum PixelFormat format)
+{
+    *h = av_pix_fmt_descriptors[format].log2_chroma_w;
+    *v = av_pix_fmt_descriptors[format].log2_chroma_h;
+}
+
+typedef struct FormatEntry {
+    int is_supported_in, is_supported_out;
+} FormatEntry;
+
+static const FormatEntry format_entries[AV_PIX_FMT_NB] = {
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 0 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 0, 0 },
+    { 1, 1 },
+    { 0, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 0, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 0 },
+    { 1, 0 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 0 },
+    { 1, 1 },
+    { 1, 1 },
+    { 0, 0 },
+    { 0, 0 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 1 },
+    { 1, 0 },
+    { 1, 0 },
+    { 1, 0 },
+    { 1, 0 },
+    { 1, 0 },
+    { 1, 0 },
+    { 1, 0 },
+};
+
+int scale_isSupportedInput(enum PixelFormat pix_fmt)
+{
+    return (unsigned)pix_fmt < AV_PIX_FMT_NB ?
+           format_entries[pix_fmt].is_supported_in : 0;
+}
+
+int scale_isSupportedOutput(enum PixelFormat pix_fmt)
+{
+    return (unsigned)pix_fmt < AV_PIX_FMT_NB ?
+           format_entries[pix_fmt].is_supported_out : 0;
+}
+
+static void hcscale_fast_c(ScaleContext *c, int16_t *dst1, int16_t *dst2,
+                           int dstWidth, const uint8_t *src1,
+                           const uint8_t *src2, int srcW, int xInc)
+{
+    int i;
+    unsigned int xpos = 0;
+    for (i = 0; i < dstWidth; i++) {
+        register unsigned int xx     = xpos >> 16;
+        register unsigned int xalpha = (xpos & 0xFFFF) >> 9;
+        dst1[i] = (src1[xx] * (xalpha ^ 127) + src1[xx + 1] * xalpha);
+        dst2[i] = (src2[xx] * (xalpha ^ 127) + src2[xx + 1] * xalpha);
+        xpos   += xInc;
+    }
+    for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
+        dst1[i] = src1[srcW-1]*128;
+        dst2[i] = src2[srcW-1]*128;
+    }
+}
+
+static void hyscale_fast_c(ScaleContext *c, int16_t *dst, int dstWidth,
+                           const uint8_t *src, int srcW, int xInc)
+{
+    int i;
+    unsigned int xpos = 0;
+    for (i = 0; i < dstWidth; i++) {
+        register unsigned int xx     = xpos >> 16;
+        register unsigned int xalpha = (xpos & 0xFFFF) >> 9;
+        dst[i] = (src[xx] << 7) + (src[xx + 1] - src[xx]) * xalpha;
+        xpos  += xInc;
+    }
+    for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
+        dst[i] = src[srcW-1]*128;
+}
+
+static void hScale16To19_c(ScaleContext *c, int16_t *_dst, int dstW,
+                           const uint8_t *_src, const int16_t *filter,
+                           const int32_t *filterPos, int filterSize)
+{
+    int i;
+    int32_t *dst        = (int32_t *) _dst;
+    const uint16_t *src = (const uint16_t *) _src;
+    int bits            = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
+    int sh              = bits - 4;
+
+    if((isScaleAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
+        sh= 9;
+
+    for (i = 0; i < dstW; i++) {
+        int j;
+        int srcPos = filterPos[i];
+        int val    = 0;
+
+        for (j = 0; j < filterSize; j++) {
+            val += src[srcPos + j] * filter[filterSize * i + j];
+        }
+        dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
+    }
+}
+
+static void hScale16To15_c(ScaleContext *c, int16_t *dst, int dstW,
+                           const uint8_t *_src, const int16_t *filter,
+                           const int32_t *filterPos, int filterSize)
+{
+    int i;
+    const uint16_t *src = (const uint16_t *) _src;
+    int sh              = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
+
+    if(sh<15)
+        sh= isScaleAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
+
+    for (i = 0; i < dstW; i++) {
+        int j;
+        int srcPos = filterPos[i];
+        int val    = 0;
+
+        for (j = 0; j < filterSize; j++) {
+            val += src[srcPos + j] * filter[filterSize * i + j];
+        }
+        // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
+        dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
+    }
+}
+
+static void hScale8To15_c(ScaleContext *c, int16_t *dst, int dstW,
+                          const uint8_t *src, const int16_t *filter,
+                          const int32_t *filterPos, int filterSize)
+{
+    int i;
+    for (i = 0; i < dstW; i++) {
+        int j;
+        int srcPos = filterPos[i];
+        int val    = 0;
+        for (j = 0; j < filterSize; j++) {
+            val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
+        }
+        dst[i] = FFMIN(val >> 7, (1 << 15) - 1); // the cubic equation does overflow ...
+    }
+}
+
+static void hScale8To19_c(ScaleContext *c, int16_t *_dst, int dstW,
+                          const uint8_t *src, const int16_t *filter,
+                          const int32_t *filterPos, int filterSize)
+{
+    int i;
+    int32_t *dst = (int32_t *) _dst;
+    for (i = 0; i < dstW; i++) {
+        int j;
+        int srcPos = filterPos[i];
+        int val    = 0;
+        for (j = 0; j < filterSize; j++) {
+            val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
+        }
+        dst[i] = FFMIN(val >> 3, (1 << 19) - 1); // the cubic equation does overflow ...
+    }
+}
+
+static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
+{
+    int i;
+    for (i = 0; i < width; i++) {
+        dstU[i] = (FFMIN(dstU[i], 30775) * 4663 - 9289992) >> 12; // -264
+        dstV[i] = (FFMIN(dstV[i], 30775) * 4663 - 9289992) >> 12; // -264
+    }
+}
+
+static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
+{
+    int i;
+    for (i = 0; i < width; i++) {
+        dstU[i] = (dstU[i] * 1799 + 4081085) >> 11; // 1469
+        dstV[i] = (dstV[i] * 1799 + 4081085) >> 11; // 1469
+    }
+}
+
+static void lumRangeToJpeg_c(int16_t *dst, int width)
+{
+    int i;
+    for (i = 0; i < width; i++)
+        dst[i] = (FFMIN(dst[i], 30189) * 19077 - 39057361) >> 14;
+}
+
+static void lumRangeFromJpeg_c(int16_t *dst, int width)
+{
+    int i;
+    for (i = 0; i < width; i++)
+        dst[i] = (dst[i] * 14071 + 33561947) >> 14;
+}
+
+static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
+{
+    int i;
+    int32_t *dstU = (int32_t *) _dstU;
+    int32_t *dstV = (int32_t *) _dstV;
+    for (i = 0; i < width; i++) {
+        dstU[i] = (FFMIN(dstU[i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12; // -264
+        dstV[i] = (FFMIN(dstV[i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12; // -264
+    }
+}
+
+static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
+{
+    int i;
+    int32_t *dstU = (int32_t *) _dstU;
+    int32_t *dstV = (int32_t *) _dstV;
+    for (i = 0; i < width; i++) {
+        dstU[i] = (dstU[i] * 1799 + (4081085 << 4)) >> 11; // 1469
+        dstV[i] = (dstV[i] * 1799 + (4081085 << 4)) >> 11; // 1469
+    }
+}
+
+static void lumRangeToJpeg16_c(int16_t *_dst, int width)
+{
+    int i;
+    int32_t *dst = (int32_t *) _dst;
+    for (i = 0; i < width; i++)
+        dst[i] = (FFMIN(dst[i], 30189 << 4) * 4769 - (39057361 << 2)) >> 12;
+}
+
+static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
+{
+    int i;
+    int32_t *dst = (int32_t *) _dst;
+    for (i = 0; i < width; i++)
+        dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12;
+}
+
+static av_cold void sws_init_swScale_c(ScaleContext *c)
+{
+    enum PixelFormat srcFormat = c->srcFormat;
+
+    ff_sws_init_output_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
+                             &c->yuv2nv12cX, &c->yuv2packed1,
+                             &c->yuv2packed2, &c->yuv2packedX);
+
+    ff_sws_init_input_funcs(c);
+
+    if (c->srcBpc == 8) {
+        if (c->dstBpc <= 10) {
+            c->hyScale = c->hcScale = hScale8To15_c;
+            if (c->flags & SWS_FAST_BILINEAR) {
+                c->hyscale_fast = hyscale_fast_c;
+                c->hcscale_fast = hcscale_fast_c;
+            }
+        } else {
+            c->hyScale = c->hcScale = hScale8To19_c;
+        }
+    } else {
+        c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c
+                                                 : hScale16To15_c;
+    }
+
+    if (c->srcRange != c->dstRange && !isScaleAnyRGB(c->dstFormat)) {
+        if (c->dstBpc <= 10) {
+            if (c->srcRange) {
+                c->lumConvertRange = lumRangeFromJpeg_c;
+                c->chrConvertRange = chrRangeFromJpeg_c;
+            } else {
+                c->lumConvertRange = lumRangeToJpeg_c;
+                c->chrConvertRange = chrRangeToJpeg_c;
+            }
+        } else {
+            if (c->srcRange) {
+                c->lumConvertRange = lumRangeFromJpeg16_c;
+                c->chrConvertRange = chrRangeFromJpeg16_c;
+            } else {
+                c->lumConvertRange = lumRangeToJpeg16_c;
+                c->chrConvertRange = chrRangeToJpeg16_c;
+            }
+        }
+    }
+
+    if (!(isScaleGray(srcFormat) || isScaleGray(c->dstFormat) ||
+          srcFormat == AV_PIX_FMT_MONOBLACK || srcFormat == AV_PIX_FMT_MONOWHITE))
+        c->needs_hcscale = 1;
+}
+
+int scale_init_context(ScaleContext *c, ScaleFilter *srcFilter, ScaleFilter *dstFilter)
+{
+//	int i, j;
+    ScaleFilter dummyFilter = { NULL, NULL, NULL, NULL };
+    int srcW              = c->srcW;
+    int srcH              = c->srcH;
+    int dstW              = c->dstW;
+    int dstH              = c->dstH;
+//	int dst_stride        = FFALIGN(dstW * sizeof(int16_t) + 66, 16);
+    int flags, cpu_flags;
+    enum PixelFormat srcFormat = c->srcFormat;
+    enum PixelFormat dstFormat = c->dstFormat;
+
+	cpu_flags = 0;
+    flags     = c->flags;
+
+    if(srcFormat != c->srcFormat || dstFormat != c->dstFormat){
+		printf("deprecated pixel format used, make sure you did set range correctly\n");
+        c->srcFormat = srcFormat;
+        c->dstFormat = dstFormat;
+    }
+
+/*
+    if (!scale_isSupportedInput(srcFormat)) {
+		printf("%s is not supported as input pixel format\n",av_get_pix_fmt_name(srcFormat));
+        return -1;
+    }
+
+    if (!scale_isSupportedOutput(dstFormat)) {
+		printf("%s is not supported as output pixel format\n",av_get_pix_fmt_name(dstFormat));
+        return -1;
+    }
+    i = flags & (SWS_POINT         |
+                 SWS_AREA          |
+                 SWS_BILINEAR      |
+                 SWS_FAST_BILINEAR |
+                 SWS_BICUBIC       |
+                 SWS_X             |
+                 SWS_GAUSS         |
+                 SWS_LANCZOS       |
+                 SWS_SINC          |
+                 SWS_SPLINE        |
+                 SWS_BICUBLIN);
+    if (!i || (i & (i - 1))) {
+		printf("Exactly one scaler algorithm must be chosen, got %X\n",i);
+        return -1;
+    }
+*/
+
+    if (srcW < 4 || srcH < 1 || dstW < 8 || dstH < 1) {
+		printf("%dx%d -> %dx%d is invalid scaling dimension\n",srcW,srcH,dstW,dstH);
+        return -1;
+    }
+
+    if (!dstFilter)
+        dstFilter = &dummyFilter;
+    if (!srcFilter)
+        srcFilter = &dummyFilter;
+
+    c->lumXInc      = (((int64_t)srcW << 16) + (dstW >> 1)) / dstW;
+    c->lumYInc      = (((int64_t)srcH << 16) + (dstH >> 1)) / dstH;
+    c->dstFormatBpp = av_get_bits_per_pixel(&av_pix_fmt_descriptors[dstFormat]);
+    c->srcFormatBpp = av_get_bits_per_pixel(&av_pix_fmt_descriptors[srcFormat]);
+    c->vRounder     = 4 * 0x0001000100010001ULL;
+
+    scaleGetSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat);
+    scaleGetSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat);
+
+/*
+    if (isScaleAnyRGB(dstFormat) && !(flags&SWS_FULL_CHR_H_INT)) {
+        if (dstW&1) {
+			printf("Forcing full internal H chroma due to odd output size\n");
+            flags |= SWS_FULL_CHR_H_INT;
+            c->flags = flags;
+        }
+    }
+
+    if (flags & SWS_FULL_CHR_H_INT &&
+        isScaleAnyRGB(dstFormat)        &&
+        dstFormat != AV_PIX_FMT_RGBA  &&
+        dstFormat != AV_PIX_FMT_ARGB  &&
+        dstFormat != AV_PIX_FMT_BGRA  &&
+        dstFormat != AV_PIX_FMT_ABGR  &&
+        dstFormat != AV_PIX_FMT_RGB24 &&
+        dstFormat != AV_PIX_FMT_BGR24) {
+		printf("full chroma interpolation for destination format '%s' not yet implemented\n", av_get_pix_fmt_name(dstFormat));
+        flags   &= ~SWS_FULL_CHR_H_INT;
+        c->flags = flags;
+    }
+
+    if (isScaleAnyRGB(dstFormat) && !(flags & SWS_FULL_CHR_H_INT))
+        c->chrDstHSubSample = 1;
+*/
+
+    // drop some chroma lines if the user wants it
+    c->vChrDrop          = (flags & SWS_SRC_V_CHR_DROP_MASK) >> SWS_SRC_V_CHR_DROP_SHIFT;
+    c->chrSrcVSubSample += c->vChrDrop;
+
+/*
+    if (isScaleAnyRGB(srcFormat) && !(flags & SWS_FULL_CHR_H_INP)   &&
+        srcFormat != AV_PIX_FMT_RGB8 && srcFormat != AV_PIX_FMT_BGR8 &&
+        srcFormat != AV_PIX_FMT_RGB4 && srcFormat != AV_PIX_FMT_BGR4 &&
+        srcFormat != AV_PIX_FMT_RGB4_BYTE && srcFormat != AV_PIX_FMT_BGR4_BYTE &&
+        ((dstW >> c->chrDstHSubSample) <= (srcW >> 1) || (flags & SWS_FAST_BILINEAR)))
+        c->chrSrcHSubSample = 1;
+*/
+
+    c->chrSrcW = -((-srcW) >> c->chrSrcHSubSample);
+    c->chrSrcH = -((-srcH) >> c->chrSrcVSubSample);
+    c->chrDstW = -((-dstW) >> c->chrDstHSubSample);
+    c->chrDstH = -((-dstH) >> c->chrDstVSubSample);
+
+/*
+    c->srcBpc = 1 + av_pix_fmt_descriptors[srcFormat].comp[0].depth_minus1;
+    if (c->srcBpc < 8)
+        c->srcBpc = 8;
+    c->dstBpc = 1 + av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1;
+    if (c->dstBpc < 8)
+        c->dstBpc = 8;
+    if (isScaleAnyRGB(srcFormat) || srcFormat == AV_PIX_FMT_PAL8)
+        c->srcBpc = 16;
+    if (c->dstBpc == 16)
+        dst_stride <<= 1;
+	c->formatConvBuffer = (uint8_t *)av_mallocz(FFALIGN(srcW*2+78, 16) * 2);
+	if( c->formatConvBuffer == NULL && (FFALIGN(srcW*2+78, 16) * 2) != 0)
+	{
+		printf("Can't alloc memory formatConvBuffer\n");
+		goto fail;
+	}
+*/    
+
+    c->chrXInc = (((int64_t)c->chrSrcW << 16) + (c->chrDstW >> 1)) / c->chrDstW;
+    c->chrYInc = (((int64_t)c->chrSrcH << 16) + (c->chrDstH >> 1)) / c->chrDstH;
+
+/*
+    if (flags & SWS_FAST_BILINEAR) {
+        if (c->canMMX2BeUsed) {
+            c->lumXInc += 20;
+            c->chrXInc += 20;
+        }
+    }
+*/
+
+	const int filterAlign = 1;
+
+    if (initScaleFilter(&c->hLumFilter, &c->hLumFilterPos,
+                        &c->hLumFilterSize, c->lumXInc,
+                        srcW, dstW, filterAlign, 1 << 14,
+                        (flags & SWS_BICUBLIN) ? (flags | SWS_BICUBIC) : flags,
+                        cpu_flags, srcFilter->lumH, dstFilter->lumH,
+                        c->param) < 0)
+    	goto fail;
+
+    if (initScaleFilter(&c->hChrFilter, &c->hChrFilterPos,
+                        &c->hChrFilterSize, c->chrXInc,
+                        c->chrSrcW, c->chrDstW, filterAlign, 1 << 14,
+                        (flags & SWS_BICUBLIN) ? (flags | SWS_BILINEAR) : flags,
+                        cpu_flags, srcFilter->chrH, dstFilter->chrH,
+                        c->param) < 0)
+        goto fail;
+
+    if (initScaleFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize,
+                       c->lumYInc, srcH, dstH, filterAlign, (1 << 12),
+                       (flags & SWS_BICUBLIN) ? (flags | SWS_BICUBIC) : flags,
+                       cpu_flags, srcFilter->lumV, dstFilter->lumV,
+                       c->param) < 0)
+		goto fail;
+
+    if (initScaleFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize,
+                       c->chrYInc, c->chrSrcH, c->chrDstH,
+                       filterAlign, (1 << 12),
+                       (flags & SWS_BICUBLIN) ? (flags | SWS_BILINEAR) : flags,
+                       cpu_flags, srcFilter->chrV, dstFilter->chrV,
+                       c->param) < 0)
+        goto fail;
+
+/*
+#if HAVE_ALTIVEC
+    FF_ALLOC_OR_GOTO(c, c->vYCoeffsBank, sizeof(vector signed short) * c->vLumFilterSize * c->dstH,    fail);
+    FF_ALLOC_OR_GOTO(c, c->vCCoeffsBank, sizeof(vector signed short) * c->vChrFilterSize * c->chrDstH, fail);
+
+    for (i = 0; i < c->vLumFilterSize * c->dstH; i++) {
+    	int j;
+        short *p = (short *)&c->vYCoeffsBank[i];
+        for (j = 0; j < 8; j++)
+            p[j] = c->vLumFilter[i];
+    }
+
+    for (i = 0; i < c->vChrFilterSize * c->chrDstH; i++) {
+        int j;
+        short *p = (short *)&c->vCCoeffsBank[i];
+        for (j = 0; j < 8; j++)
+            p[j] = c->vChrFilter[i];
+    }
+#endif
+*/
+
+/*
+    // calculate buffer sizes so that they won't run out while handling these damn slices
+    c->vLumBufSize = c->vLumFilterSize;
+    c->vChrBufSize = c->vChrFilterSize;
+    for (i = 0; i < dstH; i++) {
+        int chrI      = (int64_t)i * c->chrDstH / dstH;
+        int nextSlice = FFMAX(c->vLumFilterPos[i] + c->vLumFilterSize - 1,
+                              ((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)
+                               << c->chrSrcVSubSample));
+
+        nextSlice >>= c->chrSrcVSubSample;
+        nextSlice <<= c->chrSrcVSubSample;
+        if (c->vLumFilterPos[i] + c->vLumBufSize < nextSlice)
+            c->vLumBufSize = nextSlice - c->vLumFilterPos[i];
+        if (c->vChrFilterPos[chrI] + c->vChrBufSize <
+            (nextSlice >> c->chrSrcVSubSample))
+            c->vChrBufSize = (nextSlice >> c->chrSrcVSubSample) -
+                             c->vChrFilterPos[chrI];
+    }
+
+	c->lumPixBuf = (int16_t **)av_malloc( c->vLumBufSize * 3 * sizeof(int16_t *));
+  	if(c->lumPixBuf == NULL && ( c->vLumBufSize * 3 * sizeof(int16_t *)) != 0)
+  	{
+	  	printf("Can't alloc memory lumPixbuf\n");
+	  	goto fail;
+  	}
+
+  	c->chrUPixBuf = (int16_t **)av_malloc( c->vChrBufSize * 3 * sizeof(int16_t *));
+  	if(c->chrUPixBuf == NULL && ( c->vChrBufSize * 3 * sizeof(int16_t *)) != 0)
+  	{
+	  	printf("Can't alloc memory chrUpixbuf\n");
+	  	goto fail;
+  	}
+
+  	c->chrVPixBuf = (int16_t **)av_malloc( c->vChrBufSize * 3 * sizeof(int16_t *));
+  	if(c->chrVPixBuf == NULL && ( c->vChrBufSize * 3 * sizeof(int16_t *)) != 0)
+  	{
+	  	printf("Can't alloc memory chrVPixBuf\n");
+	  	goto fail;
+  	}
+
+    for (i = 0; i < c->vLumBufSize; i++) {
+	  	c->lumPixBuf[i + c->vLumBufSize] = (int16_t *)malloc( dst_stride + 16);
+	  	if(c->lumPixBuf[i + c->vLumBufSize] == NULL && ( dst_stride + 16) != 0)
+	  	{
+		  	printf("Can't alloc memory lumPixBuf[]\n");
+		  	goto fail;
+	 	}
+        c->lumPixBuf[i] = c->lumPixBuf[i + c->vLumBufSize];
+    }
+
+    c->uv_off   = (dst_stride>>1) + 64 / (c->dstBpc &~ 7);
+    c->uv_offx2 = dst_stride + 16;
+    for (i = 0; i < c->vChrBufSize; i++) {
+	  	c->chrUPixBuf[i + c->vChrBufSize] = (int16_t *)av_malloc( dst_stride * 2  + 32);
+	  	if(c->chrUPixBuf[i + c->vChrBufSize] == NULL && ( dst_stride * 2  + 32) != 0)
+	  	{
+		  	printf("Can't alloc memory chrUPixBuf[]\n");
+		  	goto fail;
+	  	}
+        c->chrUPixBuf[i] = c->chrUPixBuf[i + c->vChrBufSize];
+        c->chrVPixBuf[i] = c->chrVPixBuf[i + c->vChrBufSize]
+                         = c->chrUPixBuf[i] + (dst_stride >> 1) + 8;
+    }
+
+    // try to avoid drawing green stuff between the right end and the stride end
+    for (i = 0; i < c->vChrBufSize; i++)
+        if(av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 == 15){
+			assert(c->dstBpc > 10);
+            for(j=0; j<dst_stride/2+1; j++)
+                ((int32_t*)(c->chrUPixBuf[i]))[j] = 1<<18;
+        } else
+            for(j=0; j<dst_stride+1; j++)
+                ((int16_t*)(c->chrUPixBuf[i]))[j] = 1<<14;
+
+    assert(c->chrDstH <= dstH);
+*/
+    if (flags & SWS_PRINT_INFO) {
+        printf("I have delete it, Haha\n");
+    }
+
+//	sws_init_swScale_c(c);
+    return 0;
+fail:
+    return -1;
+}
+
+ScaleContext *scale_getContext(int srcW, int srcH, enum PixelFormat srcFormat,
+                           int dstW, int dstH, enum PixelFormat dstFormat,
+                           int flags, ScaleFilter *srcFilter,
+                           ScaleFilter *dstFilter, const double *param)
+{
+	ScaleContext *sc = (ScaleContext*)malloc(sizeof(ScaleContext));
+    sc->flags     = flags;
+    sc->srcW      = srcW;
+    sc->srcH      = srcH;
+    sc->dstW      = dstW;
+    sc->dstH      = dstH;
+    sc->srcRange  = handle_scale_jpeg(&srcFormat);
+    sc->dstRange  = handle_scale_jpeg(&dstFormat);
+    sc->srcFormat = srcFormat;
+    sc->dstFormat = dstFormat;
+	sc->hyscale_fast = 0;
+	sc->hcscale_fast = 0;
+
+    if (param) {
+        sc->param[0] = param[0];
+        sc->param[1] = param[1];
+    }    
+
+    if (scale_init_context(sc, srcFilter, dstFilter) < 0) { 
+        sws_freeContext(sc);
+        return NULL;
+    }    
+
+    return sc;
+}
+
+int scale_opencl(ScaleContext *c, 
+                 void *cl_inbuf,
+                 void *cl_outbuf,
+                 int *srcStride,
+                 int *dstStride)
+{
+	int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat);   
+                                                                      
+    av_scale_frame(c,cl_outbuf,cl_inbuf,srcStride,dstStride,&should_dither);
+                                                       
+    return 1;                       
+}
+
+void scale_init( int width, int height, int dstwidth, int dstheight )
+{
+	int srcW = width;
+	int srcH = height;
+	int dstW = dstwidth;
+	int dstH = dstheight;
+	enum PixelFormat inputfmt = AV_PIX_FMT_YUV420P;
+	enum PixelFormat outputfmt = AV_PIX_FMT_YUV420P;
+	int flags = SWS_BILINEAR;
+
+	g_scale = scale_getContext(srcW,srcH,inputfmt,dstW,dstH,outputfmt,flags,NULL,NULL,NULL);
+}
+
+void scale_release()
+{
+	sws_freeContext( g_scale );
+}
+#ifdef USE_OPENCL
+int scale_run( cl_mem inbuf, cl_mem outbuf, int linesizey, int linesizeuv, int height )
+{
+	g_scale->cl_src = inbuf;
+	g_scale->cl_dst = outbuf;
+
+	int src_stride[4] = { linesizey, linesizeuv, linesizeuv, 0 };
+	int dst_stride[4] = { g_scale->dstW, g_scale->chrDstW, g_scale->chrDstW, 0 };
+	int ret = -1;
+
+	ret = scale_opencl( g_scale, inbuf, outbuf, src_stride, dst_stride );
+
+	return ret;
+}
+#endif
+#endif
diff --git a/libhb/scale.h b/libhb/scale.h
new file mode 100644
index 000000000..a28ccdf38
--- /dev/null
+++ b/libhb/scale.h
@@ -0,0 +1,310 @@
+#ifndef SCALE_H
+#define SCALE_H
+#ifdef USE_OPENCL
+#include <stdint.h>
+#include "vadxva2.h"
+#include "libavutil/pixfmt.h"
+#include "hbffmpeg.h"
+
+#define YUVRGB_TABLE_HEADROOM 128
+#define MAX_FILTER_SIZE 256
+#define is16BPS(x) \
+    (av_pix_fmt_descriptors[x].comp[0].depth_minus1 == 15)
+
+#define is9_OR_10BPS(x) \
+    (av_pix_fmt_descriptors[x].comp[0].depth_minus1 == 8 || \
+     av_pix_fmt_descriptors[x].comp[0].depth_minus1 == 9)
+
+#if ARCH_X86_64
+#   define APCK_PTR2  8
+#   define APCK_COEF 16
+#   define APCK_SIZE 24
+#else
+#   define APCK_PTR2  4
+#   define APCK_COEF  8
+#   define APCK_SIZE 16
+#endif
+
+typedef void (*yuv2planar1_fn)(const int16_t *src, uint8_t *dest, int dstW,
+                               const uint8_t *dither, int offset);
+
+typedef void (*yuv2planarX_fn)(const int16_t *filter, int filterSize,
+                               const int16_t **src, uint8_t *dest, int dstW,
+                               const uint8_t *dither, int offset);
+
+typedef void (*yuv2interleavedX_fn)(struct ScaleContext *c,
+                                    const int16_t *chrFilter,
+                                    int chrFilterSize,
+                                    const int16_t **chrUSrc,
+                                    const int16_t **chrVSrc,
+                                    uint8_t *dest, int dstW);
+
+typedef void (*yuv2packed1_fn)(struct ScaleContext *c, const int16_t *lumSrc,
+                               const int16_t *chrUSrc[2],
+                               const int16_t *chrVSrc[2],
+                               const int16_t *alpSrc, uint8_t *dest,
+                               int dstW, int uvalpha, int y);
+
+typedef void (*yuv2packed2_fn)(struct SCaleContext *c, const int16_t *lumSrc[2],
+                               const int16_t *chrUSrc[2],
+                               const int16_t *chrVSrc[2],
+                               const int16_t *alpSrc[2],
+                               uint8_t *dest,
+                               int dstW, int yalpha, int uvalpha, int y);
+
+typedef void (*yuv2packedX_fn)(struct SCaleContext *c, const int16_t *lumFilter,
+                               const int16_t **lumSrc, int lumFilterSize,
+                               const int16_t *chrFilter,
+                               const int16_t **chrUSrc,
+                               const int16_t **chrVSrc, int chrFilterSize,
+                               const int16_t **alpSrc, uint8_t *dest,
+                               int dstW, int y);
+
+typedef int (*SwsFunc)(struct ScaleContext *context, const uint8_t *src[],
+                       int srcStride[], int srcSliceY, int srcSliceH,
+                       uint8_t *dst[], int dstStride[]);
+
+typedef struct {
+    double *coeff;              ///< pointer to the list of coefficients
+    int length;                 ///< number of coefficients in the vector
+} ScaleVector;
+
+typedef struct {
+    ScaleVector *lumH;
+    ScaleVector *lumV;
+    ScaleVector *chrH;
+    ScaleVector *chrV;
+} ScaleFilter;
+
+typedef struct ScaleContext {
+    SwsFunc swScale;
+    int srcW;                     ///< Width  of source      luma/alpha planes.
+    int srcH;                     ///< Height of source      luma/alpha planes.
+    int dstH;                     ///< Height of destination luma/alpha planes.
+    int chrSrcW;                  ///< Width  of source      chroma     planes.
+    int chrSrcH;                  ///< Height of source      chroma     planes.
+    int chrDstW;                  ///< Width  of destination chroma     planes.
+    int chrDstH;                  ///< Height of destination chroma     planes.
+    int lumXInc, chrXInc;
+    int lumYInc, chrYInc;
+    enum PixelFormat dstFormat;   ///< Destination pixel format.
+    enum PixelFormat srcFormat;   ///< Source      pixel format.
+    int dstFormatBpp;             ///< Number of bits per pixel of the destination pixel format.
+    int srcFormatBpp;             ///< Number of bits per pixel of the source      pixel format.
+    int dstBpc, srcBpc;
+    int chrSrcHSubSample;         ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in source      image.
+    int chrSrcVSubSample;         ///< Binary logarithm of vertical   subsampling factor between luma/alpha and chroma planes in source      image.
+	int chrDstHSubSample;         ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in destination image.
+    int chrDstVSubSample;         ///< Binary logarithm of vertical   subsampling factor between luma/alpha and chroma planes in destination image.
+    int vChrDrop;                 ///< Binary logarithm of extra vertical subsampling factor in source image chroma planes specified by user.
+    int sliceDir;                 ///< Direction that slices are fed to the scaler (1 = top-to-bottom, -1 = bottom-to-top).
+    double param[2];              ///< Input parameters for scaling algorithms that need them.
+
+    uint32_t pal_yuv[256];
+    uint32_t pal_rgb[256];
+
+    int16_t **lumPixBuf;          ///< Ring buffer for scaled horizontal luma   plane lines to be fed to the vertical scaler.
+    int16_t **chrUPixBuf;         ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler.
+    int16_t **chrVPixBuf;         ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler.
+    int16_t **alpPixBuf;          ///< Ring buffer for scaled horizontal alpha  plane lines to be fed to the vertical scaler.
+    int vLumBufSize;              ///< Number of vertical luma/alpha lines allocated in the ring buffer.
+    int vChrBufSize;              ///< Number of vertical chroma     lines allocated in the ring buffer.
+    int lastInLumBuf;             ///< Last scaled horizontal luma/alpha line from source in the ring buffer.
+    int lastInChrBuf;             ///< Last scaled horizontal chroma     line from source in the ring buffer.
+    int lumBufIndex;              ///< Index in ring buffer of the last scaled horizontal luma/alpha line from source.
+    int chrBufIndex;              ///< Index in ring buffer of the last scaled horizontal chroma     line from source.
+
+    uint8_t *formatConvBuffer;
+    int16_t *hLumFilter;          ///< Array of horizontal filter coefficients for luma/alpha planes.
+    int16_t *hChrFilter;          ///< Array of horizontal filter coefficients for chroma     planes.
+    int16_t *vLumFilter;          ///< Array of vertical   filter coefficients for luma/alpha planes.
+    int16_t *vChrFilter;          ///< Array of vertical   filter coefficients for chroma     planes.
+    int32_t *hLumFilterPos;       ///< Array of horizontal filter starting positions for each dst[i] for luma/alpha planes.
+    int32_t *hChrFilterPos;       ///< Array of horizontal filter starting positions for each dst[i] for chroma     planes.
+    int32_t *vLumFilterPos;       ///< Array of vertical   filter starting positions for each dst[i] for luma/alpha planes.
+    int32_t *vChrFilterPos;       ///< Array of vertical   filter starting positions for each dst[i] for chroma     planes.
+    int hLumFilterSize;           ///< Horizontal filter size for luma/alpha pixels.
+    int hChrFilterSize;           ///< Horizontal filter size for chroma     pixels.
+    int vLumFilterSize;           ///< Vertical   filter size for luma/alpha pixels.
+    int vChrFilterSize;           ///< Vertical   filter size for chroma     pixels.
+
+    int lumMmx2FilterCodeSize;    ///< Runtime-generated MMX2 horizontal fast bilinear scaler code size for luma/alpha planes.
+    int chrMmx2FilterCodeSize;    ///< Runtime-generated MMX2 horizontal fast bilinear scaler code size for chroma     planes.
+    uint8_t *lumMmx2FilterCode;   ///< Runtime-generated MMX2 horizontal fast bilinear scaler code for luma/alpha planes.
+    uint8_t *chrMmx2FilterCode;   ///< Runtime-generated MMX2 horizontal fast bilinear scaler code for chroma     planes.
+
+    int canMMX2BeUsed;
+
+    unsigned char *dest;
+    unsigned char *source;
+
+    int dstY;                     ///< Last destination vertical line output from last slice.
+    int flags;                    ///< Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc...
+    void *yuvTable;             // pointer to the yuv->rgb table start so it can be freed()
+    uint8_t *table_rV[256 + 2*YUVRGB_TABLE_HEADROOM];
+    uint8_t *table_gU[256 + 2*YUVRGB_TABLE_HEADROOM];
+    int table_gV[256 + 2*YUVRGB_TABLE_HEADROOM];
+    uint8_t *table_bU[256 + 2*YUVRGB_TABLE_HEADROOM];
+
+    //Colorspace stuff
+    int contrast, brightness, saturation;    // for sws_getColorspaceDetails
+    int srcColorspaceTable[4];
+    int dstColorspaceTable[4];
+    int srcRange;                 ///< 0 = MPG YUV range, 1 = JPG YUV range (source      image).
+    int dstRange;                 ///< 0 = MPG YUV range, 1 = JPG YUV range (destination image).
+    int src0Alpha;
+    int dst0Alpha;
+    int yuv2rgb_y_offset;
+    int yuv2rgb_y_coeff;
+    int yuv2rgb_v2r_coeff;
+    int yuv2rgb_v2g_coeff;
+    int yuv2rgb_u2g_coeff;
+    int yuv2rgb_u2b_coeff;
+
+#define RED_DITHER            "0*8"
+#define GREEN_DITHER          "1*8"
+#define BLUE_DITHER           "2*8"
+#define Y_COEFF               "3*8"
+#define VR_COEFF              "4*8"
+#define UB_COEFF              "5*8"
+#define VG_COEFF              "6*8"
+#define UG_COEFF              "7*8"
+#define Y_OFFSET              "8*8"
+#define U_OFFSET              "9*8"
+#define V_OFFSET              "10*8"
+#define LUM_MMX_FILTER_OFFSET "11*8"
+#define CHR_MMX_FILTER_OFFSET "11*8+4*4*256"
+#define DSTW_OFFSET           "11*8+4*4*256*2" //do not change, it is hardcoded in the ASM
+#define ESP_OFFSET            "11*8+4*4*256*2+8"
+#define VROUNDER_OFFSET       "11*8+4*4*256*2+16"
+#define U_TEMP                "11*8+4*4*256*2+24"
+#define V_TEMP                "11*8+4*4*256*2+32"
+#define Y_TEMP                "11*8+4*4*256*2+40"
+#define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48"
+#define UV_OFF_PX             "11*8+4*4*256*3+48"
+#define UV_OFF_BYTE           "11*8+4*4*256*3+56"
+#define DITHER16              "11*8+4*4*256*3+64"
+#define DITHER32              "11*8+4*4*256*3+80"
+
+    DECLARE_ALIGNED(8, uint64_t, redDither);
+    DECLARE_ALIGNED(8, uint64_t, greenDither);
+    DECLARE_ALIGNED(8, uint64_t, blueDither);
+
+    DECLARE_ALIGNED(8, uint64_t, yCoeff);
+    DECLARE_ALIGNED(8, uint64_t, vrCoeff);
+    DECLARE_ALIGNED(8, uint64_t, ubCoeff);
+    DECLARE_ALIGNED(8, uint64_t, vgCoeff);
+    DECLARE_ALIGNED(8, uint64_t, ugCoeff);
+    DECLARE_ALIGNED(8, uint64_t, yOffset);
+    DECLARE_ALIGNED(8, uint64_t, uOffset);
+    DECLARE_ALIGNED(8, uint64_t, vOffset);
+    int32_t lumMmxFilter[4 * MAX_FILTER_SIZE];
+    int32_t chrMmxFilter[4 * MAX_FILTER_SIZE];
+    int dstW;                     ///< Width  of destination luma/alpha planes.
+    DECLARE_ALIGNED(8, uint64_t, esp);
+    DECLARE_ALIGNED(8, uint64_t, vRounder);
+    DECLARE_ALIGNED(8, uint64_t, u_temp);
+    DECLARE_ALIGNED(8, uint64_t, v_temp);
+    DECLARE_ALIGNED(8, uint64_t, y_temp);
+    int32_t alpMmxFilter[4 * MAX_FILTER_SIZE];
+
+    DECLARE_ALIGNED(8, ptrdiff_t, uv_off); ///< offset (in pixels) between u and v planes
+    DECLARE_ALIGNED(8, ptrdiff_t, uv_offx2); ///< offset (in bytes) between u and v planes
+    DECLARE_ALIGNED(8, uint16_t, dither16)[8];
+    DECLARE_ALIGNED(8, uint32_t, dither32)[8];
+
+    const uint8_t *chrDither8, *lumDither8;
+
+#if HAVE_ALTIVEC
+    vector signed short   CY;
+    vector signed short   CRV;
+    vector signed short   CBU;
+    vector signed short   CGU;
+    vector signed short   CGV;
+    vector signed short   OY;
+    vector unsigned short CSHIFT;
+    vector signed short  *vYCoeffsBank, *vCCoeffsBank;
+#endif
+
+#if ARCH_BFIN
+    DECLARE_ALIGNED(4, uint32_t, oy);
+    DECLARE_ALIGNED(4, uint32_t, oc);
+    DECLARE_ALIGNED(4, uint32_t, zero);
+    DECLARE_ALIGNED(4, uint32_t, cy);
+    DECLARE_ALIGNED(4, uint32_t, crv);
+    DECLARE_ALIGNED(4, uint32_t, rmask);
+    DECLARE_ALIGNED(4, uint32_t, cbu);
+    DECLARE_ALIGNED(4, uint32_t, bmask);
+    DECLARE_ALIGNED(4, uint32_t, cgu);
+    DECLARE_ALIGNED(4, uint32_t, cgv);
+    DECLARE_ALIGNED(4, uint32_t, gmask);
+#endif
+
+#if HAVE_VIS
+    DECLARE_ALIGNED(8, uint64_t, sparc_coeffs)[10];
+#endif
+    int use_mmx_vfilter;
+
+    /* function pointers for swScale() */
+    yuv2planar1_fn yuv2plane1;
+    yuv2planarX_fn yuv2planeX;
+    yuv2interleavedX_fn yuv2nv12cX;
+    yuv2packed1_fn yuv2packed1;
+    yuv2packed2_fn yuv2packed2;
+    yuv2packedX_fn yuv2packedX;
+
+    /// Unscaled conversion of luma plane to YV12 for horizontal scaler.
+    void (*lumToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3,
+                      int width, uint32_t *pal);
+    /// Unscaled conversion of alpha plane to YV12 for horizontal scaler.
+    void (*alpToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3,
+                      int width, uint32_t *pal);
+    /// Unscaled conversion of chroma planes to YV12 for horizontal scaler.
+    void (*chrToYV12)(uint8_t *dstU, uint8_t *dstV,
+                      const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
+                      int width, uint32_t *pal);
+
+    void (*readLumPlanar)(uint8_t *dst, const uint8_t *src[4], int width);
+    void (*readChrPlanar)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4],
+                          int width);
+
+    void (*hyscale_fast)(struct SwsContext *c,
+                         int16_t *dst, int dstWidth,
+                         const uint8_t *src, int srcW, int xInc);
+    void (*hcscale_fast)(struct SwsContext *c,
+                         int16_t *dst1, int16_t *dst2, int dstWidth,
+                         const uint8_t *src1, const uint8_t *src2,
+                         int srcW, int xInc);
+
+    void (*hyScale)(struct SwsContext *c, int16_t *dst, int dstW,
+                    const uint8_t *src, const int16_t *filter,
+                    const int32_t *filterPos, int filterSize);
+    void (*hcScale)(struct SwsContext *c, int16_t *dst, int dstW,
+                    const uint8_t *src, const int16_t *filter,
+                    const int32_t *filterPos, int filterSize);
+
+    void (*lumConvertRange)(int16_t *dst, int width);
+    void (*chrConvertRange)(int16_t *dst1, int16_t *dst2, int width);
+
+    int needs_hcscale; ///< Set if there are chroma planes to be converted.
+
+    cl_mem cl_hLumFilter;
+    cl_mem cl_hLumFilterPos;
+    cl_mem cl_hChrFilter;
+    cl_mem cl_hChrFilterPos;
+    cl_mem cl_vLumFilter;
+    cl_mem cl_vLumFilterPos;
+    cl_mem cl_vChrFilter;
+    cl_mem cl_vChrFilterPos;
+
+    cl_mem cl_intermediaBuf;
+
+    cl_mem cl_src;
+    cl_mem cl_dst;
+} ScaleContext;
+
+void scale_init(int, int, int, int);
+void scale_release();
+int scale_run(cl_mem inbuf, cl_mem outbuf, int linesizey, int linesizeuv, int height);
+#endif
+#endif
diff --git a/libhb/scale_kernel.c b/libhb/scale_kernel.c
new file mode 100644
index 000000000..4d0b285c3
--- /dev/null
+++ b/libhb/scale_kernel.c
@@ -0,0 +1,215 @@
+
+#ifdef USE_OPENCL
+#include <assert.h>
+#include <math.h>
+#include <stdio.h>
+#include <string.h>
+#include <windows.h>
+#include <time.h>
+#include "scale.h"
+#include "openclwrapper.h"
+
+#define OCLCHECK( method, ...) \
+	status = method(__VA_ARGS__); if(status != CL_SUCCESS) { \
+	printf(" error %s %d\n",# method, status); assert(0); return status; }
+	
+#define CREATEBUF( out, flags, size, ptr)\
+    out = clCreateBuffer( kenv->context, (flags), (size), ptr, &status );\
+    if( status != CL_SUCCESS ) { printf( "clCreateBuffer faild %d\n", status ); return -1; }
+
+ #define CL_PARAM_NUM 20
+
+/****************************************************************************************************************************/
+/*************************Combine the hscale and yuv2plane into scaling******************************************************/
+/****************************************************************************************************************************/
+static int CreateCLBuffer( ScaleContext *c, KernelEnv *kenv )
+{
+	cl_int status;
+	
+	if(!c->hyscale_fast || !c->hcscale_fast)
+	{
+		CREATEBUF(c->cl_hLumFilter,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->dstW*c->hLumFilterSize*sizeof(cl_short),c->hLumFilter);
+		CREATEBUF(c->cl_hLumFilterPos,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->dstW*sizeof(cl_int),c->hLumFilterPos);
+		CREATEBUF(c->cl_hChrFilter,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->chrDstW*c->hChrFilterSize*sizeof(cl_short),c->hChrFilter);
+		CREATEBUF(c->cl_hChrFilterPos,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->chrDstW*sizeof(cl_int),c->hChrFilterPos);
+	}
+	if( c->vLumFilterSize > 1 && c->vChrFilterSize > 1 )
+	{
+		CREATEBUF(c->cl_vLumFilter,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,c->dstH*c->vLumFilterSize*sizeof(cl_short),c->vLumFilter);
+		CREATEBUF(c->cl_vChrFilter,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,c->chrDstH*c->vChrFilterSize*sizeof(cl_short),c->vChrFilter);
+	}
+	CREATEBUF(c->cl_vLumFilterPos,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,c->dstH*sizeof(cl_int),c->vLumFilterPos);
+	CREATEBUF(c->cl_vChrFilterPos,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,c->chrDstH*sizeof(cl_int),c->vChrFilterPos);
+	
+	return 1;
+}
+
+int av_scale_frame_func( void **userdata, KernelEnv *kenv )
+{
+	ScaleContext *c = (ScaleContext *)userdata[0];
+
+	c->cl_src = (cl_mem)userdata[2];
+	c->cl_dst = (cl_mem)userdata[1];
+
+	/*frame size*/
+	int *tmp = (int *)userdata[3];
+	int srcStride = tmp[0];
+	int srcChrStride = tmp[1];
+	int srcW = c->srcW;
+	int srcH = c->srcH;
+	
+	tmp = (int *)userdata[4];
+	int dstStride = tmp[0];
+	int dstChrStride = tmp[1];
+	int dstW = c->dstW;
+	int dstH = c->dstH;
+	
+	 /* local variable */
+	cl_int status;
+	size_t global_work_size[2];
+
+	int intermediaSize;
+
+	int st = CreateCLBuffer(c,kenv);
+    if( !st )
+    {
+        printf( "CreateBuffer[%s] faild %d\n", "scale_opencl",st );
+        return -1;
+    }
+
+	intermediaSize = dstStride * srcH + dstChrStride * srcH;
+
+	CREATEBUF(c->cl_intermediaBuf,CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,intermediaSize*sizeof(cl_short),NULL);
+
+	static int init_chr_status = 0;
+	static cl_kernel chr_kernel;
+
+	if(init_chr_status == 0){
+
+		if(!(c->flags & 1))
+		{
+			chr_kernel = clCreateKernel( kenv->program, "hscale_all_opencl", NULL );
+			//Set the Kernel Argument;
+			OCLCHECK(clSetKernelArg,chr_kernel,2,sizeof(cl_mem),(void*)&c->cl_hLumFilter);
+			OCLCHECK(clSetKernelArg,chr_kernel,3,sizeof(cl_mem),(void*)&c->cl_hLumFilterPos);
+			OCLCHECK(clSetKernelArg,chr_kernel,4,sizeof(int),(void*)&c->hLumFilterSize);
+			OCLCHECK(clSetKernelArg,chr_kernel,5,sizeof(cl_mem),(void*)&c->cl_hChrFilter);
+			OCLCHECK(clSetKernelArg,chr_kernel,6,sizeof(cl_mem),(void*)&c->cl_hChrFilterPos);
+			OCLCHECK(clSetKernelArg,chr_kernel,7,sizeof(int),(void*)&c->hChrFilterSize);
+		}
+	
+		/*Set the arguments*/
+		OCLCHECK(clSetKernelArg,chr_kernel,8,sizeof(dstW),(void*)&dstW);
+		OCLCHECK(clSetKernelArg,chr_kernel,9,sizeof(srcH),(void*)&srcH);
+		OCLCHECK(clSetKernelArg,chr_kernel,10,sizeof(srcW),(void*)&srcW);
+		OCLCHECK(clSetKernelArg,chr_kernel,11,sizeof(srcH),(void*)&srcH);
+		OCLCHECK(clSetKernelArg,chr_kernel,12,sizeof(dstStride),(void*)&dstStride);
+		OCLCHECK(clSetKernelArg,chr_kernel,13,sizeof(dstChrStride),(void*)&dstChrStride);
+		OCLCHECK(clSetKernelArg,chr_kernel,14,sizeof(srcStride),(void*)&srcStride);
+		OCLCHECK(clSetKernelArg,chr_kernel,15,sizeof(srcChrStride),(void*)&srcChrStride);
+		init_chr_status = 1;
+	}
+
+	kenv->kernel = chr_kernel;
+	OCLCHECK(clSetKernelArg,chr_kernel,0,sizeof(cl_mem),(void*)&c->cl_intermediaBuf);
+	OCLCHECK(clSetKernelArg,chr_kernel,1,sizeof(cl_mem),(void*)&c->cl_src);
+	/*Run the Kernel*/
+	global_work_size[0] = c->chrDstW;//dstW >> 1; //must times 256;
+	global_work_size[1] = c->chrSrcH;
+
+	OCLCHECK(clEnqueueNDRangeKernel,kenv->command_queue, kenv->kernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
+
+	static int init_lum_status = 0;
+	static cl_kernel lum_kernel;
+
+	if( init_lum_status == 0 ){
+		//Vertical:
+		/*Create Kernel*/
+		if( c->vLumFilterSize > 1 && c->vChrFilterSize > 1 )
+			lum_kernel = clCreateKernel( kenv->program, "vscale_all_nodither_opencl", NULL );
+		else
+			lum_kernel = clCreateKernel( kenv->program, "vscale_fast_opencl", NULL );
+
+		if( c->vLumFilterSize > 1 && c->vChrFilterSize > 1 )
+		{
+			OCLCHECK(clSetKernelArg,lum_kernel,2,sizeof(cl_mem),(void*)&c->cl_vLumFilter);
+			OCLCHECK(clSetKernelArg,lum_kernel,3,sizeof(int),(void*)&c->vLumFilterSize);
+			OCLCHECK(clSetKernelArg,lum_kernel,4,sizeof(cl_mem),(void*)&c->cl_vChrFilter);
+			OCLCHECK(clSetKernelArg,lum_kernel,5,sizeof(int),(void*)&c->vChrFilterSize);
+			OCLCHECK(clSetKernelArg,lum_kernel,6,sizeof(cl_mem),(void*)&c->cl_vLumFilterPos);
+			OCLCHECK(clSetKernelArg,lum_kernel,7,sizeof(cl_mem),(void*)&c->cl_vChrFilterPos);
+			OCLCHECK(clSetKernelArg,lum_kernel,8,sizeof(dstW),(void*)&dstW);
+			OCLCHECK(clSetKernelArg,lum_kernel,9,sizeof(dstH),(void*)&dstH);
+			OCLCHECK(clSetKernelArg,lum_kernel,10,sizeof(srcW),(void*)&srcW);
+			OCLCHECK(clSetKernelArg,lum_kernel,11,sizeof(srcH),(void*)&srcH);
+			OCLCHECK(clSetKernelArg,lum_kernel,12,sizeof(dstStride),(void*)&dstStride);
+			OCLCHECK(clSetKernelArg,lum_kernel,13,sizeof(dstChrStride),(void*)&dstChrStride);
+			OCLCHECK(clSetKernelArg,lum_kernel,14,sizeof(dstStride),(void*)&dstStride);
+			OCLCHECK(clSetKernelArg,lum_kernel,15,sizeof(dstChrStride),(void*)&dstChrStride);
+		}else{
+	
+			OCLCHECK(clSetKernelArg,lum_kernel,2,sizeof(cl_mem),(void*)&c->cl_vLumFilterPos);
+			OCLCHECK(clSetKernelArg,lum_kernel,3,sizeof(cl_mem),(void*)&c->cl_vChrFilterPos);
+			OCLCHECK(clSetKernelArg,lum_kernel,4,sizeof(dstW),(void*)&dstW);
+			OCLCHECK(clSetKernelArg,lum_kernel,5,sizeof(dstH),(void*)&dstH);
+			OCLCHECK(clSetKernelArg,lum_kernel,6,sizeof(srcW),(void*)&srcW);
+			OCLCHECK(clSetKernelArg,lum_kernel,7,sizeof(srcH),(void*)&srcH);
+			OCLCHECK(clSetKernelArg,lum_kernel,8,sizeof(dstStride),(void*)&dstStride);
+			OCLCHECK(clSetKernelArg,lum_kernel,9,sizeof(dstChrStride),(void*)&dstChrStride);
+			OCLCHECK(clSetKernelArg,lum_kernel,10,sizeof(dstStride),(void*)&dstStride);
+			OCLCHECK(clSetKernelArg,lum_kernel,11,sizeof(dstChrStride),(void*)&dstChrStride);
+		}
+		init_lum_status = 1;
+	}
+	
+	kenv->kernel = lum_kernel;
+	OCLCHECK(clSetKernelArg,kenv->kernel,0,sizeof(cl_mem),(void*)&c->cl_dst);
+	OCLCHECK(clSetKernelArg,kenv->kernel,1,sizeof(cl_mem),(void*)&c->cl_intermediaBuf);
+	
+	/*Run the Kernel*/
+	global_work_size[0] = c->chrDstW;
+	global_work_size[1] = c->chrDstH;
+
+	OCLCHECK(clEnqueueNDRangeKernel,kenv->command_queue,kenv->kernel,2,NULL,global_work_size,NULL,0,NULL,NULL);
+
+	clReleaseMemObject( c->cl_intermediaBuf );
+	
+	return 1;
+}
+
+void av_scale_frame(ScaleContext *c,
+						void *dst,
+						void *src,
+						int *srcStride,
+						int *dstStride,
+						int *should_dither)
+{
+	
+	static int regflg = 0;
+	void *userdata[CL_PARAM_NUM];
+	userdata[0] = (void *)c;
+	userdata[1] = (void *)dst;
+	userdata[2] = (void *)src;
+	userdata[3] = (void *)srcStride;
+	userdata[4] = (void *)dstStride;
+	userdata[5] = (void *)should_dither;
+
+	if( regflg==0 )
+    {
+        int st = hb_register_kernel_wrapper( "scale_opencl", av_scale_frame_func);
+        if( !st )
+        {
+            printf( "register kernel[%s] faild %d\n", "scale_opencl",st );
+            return;
+        }
+        regflg++;
+    }
+ 
+	if( !hb_run_kernel( "scale_opencl", userdata ))
+	{
+		printf("run kernel function[%s] faild\n", "scale_opencl_func" );
+		return;
+	}     
+}
+
+#endif
diff --git a/libhb/scale_kernel.h b/libhb/scale_kernel.h
new file mode 100644
index 000000000..29562bdaf
--- /dev/null
+++ b/libhb/scale_kernel.h
@@ -0,0 +1,6 @@
+#ifndef _H_SCALE_KERNEL_H
+#define _H_SCALE_KERNEL_H
+#ifdef USE_OPENCL
+void av_scale_frame(ScaleContext *c, void *dst, void *src, int *srcStride, int *dstStride, int *should_dither);
+#endif
+#endif
diff --git a/libhb/stream.c b/libhb/stream.c
index 46b4cb71e..0790394a8 100644
--- a/libhb/stream.c
+++ b/libhb/stream.c
@@ -610,7 +610,7 @@ static int hb_stream_get_type(hb_stream_t *stream)
 
     if ( fread(buf, 1, sizeof(buf), stream->file_handle) == sizeof(buf) )
     {
-#ifdef USE_OPENCL
+#ifdef USE_HWD
         if ( hb_get_gui_info(&hb_gui, 1) || (hb_get_gui_info(&hb_gui, 3) == 0) )
             return 0;
 #endif
@@ -1101,23 +1101,26 @@ hb_title_t * hb_stream_title_scan(hb_stream_t *stream, hb_title_t * title)
     {
         hb_log( "transport stream missing PCRs - using video DTS instead" );
     }
-#ifdef USE_OPENCL
+#ifdef USE_HWD
     hb_va_dxva2_t * dxva2 = NULL;
     dxva2 = hb_va_create_dxva2( dxva2, title->video_codec_param );
     if (dxva2)
     {
-        title->uvd_support = 1;
+        title->hwd_support = 1;
         hb_va_close(dxva2);
         dxva2 = NULL;
     }
     else
-        title->uvd_support = 0;
+        title->hwd_support = 0;
+#else
+    title->hwd_support = 0;
+#endif
+#ifdef USE_OPENCL
     if (TestGPU() == 0)
         title->opencl_support = 1;
     else
         title->opencl_support = 0;
 #else
-    title->uvd_support = 0;
 	title->opencl_support = 0;
 #endif
     // Height, width, rate and aspect ratio information is filled in
@@ -5671,24 +5674,26 @@ static hb_title_t *ffmpeg_title_scan( hb_stream_t *stream, hb_title_t *title )
         chapter->seconds = title->seconds;
         hb_list_add( title->list_chapter, chapter );
     }
-
-#ifdef USE_OPENCL
+#ifdef USE_HWD
     hb_va_dxva2_t * dxva2 = NULL;
     dxva2 = hb_va_create_dxva2( dxva2, title->video_codec_param );
     if (dxva2)
     {
-        title->uvd_support = 1;
+        title->hwd_support = 1;
         hb_va_close(dxva2);
         dxva2 = NULL;
     }
     else
-        title->uvd_support = 0;
+        title->hwd_support = 0;
+#else
+    title->hwd_support = 0;
+#endif
+#ifdef USE_OPENCL
     if (TestGPU() == 0)
         title->opencl_support = 1;
     else
         title->opencl_support = 0;
 #else
-    title->uvd_support = 0;
     title->opencl_support = 0;
 #endif
 
diff --git a/libhb/vadxva2.c b/libhb/vadxva2.c
index ff6940497..548dfe144 100644
--- a/libhb/vadxva2.c
+++ b/libhb/vadxva2.c
@@ -10,31 +10,18 @@
             Li   Cao <[email protected]> <http://www.multicorewareinc.com/>
 
  */
-
 #include "vadxva2.h"
+
+#ifdef USE_OPENCL
 #include "CL/cl.h"
 #include "oclnv12toyuv.h"
+#include "scale.h"
 
-static int   hb_va_setup( hb_va_dxva2_t *dxva2, void **hw, int width, int height );
-static int   hb_va_get( hb_va_dxva2_t *dxva2, AVFrame *frame );
-static int   hb_d3d_create_device( hb_va_dxva2_t *dxva2 );
-static void  hb_d3d_destroy_device( hb_va_dxva2_t *dxvva2 );
-static int   hb_d3d_create_device_manager( hb_va_dxva2_t *dxva2 );
-static void  hb_d3d_destroy_device_manager( hb_va_dxva2_t *dxva2 );
-static int   hb_dx_create_video_service( hb_va_dxva2_t *dxva2 );
-static void  hb_dx_destroy_video_service( hb_va_dxva2_t *dxva2 );
-static int   hb_dx_find_video_service_conversion( hb_va_dxva2_t *dxva2, GUID *input, D3DFORMAT *output );
-static int   hb_dx_create_video_decoder( hb_va_dxva2_t *dxva2, int codec_id, const  hb_title_t* fmt );
-static void  hb_dx_create_video_conversion( hb_va_dxva2_t *dxva2 );
-static const hb_d3d_format_t *hb_d3d_find_format( D3DFORMAT format );
-static const hb_dx_mode_t *hb_dx_find_mode( const GUID *guid );
-static void hb_dx_destroy_video_decoder( hb_va_dxva2_t *dxva2 );
-
-#ifdef USE_OPENCL
 int TestGPU()
 {
     int status = 1;
-    unsigned int i;
+    unsigned int i, j;
+    cl_device_id device;
     cl_uint numPlatforms = 0; 
     status = clGetPlatformIDs(0,NULL,&numPlatforms); 
     if(status != 0) 
@@ -59,24 +46,55 @@ int TestGPU()
                 sizeof (pbuff), 
                 pbuff,
                 NULL); 
-             if (status) 
-		continue; 
-             status = clGetDeviceIDs(platforms[i], 
+            if (status) 
+         		continue; 
+            status = clGetDeviceIDs(platforms[i], 
                                     CL_DEVICE_TYPE_GPU , 
                                     0 , 
                                     NULL , 
                                     &numDevices); 
-             if (status != CL_SUCCESS)
-                 continue;
-             if(numDevices) 
-                   break; 
+            
+   		    cl_device_id *devices = (cl_device_id *)malloc(numDevices * sizeof(cl_device_id));
+		    status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
+		    for (j = 0; j < numDevices; j++)
+		    {
+			    char dbuff[100];
+			    status = clGetDeviceInfo(devices[j], CL_DEVICE_VENDOR, sizeof(dbuff), dbuff, NULL); 
+			    device = devices[j];
+			    if(!strcmp(dbuff, "Advanced Micro Devices, Inc."))
+			    {
+				    return 0;
+			    }
+		    }
+
+            if (status != CL_SUCCESS)
+                continue;
+            if( numDevices) 
+                break; 
         } 
         free(platforms); 
     } 
     end:
-    return status;
+    return -1;
 }
 #endif
+
+#ifdef USE_HWD
+
+static int   hb_va_setup( hb_va_dxva2_t *dxva2, void **hw, int width, int height );
+static int   hb_va_get( hb_va_dxva2_t *dxva2, AVFrame *frame );
+static int   hb_d3d_create_device( hb_va_dxva2_t *dxva2 );
+static void  hb_d3d_destroy_device( hb_va_dxva2_t *dxvva2 );
+static int   hb_d3d_create_device_manager( hb_va_dxva2_t *dxva2 );
+static void  hb_d3d_destroy_device_manager( hb_va_dxva2_t *dxva2 );
+static int   hb_dx_create_video_service( hb_va_dxva2_t *dxva2 );
+static void  hb_dx_destroy_video_service( hb_va_dxva2_t *dxva2 );
+static int   hb_dx_find_video_service_conversion( hb_va_dxva2_t *dxva2, GUID *input, D3DFORMAT *output );
+static int   hb_dx_create_video_decoder( hb_va_dxva2_t *dxva2, int codec_id, const  hb_title_t* fmt );
+static void  hb_dx_create_video_conversion( hb_va_dxva2_t *dxva2 );
+static const hb_d3d_format_t *hb_d3d_find_format( D3DFORMAT format );
+static const hb_dx_mode_t *hb_dx_find_mode( const GUID *guid );
+static void hb_dx_destroy_video_decoder( hb_va_dxva2_t *dxva2 );
 /**
  * It destroys a Direct3D device manager
  */
@@ -640,6 +658,35 @@ static void hb_copy_from_nv12( uint8_t *dst, uint8_t *src[2], size_t src_pitch[2
         }
     }
 }
+
+#ifdef USE_OPENCL
+void hb_init_filter( cl_mem src, int srcwidth, int srcheight, uint8_t* dst, int dstwidth, int dstheight, int *crop )
+{
+    T_FilterLink fl = {0};
+    int STEP = srcwidth * srcheight * 3 / 2;
+	int OUTSTEP = dstwidth * dstheight * 3 / 2;
+    int HEIGHT = srcheight;
+    int LINESIZEY = srcwidth;
+    int LINESIZEUV = srcwidth / 2;
+
+    cl_mem cl_outbuf;
+
+    if( !hb_create_buffer( &(cl_outbuf), CL_MEM_WRITE_ONLY, STEP ) )
+    {   
+        hb_log("av_create_buffer cl_outbuf Error\n");
+        return;
+    }   
+
+    fl.cl_outbuf = cl_outbuf;
+
+    scale_run( src, fl.cl_outbuf, LINESIZEY, LINESIZEUV, HEIGHT );
+
+	hb_read_opencl_buffer( fl.cl_outbuf, dst, OUTSTEP );
+	CL_FREE( cl_outbuf );
+
+	return;
+}
+#endif
 /**
  *  lock frame data form surface.
  *  nv12 to yuv with opencl and with C reference
@@ -671,20 +718,16 @@ int hb_va_extract( hb_va_dxva2_t *dxva2, uint8_t *dst, AVFrame *frame, int job_w
 #ifdef USE_OPENCL
         if( ( dxva2->width > job_w || dxva2->height > job_h ) && (TestGPU() == 0) && (hb_get_gui_info(&hb_gui, 2) == 1))
         {
-/*          int i;
-            uint8_t *tmp = (uint8_t*)malloc( dxva2->width*dxva2->height*3/2 );
-            for( i = 0; i < dxva2->height; i++ )
-            {
-                memcpy( tmp+i*dxva2->width, plane[0]+i*lock.Pitch, dxva2->width );
-                if( i<dxva2->height>>1 )
-                    memcpy( tmp+(dxva2->width*dxva2->height)+i*dxva2->width, plane[1]+i*lock.Pitch, dxva2->width );
-            }
-*/
             hb_ocl_nv12toyuv( plane, lock.Pitch,  dxva2->width, dxva2->height, crop, dxva2 );
-            //hb_ocl_nv12toyuv( tmp, dxva2->width, dxva2->height, crop, dxva2 );
-            hb_ocl_scale( dxva2->cl_mem_yuv, NULL, dst, dxva2->width - ( crop[2] + crop[3] ), dxva2->height - ( crop[0] + crop[1] ), job_w, job_h, os );
-            //free( tmp );
-        }
+
+			static int init_flag = 0;
+			if(init_flag == 0){
+    			scale_init( dxva2->width - crop[2] - crop[3], dxva2->height - crop[0] - crop[1], job_w, job_h );
+				init_flag = 1;
+			}
+
+			hb_init_filter( dxva2->cl_mem_yuv, dxva2->width - crop[2] - crop[3], dxva2->height - crop[0] - crop[1], dst, job_w, job_h, crop ); 
+		}
         else
 #endif
         {
@@ -785,20 +828,20 @@ void hb_va_new_dxva2( hb_va_dxva2_t *dxva2, AVCodecContext *p_context )
 enum PixelFormat hb_ffmpeg_get_format( AVCodecContext *p_context, const enum PixelFormat *pi_fmt )
 {
     int i;
-    static const char *ppsz_name[PIX_FMT_NB] =
-    {
-        [PIX_FMT_VDPAU_H264] = "PIX_FMT_VDPAU_H264",
-        [PIX_FMT_VAAPI_IDCT] = "PIX_FMT_VAAPI_IDCT",
-        [PIX_FMT_VAAPI_VLD] = "PIX_FMT_VAAPI_VLD",
-        [PIX_FMT_VAAPI_MOCO] = "PIX_FMT_VAAPI_MOCO",
-        [PIX_FMT_DXVA2_VLD] = "PIX_FMT_DXVA2_VLD",
-        [PIX_FMT_YUYV422] = "PIX_FMT_YUYV422",
-        [PIX_FMT_YUV420P] = "PIX_FMT_YUV420P",
+    static const char *ppsz_name[AV_PIX_FMT_NB] =
+    {
+        [AV_PIX_FMT_VDPAU_H264] = "AV_PIX_FMT_VDPAU_H264",
+        [AV_PIX_FMT_VAAPI_IDCT] = "AV_PIX_FMT_VAAPI_IDCT",
+        [AV_PIX_FMT_VAAPI_VLD] = "AV_PIX_FMT_VAAPI_VLD",
+        [AV_PIX_FMT_VAAPI_MOCO] = "AV_PIX_FMT_VAAPI_MOCO",
+        [AV_PIX_FMT_DXVA2_VLD] = "AV_PIX_FMT_DXVA2_VLD",
+        [AV_PIX_FMT_YUYV422] = "AV_PIX_FMT_YUYV422",
+        [AV_PIX_FMT_YUV420P] = "AV_PIX_FMT_YUV420P",
     };
-    for( i = 0; pi_fmt[i] != PIX_FMT_NONE; i++ )
+    for( i = 0; pi_fmt[i] != AV_PIX_FMT_NONE; i++ )
     {
         hb_log( "dxva2:Available decoder output format %d (%s)", pi_fmt[i], ppsz_name[pi_fmt[i]] ? : "Unknown" );
-        if( pi_fmt[i] == PIX_FMT_DXVA2_VLD )
+        if( pi_fmt[i] == AV_PIX_FMT_DXVA2_VLD )
         {
             return pi_fmt[i];
         }
@@ -825,3 +868,4 @@ int hb_va_get_frame_buf( hb_va_dxva2_t *dxva2, AVCodecContext *p_context, AVFram
     return HB_WORK_OK;
 
 }
+#endif
diff --git a/libhb/vadxva2.h b/libhb/vadxva2.h
index 7a2af4862..eeab5bec8 100644
--- a/libhb/vadxva2.h
+++ b/libhb/vadxva2.h
@@ -16,6 +16,7 @@
 #ifndef VA_DXVA2_H
 #define VA_DXVA2_H
 
+#ifdef USE_HWD
 #include "hbffmpeg.h"
 #include "d3d9.h"
 #include "libavcodec/dxva2.h"
@@ -143,6 +144,25 @@ typedef struct
 #endif
 } hb_va_dxva2_t;
 
+typedef struct FilterLink_T
+{
+#ifdef USE_OPENCL
+    cl_mem cl_inbuf;
+    cl_mem cl_outbuf;
+#endif
+    uint8_t *mem_inbuf;
+    uint8_t *mem_outbuf;
+    int width;
+    int height;
+    int linesizeY;
+    int linesizeUV;
+    int inmemdataflag;
+    int outmemdataflag;
+    int incldataflag;
+    int outcldataflag;
+    int framenum;
+    int outputSize; 
+} T_FilterLink;
 
 static const hb_d3d_format_t d3d_formats[] =
 {
@@ -192,3 +212,4 @@ void hb_va_release( hb_va_dxva2_t *dxva2, AVFrame *frame );
 void  hb_va_close( hb_va_dxva2_t *dxva2 );
 
 #endif
+#endif
diff --git a/libhb/work.c b/libhb/work.c
index c8057f0e4..6625d5ff7 100644
--- a/libhb/work.c
+++ b/libhb/work.c
@@ -554,18 +554,17 @@ static void do_job( hb_job_t * job )
     job->list_work = hb_list_init();
 
     hb_log( "starting job" );
-#ifdef USE_OPENCL
-    if ( job->use_opencl || job->use_uvd)
+    if ( job->use_opencl || job->use_hwd)
     {
-	    /* init opencl environment */
         hb_log( "Using GPU : Yes.\n" );
-        job->use_opencl =! hb_init_opencl_run_env(0, NULL, "-I.");
+        /* init opencl environment */ 
+#ifdef USE_OPENCL
+        if ( job->use_opencl )
+            job->use_opencl =! hb_init_opencl_run_env(0, NULL, "-I.");
+#endif    
     }
     else
         hb_log( "Using GPU : NO.\n" );
-#else
-    hb_log( "Using GPU : NO.\n" );
-#endif    
     /* Look for the scanned subtitle in the existing subtitle list
      * select_subtitle implies that we did a scan. */
     if( !job->indepth_scan && interjob->select_subtitle )
@@ -1021,10 +1020,11 @@ static void do_job( hb_job_t * job )
         title->video_codec_param = AV_CODEC_ID_MPEG2VIDEO;
     }
 #endif
-#ifdef USE_OPENCL  
-    if ( /*job->use_opencl &&*/ hb_use_dxva( title ) && (TestGPU() == 0) && job->use_uvd )
-    {        
-        vcodec = WORK_DECAVCODECVACCL;
+#ifdef USE_HWD 
+    if ( /*job->use_opencl &&*/ hb_use_dxva( title ) && job->use_hwd )
+    {   
+        //vcodec = WORK_DECAVCODECVACCL;
+		job->use_hw_decode = 1;
     }
 #endif
     hb_list_add( job->list_work, ( w = hb_get_work( vcodec ) ) );
diff --git a/make/configure.py b/make/configure.py
index 598ef754b..0fc32de0f 100644
--- a/make/configure.py
+++ b/make/configure.py
@@ -1087,6 +1087,8 @@ def createCLI():
     grp.add_option( '--enable-ff-mpeg2', default=False, action='store_true', help=h )
     h = IfHost( 'enable OpenCL features', '*-*-*', none=optparse.SUPPRESS_HELP ).value
     grp.add_option( '--enable-opencl', default=False, action='store_true', help=h )
+    h = IfHost( 'enable HWD features', '*-*-*', none=optparse.SUPPRESS_HELP ).value
+    grp.add_option( '--enable-hwd', default=False, action='store_true', help=h )
     
     cli.add_option_group( grp )
 
@@ -1521,6 +1523,7 @@ int main ()
     doc.add( 'FEATURE.gst',        int( not options.disable_gst ))
     doc.add( 'FEATURE.ff.mpeg2',   int( options.enable_ff_mpeg2 ))
     doc.add( 'FEATURE.opencl',   int( options.enable_opencl ))
+    doc.add( 'FEATURE.hwd',   int( options.enable_hwd ))
     doc.add( 'FEATURE.xcode',      int( not (Tools.xcodebuild.fail or options.disable_xcode or options.cross) ))
 
     if not Tools.xcodebuild.fail and not options.disable_xcode:
diff --git a/test/module.defs b/test/module.defs
index 246b62a94..ba7eaf5a4 100644
--- a/test/module.defs
+++ b/test/module.defs
@@ -48,6 +48,9 @@ ifeq (1,$(FEATURE.opencl))
     TEST.GCC.l += OpenCL
     TEST.GCC.D += USE_OPENCL
 endif
+ifeq (1,$(FEATURE.hwd))
+    TEST.GCC.D += USE_HWD
+endif
     TEST.GCC.l += pthreadGC2 iconv ws2_32
     TEST.GCC.D += PTW32_STATIC_LIB
 	TEST.GCC.args.extra.exe++ += -static
diff --git a/test/test case/handbrake_test.txt b/test/test case/handbrake_test.txt
index 47fb0dd8e..a0e9397a1 100644
--- a/test/test case/handbrake_test.txt
+++ b/test/test case/handbrake_test.txt
@@ -7,74 +7,61 @@
 #- the names are used as test names and file names
 
 # universal
--i "infile" -t 1  -o "outfile.m4v"  -f mp4 -w 720 --loose-anamorphic  -e x264 -q 20 --vfr  -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x cabac=0:ref=2:me=umh:bframes=0:weightp=0:8x8dct=0:trellis=0:subq=6 --verbose=1
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.m4v"  -f mp4  -P  -U  -w 720 --loose-anamorphic  --modulus 2 -e x264 -q 20 -r 30 --pfr -a 1,1 -E faac,copy:ac3 -6 dpl2,auto -R Auto,Auto -B 160,0 -D 0,0 --gain 0,0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\2-1-chapters.csv" --x264-preset=fast  --x264-profile=baseline  --h264-level="3.0"  --verbose=1
 
 # iPod
--i "infile" -t 1  -o "outfile.m4v"  -f mp4 -I  -w 320 -l 240 -e x264 -b 700 --vfr  -a 1 -E faac -B 160 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x level=30:bframes=0:weightp=0:cabac=0:ref=1:vbv-maxrate=768:vbv-bufsize=2000:analyse=all:me=umh:no-fast-pskip=1:subq=6:8x8dct=0:trellis=0 --verbose=1
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.mp4"  -f mp4  -I  -P  -U  -w 320 -l 180 --modulus 2 -e x264 -q 22 -r 30 --pfr -a 1 -E faac -6 dpl2 -R Auto -B 160 -D 0 --gain 0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\2-1-chapters.csv" --x264-profile=baseline  --h264-level="1.3"  --verbose=1
 
 
 # iPhone_iPod_Touch
- -i "infile" -t 1  -o "outfile.m4v"  -f mp4 -w 480 -l 320 -e x264 -q 20 --vfr  -a 1 -E faac -B 128 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x cabac=0:ref=2:me=umh:bframes=0:weightp=0:subq=6:8x8dct=0:trellis=0 --verbose=1
-
-
-# iPhone_4
-  -i "infile" -t 1 -o "outfile.m4v"  -f mp4 -4  -w 960 --loose-anamorphic  -e x264 -q 20 -r 29.97 --pfr  -a 1 -E faac -B 160 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" --verbose=1
-
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.mp4"  -f mp4  -4  -P  -U  -w 960 --loose-anamorphic  --modulus 2 -e x264 -q 22 -r 29.97 --pfr -a 1 -E faac -6 dpl2 -R Auto -B 160 -D 0 --gain 0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\2-1-chapters.csv" --x264-profile=high  --h264-level="3.1"  --verbose=1
 
 # iPad
+ -i "infile" -t 1 --angle 1 -c 1 -o "outfile.mp4"  -f mp4  -4  -P  -U  -w 1280 --loose-anamorphic  --modulus 2 -e x264 -q 20 -r 29.97 --pfr -a 1 -E faac -6 dpl2 -R Auto -B 160 -D 0 --gain 0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\2-1-chapters.csv" --x264-profile=high  --h264-level="3.1"  --verbose=1
 
--i "infile" -t 1 -o "outfile.m4v"  -f mp4 -4  -w 1024 --loose-anamorphic  -e x264 -q 20 -r 29.97 --pfr  -a 1 -E faac -B 160 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" --verbose=1
-
-
-# Apple_TV
-
- -i "infile" -t 1 -o "outfile.m4v"  -f mp4 -4  -w 960 --loose-anamorphic  -e x264 -q 20 --vfr  -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x cabac=0:ref=2:me=umh:b-pyramid=none:b-adapt=2:weightb=0:trellis=0:weightp=0:vbv-maxrate=9500:vbv-bufsize=9500 --verbose=1
-
-# Apple_TV_2
--i "infile" -t 1  -o "outfile.m4v"  -f mp4 -4  -w 1280 --loose-anamorphic  -e x264 -q 20 -r 29.97 --pfr  -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" --verbose=1
-
-
-# Android_Mid
--i "infile" -t 1  -o "outfile.m4v"  -f mp4 -w 480 -l 270 -e x264 -q 22 -r 29.97 --pfr  -a 1 -E faac -B 128 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 -x cabac=0:ref=2:me=umh:bframes=0:weightp=0:subq=6:8x8dct=0:trellis=0 --verbose=1
-
-
-# Android_High
- -i "infile" -t 1  -o "outfile.mp4"  -f mp4 -w 720 --loose-anamorphic  -e x264 -q 22 -r 29.97 --pfr  -a 1 -E faac -B 128 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 -x weightp=0:cabac=0 --verbose=1
+# AppleTV
+ -i "infile" -t 1 --angle 1 -c 1 -o "outfile.m4v"  -f mp4  -4  -P  -U  -w 960 --loose-anamorphic  --modulus 2 -e x264 -q 20 -r 30 --pfr -a 1,1 -E faac,copy:ac3 -6 dpl2,auto -R Auto,Auto -B 160,0 -D 0,0 --gain 0,0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\2-1-chapters.csv" -x cabac=0:ref=2:b-pyramid=none:weightb=0:weightp=0:vbv-maxrate=9500:vbv-bufsize=9500 --verbose=1
 
+# AppleTV2
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.m4v"  -f mp4  -4  -P  -U  -w 1280 --loose-anamorphic  --modulus 2 -e x264 -q 20 -r 29.97 --pfr -a 1,1 -E faac,copy:ac3 -6 dpl2,auto -R Auto,Auto -B 160,0 -D 0,0 --gain 0,0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\2-1-chapters.csv" --x264-profile=high  --h264-level="3.1"  --verbose=1
 
-# Normal_1080p_to_1080p_fixed_qp
--i "infile" -t 1 -o "outfile.m4v"  -f mp4 --strict-anamorphic  -e x264 -q 20 --vfr  -a 1 -E faac -B 160 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x ref=1:weightp=1:subq=2:rc-lookahead=10:trellis=0:8x8dct=0 --verbose=1
+# AppleTV3
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.m4v"  -f mp4  -4  -P  -U  --decomb -w 1920 --loose-anamorphic  --modulus 2 -e x264 -q 20 -r 30 --pfr -a 1,1 -E faac,copy:ac3 -6 dpl2,auto -R Auto,Auto -B 160,0 -D 0,0 --gain 0,0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\2-1-chapters.csv" --x264-profile=high  --h264-level="4.0"  --verbose=1
 
+# Android
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.mp4"  -f mp4  -P  -U  -w 720 --loose-anamorphic  --modulus 2 -e x264 -q 22 -r 29.97 --pfr -a 1 -E faac -6 dpl2 -R Auto -B 128 -D 0 --gain 0 --audio-fallback ffac3 --x264-profile=main  --h264-level="2.2"  --verbose=1
 
-# Normal_1080p_to_720p_fixed_qp
--i "infile" -t 1  -o "outfile.m4v"  -f mp4 -w 1280 -l 720 --custom-anamorphic  --display-width 1282  --keep-display-aspect  -e x264 -q 20 --vfr  -a 1 -E faac -B 160 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x ref=1:weightp=1:subq=2:rc-lookahead=10:trellis=0:8x8dct=0 --verbose=1
 
+# Android_Tablet
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.mp4"  -f mp4  -P  -U  -w 1280 --loose-anamorphic  --modulus 2 -e x264 -q 22 -r 29.97 --pfr -a 1 -E faac -6 dpl2 -R Auto -B 128 -D 0 --gain 0 --audio-fallback ffac3 --x264-profile=main  --h264-level="3.1"  --verbose=1
 
 
-# Normal_1080p_to_1080p_13_mbps
--i "infile" -t 1  -o "outfile.m4v"  -f mp4 --strict-anamorphic  -e x264 -b 13000 --vfr  -a 1 -E faac -B 160 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x ref=1:weightp=1:subq=2:rc-lookahead=10:trellis=0:8x8dct=0 --verbose=1
+# Normal_to_480p
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.mp4"  -f mp4  -P  -U  -w 720 --loose-anamorphic  --modulus 2 -e x264 -q 20 --vfr -a 1 -E faac -6 dpl2 -R Auto -B 160 -D 0 --gain 0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\1-1-chapters.csv" --x264-preset=veryfast  --x264-profile=main  --h264-level="4.0"  --verbose=1
 
+# Normal_to_720p
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.mp4"  -f mp4  -P  -U  -w 1280 --loose-anamorphic  --modulus 2 -e x264 -q 20 --vfr -a 1 -E faac -6 dpl2 -R Auto -B 160 -D 0 --gain 0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\1-1-chapters.csv" --x264-preset=veryfast  --x264-profile=main  --h264-level="4.0"  --verbose=1
 
-# Normal_1080p_to_720p_6_mbps
- -i "infile" -t 1 -c 1 -o "outfile.m4v"  -f mp4 -w 1280 -l 720 --custom-anamorphic  --display-width 1282  --keep-display-aspect  -e x264 -b 6000 --vfr  -a 1 -E faac -B 160 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x ref=1:weightp=1:subq=2:rc-lookahead=10:trellis=0:8x8dct=0 --verbose=1
 
-# high_1080p_to_1080p_fixed_qp
--i "infile" -t 1  -o "high_1080p.m4v"  -f mp4 -4  --detelecine --decomb -w 1920 --loose-anamorphic  -e x264 -q 20 --vfr  -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x b-adapt=2:rc-lookahead=50 --verbose=1
+# Normal_to_1080p
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.mp4"  -f mp4  -P  -U  -w 1920 --loose-anamorphic  --modulus 2 -e x264 -q 20 --vfr -a 1 -E faac -6 dpl2 -R Auto -B 160 -D 0 --gain 0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\1-1-chapters.csv" --x264-preset=veryfast  --x264-profile=main  --h264-level="4.0"  --verbose=1
 
+# Normal_to_1080p_qp
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.mp4"  -f mp4  -P  -U  -w 1920 -l 1080 --custom-anamorphic  --display-width 1920  --keep-display-aspect  --modulus 2 -e x264 -q 20 --vfr -a 1 -E faac -6 dpl2 -R Auto -B 160 -D 0 --gain 0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\1-1-chapters.csv" --x264-preset=veryfast  --x264-profile=main  --h264-level="4.0"  --verbose=1
 
-# high_1080p_to_720p_fixed qp
- -i "infile" -t 1  -o "outfile.m4v"  -f mp4 -4  --detelecine --decomb -w 1280 -l 720 --custom-anamorphic  --display-width 1282  --keep-display-aspect  -e x264 -q 20 --vfr  -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x b-adapt=2:rc-lookahead=50 --verbose=1
+# High Profile_to_480p
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.m4v"  -f mp4  -4  -P  -U  --decomb -w 720 --loose-anamorphic  --modulus 2 -e x264 -q 20 --vfr -a 1,1 -E faac,copy:ac3 -6 dpl2,auto -R Auto,Auto -B 160,0 -D 0,0 --gain 0,0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\1-1-chapters.csv" --x264-profile=high  --h264-level="4.1"  --verbose=1
 
 
-# high_1080p_to_1080p_13_mbps
- -i "infile" -t 1 -o "outfile.m4v"  -f mp4 -4  --detelecine --decomb -w 1920 --loose-anamorphic  -e x264 -b 13000 --vfr  -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x b-adapt=2:rc-lookahead=50 --verbose=1
+# High Profile_to_720p
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.m4v"  -f mp4  -4  -P  -U  --decomb -w 1280 --loose-anamorphic  --modulus 2 -e x264 -q 20 --vfr -a 1,1 -E faac,copy:ac3 -6 dpl2,auto -R Auto,Auto -B 160,0 -D 0,0 --gain 0,0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\1-1-chapters.csv" --x264-profile=high  --h264-level="4.1"  --verbose=1
 
 
-# high_1080p_to_720p_6mbps
--i "infile" -t 1  -o "outfile.m4v"  -f mp4 -4  --detelecine --decomb -w 1280 -l 720 --custom-anamorphic  --display-width 1282  --keep-display-aspect  -e x264 -b 6000 --vfr  -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x b-adapt=2:rc-lookahead=50 --verbose=1
+# High Profile_to_1080p
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.m4v"  -f mp4  -4  -P  -U  --decomb -w 1920 --loose-anamorphic  --modulus 2 -e x264 -q 20 --vfr -a 1,1 -E faac,copy:ac3 -6 dpl2,auto -R Auto,Auto -B 160,0 -D 0,0 --gain 0,0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\1-1-chapters.csv" --x264-profile=high  --h264-level="4.1"  --verbose=1
 
-# strange_resolution
--i "infile" -t 1  -o "outfile.m4v"  -f mp4 -w 480 -l 1078 -e x264 -q 20 --vfr  -a 1 -E faac -B 128 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x cabac=0:ref=2:me=umh:bframes=0:weightp=0:subq=6:8x8dct=0:trellis=0 --verbose=1
+# High Profile_to_1080p_qp
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.m4v"  -f mp4  -4  -P  -U  --decomb -w 1920 -l 1080 --custom-anamorphic  --display-width 720  --pixel-aspect 720:480 --modulus 2 -e x264 -q 20 --vfr -a 1,1 -E faac,copy:ac3 -6 dpl2,auto -R Auto,Auto -B 160,0 -D 0,0 --gain 0,0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\1-1-chapters.csv" --x264-profile=high  --h264-level="4.1"  --verbose=1
 
 
 #-end of script
diff --git a/test/test case/readme.txt b/test/test case/readme.txt
index 8c182e26c..87c91201f 100644
--- a/test/test case/readme.txt
+++ b/test/test case/readme.txt
@@ -37,7 +37,9 @@ The handbrake_test.txt is a "script" file that contains the individual tests to
 
 blank lines are ignored.
 
-To enable UVD decoding, add -P -U to each execution line in handbrake_test.txt.
+
+To enable OpenCL, add -P to each execution line in handbrake_test.txt.
+To enable hardware decoding, add -U to each execution line in handbrake_test.txt.
 Example:
 # universal
 -i "infile" -t 1  -o "outfile.m4v"  -P -U -f mp4 -w 720 --loose-anamorphic  -e x264 -q 20 --vfr  -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x cabac=0:ref=2:me=umh:bframes=0:weightp=0:8x8dct=0:trellis=0:subq=6 --verbose=1
diff --git a/test/test.c b/test/test.c
index 82797fd6e..42c53ab8d 100644
--- a/test/test.c
+++ b/test/test.c
@@ -137,7 +137,7 @@ static int64_t stop_at_pts    = 0;
 static int    stop_at_frame = 0;
 static uint64_t min_title_duration = 10;
 static int use_opencl = 0;
-static int use_uvd = 0;
+static int use_hwd = 0;
 
 /* Exit cleanly on Ctrl-C */
 static volatile int die = 0;
@@ -218,7 +218,8 @@ int main( int argc, char ** argv )
     h = hb_init( debug, update );
     hb_dvd_set_dvdnav( dvdnav );
 #ifdef USE_OPENCL
-    hb_get_opencl_env();
+    if ( use_opencl )
+        hb_get_opencl_env();
 #endif
     /* Show version */
     fprintf( stderr, "%s - %s - %s\n",
@@ -260,7 +261,7 @@ int main( int argc, char ** argv )
         titleindex = 0;
     }
 
-    hb_set_gui_info(&hb_gui, use_uvd, use_opencl, titleindex);
+    hb_set_gui_info(&hb_gui, use_hwd, use_opencl, titleindex);
     hb_scan( h, input, titleindex, preview_count, store_previews, min_title_duration * 90000LL );
 
     /* Wait... */
@@ -429,10 +430,10 @@ static void PrintTitleInfo( hb_title_t * title, int feature )
         fprintf( stderr, "  + support opencl: yes\n");
     else
         fprintf( stderr, "  + support opencl: no\n");
-    if (title->uvd_support)
-        fprintf( stderr, "  + support uvd: yes\n");
+    if (title->hwd_support)
+        fprintf( stderr, "  + support hwd: yes\n");
     else
-        fprintf( stderr, "  + support uvd: no\n");
+        fprintf( stderr, "  + support hwd: no\n");
     fprintf( stderr, "  + chapters:\n" );
     for( i = 0; i < hb_list_count( title->list_chapter ); i++ )
     {
@@ -1412,9 +1413,9 @@ static int HandleEvents( hb_handle_t * h )
                 job->maxWidth = maxWidth;
             if (maxHeight)
                 job->maxHeight = maxHeight;
-            if (use_uvd)
+            if (use_hwd)
             {
-                job->use_uvd = use_uvd;
+                job->use_hwd = use_hwd;
             }
 
             switch( anamorphic_mode )
@@ -1588,13 +1589,8 @@ static int HandleEvents( hb_handle_t * h )
             filter_str = hb_strdup_printf("%d:%d:%d:%d:%d:%d",
                 job->width, job->height, 
                 job->crop[0], job->crop[1], job->crop[2], job->crop[3] );
-
-#ifdef USE_OPENCL
-            if ( use_opencl )
-                filter = hb_filter_init( HB_FILTER_CROP_SCALE_ACCL );
-            else
-#endif                    
-                filter = hb_filter_init( HB_FILTER_CROP_SCALE );
+                    
+            filter = hb_filter_init( HB_FILTER_CROP_SCALE );
             hb_add_filter( job, filter, filter_str );
             free( filter_str );
 
@@ -3251,7 +3247,7 @@ static int ParseOptions( int argc, char ** argv )
             { "optimize",    no_argument,       NULL,    'O' },
             { "ipod-atom",   no_argument,       NULL,    'I' },
             { "use-opencl",  no_argument,       NULL,    'P' },
-            { "use-uvd",     no_argument,       NULL,    'U' },
+            { "use-hwd",     no_argument,       NULL,    'U' },
 
             { "title",       required_argument, NULL,    't' },
             { "min-duration",required_argument, NULL,    MIN_DURATION },
@@ -3416,7 +3412,7 @@ static int ParseOptions( int argc, char ** argv )
                 use_opencl = 1;
                 break;
             case 'U':
-                use_uvd = 1;
+                use_hwd = 1;
                 break;
 
             case 't':
diff --git a/win/CS/HandBrake.ApplicationServices/Model/EncodeTask.cs b/win/CS/HandBrake.ApplicationServices/Model/EncodeTask.cs
index 5c9f166e0..f9053a8bb 100644
--- a/win/CS/HandBrake.ApplicationServices/Model/EncodeTask.cs
+++ b/win/CS/HandBrake.ApplicationServices/Model/EncodeTask.cs
@@ -88,7 +88,7 @@ namespace HandBrake.ApplicationServices.Model
             this.IncludeChapterMarkers = task.IncludeChapterMarkers;
             this.IPod5GSupport = task.IPod5GSupport;
             this.OpenCLSupport = task.OpenCLSupport;
-            this.UVDSupport = task.UVDSupport;
+            this.HWDSupport = task.HWDSupport;
             this.KeepDisplayAspect = task.KeepDisplayAspect;
             this.LargeFile = task.LargeFile;
             this.MaxHeight = task.MaxHeight;
@@ -197,9 +197,9 @@ namespace HandBrake.ApplicationServices.Model
         public bool OpenCLSupport { get; set; }
 
         /// <summary>
-        /// Gets or sets a value indicating whether UVDSupport.
+        /// Gets or sets a value indicating whether HWDSupport.
         /// </summary>
-        public bool UVDSupport { get; set; }
+        public bool HWDSupport { get; set; }
         #endregion
 
         #region Picture
diff --git a/win/CS/HandBrake.ApplicationServices/Parsing/Title.cs b/win/CS/HandBrake.ApplicationServices/Parsing/Title.cs
index 466a33aa1..00488eac8 100644
--- a/win/CS/HandBrake.ApplicationServices/Parsing/Title.cs
+++ b/win/CS/HandBrake.ApplicationServices/Parsing/Title.cs
@@ -121,9 +121,9 @@ namespace HandBrake.ApplicationServices.Parsing
         public int OpenCLSupport { get; set; }
 
         /// <summary>
-        /// Gets or sets the UVD
+        /// Gets or sets the HWD
         /// </summary>
-        public int UVDSupport { get; set; }
+        public int HWDSupport { get; set; }
         #endregion
 
         /// <summary>
@@ -236,14 +236,14 @@ namespace HandBrake.ApplicationServices.Parsing
             }
 
             nextLine = output.ReadLine();
-            m = Regex.Match(nextLine, @"^  \+ support uvd:");
+            m = Regex.Match(nextLine, @"^  \+ support hwd:");
             if (m.Success)
             {
-                temp = nextLine.Replace("+ support uvd:", string.Empty).Trim();
+                temp = nextLine.Replace("+ support hwd:", string.Empty).Trim();
                 if (string.Compare(temp, "yes") == 0)
-                    thisTitle.UVDSupport = 1;
+                    thisTitle.HWDSupport = 1;
                 else
-                    thisTitle.UVDSupport = 0;
+                    thisTitle.HWDSupport = 0;
             }
             thisTitle.Chapters.AddRange(Chapter.ParseList(output));
 
diff --git a/win/CS/HandBrake.ApplicationServices/Utilities/InteropModelCreator.cs b/win/CS/HandBrake.ApplicationServices/Utilities/InteropModelCreator.cs
index 5cb0d4768..618dd43cc 100644
--- a/win/CS/HandBrake.ApplicationServices/Utilities/InteropModelCreator.cs
+++ b/win/CS/HandBrake.ApplicationServices/Utilities/InteropModelCreator.cs
@@ -102,7 +102,7 @@ namespace HandBrake.ApplicationServices.Utilities
             profile.Height = work.Height.HasValue ? work.Height.Value : 0;
             profile.IPod5GSupport = work.IPod5GSupport;
             profile.OpenCLGSupport = work.OpenCLSupport;
-            profile.UVDSupport = work.UVDSupport;
+            profile.HWDSupport = work.HWDSupport;
             profile.IncludeChapterMarkers = work.IncludeChapterMarkers;
             profile.KeepDisplayAspect = work.KeepDisplayAspect;
             profile.LargeFile = work.LargeFile;
diff --git a/win/CS/HandBrake.ApplicationServices/Utilities/PlistUtility.cs b/win/CS/HandBrake.ApplicationServices/Utilities/PlistUtility.cs
index 260abfe54..e22345134 100644
--- a/win/CS/HandBrake.ApplicationServices/Utilities/PlistUtility.cs
+++ b/win/CS/HandBrake.ApplicationServices/Utilities/PlistUtility.cs
@@ -142,7 +142,7 @@ namespace HandBrake.ApplicationServices.Utilities
             AddEncodeElement(xmlWriter, "Mp4LargeFile", "integer", parsed.LargeFile ? "1" : "0");
             AddEncodeElement(xmlWriter, "Mp4iPodCompatible", "integer", parsed.IPod5GSupport ? "1" : "0");
             AddEncodeElement(xmlWriter, "OpenCLSupport", "integer", parsed.OpenCLSupport ? "1" : "0");
-            AddEncodeElement(xmlWriter, "UVDSupport", "integer", parsed.UVDSupport ? "1" : "0");
+            AddEncodeElement(xmlWriter, "HWDSupport", "integer", parsed.HWDSupport ? "1" : "0");
             AddEncodeElement(xmlWriter, "PictureAutoCrop", "integer", "1");
             AddEncodeElement(xmlWriter, "PictureBottomCrop", "integer", parsed.Cropping.Bottom.ToString());
 
diff --git a/win/CS/HandBrake.ApplicationServices/Utilities/QueryGeneratorUtility.cs b/win/CS/HandBrake.ApplicationServices/Utilities/QueryGeneratorUtility.cs
index 69cbfc258..6cdf1d900 100644
--- a/win/CS/HandBrake.ApplicationServices/Utilities/QueryGeneratorUtility.cs
+++ b/win/CS/HandBrake.ApplicationServices/Utilities/QueryGeneratorUtility.cs
@@ -246,7 +246,7 @@ namespace HandBrake.ApplicationServices.Utilities
             if (task.OpenCLSupport)
                 query += " -P ";
 
-            if (task.UVDSupport)
+            if (task.HWDSupport)
                 query += " -U ";
 
             return query;
diff --git a/win/CS/HandBrake.ApplicationServices/Utilities/QueryParserUtility.cs b/win/CS/HandBrake.ApplicationServices/Utilities/QueryParserUtility.cs
index a167801df..9655d6629 100644
--- a/win/CS/HandBrake.ApplicationServices/Utilities/QueryParserUtility.cs
+++ b/win/CS/HandBrake.ApplicationServices/Utilities/QueryParserUtility.cs
@@ -57,7 +57,7 @@ namespace HandBrake.ApplicationServices.Utilities
             Match largerMp4 = Regex.Match(input, @" -4");
             Match ipodAtom = Regex.Match(input, @" -I");
             Match openclSupport = Regex.Match(input, @" -P");
-            Match uvdSupport = Regex.Match(input, @" -U");
+            Match hwdSupport = Regex.Match(input, @" -U");
 
             // Picture Settings Tab
             Match width = Regex.Match(input, @"-w ([0-9]+)");
@@ -158,7 +158,7 @@ namespace HandBrake.ApplicationServices.Utilities
                 parsed.IPod5GSupport = ipodAtom.Success;
                 parsed.OptimizeMP4 = optimizeMP4.Success;
                 parsed.OpenCLSupport = openclSupport.Success;
-                parsed.UVDSupport = uvdSupport.Success;
+                parsed.HWDSupport = hwdSupport.Success;
 
                 #endregion
 
diff --git a/win/CS/HandBrake.Interop/HandBrakeInterop/HandBrakeInstance.cs b/win/CS/HandBrake.Interop/HandBrakeInterop/HandBrakeInstance.cs
index 74f1683a9..06bf022f3 100644
--- a/win/CS/HandBrake.Interop/HandBrakeInterop/HandBrakeInstance.cs
+++ b/win/CS/HandBrake.Interop/HandBrakeInterop/HandBrakeInstance.cs
@@ -1448,7 +1448,7 @@ namespace HandBrake.Interop
 			nativeJob.mp4_optimize = profile.Optimize ? 1 : 0;
 			nativeJob.ipod_atom = profile.IPod5GSupport ? 1 : 0;
             nativeJob.opencl_support = profile.OpenCLGSupport ? 1 : 0;
-            nativeJob.uvd_support = profile.UVDSupport ? 1 : 0;
+            nativeJob.hwd_support = profile.HWDSupport ? 1 : 0;
 
 			if (title.AngleCount > 1)
 			{
diff --git a/win/CS/HandBrake.Interop/HandBrakeInterop/HbLib/hb_job_s.cs b/win/CS/HandBrake.Interop/HandBrakeInterop/HbLib/hb_job_s.cs
index a761780fa..562531083 100644
--- a/win/CS/HandBrake.Interop/HandBrakeInterop/HbLib/hb_job_s.cs
+++ b/win/CS/HandBrake.Interop/HandBrakeInterop/HbLib/hb_job_s.cs
@@ -149,7 +149,7 @@ namespace HandBrake.Interop.HbLib
         public int opencl_support;
 
         /// int
-        public int uvd_support;
+        public int hwd_support;
 
 		/// int
 		public int indepth_scan;
diff --git a/win/CS/HandBrake.Interop/HandBrakeInterop/Model/Encoding/EncodingProfile.cs b/win/CS/HandBrake.Interop/HandBrakeInterop/Model/Encoding/EncodingProfile.cs
index 9667d5e6e..937c776a7 100644
--- a/win/CS/HandBrake.Interop/HandBrakeInterop/Model/Encoding/EncodingProfile.cs
+++ b/win/CS/HandBrake.Interop/HandBrakeInterop/Model/Encoding/EncodingProfile.cs
@@ -28,7 +28,7 @@ namespace HandBrake.Interop.Model.Encoding
 		public bool Optimize { get; set; }
 		public bool IPod5GSupport { get; set; }
         public bool OpenCLGSupport { get; set; }
-        public bool UVDSupport { get; set; }
+        public bool HWDSupport { get; set; }
 
 		public int Width { get; set; }
 		public int Height { get; set; }
@@ -85,7 +85,7 @@ namespace HandBrake.Interop.Model.Encoding
 				Optimize = this.Optimize,
 				IPod5GSupport = this.IPod5GSupport,
                 OpenCLGSupport = this.OpenCLGSupport,
-                UVDSupport = this.UVDSupport,
+                HWDSupport = this.HWDSupport,
 
 				Width = this.Width,
 				Height = this.Height,
diff --git a/win/CS/HandBrakeWPF/ViewModels/MainViewModel.cs b/win/CS/HandBrakeWPF/ViewModels/MainViewModel.cs
index e814e897c..ad09f3043 100644
--- a/win/CS/HandBrakeWPF/ViewModels/MainViewModel.cs
+++ b/win/CS/HandBrakeWPF/ViewModels/MainViewModel.cs
@@ -377,7 +377,7 @@ namespace HandBrakeWPF.ViewModels
                     this.CurrentTask.OptimizeMP4 = selectedPreset.Task.OptimizeMP4;
                     this.CurrentTask.IPod5GSupport = selectedPreset.Task.IPod5GSupport;
                     this.CurrentTask.OpenCLSupport = selectedPreset.Task.OpenCLSupport;
-                    this.CurrentTask.UVDSupport = selectedPreset.Task.UVDSupport;
+                    this.CurrentTask.HWDSupport = selectedPreset.Task.HWDSupport;
                     this.SelectedOutputFormat = selectedPreset.Task.OutputFormat;
 
                     // Tab Settings
@@ -1697,7 +1697,7 @@ namespace HandBrakeWPF.ViewModels
                         {
                             this.SupportOpenCL = false;
                         }
-                        if (this.selectedTitle.UVDSupport == 0)
+                        if (this.selectedTitle.HWDSupport == 0)
                         {
                             this.SupportHardwareDecoding = true;
                         }
diff --git a/win/CS/HandBrakeWPF/Views/MainView.xaml b/win/CS/HandBrakeWPF/Views/MainView.xaml
index 295ebb4c1..0f85a2b73 100644
--- a/win/CS/HandBrakeWPF/Views/MainView.xaml
+++ b/win/CS/HandBrakeWPF/Views/MainView.xaml
@@ -482,11 +482,11 @@
                                                    Converter={StaticResource boolToVisConverter},
                                                    ConverterParameter=true}"
                               />
-                    <CheckBox Name="UVD"
+                    <CheckBox Name="HWD"
                               Margin="8,0,0,0"
                               VerticalAlignment="Center"
-                              Content="UVD Support"
-                              IsChecked="{Binding Path=CurrentTask.UVDSupport}" IsEnabled="True" 
+                              Content="HWD Support"
+                              IsChecked="{Binding Path=CurrentTask.HWDSupport}" IsEnabled="True" 
                               Visibility="{Binding SupportHardwareDecoding,
                                                    Converter={StaticResource boolToVisConverter},
                                                    ConverterParameter=true}"
author	handbrake <[email protected]>	2013-01-31 08:18:55 +0000
committer	handbrake <[email protected]>	2013-01-31 08:18:55 +0000
commit	bc3b56381c6ac214b7736a00972c9d0902902453 (patch)
tree	6caffbe88ef2ee5efbd2279cf00556a986a7c22e
parent	39821495c82b104b77e628d5cb244be041e285ed (diff)