summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorhandbrake <[email protected]>2013-01-31 08:18:55 +0000
committerhandbrake <[email protected]>2013-01-31 08:18:55 +0000
commitbc3b56381c6ac214b7736a00972c9d0902902453 (patch)
tree6caffbe88ef2ee5efbd2279cf00556a986a7c22e
parent39821495c82b104b77e628d5cb244be041e285ed (diff)
add a new compile option: --enable-hwd
change the GUI uvd checkbox's name to hardware decoder modify the issue compile on linux modify the issue on Intel platform with Intel integrate graphic add a new opencl scale algorithm, PSNR goes up (added files: scale.h, scale.c, scale_kernel.h, scale_kernel.c) merge the cropscaleaccl.c to cropscale.c merge the decavcodecaccl.c to decavcodec.c git-svn-id: svn://svn.handbrake.fr/HandBrake/branches/opencl@5227 b64f7644-9d1e-0410-96f1-a4d463321fa5
-rw-r--r--contrib/ffmpeg/module.defs10
-rw-r--r--libhb/common.c13
-rw-r--r--libhb/common.h13
-rw-r--r--libhb/cropscale.c171
-rw-r--r--libhb/cropscaleaccl.c262
-rw-r--r--libhb/decavcodec.c178
-rw-r--r--libhb/decavcodecaccl.c1311
-rw-r--r--libhb/dxva2api.c2
-rw-r--r--libhb/dxva2api.h4
-rw-r--r--libhb/hb.c4
-rw-r--r--libhb/internal.h1
-rw-r--r--libhb/module.defs6
-rw-r--r--libhb/oclnv12toyuv.c2
-rw-r--r--libhb/oclnv12toyuv.h4
-rw-r--r--libhb/openclkernels.h374
-rw-r--r--libhb/openclwrapper.c56
-rw-r--r--libhb/openclwrapper.h11
-rw-r--r--libhb/scale.c1199
-rw-r--r--libhb/scale.h310
-rw-r--r--libhb/scale_kernel.c215
-rw-r--r--libhb/scale_kernel.h6
-rw-r--r--libhb/stream.c25
-rw-r--r--libhb/vadxva2.c144
-rw-r--r--libhb/vadxva2.h21
-rw-r--r--libhb/work.c22
-rw-r--r--make/configure.py3
-rw-r--r--test/module.defs3
-rw-r--r--test/test case/handbrake_test.txt73
-rw-r--r--test/test case/readme.txt4
-rw-r--r--test/test.c30
-rw-r--r--win/CS/HandBrake.ApplicationServices/Model/EncodeTask.cs6
-rw-r--r--win/CS/HandBrake.ApplicationServices/Parsing/Title.cs12
-rw-r--r--win/CS/HandBrake.ApplicationServices/Utilities/InteropModelCreator.cs2
-rw-r--r--win/CS/HandBrake.ApplicationServices/Utilities/PlistUtility.cs2
-rw-r--r--win/CS/HandBrake.ApplicationServices/Utilities/QueryGeneratorUtility.cs2
-rw-r--r--win/CS/HandBrake.ApplicationServices/Utilities/QueryParserUtility.cs4
-rw-r--r--win/CS/HandBrake.Interop/HandBrakeInterop/HandBrakeInstance.cs2
-rw-r--r--win/CS/HandBrake.Interop/HandBrakeInterop/HbLib/hb_job_s.cs2
-rw-r--r--win/CS/HandBrake.Interop/HandBrakeInterop/Model/Encoding/EncodingProfile.cs4
-rw-r--r--win/CS/HandBrakeWPF/ViewModels/MainViewModel.cs4
-rw-r--r--win/CS/HandBrakeWPF/Views/MainView.xaml6
41 files changed, 2740 insertions, 1783 deletions
diff --git a/contrib/ffmpeg/module.defs b/contrib/ffmpeg/module.defs
index 01fd5f753..435ce778b 100644
--- a/contrib/ffmpeg/module.defs
+++ b/contrib/ffmpeg/module.defs
@@ -20,11 +20,6 @@ FFMPEG.CONFIGURE.extra = \
--disable-network \
--disable-hwaccels \
--disable-encoders \
- --enable-dxva2 \
- --enable-hwaccel=h264_dxva2 \
- --enable-hwaccel=mpeg2_dxva2 \
- --enable-hwaccel=vc1_dxva2 \
- --enable-hwaccel=wmv3_dxva2 \
--enable-encoder=aac \
--enable-encoder=ac3 \
--enable-encoder=flac \
@@ -47,6 +42,11 @@ else ifeq (1-mingw,$(BUILD.cross)-$(BUILD.system))
FFMPEG.CONFIGURE.extra += \
--enable-w32threads \
--enable-memalign-hack \
+ --enable-dxva2 \
+ --enable-hwaccel=h264_dxva2 \
+ --enable-hwaccel=mpeg2_dxva2 \
+ --enable-hwaccel=vc1_dxva2 \
+ --enable-hwaccel=wmv3_dxva2 \
--target-os=mingw32 \
--arch=i386 \
--enable-cross-compile --cross-prefix=$(BUILD.cross.prefix)
diff --git a/libhb/common.c b/libhb/common.c
index eccc00354..c10eae93d 100644
--- a/libhb/common.c
+++ b/libhb/common.c
@@ -1739,6 +1739,7 @@ static void job_setup( hb_job_t * job, hb_title_t * title )
job->list_attachment = hb_attachment_list_copy( title->list_attachment );
job->metadata = hb_metadata_copy( title->metadata );
+ job->use_hw_decode = 0;
}
static void job_clean( hb_job_t * job )
@@ -1993,11 +1994,7 @@ hb_filter_object_t * hb_filter_init( int filter_id )
case HB_FILTER_CROP_SCALE:
filter = &hb_filter_crop_scale;
break;
-#ifdef USE_OPENCL
- case HB_FILTER_CROP_SCALE_ACCL:
- filter = &hb_filter_crop_scale_accl;
- break;
-#endif
+
case HB_FILTER_ROTATE:
filter = &hb_filter_rotate;
break;
@@ -2974,15 +2971,15 @@ int hb_use_dxva( hb_title_t * title )
int hb_get_gui_info(hb_gui_t * gui, int option)
{
if ( option == 1 )
- return gui->use_uvd;
+ return gui->use_hwd;
else if ( option == 2 )
return gui->use_opencl;
else
return gui->title_scan;
}
-void hb_set_gui_info(hb_gui_t *gui, int uvd, int opencl, int titlescan)
+void hb_set_gui_info(hb_gui_t *gui, int hwd, int opencl, int titlescan)
{
- gui->use_uvd = uvd;
+ gui->use_hwd = hwd;
gui->use_opencl = opencl;
gui->title_scan = titlescan;
}
diff --git a/libhb/common.h b/libhb/common.h
index 3d5f38f15..8586d2a6b 100644
--- a/libhb/common.h
+++ b/libhb/common.h
@@ -148,10 +148,10 @@ int hb_subtitle_can_pass( int source, int mux );
hb_attachment_t *hb_attachment_copy(const hb_attachment_t *src);
int hb_get_gui_info(hb_gui_t *gui, int option);
-void hb_set_gui_info(hb_gui_t *gui, int uvd, int opencl, int titlescan);
+void hb_set_gui_info(hb_gui_t *gui, int hwd, int opencl, int titlescan);
struct hb_gui_s
{
- int use_uvd;
+ int use_hwd;
int use_opencl;
int title_scan;
};
@@ -441,7 +441,8 @@ struct hb_job_s
// initially (for frame accurate positioning
// to non-I frames).
int use_opencl;/* 0 is disable use of opencl. 1 is enable use of opencl */
- int use_uvd;
+ int use_hwd;
+ int use_hw_decode;
#ifdef __LIBHB__
/* Internal data */
@@ -786,7 +787,7 @@ struct hb_title_s
uint32_t flags;
// set if video stream doesn't have IDR frames
int opencl_support;
- int uvd_support;
+ int hwd_support;
#define HBTF_NO_IDR (1 << 0)
#define HBTF_SCAN_COMPLETE (1 << 0)
};
@@ -1086,9 +1087,7 @@ enum
HB_FILTER_DENOISE,
HB_FILTER_RENDER_SUB,
HB_FILTER_CROP_SCALE,
-#ifdef USE_OPENCL
- HB_FILTER_CROP_SCALE_ACCL,
-#endif
+
// Finally filters that don't care what order they are in,
// except that they must be after the above filters
HB_FILTER_ROTATE,
diff --git a/libhb/cropscale.c b/libhb/cropscale.c
index 61aa85101..e0a3e2af3 100644
--- a/libhb/cropscale.c
+++ b/libhb/cropscale.c
@@ -9,6 +9,8 @@
#include "hb.h"
#include "hbffmpeg.h"
+#include "common.h"
+
struct hb_filter_private_s
{
@@ -19,7 +21,13 @@ struct hb_filter_private_s
int width_out;
int height_out;
int crop[4];
+
+#ifdef USE_OPENCL
int use_dxva;
+ int title_width;
+ int title_height;
+ hb_oclscale_t * os; //ocl scaler handler
+#endif
struct SwsContext * context;
};
@@ -59,6 +67,16 @@ static int hb_crop_scale_init( hb_filter_object_t * filter,
pv->height_in = init->height;
pv->width_out = init->width;
pv->height_out = init->height;
+#ifdef USE_OPENCL
+ pv->use_dxva = init->use_dxva;
+ if ( hb_get_gui_info(&hb_gui, 2) )
+ {
+ pv->title_width = init->title_width;
+ pv->title_height = init->title_height;
+ pv->os = ( hb_oclscale_t * )malloc( sizeof( hb_oclscale_t ) );
+ memset( pv->os, 0, sizeof( hb_oclscale_t ) );
+ }
+#endif
memcpy( pv->crop, init->crop, sizeof( int[4] ) );
if( filter->settings )
{
@@ -95,6 +113,30 @@ static int hb_crop_scale_info( hb_filter_object_t * filter,
info->out.height = pv->height_out;
memcpy( info->out.crop, pv->crop, sizeof( int[4] ) );
+#ifdef USE_OPENCL
+ if ( hb_get_gui_info(&hb_gui, 2) )
+ {
+ int cropped_width = pv->title_width - ( pv->crop[2] + pv->crop[3] );
+ int cropped_height = pv->title_height - ( pv->crop[0] + pv->crop[1] );
+
+ sprintf( info->human_readable_desc,
+ "source: %d * %d, crop (%d/%d/%d/%d): %d * %d, scale: %d * %d",
+ pv->title_width, pv->title_height,
+ pv->crop[0], pv->crop[1], pv->crop[2], pv->crop[3],
+ cropped_width, cropped_height, pv->width_out, pv->height_out );
+ }
+ else
+ {
+ int cropped_width = pv->width_in - ( pv->crop[2] + pv->crop[3] );
+ int cropped_height = pv->height_in - ( pv->crop[0] + pv->crop[1] );
+
+ sprintf( info->human_readable_desc,
+ "source: %d * %d, crop (%d/%d/%d/%d): %d * %d, scale: %d * %d",
+ pv->width_in, pv->height_in,
+ pv->crop[0], pv->crop[1], pv->crop[2], pv->crop[3],
+ cropped_width, cropped_height, pv->width_out, pv->height_out );
+ }
+#else
int cropped_width = pv->width_in - ( pv->crop[2] + pv->crop[3] );
int cropped_height = pv->height_in - ( pv->crop[0] + pv->crop[1] );
@@ -103,7 +145,7 @@ static int hb_crop_scale_info( hb_filter_object_t * filter,
pv->width_in, pv->height_in,
pv->crop[0], pv->crop[1], pv->crop[2], pv->crop[3],
cropped_width, cropped_height, pv->width_out, pv->height_out );
-
+#endif
return 0;
}
@@ -115,7 +157,23 @@ static void hb_crop_scale_close( hb_filter_object_t * filter )
{
return;
}
-
+#ifdef USE_OPENCL
+ if ( hb_get_gui_info(&hb_gui, 2) && pv->os)
+ {
+ CL_FREE( pv->os->h_in_buf );
+ CL_FREE( pv->os->h_out_buf );
+ CL_FREE( pv->os->v_out_buf );
+ CL_FREE( pv->os->h_coeff_y );
+ CL_FREE( pv->os->h_coeff_uv );
+ CL_FREE( pv->os->h_index_y );
+ CL_FREE( pv->os->h_index_uv );
+ CL_FREE( pv->os->v_coeff_y );
+ CL_FREE( pv->os->v_coeff_uv );
+ CL_FREE( pv->os->v_index_y );
+ CL_FREE( pv->os->v_index_uv );
+ free( pv->os );
+ }
+#endif
if ( pv->context )
{
sws_freeContext( pv->context );
@@ -124,7 +182,24 @@ static void hb_crop_scale_close( hb_filter_object_t * filter )
free( pv );
filter->private_data = NULL;
}
-
+#ifdef USE_OPENCL
+static uint8_t *copy_plane( uint8_t *dst, uint8_t* src, int dstride, int sstride, int h )
+{
+ if( dstride == sstride )
+ {
+ memcpy( dst, src, dstride * h );
+ return dst + dstride * h;
+ }
+ int lbytes = dstride <= sstride ? dstride : sstride;
+ while( --h >= 0 )
+ {
+ memcpy( dst, src, lbytes );
+ src += sstride;
+ dst += dstride;
+ }
+ return dst;
+}
+#endif
static hb_buffer_t* crop_scale( hb_filter_private_t * pv, hb_buffer_t * in )
{
AVPicture pic_in;
@@ -141,6 +216,76 @@ static hb_buffer_t* crop_scale( hb_filter_private_t * pv, hb_buffer_t * in )
av_picture_crop( &pic_crop, &pic_in, in->f.fmt,
pv->crop[0], pv->crop[2] );
+#ifdef USE_OPENCL
+ if ( hb_get_gui_info(&hb_gui, 2) )
+ {
+ int w = in->f.width - ( pv->crop[2] + pv->crop[3] );
+ int h = in->f.height - ( pv->crop[0] + pv->crop[1] );
+ uint8_t *tmp_in = malloc( w * h * 3 / 2 );
+ uint8_t *tmp_out = malloc( pv->width_out * pv->height_out * 3 / 2 );
+ if( pic_crop.data[0] || pic_crop.data[1] || pic_crop.data[2] || pic_crop.data[3] )
+ {
+ int i;
+ for( i = 0; i< h>>1; i++ )
+ {
+ memcpy( tmp_in + ( ( i<<1 ) + 0 ) * w, pic_crop.data[0]+ ( ( i<<1 ) + 0 ) * pic_crop.linesize[0], w );
+ memcpy( tmp_in + ( ( i<<1 ) + 1 ) * w, pic_crop.data[0]+ ( ( i<<1 ) + 1 ) * pic_crop.linesize[0], w );
+ memcpy( tmp_in + ( w * h ) + i * ( w>>1 ), pic_crop.data[1] + i * pic_crop.linesize[1], w >> 1 );
+ memcpy( tmp_in + ( w * h ) + ( ( w * h )>>2 ) + i * ( w>>1 ), pic_crop.data[2] + i * pic_crop.linesize[2], w >> 1 );
+ }
+ }
+ else
+ {
+ memcpy( tmp_in, pic_crop.data[0], w * h );
+ memcpy( tmp_in + w * h, pic_crop.data[1], (w*h)>>2 );
+ memcpy( tmp_in + w * h + ((w*h)>>2), pic_crop.data[2], (w*h)>>2 );
+ }
+ hb_ocl_scale( NULL, tmp_in, tmp_out, w, h, out->f.width, out->f.height, pv->os );
+ w = out->plane[0].stride;
+ h = out->plane[0].height;
+ uint8_t *dst = out->plane[0].data;
+ copy_plane( dst, tmp_out, w, pv->width_out, h );
+ w = out->plane[1].stride;
+ h = out->plane[1].height;
+ dst = out->plane[1].data;
+ copy_plane( dst, tmp_out + pv->width_out * pv->height_out, w, pv->width_out>>1, h );
+ w = out->plane[2].stride;
+ h = out->plane[2].height;
+ dst = out->plane[2].data;
+ copy_plane( dst, tmp_out + pv->width_out * pv->height_out +( ( pv->width_out * pv->height_out )>>2 ), w, pv->width_out>>1, h );
+ free( tmp_out );
+ free( tmp_in );
+ }
+ else
+ {
+ if ( !pv->context ||
+ pv->width_in != in->f.width ||
+ pv->height_in != in->f.height ||
+ pv->pix_fmt != in->f.fmt )
+ {
+ // Something changed, need a new scaling context.
+ if( pv->context )
+ sws_freeContext( pv->context );
+ pv->context = hb_sws_get_context(
+ in->f.width - (pv->crop[2] + pv->crop[3]),
+ in->f.height - (pv->crop[0] + pv->crop[1]),
+ in->f.fmt,
+ out->f.width, out->f.height, out->f.fmt,
+ SWS_LANCZOS | SWS_ACCURATE_RND );
+ pv->width_in = in->f.width;
+ pv->height_in = in->f.height;
+ pv->pix_fmt = in->f.fmt;
+ }
+
+ // Scale pic_crop into pic_render according to the
+ // context set up above
+ sws_scale(pv->context,
+ (const uint8_t* const*)pic_crop.data,
+ pic_crop.linesize,
+ 0, in->f.height - (pv->crop[0] + pv->crop[1]),
+ pic_out.data, pic_out.linesize);
+ }
+#else
if ( !pv->context ||
pv->width_in != in->f.width ||
pv->height_in != in->f.height ||
@@ -168,7 +313,7 @@ static hb_buffer_t* crop_scale( hb_filter_private_t * pv, hb_buffer_t * in )
pic_crop.linesize,
0, in->f.height - (pv->crop[0] + pv->crop[1]),
pic_out.data, pic_out.linesize);
-
+#endif
out->s = in->s;
hb_buffer_move_subs( out, in );
return out;
@@ -202,6 +347,17 @@ static int hb_crop_scale_work( hb_filter_object_t * filter,
pv->width_out = in->f.width - (pv->crop[2] + pv->crop[3]);
pv->height_out = in->f.height - (pv->crop[0] + pv->crop[1]);
}
+#ifdef USE_OPENCL
+ if ( (in->f.fmt == pv->pix_fmt_out &&
+ !pv->crop[0] && !pv->crop[1] && !pv->crop[2] && !pv->crop[3] &&
+ in->f.width == pv->width_out && in->f.height == pv->height_out) ||
+ (pv->use_dxva && in->f.width == pv->width_out && in->f.height == pv->height_out) )
+ {
+ *buf_out = in;
+ *buf_in = NULL;
+ return HB_FILTER_OK;
+ }
+#else
if ( in->f.fmt == pv->pix_fmt_out &&
!pv->crop[0] && !pv->crop[1] && !pv->crop[2] && !pv->crop[3] &&
in->f.width == pv->width_out && in->f.height == pv->height_out )
@@ -210,13 +366,6 @@ static int hb_crop_scale_work( hb_filter_object_t * filter,
*buf_in = NULL;
return HB_FILTER_OK;
}
-#ifdef USE_OPENCL
- if ( pv->use_dxva && in->f.width == pv->width_out && in->f.height == pv->height_out )
- {
- *buf_out = in;
- *buf_in = NULL;
- return HB_FILTER_OK;
- }
#endif
*buf_out = crop_scale( pv, in );
diff --git a/libhb/cropscaleaccl.c b/libhb/cropscaleaccl.c
deleted file mode 100644
index b254acde0..000000000
--- a/libhb/cropscaleaccl.c
+++ /dev/null
@@ -1,262 +0,0 @@
-/* cropscaleaccl.c
-
- Copyright (c) 2003-2012 HandBrake Team
- This file is part of the HandBrake source code
- Homepage: <http://handbrake.fr/>.
- It may be used under the terms of the GNU General Public License v2.
- For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
-
- Authors: Peng Gao <[email protected]> <http://www.multicorewareinc.com/>
- Li Cao <[email protected]> <http://www.multicorewareinc.com/>
- */
-#ifdef USE_OPENCL
-#include "hb.h"
-#include "common.h"
-
-struct hb_filter_private_s
-{
- int width_in;
- int height_in;
- int pix_fmt;
- int pix_fmt_out;
- int width_out;
- int height_out;
- int crop[4];
- int use_dxva;
-
- int title_width;
- int title_height;
- hb_oclscale_t * os; //ocl scaler handler
- struct SwsContext * context;
-};
-
-static int hb_crop_scale_init( hb_filter_object_t * filter,
- hb_filter_init_t * init );
-
-static int hb_crop_scale_work( hb_filter_object_t * filter,
- hb_buffer_t ** buf_in,
- hb_buffer_t ** buf_out );
-
-static int hb_crop_scale_info( hb_filter_object_t * filter,
- hb_filter_info_t * info );
-
-static void hb_crop_scale_close( hb_filter_object_t * filter );
-
-hb_filter_object_t hb_filter_crop_scale_accl =
-{
- .id = HB_FILTER_CROP_SCALE_ACCL,
- .enforce_order = 1,
- .name = "Custom Crop and Scale",
- .settings = NULL,
- .init = hb_crop_scale_init,
- .work = hb_crop_scale_work,
- .close = hb_crop_scale_close,
- .info = hb_crop_scale_info,
-};
-
-static int hb_crop_scale_init( hb_filter_object_t * filter,
- hb_filter_init_t * init )
-{
- filter->private_data = calloc( 1, sizeof(struct hb_filter_private_s) );
- hb_filter_private_t * pv = filter->private_data;
-
- // TODO: add pix format option to settings
- pv->pix_fmt_out = init->pix_fmt;
- pv->width_in = init->width;
- pv->height_in = init->height;
- pv->width_out = init->width;
- pv->height_out = init->height;
- pv->use_dxva = init->use_dxva;
- pv->title_width = init->title_width;
- pv->title_height = init->title_height;
- memcpy( pv->crop, init->crop, sizeof( int[4] ) );
- if( filter->settings )
- {
- sscanf( filter->settings, "%d:%d:%d:%d:%d:%d",
- &pv->width_out, &pv->height_out,
- &pv->crop[0], &pv->crop[1], &pv->crop[2], &pv->crop[3] );
- }
- // Set init values so the next stage in the pipline
- // knows what it will be getting
- init->pix_fmt = pv->pix_fmt;
- init->width = pv->width_out;
- init->height = pv->height_out;
- memcpy( init->crop, pv->crop, sizeof( int[4] ) );
- pv->os = ( hb_oclscale_t * )malloc( sizeof( hb_oclscale_t ) );
- memset( pv->os, 0, sizeof( hb_oclscale_t ) );
- return 0;
-}
-
-static int hb_crop_scale_info( hb_filter_object_t * filter,
- hb_filter_info_t * info )
-{
- hb_filter_private_t * pv = filter->private_data;
-
- if( !pv )
- return 0;
-
- // Set init values so the next stage in the pipline
- // knows what it will be getting
- memset( info, 0, sizeof( hb_filter_info_t ) );
- info->out.pix_fmt = pv->pix_fmt;
- info->out.width = pv->width_out;
- info->out.height = pv->height_out;
- memcpy( info->out.crop, pv->crop, sizeof( int[4] ) );
-
- int cropped_width = pv->title_width - ( pv->crop[2] + pv->crop[3] );
- int cropped_height = pv->title_height - ( pv->crop[0] + pv->crop[1] );
-
- sprintf( info->human_readable_desc,
- "source: %d * %d, crop (%d/%d/%d/%d): %d * %d, scale: %d * %d",
- pv->title_width, pv->title_height,
- pv->crop[0], pv->crop[1], pv->crop[2], pv->crop[3],
- cropped_width, cropped_height, pv->width_out, pv->height_out );
-
- return 0;
-}
-
-static void hb_crop_scale_close( hb_filter_object_t * filter )
-{
- hb_filter_private_t * pv = filter->private_data;
-
- if( !pv )
- {
- return;
- }
- if ( pv->os )
- {
- CL_FREE( pv->os->h_in_buf );
- CL_FREE( pv->os->h_out_buf );
- CL_FREE( pv->os->v_out_buf );
- CL_FREE( pv->os->h_coeff_y );
- CL_FREE( pv->os->h_coeff_uv );
- CL_FREE( pv->os->h_index_y );
- CL_FREE( pv->os->h_index_uv );
- CL_FREE( pv->os->v_coeff_y );
- CL_FREE( pv->os->v_coeff_uv );
- CL_FREE( pv->os->v_index_y );
- CL_FREE( pv->os->v_index_uv );
- free( pv->os );
- }
- free( pv );
- filter->private_data = NULL;
-}
-
-static uint8_t *copy_plane( uint8_t *dst, uint8_t* src, int dstride, int sstride, int h )
-{
- if( dstride == sstride )
- {
- memcpy( dst, src, dstride * h );
- return dst + dstride * h;
- }
- int lbytes = dstride <= sstride ? dstride : sstride;
- while( --h >= 0 )
- {
- memcpy( dst, src, lbytes );
- src += sstride;
- dst += dstride;
- }
- return dst;
-}
-
-static hb_buffer_t* crop_scale( hb_filter_private_t * pv, hb_buffer_t * in )
-{
- AVPicture pic_in;
- AVPicture pic_out;
- AVPicture pic_crop;
- hb_buffer_t * out;
- out = hb_video_buffer_init( pv->width_out, pv->height_out );
-
- hb_avpicture_fill( &pic_in, in );
- hb_avpicture_fill( &pic_out, out );
-
- // Crop; this alters the pointer to the data to point to the
- // correct place for cropped frame
- av_picture_crop( &pic_crop, &pic_in, in->f.fmt,
- pv->crop[0], pv->crop[2] );
-
-
- int w = in->f.width - ( pv->crop[2] + pv->crop[3] );
- int h = in->f.height - ( pv->crop[0] + pv->crop[1] );
- uint8_t *tmp_in = malloc( w * h * 3 / 2 );
- uint8_t *tmp_out = malloc( pv->width_out * pv->height_out * 3 / 2 );
- if( pic_crop.data[0] || pic_crop.data[1] || pic_crop.data[2] || pic_crop.data[3] )
- {
- int i;
- for( i = 0; i< h>>1; i++ )
- {
- memcpy( tmp_in + ( ( i<<1 ) + 0 ) * w, pic_crop.data[0]+ ( ( i<<1 ) + 0 ) * pic_crop.linesize[0], w );
- memcpy( tmp_in + ( ( i<<1 ) + 1 ) * w, pic_crop.data[0]+ ( ( i<<1 ) + 1 ) * pic_crop.linesize[0], w );
- memcpy( tmp_in + ( w * h ) + i * ( w>>1 ), pic_crop.data[1] + i * pic_crop.linesize[1], w >> 1 );
- memcpy( tmp_in + ( w * h ) + ( ( w * h )>>2 ) + i * ( w>>1 ), pic_crop.data[2] + i * pic_crop.linesize[2], w >> 1 );
- }
- }
- else
- {
- memcpy( tmp_in, pic_crop.data[0], w * h );
- memcpy( tmp_in + w * h, pic_crop.data[1], (w*h)>>2 );
- memcpy( tmp_in + w * h + ((w*h)>>2), pic_crop.data[2], (w*h)>>2 );
- }
- hb_ocl_scale( NULL, tmp_in, tmp_out, w, h, out->f.width, out->f.height, pv->os );
- w = out->plane[0].stride;
- h = out->plane[0].height;
- uint8_t *dst = out->plane[0].data;
- copy_plane( dst, tmp_out, w, pv->width_out, h );
- w = out->plane[1].stride;
- h = out->plane[1].height;
- dst = out->plane[1].data;
- copy_plane( dst, tmp_out + pv->width_out * pv->height_out, w, pv->width_out>>1, h );
- w = out->plane[2].stride;
- h = out->plane[2].height;
- dst = out->plane[2].data;
- copy_plane( dst, tmp_out + pv->width_out * pv->height_out +( ( pv->width_out * pv->height_out )>>2 ), w, pv->width_out>>1, h );
- free( tmp_out );
- free( tmp_in );
- out->s = in->s;
- hb_buffer_move_subs( out, in );
- return out;
-}
-
-static int hb_crop_scale_work( hb_filter_object_t * filter,
- hb_buffer_t ** buf_in,
- hb_buffer_t ** buf_out )
-{
- hb_filter_private_t * pv = filter->private_data;
- hb_buffer_t * in = *buf_in;
-
- if( in->size <= 0 )
- {
- *buf_out = in;
- *buf_in = NULL;
- return HB_FILTER_DONE;
- }
-
- if( !pv )
- {
- *buf_out = in;
- *buf_in = NULL;
- return HB_FILTER_OK;
- }
-
- // If width or height were not set, set them now based on the
- // input width & height
- if( pv->width_out <= 0 || pv->height_out <= 0 )
- {
- pv->width_out = in->f.width - (pv->crop[2] + pv->crop[3]);
- pv->height_out = in->f.height - (pv->crop[0] + pv->crop[1]);
- }
- if( ( in->f.fmt == pv->pix_fmt_out &&
- !pv->crop[0] && !pv->crop[1] && !pv->crop[2] && !pv->crop[3] &&
- in->f.width == pv->width_out && in->f.height == pv->height_out ) ||
- ( pv->use_dxva && in->f.width == pv->width_out && in->f.height == pv->height_out ) )
- {
- *buf_out = in;
- *buf_in = NULL;
- return HB_FILTER_OK;
- }
- *buf_out = crop_scale( pv, in );
-
-
- return HB_FILTER_OK;
-}
-#endif
diff --git a/libhb/decavcodec.c b/libhb/decavcodec.c
index fe59300fe..43457720a 100644
--- a/libhb/decavcodec.c
+++ b/libhb/decavcodec.c
@@ -40,8 +40,13 @@
#include "hb.h"
#include "hbffmpeg.h"
+#include "audio_remap.h"
#include "audio_resample.h"
+#ifdef USE_HWD
+#include "vadxva2.h"
+#endif
+
static void compute_frame_duration( hb_work_private_t *pv );
static void flushDelayQueue( hb_work_private_t *pv );
static int decavcodecaInit( hb_work_object_t *, hb_job_t * );
@@ -99,7 +104,11 @@ struct hb_work_private_s
int sws_pix_fmt;
int cadence[12];
int wait_for_keyframe;
-
+#ifdef USE_HWD
+ hb_va_dxva2_t * dxva2;
+ uint8_t *dst_frame;
+ hb_oclscale_t *os;
+#endif
hb_audio_resample_t *resample;
};
@@ -264,6 +273,32 @@ static void closePrivData( hb_work_private_t ** ppv )
hb_list_empty( &pv->list );
}
hb_audio_resample_free(pv->resample);
+#ifdef USE_HWD
+ if ( pv->os )
+ {
+#ifdef USE_OPENCL
+ CL_FREE( pv->os->h_in_buf );
+ CL_FREE( pv->os->h_out_buf );
+ CL_FREE( pv->os->v_out_buf );
+ CL_FREE( pv->os->h_coeff_y );
+ CL_FREE( pv->os->h_coeff_uv );
+ CL_FREE( pv->os->h_index_y );
+ CL_FREE( pv->os->h_index_uv );
+ CL_FREE( pv->os->v_coeff_y );
+ CL_FREE( pv->os->v_coeff_uv );
+ CL_FREE( pv->os->v_index_y );
+ CL_FREE( pv->os->v_index_uv );
+#endif
+ free( pv->os );
+ }
+ if ( pv->dxva2 )
+ {
+#ifdef USE_OPENCL
+ CL_FREE( pv->dxva2->cl_mem_nv12 );
+#endif
+ hb_va_close( pv->dxva2 );
+ }
+#endif
free( pv );
}
*ppv = NULL;
@@ -272,7 +307,9 @@ static void closePrivData( hb_work_private_t ** ppv )
static void decavcodecClose( hb_work_object_t * w )
{
hb_work_private_t * pv = w->private_data;
-
+#ifdef USE_HWD
+ if( pv->dst_frame ) free( pv->dst_frame );
+#endif
if ( pv )
{
closePrivData( &pv );
@@ -501,6 +538,48 @@ static hb_buffer_t *copy_frame( hb_work_private_t *pv, AVFrame *frame )
w = pv->job->title->width;
h = pv->job->title->height;
}
+#ifdef USE_HWD
+ if (pv->dxva2 && pv->job)
+ {
+ hb_buffer_t *buf;
+ int ww, hh;
+ if( (w > pv->job->width || h > pv->job->height) && (hb_get_gui_info(&hb_gui, 2) == 1) )
+ {
+ buf = hb_video_buffer_init( pv->job->width, pv->job->height );
+ ww = pv->job->width;
+ hh = pv->job->height;
+ }
+ else
+ {
+ buf = hb_video_buffer_init( w, h );
+ ww = w;
+ hh = h;
+ }
+ if( !pv->dst_frame )
+ {
+ pv->dst_frame = malloc( ww * hh * 3 / 2 );
+ }
+ if( hb_va_extract( pv->dxva2, pv->dst_frame, frame, pv->job->width, pv->job->height, pv->job->title->crop, pv->os ) == HB_WORK_ERROR )
+ {
+ hb_log( "hb_va_Extract failed!!!!!!" );
+ }
+ w = buf->plane[0].stride;
+ h = buf->plane[0].height;
+ uint8_t *dst = buf->plane[0].data;
+ copy_plane( dst, pv->dst_frame, w, ww, h );
+ w = buf->plane[1].stride;
+ h = buf->plane[1].height;
+ dst = buf->plane[1].data;
+ copy_plane( dst, pv->dst_frame + ww * hh, w, ww>>1, h );
+ w = buf->plane[2].stride;
+ h = buf->plane[2].height;
+ dst = buf->plane[2].data;
+ copy_plane( dst, pv->dst_frame + ww * hh +( ( ww * hh )>>2 ), w, ww>>1, h );
+ return buf;
+ }
+ else
+ {
+#endif
hb_buffer_t *buf = hb_video_buffer_init( w, h );
uint8_t *dst = buf->data;
@@ -547,10 +626,26 @@ static hb_buffer_t *copy_frame( hb_work_private_t *pv, AVFrame *frame )
copy_plane( dst, frame->data[2], w, frame->linesize[2], h );
}
return buf;
+#ifdef USE_HWD
+}
+#endif
}
static int get_frame_buf( AVCodecContext *context, AVFrame *frame )
{
+#ifdef USE_HWD
+ hb_work_private_t *pv = (hb_work_private_t*)context->opaque;
+ if ( (pv != NULL) && pv->dxva2 )
+ {
+ int result = HB_WORK_ERROR;
+ hb_work_private_t *pv = (hb_work_private_t*)context->opaque;
+ result = hb_va_get_frame_buf( pv->dxva2, context, frame );
+ if( result==HB_WORK_ERROR )
+ return avcodec_default_get_buffer( context, frame );
+ return 0;
+ }
+ else
+#endif
return avcodec_default_get_buffer( context, frame );
}
@@ -763,7 +858,18 @@ static int decodeFrame( hb_work_object_t *w, uint8_t *data, int size, int sequen
{
frame_dur += frame.repeat_pict * pv->field_duration;
}
-
+#ifdef USE_HWD
+ if( pv->dxva2 && pv->dxva2->do_job==HB_WORK_OK )
+ {
+ if( avp.pts>0 )
+ {
+ if( pv->dxva2->input_pts[0]!=0 && pv->dxva2->input_pts[1]==0 )
+ frame.pkt_pts = pv->dxva2->input_pts[0];
+ else
+ frame.pkt_pts = pv->dxva2->input_pts[0]<pv->dxva2->input_pts[1] ? pv->dxva2->input_pts[0] : pv->dxva2->input_pts[1];
+ }
+ }
+#endif
// If there was no pts for this frame, assume constant frame rate
// video & estimate the next frame time from the last & duration.
double pts;
@@ -949,6 +1055,24 @@ static hb_buffer_t *link_buf_list( hb_work_private_t *pv )
}
return head;
}
+#ifdef USE_HWD
+static void hb_ffmpeg_release_frame_buf( struct AVCodecContext *p_context, AVFrame *frame )
+{
+ hb_work_private_t *p_dec = (hb_work_private_t*)p_context->opaque;
+ int i;
+ if( p_dec->dxva2 )
+ {
+ hb_va_release( p_dec->dxva2, frame );
+ }
+ else if( !frame->opaque )
+ {
+ if( frame->type == FF_BUFFER_TYPE_INTERNAL )
+ avcodec_default_release_buffer( p_context, frame );
+ }
+ for( i = 0; i < 4; i++ )
+ frame->data[i] = NULL;
+}
+#endif
static void init_video_avcodec_context( hb_work_private_t *pv )
{
@@ -956,6 +1080,10 @@ static void init_video_avcodec_context( hb_work_private_t *pv )
pv->context->opaque = pv;
pv->context->get_buffer = get_frame_buf;
pv->context->reget_buffer = reget_frame_buf;
+#ifdef USE_HWD
+ if( pv->dxva2 && pv->dxva2->do_job==HB_WORK_OK )
+ pv->context->release_buffer = hb_ffmpeg_release_frame_buf;
+#endif
}
static int decavcodecvInit( hb_work_object_t * w, hb_job_t * job )
@@ -990,7 +1118,27 @@ static int decavcodecvInit( hb_work_object_t * w, hb_job_t * job )
pv->context->workaround_bugs = FF_BUG_AUTODETECT;
pv->context->err_recognition = AV_EF_CRCCHECK;
pv->context->error_concealment = FF_EC_GUESS_MVS|FF_EC_DEBLOCK;
+#ifdef USE_HWD
+ if( ((w->codec_param==AV_CODEC_ID_H264)
+ || (w->codec_param==AV_CODEC_ID_MPEG2VIDEO)
+ || (w->codec_param==AV_CODEC_ID_VC1)
+ || (w->codec_param==AV_CODEC_ID_WMV3)
+ || (w->codec_param==AV_CODEC_ID_MPEG4))
+ && pv->job && job->use_hw_decode)
+ {
+ pv->dxva2 = hb_va_create_dxva2( pv->dxva2, w->codec_param );
+ if( pv->dxva2 && pv->dxva2->do_job==HB_WORK_OK )
+ {
+ hb_va_new_dxva2( pv->dxva2, pv->context );
+ init_video_avcodec_context( pv );
+ pv->context->get_format = hb_ffmpeg_get_format;
+ pv->os = ( hb_oclscale_t * )malloc( sizeof( hb_oclscale_t ) );
+ memset( pv->os, 0, sizeof( hb_oclscale_t ) );
+ pv->threads = 1;
+ }
+ }
+#endif
if ( hb_avcodec_open( pv->context, codec, NULL, pv->threads ) )
{
hb_log( "decavcodecvInit: avcodec_open failed" );
@@ -1180,6 +1328,16 @@ static int decavcodecvWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
pv->new_chap = in->s.new_chap;
pv->chap_time = pts >= 0? pts : pv->pts_next;
}
+#ifdef USE_HWD
+ if( pv->dxva2 && pv->dxva2->do_job==HB_WORK_OK )
+ {
+ if( pv->dxva2->input_pts[0]<=pv->dxva2->input_pts[1] )
+ pv->dxva2->input_pts[0] = pts;
+ else if( pv->dxva2->input_pts[0]>pv->dxva2->input_pts[1] )
+ pv->dxva2->input_pts[1] = pts;
+ pv->dxva2->input_dts = dts;
+ }
+#endif
decodeVideo( w, in->data, in->size, in->sequence, pts, dts, in->s.frametype );
hb_buffer_close( &in );
*buf_out = link_buf_list( pv );
@@ -1421,7 +1579,19 @@ hb_work_object_t hb_decavcodecv =
.info = decavcodecvInfo,
.bsinfo = decavcodecvBSInfo
};
-
+#ifdef USE_HWD
+hb_work_object_t hb_decavcodecv_accl =
+{
+ .id = WORK_DECAVCODECVACCL,
+ .name = "Video hardware decoder (libavcodec)",
+ .init = decavcodecvInit,
+ .work = decavcodecvWork,
+ .close = decavcodecClose,
+ .flush = decavcodecvFlush,
+ .info = decavcodecvInfo,
+ .bsinfo = decavcodecvBSInfo
+};
+#endif
static void decodeAudio(hb_audio_t *audio, hb_work_private_t *pv, uint8_t *data,
int size, int64_t pts)
{
diff --git a/libhb/decavcodecaccl.c b/libhb/decavcodecaccl.c
deleted file mode 100644
index 3842053ec..000000000
--- a/libhb/decavcodecaccl.c
+++ /dev/null
@@ -1,1311 +0,0 @@
-/* decavcodecaccl.c
-
- Copyright (c) 2003-2012 HandBrake Team
- This file is part of the HandBrake source code
- Homepage: <http://handbrake.fr/>.
- It may be used under the terms of the GNU General Public License v2.
- For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
-
- Authors: Peng Gao <[email protected]> <http://www.multicorewareinc.com/>
- Li Cao <[email protected]> <http://www.multicorewareinc.com/>
-
- */
-
-/* This module is Handbrake's interface to the ffmpeg decoder library
- (libavcodec & small parts of libavformat). It contains four Handbrake
- "work objects":
-
- decavcodeca connects HB to an ffmpeg audio decoder
- decavcodecvaccl connects HB to an ffmpeg video decoder
-
- (Two different routines are needed because the ffmpeg library
- has different decoder calling conventions for audio & video.
- These work objects are self-contained & follow all
- of HB's conventions for a decoder module. They can be used like
- any other HB decoder (deca52, decmpeg2, etc.).
-
- These decoders handle 2 kinds of input. Streams that are demuxed
- by HandBrake and streams that are demuxed by libavformat. In the
- case of streams that are demuxed by HandBrake, there is an extra
- parse step required that happens in decodeVideo and decavcodecaWork.
- In the case of streams that are demuxed by libavformat, there is context
- information that we need from the libavformat. This information is
- propagated from hb_stream_open to these decoders through title->opaque_priv.
-
- A consequence of the above is that the streams that are demuxed by HandBrake
- *can't* use information from the AVStream because there isn't one - they
- get their data from either the dvd reader or the mpeg reader, not the ffmpeg
- stream reader. That means that they have to make up for deficiencies in the
- AVCodecContext info by using stuff kept in the HB "title" struct. It
- also means that ffmpeg codecs that randomly scatter state needed by
- the decoder across both the AVCodecContext & the AVStream (e.g., the
- VC1 decoder) can't easily be used by the HB mpeg stream reader.
- */
-#define HAVE_DXVA2
-#ifdef HAVE_DXVA2
-#include "hb.h"
-#include "hbffmpeg.h"
-#include "vadxva2.h"
-#include "audio_remap.h"
-#include "audio_resample.h"
-
-static void compute_frame_duration( hb_work_private_t *pv );
-static void flushDelayQueue( hb_work_private_t *pv );
-
-#define HEAP_SIZE 8
-typedef struct {
- // there are nheap items on the heap indexed 1..nheap (i.e., top of
- // heap is 1). The 0th slot is unused - a marker is put there to check
- // for overwrite errs.
- int64_t h[HEAP_SIZE+1];
- int nheap;
-} pts_heap_t;
-
-struct hb_work_private_s
-{
- hb_job_t *job;
- hb_title_t *title;
- AVCodecContext *context;
- AVCodecParserContext *parser;
- int threads;
- int video_codec_opened;
- hb_list_t *list;
- double duration; // frame duration (for video)
- double field_duration; // field duration (for video)
- int frame_duration_set; // Indicates valid timing was found in stream
- double pts_next; // next pts we expect to generate
- int64_t chap_time; // time of next chap mark (if new_chap != 0)
- int new_chap; // output chapter mark pending
- uint32_t nframes;
- uint32_t ndrops;
- uint32_t decode_errors;
- int brokenByMicrosoft; // video stream may contain packed b-frames
- hb_buffer_t* delayq[HEAP_SIZE];
- int queue_primed;
- pts_heap_t pts_heap;
- void* buffer;
- struct SwsContext *sws_context; // if we have to rescale or convert color space
- int sws_width;
- int sws_height;
- int sws_pix_fmt;
- int cadence[12];
- int wait_for_keyframe;
- hb_va_dxva2_t * dxva2;
- uint8_t *dst_frame;
- hb_oclscale_t *os;
- hb_audio_resample_t *resample;
-};
-
-static hb_buffer_t *link_buf_list( hb_work_private_t *pv );
-
-
-static int64_t heap_pop( pts_heap_t *heap )
-{
- int64_t result;
-
- if( heap->nheap <= 0 )
- {
- return -1;
- }
-
- // return the top of the heap then put the bottom element on top,
- // decrease the heap size by one & rebalence the heap.
- result = heap->h[1];
-
- int64_t v = heap->h[heap->nheap--];
- int parent = 1;
- int child = parent << 1;
- while( child <= heap->nheap )
- {
- // find the smallest of the two children of parent
- if (child < heap->nheap && heap->h[child] > heap->h[child+1] )
- ++child;
-
- if (v <= heap->h[child])
- // new item is smaller than either child so it's the new parent.
- break;
-
- // smallest child is smaller than new item so move it up then
- // check its children.
- int64_t hp = heap->h[child];
- heap->h[parent] = hp;
- parent = child;
- child = parent << 1;
- }
- heap->h[parent] = v;
- return result;
-}
-
-static void heap_push( pts_heap_t *heap, int64_t v )
-{
- if ( heap->nheap < HEAP_SIZE )
- {
- ++heap->nheap;
- }
-
- // stick the new value on the bottom of the heap then bubble it
- // up to its correct spot.
- int child = heap->nheap;
- while (child > 1) {
- int parent = child >> 1;
- if (heap->h[parent] <= v)
- break;
- // move parent down
- int64_t hp = heap->h[parent];
- heap->h[child] = hp;
- child = parent;
- }
- heap->h[child] = v;
-}
-
-/***********************************************************************
- * Close
- ***********************************************************************
- *
- **********************************************************************/
-static void closePrivData( hb_work_private_t ** ppv )
-{
- hb_work_private_t * pv = *ppv;
-
- if ( pv )
- {
- flushDelayQueue( pv );
-
- if ( pv->job && pv->context && pv->context->codec )
- {
- hb_log( "%s-decoder done: %u frames, %u decoder errors, %u drops",
- pv->context->codec->name, pv->nframes, pv->decode_errors,
- pv->ndrops );
- }
- if ( pv->sws_context )
- {
- sws_freeContext( pv->sws_context );
- }
- if ( pv->parser )
- {
- av_parser_close(pv->parser);
- }
- if ( pv->context && pv->context->codec )
- {
- hb_avcodec_close( pv->context );
- }
- if ( pv->context )
- {
- av_freep( &pv->context->extradata );
- av_free( pv->context );
- }
- if ( pv->list )
- {
- hb_list_empty( &pv->list );
- }
-
- hb_audio_resample_free( pv->resample );
- if ( pv->os )
- {
-#ifdef USE_OPENCL
- CL_FREE( pv->os->h_in_buf );
- CL_FREE( pv->os->h_out_buf );
- CL_FREE( pv->os->v_out_buf );
- CL_FREE( pv->os->h_coeff_y );
- CL_FREE( pv->os->h_coeff_uv );
- CL_FREE( pv->os->h_index_y );
- CL_FREE( pv->os->h_index_uv );
- CL_FREE( pv->os->v_coeff_y );
- CL_FREE( pv->os->v_coeff_uv );
- CL_FREE( pv->os->v_index_y );
- CL_FREE( pv->os->v_index_uv );
-#endif
- free( pv->os );
- }
- if ( pv->dxva2 )
- {
-
-#ifdef USE_OPENCL
- CL_FREE( pv->dxva2->cl_mem_nv12 );
-#endif
- hb_va_close( pv->dxva2 );
- }
- free( pv );
- }
- *ppv = NULL;
-}
-
-/* -------------------------------------------------------------
- * General purpose video decoder using libavcodec
- */
-
-static uint8_t *copy_plane( uint8_t *dst, uint8_t* src, int dstride, int sstride,
- int h )
-{
- if ( dstride == sstride )
- {
- memcpy( dst, src, dstride * h );
- return dst + dstride * h;
- }
- int lbytes = dstride <= sstride? dstride : sstride;
- while ( --h >= 0 )
- {
- memcpy( dst, src, lbytes );
- src += sstride;
- dst += dstride;
- }
- return dst;
-}
-
-// copy one video frame into an HB buf. If the frame isn't in our color space
-// or at least one of its dimensions is odd, use sws_scale to convert/rescale it.
-// Otherwise just copy the bits.
-static hb_buffer_t *copy_frame( hb_work_private_t *pv, AVFrame *frame )
-{
- AVCodecContext *context = pv->context;
- int w, h;
- if ( ! pv->job )
- {
- // HandBrake's video pipeline uses yuv420 color. This means all
- // dimensions must be even. So we must adjust the dimensions
- // of incoming video if not even.
- w = context->width & ~1;
- h = context->height & ~1;
- }
- else
- {
- w = pv->job->title->width;
- h = pv->job->title->height;
- }
- if( pv->dxva2 && pv->job )
- {
- hb_buffer_t *buf;
- int ww, hh;
- if( (w > pv->job->width || h > pv->job->height) && (hb_get_gui_info(&hb_gui, 2) == 1) )
- {
- buf = hb_video_buffer_init( pv->job->width, pv->job->height );
- ww = pv->job->width;
- hh = pv->job->height;
- }
- else
- {
- buf = hb_video_buffer_init( w, h );
- ww = w;
- hh = h;
- }
- if( !pv->dst_frame )
- {
- pv->dst_frame = malloc( ww * hh * 3 / 2 );
- }
- if( hb_va_extract( pv->dxva2, pv->dst_frame, frame, pv->job->width, pv->job->height, pv->job->title->crop, pv->os ) == HB_WORK_ERROR )
- {
- hb_log( "hb_va_Extract failed!!!!!!" );
- }
-
- w = buf->plane[0].stride;
- h = buf->plane[0].height;
- uint8_t *dst = buf->plane[0].data;
- copy_plane( dst, pv->dst_frame, w, ww, h );
- w = buf->plane[1].stride;
- h = buf->plane[1].height;
- dst = buf->plane[1].data;
- copy_plane( dst, pv->dst_frame + ww * hh, w, ww>>1, h );
- w = buf->plane[2].stride;
- h = buf->plane[2].height;
- dst = buf->plane[2].data;
- copy_plane( dst, pv->dst_frame + ww * hh +( ( ww * hh )>>2 ), w, ww>>1, h );
- return buf;
- }
- else
- {
- hb_buffer_t *buf = hb_video_buffer_init( w, h );
- uint8_t *dst = buf->data;
- if( context->pix_fmt != AV_PIX_FMT_YUV420P || w != context->width ||
- h != context->height )
- {
- // have to convert to our internal color space and/or rescale
- AVPicture dstpic;
- hb_avpicture_fill( &dstpic, buf );
- if( !pv->sws_context ||
- pv->sws_width != context->width ||
- pv->sws_height != context->height ||
- pv->sws_pix_fmt != context->pix_fmt )
- {
- if( pv->sws_context )
- sws_freeContext( pv->sws_context );
- pv->sws_context = hb_sws_get_context(
- context->width, context->height, context->pix_fmt,
- w, h, PIX_FMT_YUV420P,
- SWS_LANCZOS|SWS_ACCURATE_RND );
- pv->sws_width = context->width;
- pv->sws_height = context->height;
- pv->sws_pix_fmt = context->pix_fmt;
- }
- sws_scale( pv->sws_context, (const uint8_t*const*)frame->data,
- frame->linesize, 0, context->height,
- dstpic.data, dstpic.linesize );
- }
- else
- {
- w = buf->plane[0].stride;
- h = buf->plane[0].height;
- dst = buf->plane[0].data;
- copy_plane( dst, frame->data[0], w, frame->linesize[0], h );
- w = buf->plane[1].stride;
- h = buf->plane[1].height;
- dst = buf->plane[1].data;
- copy_plane( dst, frame->data[1], w, frame->linesize[1], h );
- w = buf->plane[2].stride;
- h = buf->plane[2].height;
- dst = buf->plane[2].data;
- copy_plane( dst, frame->data[2], w, frame->linesize[2], h );
- }
- return buf;
- }
-
-}
-
-
-static int get_frame_buf( AVCodecContext *context, AVFrame *frame )
-{
- int result = HB_WORK_ERROR;
- hb_work_private_t *pv = (hb_work_private_t*)context->opaque;
- if( pv->dxva2 )
- {
- result = hb_va_get_frame_buf( pv->dxva2, context, frame );
- }
- if( result==HB_WORK_ERROR )
- return avcodec_default_get_buffer( context, frame );
- return 0;
-}
-
-static int reget_frame_buf( AVCodecContext *context, AVFrame *frame )
-{
- return avcodec_default_reget_buffer( context, frame );
-}
-
-static void log_chapter( hb_work_private_t *pv, int chap_num, int64_t pts )
-{
- hb_chapter_t *c;
-
- if ( !pv->job )
- return;
-
- c = hb_list_item( pv->job->list_chapter, chap_num - 1 );
- if ( c && c->title )
- {
- hb_log( "%s: \"%s\" (%d) at frame %u time %"PRId64,
- pv->context->codec->name, c->title, chap_num, pv->nframes, pts );
- }
- else
- {
- hb_log( "%s: Chapter %d at frame %u time %"PRId64,
- pv->context->codec->name, chap_num, pv->nframes, pts );
- }
-}
-
-static void flushDelayQueue( hb_work_private_t *pv )
-{
- hb_buffer_t *buf;
- int slot = pv->queue_primed ? pv->nframes & (HEAP_SIZE-1) : 0;
-
- // flush all the video packets left on our timestamp-reordering delay q
- while ( ( buf = pv->delayq[slot] ) != NULL )
- {
- buf->s.start = heap_pop( &pv->pts_heap );
- hb_list_add( pv->list, buf );
- pv->delayq[slot] = NULL;
- slot = ( slot + 1 ) & (HEAP_SIZE-1);
- }
-}
-
-#define TOP_FIRST PIC_FLAG_TOP_FIELD_FIRST
-#define PROGRESSIVE PIC_FLAG_PROGRESSIVE_FRAME
-#define REPEAT_FIRST PIC_FLAG_REPEAT_FIRST_FIELD
-#define TB 8
-#define BT 16
-#define BT_PROG 32
-#define BTB_PROG 64
-#define TB_PROG 128
-#define TBT_PROG 256
-
-static void checkCadence( int * cadence, uint16_t flags, int64_t start )
-{
- /* Rotate the cadence tracking. */
- int i = 0;
- for(i=11; i > 0; i--)
- {
- cadence[i] = cadence[i-1];
- }
-
- if ( !(flags & PROGRESSIVE) && !(flags & TOP_FIRST) )
- {
- /* Not progressive, not top first...
- That means it's probably bottom
- first, 2 fields displayed.
- */
- //hb_log("MPEG2 Flag: Bottom field first, 2 fields displayed.");
- cadence[0] = BT;
- }
- else if ( !(flags & PROGRESSIVE) && (flags & TOP_FIRST) )
- {
- /* Not progressive, top is first,
- Two fields displayed.
- */
- //hb_log("MPEG2 Flag: Top field first, 2 fields displayed.");
- cadence[0] = TB;
- }
- else if ( (flags & PROGRESSIVE) && !(flags & TOP_FIRST) && !( flags & REPEAT_FIRST ) )
- {
- /* Progressive, but noting else.
- That means Bottom first,
- 2 fields displayed.
- */
- //hb_log("MPEG2 Flag: Progressive. Bottom field first, 2 fields displayed.");
- cadence[0] = BT_PROG;
- }
- else if ( (flags & PROGRESSIVE) && !(flags & TOP_FIRST) && ( flags & REPEAT_FIRST ) )
- {
- /* Progressive, and repeat. .
- That means Bottom first,
- 3 fields displayed.
- */
- //hb_log("MPEG2 Flag: Progressive repeat. Bottom field first, 3 fields displayed.");
- cadence[0] = BTB_PROG;
- }
- else if ( (flags & PROGRESSIVE) && (flags & TOP_FIRST) && !( flags & REPEAT_FIRST ) )
- {
- /* Progressive, top first.
- That means top first,
- 2 fields displayed.
- */
- //hb_log("MPEG2 Flag: Progressive. Top field first, 2 fields displayed.");
- cadence[0] = TB_PROG;
- }
- else if ( (flags & PROGRESSIVE) && (flags & TOP_FIRST) && ( flags & REPEAT_FIRST ) )
- {
- /* Progressive, top, repeat.
- That means top first,
- 3 fields displayed.
- */
- //hb_log("MPEG2 Flag: Progressive repeat. Top field first, 3 fields displayed.");
- cadence[0] = TBT_PROG;
- }
-
- if ( (cadence[2] <= TB) && (cadence[1] <= TB) && (cadence[0] > TB) && (cadence[11]) )
- hb_log("%fs: Video -> Film", (float)start / 90000);
- if ( (cadence[2] > TB) && (cadence[1] <= TB) && (cadence[0] <= TB) && (cadence[11]) )
- hb_log("%fs: Film -> Video", (float)start / 90000);
-}
-
-/*
- * Decodes a video frame from the specified raw packet data
- * ('data', 'size', 'sequence').
- * The output of this function is stored in 'pv->list', which contains a list
- * of zero or more decoded packets.
- *
- * The returned packets are guaranteed to have their timestamps in the correct
- * order, even if the original packets decoded by libavcodec have misordered
- * timestamps, due to the use of 'packed B-frames'.
- *
- * Internally the set of decoded packets may be buffered in 'pv->delayq'
- * until enough packets have been decoded so that the timestamps can be
- * correctly rewritten, if this is necessary.
- */
-static int decodeFrame( hb_work_object_t *w, uint8_t *data, int size, int sequence, int64_t pts, int64_t dts, uint8_t frametype )
-{
- hb_work_private_t *pv = w->private_data;
- int got_picture, oldlevel = 0;
- AVFrame frame = { { 0 } };
- AVPacket avp;
-
- if ( global_verbosity_level <= 1 )
- {
- oldlevel = av_log_get_level();
- av_log_set_level( AV_LOG_QUIET );
- }
-
- av_init_packet(&avp);
- avp.data = data;
- avp.size = size;
- avp.pts = pts;
- avp.dts = dts;
- /*
- * libav avcodec_decode_video2() needs AVPacket flagged with AV_PKT_FLAG_KEY
- * for some codecs. For example, sequence of PNG in a mov container.
- */
- if ( frametype & HB_FRAME_KEY )
- {
- avp.flags |= AV_PKT_FLAG_KEY;
- }
-
- if ( avcodec_decode_video2( pv->context, &frame, &got_picture, &avp ) < 0 )
- {
- ++pv->decode_errors;
- }
- if ( global_verbosity_level <= 1 )
- {
- av_log_set_level( oldlevel );
- }
- if( got_picture && pv->wait_for_keyframe > 0 )
- {
- // Libav is inconsistant about how it flags keyframes. For many
- // codecs it simply sets frame.key_frame. But for others, it only
- // sets frame.pict_type. And for yet others neither gets set at all
- // (qtrle).
- int key = frame.key_frame || (w->codec_param != AV_CODEC_ID_H264 &&
- (frame.pict_type == 0 ||
- frame.pict_type == AV_PICTURE_TYPE_I));
- if( !key )
- {
- pv->wait_for_keyframe--;
- return 0;
- }
- pv->wait_for_keyframe = 0;
- }
- if( got_picture )
- {
- uint16_t flags = 0;
-
- // ffmpeg makes it hard to attach a pts to a frame. if the MPEG ES
- // packet had a pts we handed it to av_parser_parse (if the packet had
- // no pts we set it to AV_NOPTS_VALUE, but before the parse we can't
- // distinguish between the start of a video frame with no pts & an
- // intermediate packet of some frame which never has a pts). we hope
- // that when parse returns the frame to us the pts we originally
- // handed it will be in parser->pts. we put this pts into avp.pts so
- // that when avcodec_decode_video finally gets around to allocating an
- // AVFrame to hold the decoded frame, avcodec_default_get_buffer can
- // stuff that pts into the it. if all of these relays worked at this
- // point frame.pts should hold the frame's pts from the original data
- // stream or AV_NOPTS_VALUE if it didn't have one. in the latter case
- // we generate the next pts in sequence for it.
- if ( !pv->frame_duration_set )
- compute_frame_duration( pv );
-
- double frame_dur = pv->duration;
- if ( frame.repeat_pict )
- {
- frame_dur += frame.repeat_pict * pv->field_duration;
- }
-
-
- if( pv->dxva2 && pv->dxva2->do_job==HB_WORK_OK )
- {
- if( avp.pts>0 )
- {
- if( pv->dxva2->input_pts[0]!=0 && pv->dxva2->input_pts[1]==0 )
- frame.pkt_pts = pv->dxva2->input_pts[0];
- else
- frame.pkt_pts = pv->dxva2->input_pts[0]<pv->dxva2->input_pts[1] ? pv->dxva2->input_pts[0] : pv->dxva2->input_pts[1];
- }
- }
- // If there was no pts for this frame, assume constant frame rate
- // video & estimate the next frame time from the last & duration.
- double pts;
- if (frame.pkt_pts == AV_NOPTS_VALUE)
- {
- pts = pv->pts_next;
- }
- else
- {
- pts = frame.pkt_pts;
- }
- pv->pts_next = pts + frame_dur;
-
- if ( frame.top_field_first )
- {
- flags |= PIC_FLAG_TOP_FIELD_FIRST;
- }
- if ( !frame.interlaced_frame )
- {
- flags |= PIC_FLAG_PROGRESSIVE_FRAME;
- }
- if ( frame.repeat_pict == 1 )
- {
- flags |= PIC_FLAG_REPEAT_FIRST_FIELD;
- }
- if ( frame.repeat_pict == 2 )
- {
- flags |= PIC_FLAG_REPEAT_FRAME;
- }
-
- hb_buffer_t *buf;
-
- // if we're doing a scan or this content couldn't have been broken
- // by Microsoft we don't worry about timestamp reordering
- if ( ! pv->job || ! pv->brokenByMicrosoft )
- {
- buf = copy_frame( pv, &frame );
- buf->s.start = pts;
- buf->sequence = sequence;
-
- buf->s.flags = flags;
-
- if ( pv->new_chap && buf->s.start >= pv->chap_time )
- {
- buf->s.new_chap = pv->new_chap;
- log_chapter( pv, pv->new_chap, buf->s.start );
- pv->new_chap = 0;
- pv->chap_time = 0;
- }
- else if ( pv->nframes == 0 && pv->job )
- {
- log_chapter( pv, pv->job->chapter_start, buf->s.start );
- }
- checkCadence( pv->cadence, flags, buf->s.start );
- hb_list_add( pv->list, buf );
- ++pv->nframes;
- return got_picture;
- }
-
- // XXX This following probably addresses a libavcodec bug but I don't
- // see an easy fix so we workaround it here.
- //
- // The M$ 'packed B-frames' atrocity results in decoded frames with
- // the wrong timestamp. E.g., if there are 2 b-frames the timestamps
- // we see here will be "2 3 1 5 6 4 ..." instead of "1 2 3 4 5 6".
- // The frames are actually delivered in the right order but with
- // the wrong timestamp. To get the correct timestamp attached to
- // each frame we have a delay queue (longer than the max number of
- // b-frames) & a sorting heap for the timestamps. As each frame
- // comes out of the decoder the oldest frame in the queue is removed
- // and associated with the smallest timestamp. Then the new frame is
- // added to the queue & its timestamp is pushed on the heap.
- // This does nothing if the timestamps are correct (i.e., the video
- // uses a codec that Micro$oft hasn't broken yet) but the frames
- // get timestamped correctly even when M$ has munged them.
-
- // remove the oldest picture from the frame queue (if any) &
- // give it the smallest timestamp from our heap. The queue size
- // is a power of two so we get the slot of the oldest by masking
- // the frame count & this will become the slot of the newest
- // once we've removed & processed the oldest.
- int slot = pv->nframes & (HEAP_SIZE-1);
- if ( ( buf = pv->delayq[slot] ) != NULL )
- {
- pv->queue_primed = 1;
- buf->s.start = heap_pop( &pv->pts_heap );
-
- if ( pv->new_chap && buf->s.start >= pv->chap_time )
- {
- buf->s.new_chap = pv->new_chap;
- log_chapter( pv, pv->new_chap, buf->s.start );
- pv->new_chap = 0;
- pv->chap_time = 0;
- }
- else if ( pv->nframes == 0 && pv->job )
- {
- log_chapter( pv, pv->job->chapter_start, buf->s.start );
- }
- checkCadence( pv->cadence, buf->s.flags, buf->s.start );
- hb_list_add( pv->list, buf );
- }
-
- // add the new frame to the delayq & push its timestamp on the heap
- buf = copy_frame( pv, &frame );
- buf->sequence = sequence;
- /* Store picture flags for later use by filters */
- buf->s.flags = flags;
- pv->delayq[slot] = buf;
- heap_push( &pv->pts_heap, pts );
-
- ++pv->nframes;
- }
-
- return got_picture;
-}
-static void decodeVideo( hb_work_object_t *w, uint8_t *data, int size, int sequence, int64_t pts, int64_t dts, uint8_t frametype )
-{
- hb_work_private_t *pv = w->private_data;
-
- /*
- * The following loop is a do..while because we need to handle both
- * data & the flush at the end (signaled by size=0). At the end there's
- * generally a frame in the parser & one or more frames in the decoder
- * (depending on the bframes setting).
- */
- int pos = 0;
- do {
- uint8_t *pout;
- int pout_len, len;
- int64_t parser_pts, parser_dts;
- if ( pv->parser )
- {
- len = av_parser_parse2( pv->parser, pv->context, &pout, &pout_len,
- data + pos, size - pos, pts, dts, 0 );
- parser_pts = pv->parser->pts;
- parser_dts = pv->parser->dts;
- }
- else
- {
- pout = data;
- len = pout_len = size;
- parser_pts = pts;
- parser_dts = dts;
- }
- pos += len;
-
- if ( pout_len > 0 )
- {
- decodeFrame( w, pout, pout_len, sequence, parser_pts, parser_dts, frametype );
- }
- } while ( pos < size );
-
- /* the stuff above flushed the parser, now flush the decoder */
- if ( size <= 0 )
- {
- while ( decodeFrame( w, NULL, 0, sequence, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0 ) )
- {
- }
- flushDelayQueue( pv );
- }
-}
-
-/*
- * Removes all packets from 'pv->list', links them together into
- * a linked-list, and returns the first packet in the list.
- */
-static hb_buffer_t *link_buf_list( hb_work_private_t *pv )
-{
- hb_buffer_t *head = hb_list_item( pv->list, 0 );
-
- if ( head )
- {
- hb_list_rem( pv->list, head );
-
- hb_buffer_t *last = head, *buf;
-
- while ( ( buf = hb_list_item( pv->list, 0 ) ) != NULL )
- {
- hb_list_rem( pv->list, buf );
- last->next = buf;
- last = buf;
- }
- }
- return head;
-}
-static void hb_ffmpeg_release_frame_buf( struct AVCodecContext *p_context, AVFrame *frame )
-{
- hb_work_private_t *p_dec = (hb_work_private_t*)p_context->opaque;
- int i;
- if( p_dec->dxva2 )
- {
- hb_va_release( p_dec->dxva2, frame );
- }
- else if( !frame->opaque )
- {
- if( frame->type == FF_BUFFER_TYPE_INTERNAL )
- avcodec_default_release_buffer( p_context, frame );
- }
- for( i = 0; i < 4; i++ )
- frame->data[i] = NULL;
-}
-
-static void init_video_avcodec_context( hb_work_private_t *pv )
-{
- /* we have to wrap ffmpeg's get_buffer to be able to set the pts (?!) */
- pv->context->opaque = pv;
- pv->context->get_buffer = get_frame_buf;
- pv->context->reget_buffer = reget_frame_buf;
- if( pv->dxva2 && pv->dxva2->do_job==HB_WORK_OK )
- pv->context->release_buffer = hb_ffmpeg_release_frame_buf;
-}
-
-static int decavcodecvInit( hb_work_object_t * w, hb_job_t * job )
-{
-
- hb_work_private_t *pv = calloc( 1, sizeof( hb_work_private_t ) );
-
- w->private_data = pv;
- pv->wait_for_keyframe = 60;
- pv->job = job;
- if ( job )
- pv->title = job->title;
- else
- pv->title = w->title;
- pv->list = hb_list_init();
-
- if( pv->job && pv->job->title && !pv->job->title->has_resolution_change )
- {
- pv->threads = HB_FFMPEG_THREADS_AUTO;
- }
- if ( pv->title->opaque_priv )
- {
- AVFormatContext *ic = (AVFormatContext*)pv->title->opaque_priv;
- AVCodec *codec = avcodec_find_decoder( w->codec_param );
- if ( codec == NULL )
- {
- hb_log( "decavcodecvInit: failed to find codec for id (%d)", w->codec_param );
- return 1;
- }
- pv->context = avcodec_alloc_context3(codec);
- avcodec_copy_context( pv->context, ic->streams[pv->title->video_id]->codec);
- pv->context->workaround_bugs = FF_BUG_AUTODETECT;
- pv->context->err_recognition = AV_EF_CRCCHECK;
- pv->context->error_concealment = FF_EC_GUESS_MVS|FF_EC_DEBLOCK;
- if( ((w->codec_param==AV_CODEC_ID_H264)
- || (w->codec_param==AV_CODEC_ID_MPEG2VIDEO)
- || (w->codec_param==AV_CODEC_ID_VC1)
- || (w->codec_param==AV_CODEC_ID_WMV3)
- || (w->codec_param==AV_CODEC_ID_MPEG4))
- && pv->job )
- {
- pv->dxva2 = hb_va_create_dxva2( pv->dxva2, w->codec_param );
- if( pv->dxva2 && pv->dxva2->do_job==HB_WORK_OK )
- {
- hb_va_new_dxva2( pv->dxva2, pv->context );
- init_video_avcodec_context( pv );
- pv->context->get_format = hb_ffmpeg_get_format;
- pv->os = ( hb_oclscale_t * )malloc( sizeof( hb_oclscale_t ) );
- memset( pv->os, 0, sizeof( hb_oclscale_t ) );
- pv->threads = 1;
-
- }
- }
- if( hb_avcodec_open( pv->context, codec, NULL, pv->threads ) )
- {
- hb_log( "decavcodecvInit: avcodec_open failed" );
- return 1;
- }
- pv->video_codec_opened = 1;
- // avi, mkv and possibly mp4 containers can contain the M$ VFW packed
- // b-frames abortion that messes up frame ordering and timestamps.
- // XXX ffmpeg knows which streams are broken but doesn't expose the
- // info externally. We should patch ffmpeg to add a flag to the
- // codec context for this but until then we mark all ffmpeg streams
- // as suspicious.
- pv->brokenByMicrosoft = 1;
- }
- else
- {
- AVCodec *codec = avcodec_find_decoder( w->codec_param );
- pv->parser = av_parser_init( w->codec_param );
- pv->context = avcodec_alloc_context3( codec );
- pv->context->workaround_bugs = FF_BUG_AUTODETECT;
- pv->context->err_recognition = AV_EF_CRCCHECK;
- pv->context->error_concealment = FF_EC_GUESS_MVS|FF_EC_DEBLOCK;
- init_video_avcodec_context( pv );
- }
- return 0;
-}
-
-static int next_hdr( hb_buffer_t *in, int offset )
-{
- uint8_t *dat = in->data;
- uint16_t last2 = 0xffff;
- for ( ; in->size - offset > 1; ++offset )
- {
- if ( last2 == 0 && dat[offset] == 0x01 )
- // found an mpeg start code
- return offset - 2;
-
- last2 = ( last2 << 8 ) | dat[offset];
- }
-
- return -1;
-}
-
-static int find_hdr( hb_buffer_t *in, int offset, uint8_t hdr_type )
-{
- if ( in->size - offset < 4 )
- // not enough room for an mpeg start code
- return -1;
-
- for ( ; ( offset = next_hdr( in, offset ) ) >= 0; ++offset )
- {
- if ( in->data[offset+3] == hdr_type )
- // found it
- break;
- }
- return offset;
-}
-
-static int setup_extradata( hb_work_object_t *w, hb_buffer_t *in )
-{
- hb_work_private_t *pv = w->private_data;
-
- // we can't call the avstream funcs but the read_header func in the
- // AVInputFormat may set up some state in the AVContext. In particular
- // vc1t_read_header allocates 'extradata' to deal with header issues
- // related to Microsoft's bizarre engineering notions. We alloc a chunk
- // of space to make vc1 work then associate the codec with the context.
- if ( w->codec_param != AV_CODEC_ID_VC1 )
- {
- // we haven't been inflicted with M$ - allocate a little space as
- // a marker and return success.
- pv->context->extradata_size = 0;
- // av_malloc uses posix_memalign which is allowed to
- // return NULL when allocating 0 bytes. We use extradata == NULL
- // to trigger initialization of extradata and the decoder, so
- // we can not set it to NULL here. So allocate a small
- // buffer instead.
- pv->context->extradata = av_malloc(1);
- return 0;
- }
-
- // find the start and and of the sequence header
- int shdr, shdr_end;
- if ( ( shdr = find_hdr( in, 0, 0x0f ) ) < 0 )
- {
- // didn't find start of seq hdr
- return 1;
- }
- if ( ( shdr_end = next_hdr( in, shdr + 4 ) ) < 0 )
- {
- shdr_end = in->size;
- }
- shdr_end -= shdr;
-
- // find the start and and of the entry point header
- int ehdr, ehdr_end;
- if ( ( ehdr = find_hdr( in, 0, 0x0e ) ) < 0 )
- {
- // didn't find start of entry point hdr
- return 1;
- }
- if ( ( ehdr_end = next_hdr( in, ehdr + 4 ) ) < 0 )
- {
- ehdr_end = in->size;
- }
- ehdr_end -= ehdr;
-
- // found both headers - allocate an extradata big enough to hold both
- // then copy them into it.
- pv->context->extradata_size = shdr_end + ehdr_end;
- pv->context->extradata = av_malloc(pv->context->extradata_size + 8);
- memcpy( pv->context->extradata, in->data + shdr, shdr_end );
- memcpy( pv->context->extradata + shdr_end, in->data + ehdr, ehdr_end );
- memset( pv->context->extradata + shdr_end + ehdr_end, 0, 8);
- return 0;
-}
-
-static int decavcodecvWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
- hb_buffer_t ** buf_out )
-{
- hb_work_private_t *pv = w->private_data;
- hb_buffer_t *in = *buf_in;
- int64_t pts = AV_NOPTS_VALUE;
- int64_t dts = pts;
-
- *buf_in = NULL;
- *buf_out = NULL;
-
- /* if we got an empty buffer signaling end-of-stream send it downstream */
- if ( in->size == 0 )
- {
- if ( pv->context->codec != NULL )
- {
- decodeVideo( w, in->data, in->size, in->sequence, pts, dts, in->s.frametype );
- }
- hb_list_add( pv->list, in );
- *buf_out = link_buf_list( pv );
- return HB_WORK_DONE;
- }
-
- // if this is the first frame open the codec (we have to wait for the
- // first frame because of M$ VC1 braindamage).
- if ( !pv->video_codec_opened )
- {
- AVCodec *codec = avcodec_find_decoder( w->codec_param );
- if ( codec == NULL )
- {
- hb_log( "decavcodecvWork: failed to find codec for id (%d)", w->codec_param );
- *buf_out = hb_buffer_init( 0 );;
- return HB_WORK_DONE;
- }
- // Note that there is currently a small memory leak in libav at this
- // point. pv->context->priv_data gets allocated by
- // avcodec_alloc_context3(), then avcodec_get_context_defaults3()
- // memsets the context and looses the pointer.
- //
- // avcodec_get_context_defaults3() looks as if they intended for
- // it to preserve any existing priv_data because they test the pointer
- // before allocating new memory, but the memset has already cleared it.
- avcodec_get_context_defaults3( pv->context, codec );
- init_video_avcodec_context( pv );
- if ( setup_extradata( w, in ) )
- {
- // we didn't find the headers needed to set up extradata.
- // the codec will abort if we open it so just free the buf
- // and hope we eventually get the info we need.
- hb_buffer_close( &in );
- return HB_WORK_OK;
- }
- // disable threaded decoding for scan, can cause crashes
- if ( hb_avcodec_open( pv->context, codec, NULL, pv->threads ) )
- {
- hb_log( "decavcodecvWork: avcodec_open failed" );
- *buf_out = hb_buffer_init( 0 );;
- return HB_WORK_DONE;
- }
- pv->video_codec_opened = 1;
- }
-
- if( in->s.start >= 0 )
- {
- pts = in->s.start;
- dts = in->s.renderOffset;
- }
- if ( in->s.new_chap )
- {
- pv->new_chap = in->s.new_chap;
- pv->chap_time = pts >= 0? pts : pv->pts_next;
- }
- if( pv->dxva2 && pv->dxva2->do_job==HB_WORK_OK )
- {
- if( pv->dxva2->input_pts[0]<=pv->dxva2->input_pts[1] )
- pv->dxva2->input_pts[0] = pts;
- else if( pv->dxva2->input_pts[0]>pv->dxva2->input_pts[1] )
- pv->dxva2->input_pts[1] = pts;
- pv->dxva2->input_dts = dts;
- }
- decodeVideo( w, in->data, in->size, in->sequence, pts, dts, in->s.frametype );
- hb_buffer_close( &in );
- *buf_out = link_buf_list( pv );
- return HB_WORK_OK;
-}
-
-static void compute_frame_duration( hb_work_private_t *pv )
-{
- double duration = 0.;
- int64_t max_fps = 64L;
-
- // context->time_base may be in fields, so set the max *fields* per second
- if ( pv->context->ticks_per_frame > 1 )
- max_fps *= pv->context->ticks_per_frame;
-
- if ( pv->title->opaque_priv )
- {
- // If ffmpeg is demuxing for us, it collects some additional
- // information about framerates that is often more accurate
- // than context->time_base.
- AVFormatContext *ic = (AVFormatContext*)pv->title->opaque_priv;
- AVStream *st = ic->streams[pv->title->video_id];
- if ( st->nb_frames && st->duration )
- {
- // compute the average frame duration from the total number
- // of frames & the total duration.
- duration = ( (double)st->duration * (double)st->time_base.num ) /
- ( (double)st->nb_frames * (double)st->time_base.den );
- }
- else
- {
- // XXX We don't have a frame count or duration so try to use the
- // far less reliable time base info in the stream.
- // Because the time bases are so screwed up, we only take values
- // in the range 8fps - 64fps.
- AVRational *tb = NULL;
- if ( st->avg_frame_rate.den * 64L > st->avg_frame_rate.num &&
- st->avg_frame_rate.num > st->avg_frame_rate.den * 8L )
- {
- tb = &(st->avg_frame_rate);
- duration = (double)tb->den / (double)tb->num;
- }
- else if ( st->time_base.num * 64L > st->time_base.den &&
- st->time_base.den > st->time_base.num * 8L )
- {
- tb = &(st->time_base);
- duration = (double)tb->num / (double)tb->den;
- }
- else if ( st->r_frame_rate.den * 64L > st->r_frame_rate.num &&
- st->r_frame_rate.num > st->r_frame_rate.den * 8L )
- {
- tb = &(st->r_frame_rate);
- duration = (double)tb->den / (double)tb->num;
- }
- }
- if ( !duration &&
- pv->context->time_base.num * max_fps > pv->context->time_base.den &&
- pv->context->time_base.den > pv->context->time_base.num * 8L )
- {
- duration = (double)pv->context->time_base.num /
- (double)pv->context->time_base.den;
- if ( pv->context->ticks_per_frame > 1 )
- {
- // for ffmpeg 0.5 & later, the H.264 & MPEG-2 time base is
- // field rate rather than frame rate so convert back to frames.
- duration *= pv->context->ticks_per_frame;
- }
- }
- }
- else
- {
- if ( pv->context->time_base.num * max_fps > pv->context->time_base.den &&
- pv->context->time_base.den > pv->context->time_base.num * 8L )
- {
- duration = (double)pv->context->time_base.num /
- (double)pv->context->time_base.den;
- if ( pv->context->ticks_per_frame > 1 )
- {
- // for ffmpeg 0.5 & later, the H.264 & MPEG-2 time base is
- // field rate rather than frame rate so convert back to frames.
- duration *= pv->context->ticks_per_frame;
- }
- }
- }
- if ( duration == 0 )
- {
- // No valid timing info found in the stream, so pick some value
- duration = 1001. / 24000.;
- }
- else
- {
- pv->frame_duration_set = 1;
- }
- pv->duration = duration * 90000.;
- pv->field_duration = pv->duration;
- if ( pv->context->ticks_per_frame > 1 )
- {
- pv->field_duration /= pv->context->ticks_per_frame;
- }
-}
-
-static int decavcodecvInfo( hb_work_object_t *w, hb_work_info_t *info )
-{
- hb_work_private_t *pv = w->private_data;
-
- memset( info, 0, sizeof(*info) );
-
- info->bitrate = pv->context->bit_rate;
- // HandBrake's video pipeline uses yuv420 color. This means all
- // dimensions must be even. So we must adjust the dimensions
- // of incoming video if not even.
- info->width = pv->context->width & ~1;
- info->height = pv->context->height & ~1;
-
- info->pixel_aspect_width = pv->context->sample_aspect_ratio.num;
- info->pixel_aspect_height = pv->context->sample_aspect_ratio.den;
-
- compute_frame_duration( pv );
- info->rate = 27000000;
- info->rate_base = pv->duration * 300.;
-
- info->profile = pv->context->profile;
- info->level = pv->context->level;
- info->name = pv->context->codec->name;
-
- switch( pv->context->color_primaries )
- {
- case AVCOL_PRI_BT709:
- info->color_prim = HB_COLR_PRI_BT709;
- break;
- case AVCOL_PRI_BT470BG:
- info->color_prim = HB_COLR_PRI_EBUTECH;
- break;
- case AVCOL_PRI_BT470M:
- case AVCOL_PRI_SMPTE170M:
- case AVCOL_PRI_SMPTE240M:
- info->color_prim = HB_COLR_PRI_SMPTEC;
- break;
- default:
- {
- if( ( info->width >= 1280 || info->height >= 720 ) ||
- ( info->width > 720 && info->height > 576 ) )
- // ITU BT.709 HD content
- info->color_prim = HB_COLR_PRI_BT709;
- else if( info->rate_base == 1080000 )
- // ITU BT.601 DVD or SD TV content (PAL)
- info->color_prim = HB_COLR_PRI_EBUTECH;
- else
- // ITU BT.601 DVD or SD TV content (NTSC)
- info->color_prim = HB_COLR_PRI_SMPTEC;
- break;
- }
- }
-
- switch( pv->context->color_trc )
- {
- case AVCOL_TRC_SMPTE240M:
- info->color_transfer = HB_COLR_TRA_SMPTE240M;
- break;
- default:
- // ITU BT.601, BT.709, anything else
- info->color_transfer = HB_COLR_TRA_BT709;
- break;
- }
-
- switch( pv->context->colorspace )
- {
- case AVCOL_SPC_BT709:
- info->color_matrix = HB_COLR_MAT_BT709;
- break;
- case AVCOL_SPC_FCC:
- case AVCOL_SPC_BT470BG:
- case AVCOL_SPC_SMPTE170M:
- case AVCOL_SPC_RGB: // libswscale rgb2yuv
- info->color_matrix = HB_COLR_MAT_SMPTE170M;
- break;
- case AVCOL_SPC_SMPTE240M:
- info->color_matrix = HB_COLR_MAT_SMPTE240M;
- break;
- default:
- {
- if( ( info->width >= 1280 || info->height >= 720 ) ||
- ( info->width > 720 && info->height > 576 ) )
- // ITU BT.709 HD content
- info->color_matrix = HB_COLR_MAT_BT709;
- else
- // ITU BT.601 DVD or SD TV content (PAL)
- // ITU BT.601 DVD or SD TV content (NTSC)
- info->color_matrix = HB_COLR_MAT_SMPTE170M;
- break;
- }
- }
-
- return 1;
-}
-
-static int decavcodecvBSInfo( hb_work_object_t *w, const hb_buffer_t *buf,
- hb_work_info_t *info )
-{
- return 0;
-}
-
-static void decavcodecvFlush( hb_work_object_t *w )
-{
- hb_work_private_t *pv = w->private_data;
-
- if ( pv->context->codec )
- {
- flushDelayQueue( pv );
- hb_buffer_t *buf = link_buf_list( pv );
- hb_buffer_close( &buf );
- if ( pv->title->opaque_priv == NULL )
- {
- pv->video_codec_opened = 0;
- hb_avcodec_close( pv->context );
- av_freep( &pv->context->extradata );
- if ( pv->parser )
- {
- av_parser_close(pv->parser);
- }
- pv->parser = av_parser_init( w->codec_param );
- }
- else
- {
- avcodec_flush_buffers( pv->context );
- }
- }
- pv->wait_for_keyframe = 60;
-}
-
-static void decavcodecClose( hb_work_object_t * w )
-{
- hb_work_private_t * pv = w->private_data;
- if( pv->dst_frame ) free( pv->dst_frame );
- if( pv )
- {
- closePrivData( &pv );
- w->private_data = NULL;
- }
-}
-
-hb_work_object_t hb_decavcodecv_accl =
-{
- .id = WORK_DECAVCODECVACCL,
- .name = "Video hardware decoder (libavcodec)",
- .init = decavcodecvInit,
- .work = decavcodecvWork,
- .close = decavcodecClose,
- .flush = decavcodecvFlush,
- .info = decavcodecvInfo,
- .bsinfo = decavcodecvBSInfo
-};
-
-#endif
diff --git a/libhb/dxva2api.c b/libhb/dxva2api.c
index 04011c0c5..004cd681a 100644
--- a/libhb/dxva2api.c
+++ b/libhb/dxva2api.c
@@ -10,6 +10,7 @@
Li Cao <[email protected]> <http://www.multicorewareinc.com/>
*/
+#ifdef USE_HWD
#include "dxva2api.h"
__inline float hb_dx_fixedtofloat( const DXVA2_Fixed32 _fixed_ )
@@ -34,3 +35,4 @@ __inline DXVA2_Fixed32 hb_dx_floattofixed( const float _float_ )
_fixed_.Value = HIWORD( _float_ * 0x10000 );
return _fixed_;
}
+#endif
diff --git a/libhb/dxva2api.h b/libhb/dxva2api.h
index dc5909477..a64d700fb 100644
--- a/libhb/dxva2api.h
+++ b/libhb/dxva2api.h
@@ -14,7 +14,7 @@
#ifndef _DXVA2API_H
#define _DXVA2API_H
-
+#ifdef USE_HWD
#define MINGW_DXVA2API_H_VERSION (2)
#if __GNUC__ >=3
@@ -818,5 +818,5 @@ __inline float hb_dx_fixedtofloat( const DXVA2_Fixed32 _fixed_ );
__inline const DXVA2_Fixed32 hb_dx_fixed32_opaque_alpha();
__inline DXVA2_Fixed32 hb_dx_floattofixed( const float _float_ );
-
+#endif
#endif //_DXVA2API_H
diff --git a/libhb/hb.c b/libhb/hb.c
index 63c6c6931..c1188caa2 100644
--- a/libhb/hb.c
+++ b/libhb/hb.c
@@ -484,7 +484,9 @@ hb_handle_t * hb_init( int verbose, int update_check )
#endif
hb_register( &hb_encavcodeca );
hb_register( &hb_reader );
+#ifdef USE_HWD
hb_register( &hb_decavcodecv_accl );
+#endif
return h;
}
@@ -583,7 +585,9 @@ hb_handle_t * hb_init_dl( int verbose, int update_check )
#endif
hb_register( &hb_encavcodeca );
hb_register( &hb_reader );
+#ifdef USE_HWD
hb_register( &hb_decavcodecv_accl );
+#endif
return h;
}
diff --git a/libhb/internal.h b/libhb/internal.h
index 9262ff6f8..79ed86f89 100644
--- a/libhb/internal.h
+++ b/libhb/internal.h
@@ -428,7 +428,6 @@ extern hb_filter_object_t hb_filter_denoise;
extern hb_filter_object_t hb_filter_decomb;
extern hb_filter_object_t hb_filter_rotate;
extern hb_filter_object_t hb_filter_crop_scale;
-extern hb_filter_object_t hb_filter_crop_scale_accl;
extern hb_filter_object_t hb_filter_render_sub;
extern hb_filter_object_t hb_filter_vfr;
diff --git a/libhb/module.defs b/libhb/module.defs
index d3d409513..d679c68f2 100644
--- a/libhb/module.defs
+++ b/libhb/module.defs
@@ -40,10 +40,14 @@ endif
ifeq (1,$(FEATURE.opencl))
LIBHB.GCC.D += USE_OPENCL
endif
+ifeq (1,$(FEATURE.hwd))
+LIBHB.GCC.D += USE_HWD
+endif
LIBHB.GCC.D += __LIBHB__ USE_PTHREAD
LIBHB.GCC.I += $(LIBHB.build/) $(CONTRIB.build/)include
+ifeq (1,$(FEATURE.opencl))
LIBHB.GCC.I += $(AMDAPPSDKROOT)/include
-
+endif
ifeq ($(BUILD.system),cygwin)
LIBHB.GCC.D += SYS_CYGWIN
else ifeq ($(BUILD.system),darwin)
diff --git a/libhb/oclnv12toyuv.c b/libhb/oclnv12toyuv.c
index 8d49563bf..ee0f7661c 100644
--- a/libhb/oclnv12toyuv.c
+++ b/libhb/oclnv12toyuv.c
@@ -11,6 +11,7 @@
*/
#ifdef USE_OPENCL
+#ifdef USE_HWD
#include "vadxva2.h"
#include "oclnv12toyuv.h"
@@ -220,3 +221,4 @@ int hb_ocl_nv12toyuv( uint8_t *bufi[], int p, int w, int h, int *crop, hb_va_dxv
return 0;
}
#endif
+#endif
diff --git a/libhb/oclnv12toyuv.h b/libhb/oclnv12toyuv.h
index 3307b8efe..5098d805e 100644
--- a/libhb/oclnv12toyuv.h
+++ b/libhb/oclnv12toyuv.h
@@ -22,8 +22,8 @@
* nv12 to yuv interface
* bufi is input frame of nv12, w is input frame width, h is input frame height
*/
-//int hb_ocl_nv12toyuv( uint8_t *bufi, int w, int h, int *crop, hb_va_dxva2_t *dxva2 );
+#ifdef USE_HWD
int hb_ocl_nv12toyuv( uint8_t *bufi[], int p, int w, int h, int *crop, hb_va_dxva2_t *dxva2 );
-
+#endif
#endif
#endif
diff --git a/libhb/openclkernels.h b/libhb/openclkernels.h
index 0ab3014ab..8b95ff234 100644
--- a/libhb/openclkernels.h
+++ b/libhb/openclkernels.h
@@ -119,4 +119,378 @@ char *kernel_src_nvtoyuv = KERNEL(
}
);
+char *kernel_src_hscaleall = KERNEL(
+
+ kernel void hscale_all_opencl(
+ global short *dst,
+ const global unsigned char *src,
+ const global short *yfilter,
+ const global int *yfilterPos,
+ int yfilterSize,
+ const global short *cfilter,
+ const global int *cfilterPos,
+ int cfilterSize,
+ int dstWidth,
+ int dstHeight,
+ int srcWidth,
+ int srcHeight,
+ int dstStride,
+ int dstChrStride,
+ int srcStride,
+ int srcChrStride)
+ {
+ int w = get_global_id(0);
+ int h = get_global_id(1);
+
+ int chrWidth = get_global_size(0);
+ int chrHeight = get_global_size(1);
+
+ int srcPos1 = h * srcStride + yfilterPos[w];
+ int srcPos2 = h * srcStride + yfilterPos[w + chrWidth];
+ int srcPos3 = (h + (srcHeight >> 1)) * srcStride + yfilterPos[w];
+ int srcPos4 = (h + (srcHeight >> 1)) * srcStride + yfilterPos[w + chrWidth];
+ int srcc1Pos = srcStride * srcHeight + (h) * (srcChrStride) + cfilterPos[w];
+ int srcc2Pos = srcc1Pos + ((srcChrStride)*(chrHeight));
+
+ int val1 = 0;
+ int val2 = 0;
+ int val3 = 0;
+ int val4 = 0;
+ int val5 = 0;
+ int val6 = 0;
+
+ int filterPos1 = yfilterSize * w;
+ int filterPos2 = yfilterSize * (w + chrWidth);
+ int cfilterPos1 = cfilterSize * w;
+
+ int j;
+ for (j = 0; j < yfilterSize; j++)
+ {
+ val1 += src[srcPos1 + j] * yfilter[filterPos1+ j];
+ val2 += src[srcPos2 + j] * yfilter[filterPos2 + j];
+ val3 += src[srcPos3 + j] * yfilter[filterPos1 + j];
+ val4 += src[srcPos4 + j] * yfilter[filterPos2 + j];
+ val5 += src[srcc1Pos+j] * cfilter[cfilterPos1 + j];
+ val6 += src[srcc2Pos+j] * cfilter[cfilterPos1 + j];
+ }
+ int dstPos1 = h *dstStride;
+ int dstPos2 = (h + chrHeight) * dstStride;
+
+ dst[dstPos1 + w] = ((val1 >> 7) > ((1 << 15) - 1) ? ((1 << 15) - 1) : (val1 >> 7));
+ dst[dstPos1 + w + chrWidth] = ((val2 >> 7) > ((1 << 15) - 1) ? ((1 << 15) - 1) : (val2 >> 7));
+ dst[dstPos2 + w] = ((val3 >> 7) > ((1 << 15) - 1) ? ((1 << 15) - 1) : (val3 >> 7));
+ dst[dstPos2 + w + chrWidth] = ((val4 >> 7) > ((1 << 15) - 1) ? ((1 << 15) - 1) : (val4 >> 7));
+
+ int dstPos3 = h * (dstChrStride) + w + dstStride*dstHeight;
+ int dstPos4 = h * (dstChrStride) + w + dstStride*dstHeight + ((dstChrStride)*chrHeight);
+ dst[dstPos3] = ((val5 >> 7) > ((1 << 15) - 1) ? ((1 << 15) - 1) : (val5 >> 7));
+ dst[dstPos4] = ((val6 >> 7) > ((1 << 15) - 1) ? ((1 << 15) - 1) : (val6 >> 7));
+ }
+ );
+
+char *kernel_src_hscalefast = KERNEL(
+
+ kernel void hscale_fast_opencl(
+ global short *dst,
+ const global unsigned char *src,
+ int xInc,
+ int chrXInc,
+ int dstWidth,
+ int dstHeight,
+ int srcWidth,
+ int srcHeight,
+ int dstStride,
+ int dstChrStride,
+ int srcStride,
+ int srcChrStride)
+ {
+
+ int w = get_global_id(0);
+ int h = get_global_id(1);
+
+ int chrWidth = get_global_size(0);
+ int chrHeight = get_global_size(1);
+ int xpos1 = 0;
+ int xpos2 = 0;
+ int xx = xpos1 >> 16;
+ int xalpha = (xpos1 & 0xFFFF) >> 9;
+ dst[h * dstStride + w] = (src[h * srcStride + xx] << 7) + (src[h * srcStride + xx + 1] -src[h * srcStride + xx]) * xalpha;
+ int lowpart = h + (chrHeight);
+ dst[lowpart * dstStride + w] = (src[lowpart * srcStride + xx] << 7) + (src[lowpart * srcStride + xx + 1] - src[lowpart * srcStride + xx]) * xalpha;
+
+ int inv_i = w * xInc >> 16;
+ if( inv_i >= srcWidth - 1)
+ {
+ dst[h*dstStride + w] = src[h*srcStride + srcWidth-1]*128;
+ dst[lowpart*dstStride + w] = src[lowpart*srcStride + srcWidth - 1] * 128;
+ }
+
+ int rightpart = w + (chrWidth);
+ xx = xpos2 >> 16;
+ xalpha = (xpos2 & 0xFFFF) >> 9;
+ dst[h * dstStride + rightpart] = (src[h *srcStride + xx] << 7) + (src[h * srcStride + xx + 1] - src[h * srcStride + xx]) * xalpha;
+ dst[lowpart * dstStride + rightpart] = (src[lowpart * srcStride + xx] << 7) + (src[lowpart * srcStride + xx + 1] - src[lowpart * srcStride + xx]) * xalpha;
+ inv_i = rightpart * xInc >> 16;
+ if( inv_i >= srcWidth - 1)
+ {
+ dst[h*dstStride + rightpart] = src[h*srcStride + srcWidth - 1] * 128;
+ dst[lowpart*dstStride + rightpart] = src[lowpart * srcStride + srcWidth - 1] * 128;
+ }
+
+ int xpos = 0;
+ xpos = chrXInc * w;
+ xx = xpos >> 16;
+ xalpha = (xpos & 0xFFFF) >> 9;
+ src += srcStride * srcHeight;
+ dst += dstStride * dstHeight;
+ dst[h*(dstChrStride) + w] = (src[h * (srcChrStride) + xx] *(xalpha^127) + src[h * (srcChrStride) + xx + 1] * xalpha);
+ inv_i = w * xInc >> 16;
+ if( inv_i >= (srcWidth >> 1) - 1)
+ {
+ dst[h*(dstChrStride) + w] = src[h*(srcChrStride) + (srcWidth >> 1) -1]*128;
+ }
+
+ xpos = chrXInc * (w);
+ xx = xpos >> 16;
+ src += srcChrStride * srcHeight >> 1;
+ dst += (dstChrStride * chrHeight);
+ dst[h*(dstChrStride) + w] = (src[h * (srcChrStride) + xx]*(xalpha^127) + src[h * (srcChrStride) + xx + 1 ] * xalpha);
+
+ if( inv_i >= (srcWidth >> 1) - 1)
+ {
+ //v channel:
+ dst[h*(dstChrStride) + w] = src[h*(srcChrStride)+ (srcWidth >> 1) -1]*128;
+ }
+
+
+ }
+ );
+
+char *kernel_src_vscalealldither = KERNEL(
+
+ kernel void vscale_all_dither_opencl(
+ global unsigned char *dst,
+ const global short *src,
+ const global short *yfilter,
+ int yfilterSize,
+ const global short *cfilter,
+ int cfilterSize,
+ const global int *yfilterPos,
+ const global int *cfilterPos,
+ int dstWidth,
+ int dstHeight,
+ int srcWidth,
+ int srcHeight,
+ int dstStride,
+ int dstChrStride,
+ int srcStride,
+ int srcChrStride)
+ {
+ const unsigned char hb_dither_8x8_128[8][8] = {
+ { 36, 68, 60, 92, 34, 66, 58, 90, },
+ { 100, 4, 124, 28, 98, 2, 122, 26, },
+ { 52, 84, 44, 76, 50, 82, 42, 74, },
+ { 116, 20, 108, 12, 114, 18, 106, 10, },
+ { 32, 64, 56, 88, 38, 70, 62, 94, },
+ { 96, 0, 120, 24, 102, 6, 126, 30, },
+ { 48, 80, 40, 72, 54, 86, 46, 78, },
+ { 112, 16, 104, 8, 118, 22, 110, 14, },
+ };
+
+
+ int w = get_global_id(0);
+ int h = get_global_id(1);
+
+ int chrWidth = get_global_size(0);
+ int chrHeight = get_global_size(1);
+ const unsigned char *local_up_dither;
+ const unsigned char *local_down_dither;
+
+ local_up_dither = hb_dither_8x8_128[h & 7];
+ local_down_dither = hb_dither_8x8_128[(h + chrHeight) & 7];
+
+ //yscale;
+ int srcPos1 = (yfilterPos[h]) * srcStride + w;
+ int srcPos2 = (yfilterPos[h]) * srcStride + w + (chrWidth);
+ int srcPos3 = (yfilterPos[h + chrHeight]) * srcStride + w;
+ int srcPos4 = (yfilterPos[h + chrHeight]) * srcStride + w + chrWidth;
+ int src1Pos = dstStride * srcHeight + (cfilterPos[h]) * dstChrStride + (w);
+ int src2Pos = dstStride * srcHeight + (dstChrStride*(srcHeight>>1)) + (cfilterPos[h]) * dstChrStride + w;
+
+ int val1 = (local_up_dither[w & 7] << 12); //y offset is 0;
+ int val2 = (local_up_dither[(w + chrWidth) & 7] << 12);
+ int val3 = (local_down_dither[w &7] << 12);
+ int val4 = (local_down_dither[(w + chrWidth) & 7] << 12);
+ int val5 = (local_up_dither[w & 7] << 12);
+ int val6 = (local_up_dither[(w + 3) & 7] << 12); // 3 is offset of the chrome channel.
+
+ int j;
+ int filterPos1 = h * yfilterSize;
+ int filterPos2 = ( h + chrHeight ) * yfilterSize;
+ for(j = 0; j < yfilterSize; j++)
+ {
+ val1 += src[srcPos1] * yfilter[filterPos1 + j];
+ srcPos1 += srcStride;
+ val2 += src[srcPos2] * yfilter[filterPos1 + j];
+ srcPos2 += srcStride;
+ val3 += src[srcPos3] * yfilter[filterPos2 + j];
+ srcPos3 += srcStride;
+ val4 += src[srcPos4] * yfilter[filterPos2 + j];
+ srcPos4 += srcStride;
+ val5 += src[src1Pos] * cfilter[filterPos1 + j];
+ val6 += src[src2Pos] * cfilter[filterPos1 + j];
+ src1Pos += dstChrStride;
+ src2Pos += dstChrStride;
+ }
+ dst[h * dstStride + w] = (((val1 >> 19)&(~0xFF)) ? ((-(val1 >> 19)) >> 31) : (val1 >> 19));
+ dst[h * dstStride + w + chrWidth] = (((val2 >> 19)&(~0xFF)) ? ((-(val2 >> 19)) >> 31) : (val2 >> 19));
+ dst[(h + chrHeight) * dstStride + w] = (((val3 >> 19)&(~0xFF)) ? ((-(val3 >> 19)) >> 31) : (val3 >> 19));
+ dst[(h + chrHeight) * dstStride + w + chrWidth] = (((val4 >> 19)&(~0xFF)) ? ((-(val4 >> 19)) >> 31) : (val4 >> 19));
+
+ int dst1Pos = dstStride * dstHeight + h*(dstChrStride)+(w);
+ int dst2Pos = (dstChrStride * chrHeight) + dst1Pos;
+ dst[dst1Pos] = (((val5 >> 19)&(~0xFF)) ? ((-(val5 >> 19)) >> 31) : (val5 >> 19));
+ dst[dst2Pos] = (((val6 >> 19)&(~0xFF)) ? ((-(val6 >> 19)) >> 31) : (val6 >> 19));
+ }
+ );
+
+char *kernel_src_vscaleallnodither = KERNEL(
+
+ kernel void vscale_all_nodither_opencl(
+ global unsigned char *dst,
+ const global short *src,
+ const global short *yfilter,
+ int yfilterSize,
+ const global short *cfilter,
+ int cfilterSize,
+ const global int *yfilterPos,
+ const global int *cfilterPos,
+ int dstWidth,
+ int dstHeight,
+ int srcWidth,
+ int srcHeight,
+ int dstStride,
+ int dstChrStride,
+ int srcStride,
+ int srcChrStride)
+ {
+ const unsigned char hb_sws_pb_64[8] = {
+ 64, 64, 64, 64, 64, 64, 64, 64
+ };
+
+ int w = get_global_id(0);
+ int h = get_global_id(1);
+
+ int chrWidth = get_global_size(0);
+ int chrHeight = get_global_size(1);
+ const unsigned char *local_up_dither;
+ const unsigned char *local_down_dither;
+
+ local_up_dither = hb_sws_pb_64;
+ local_down_dither = hb_sws_pb_64;
+
+
+ //yscale;
+ int srcPos1 = (yfilterPos[h]) * srcStride + w;
+ int srcPos2 = (yfilterPos[h]) * srcStride + w + (chrWidth);
+ int srcPos3 = (yfilterPos[h + chrHeight]) * srcStride + w;
+ int srcPos4 = (yfilterPos[h + chrHeight]) * srcStride + w + chrWidth;
+ int src1Pos = dstStride * srcHeight + (cfilterPos[h]) * dstChrStride + (w);
+ int src2Pos = dstStride * srcHeight + (dstChrStride*(srcHeight>>1)) + (cfilterPos[h]) * dstChrStride + w;
+
+ int val1 = (local_up_dither[w & 7] << 12); //y offset is 0;
+ int val2 = (local_up_dither[(w + chrWidth) & 7] << 12);
+ int val3 = (local_down_dither[w &7] << 12);
+ int val4 = (local_down_dither[(w + chrWidth) & 7] << 12);
+ int val5 = (local_up_dither[w & 7] << 12);
+ int val6 = (local_up_dither[(w + 3) & 7] << 12); // 3 is offset of the chrome channel.
+
+
+ int j;
+ int filterPos1 = h * yfilterSize;
+ int filterPos2 = ( h + chrHeight ) * yfilterSize;
+ for(j = 0; j < yfilterSize; j++)
+ {
+ val1 += src[srcPos1] * yfilter[filterPos1 + j];
+ srcPos1 += srcStride;
+ val2 += src[srcPos2] * yfilter[filterPos1 + j];
+ srcPos2 += srcStride;
+ val3 += src[srcPos3] * yfilter[filterPos2 + j];
+ srcPos3 += srcStride;
+ val4 += src[srcPos4] * yfilter[filterPos2 + j];
+ srcPos4 += srcStride;
+ val5 += src[src1Pos] * cfilter[filterPos1 + j];
+ val6 += src[src2Pos] * cfilter[filterPos1 + j];
+ src1Pos += dstChrStride;
+ src2Pos += dstChrStride;
+ }
+ dst[h * dstStride + w] = (((val1 >> 19)&(~0xFF)) ? ((-(val1 >> 19)) >> 31) : (val1 >> 19));
+ dst[h * dstStride + w + chrWidth] = (((val2 >> 19)&(~0xFF)) ? ((-(val2 >> 19)) >> 31) : (val2 >> 19));
+ dst[(h + chrHeight) * dstStride + w] = (((val3 >> 19)&(~0xFF)) ? ((-(val3 >> 19)) >> 31) : (val3 >> 19));
+ dst[(h + chrHeight) * dstStride + w + chrWidth] = (((val4 >> 19)&(~0xFF)) ? ((-(val4 >> 19)) >> 31) : (val4 >> 19));;
+
+ int dst1Pos = dstStride * dstHeight + h*(dstChrStride)+(w);
+ int dst2Pos = (dstChrStride * chrHeight) + dst1Pos;
+ dst[dst1Pos] = (((val5 >> 19)&(~0xFF)) ? ((-(val5 >> 19)) >> 31) : (val5 >> 19));
+ dst[dst2Pos] = (((val6 >> 19)&(~0xFF)) ? ((-(val6 >> 19)) >> 31) : (val6 >> 19));
+ }
+ );
+
+char *kernel_src_vscalefast = KERNEL(
+
+ kernel void vscale_fast_opencl(
+ global unsigned char *dst,
+ const global short *src,
+ const global int *yfilterPos,
+ const global int *cfilterPos,
+ int dstWidth,
+ int dstHeight,
+ int srcWidth,
+ int srcHeight,
+ int dstStride,
+ int dstChrStride,
+ int srcStride,
+ int srcChrStride)
+ {
+ const unsigned char hb_sws_pb_64[8] = {
+ 64, 64, 64, 64, 64, 64, 64, 64
+ };
+
+ int w = get_global_id(0);
+ int h = get_global_id(1);
+
+ int chrWidth = get_global_size(0);
+ int chrHeight = get_global_size(1);
+
+ const unsigned char *local_up_dither;
+ const unsigned char *local_down_dither;
+
+ local_up_dither = hb_sws_pb_64;
+ local_down_dither = hb_sws_pb_64;
+
+
+ int rightpart = w + chrWidth;
+ int bh = h + chrHeight; // bottom part
+ short val1 = (src[(yfilterPos[h]) * dstStride + w] + local_up_dither[(w + 0) & 7]) >> 7; //lum offset is 0;
+ short val2 = (src[(yfilterPos[h]) * dstStride + rightpart] + local_up_dither[rightpart & 7]) >> 7;
+ short val3 = (src[(yfilterPos[bh]) * dstStride + w] + local_down_dither[w & 7]) >> 7;
+ short val4 = (src[(yfilterPos[bh]) * dstStride + rightpart] + local_down_dither[rightpart & 7]) >> 7;
+ dst[h * dstStride + w] = ((val1&(~0xFF)) ? ((-val1) >> 31) : (val1));
+ dst[h * dstStride + rightpart] = ((val2&(~0xFF)) ? ((-val2) >> 31) : (val2));
+ dst[bh * dstStride + w] = ((val3&(~0xFF)) ? ((-val3) >> 31) : (val3));
+ dst[bh * dstStride + rightpart] = ((val4&(~0xFF)) ? ((-val4) >> 31) : (val4));
+
+ src += dstStride * srcHeight;
+ dst += dstStride * dstHeight;
+ val1 = (src[cfilterPos[h] * (dstChrStride) + w] + local_up_dither[ w & 7]) >> 7;
+ dst[h * (dstChrStride) + w] = ((val1&(~0xFF)) ? ((-val1) >> 31) : (val1));
+
+ src += dstChrStride * (srcHeight >> 1);
+ dst += dstChrStride * chrHeight;
+ val1 = (src[cfilterPos[h] * dstChrStride + w] + local_up_dither[ (w + 3) & 7] ) >> 7;
+ dst[h * dstChrStride + w] = ((val1&(~0xFF)) ? ((-val1) >> 31) : (val1));
+
+ }
+ );
+
#endif
diff --git a/libhb/openclwrapper.c b/libhb/openclwrapper.c
index a10e9d64d..2371c2167 100644
--- a/libhb/openclwrapper.c
+++ b/libhb/openclwrapper.c
@@ -99,6 +99,7 @@ int hb_regist_opencl_kernel()
ADD_KERNEL_CFG( 0, "frame_h_scale", NULL )
ADD_KERNEL_CFG( 1, "frame_v_scale", NULL )
ADD_KERNEL_CFG( 2, "nv12toyuv", NULL )
+ ADD_KERNEL_CFG( 3, "scale_opencl", NULL )
return 0;
}
@@ -331,7 +332,7 @@ int hb_generat_bin_from_kernel_source( cl_program program, const char * cl_file_
if( !hb_write_binary_to_file( fileName, binaries[i], binarySizes[i] ))
{
- hb_log( "Notice: Unable to write opencl kernel, writing to tempory directory instead." );
+ hb_log( "Notice: Unable to write opencl kernel, writing to temporary directory instead." );
//printf( "opencl-wrapper: write binary[%s] failds\n", fileName);
return 0;
} //else
@@ -450,8 +451,7 @@ int hb_init_opencl_env( GPUEnv *gpu_info )
if( status != CL_SUCCESS )
{
- hb_log( "Notice: No more platform vendor info.\n" );
- return(1);
+ continue;
}
gpu_info->platform = platforms[i];
@@ -470,8 +470,7 @@ int hb_init_opencl_env( GPUEnv *gpu_info )
if( status != CL_SUCCESS )
{
- hb_log( "Notice: No available GPU device.\n" );
- return(1);
+ continue;
}
if( numDevices )
@@ -482,6 +481,12 @@ int hb_init_opencl_env( GPUEnv *gpu_info )
}
if( NULL == gpu_info->platform )
{
+ hb_log( "Notice: No OpenCL-compatible GPU found.\n" );
+ return(1);
+ }
+ if( status != CL_SUCCESS )
+ {
+ hb_log( "Notice: No OpenCL-compatible GPU found.\n" );
return(1);
}
@@ -659,14 +664,18 @@ int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info,
if( status == 0 )
return(0);
#else
- int kernel_src_size = strlen( kernel_src_hscale )+strlen( kernel_src_vscale )+strlen( kernel_src_nvtoyuv );
- source_str = (char*)malloc( kernel_src_size+2 );
+ int kernel_src_size = strlen( kernel_src_hscale ) + strlen( kernel_src_vscale ) + strlen( kernel_src_nvtoyuv ) + strlen( kernel_src_hscaleall ) + strlen( kernel_src_hscalefast ) + strlen( kernel_src_vscalealldither ) + strlen( kernel_src_vscaleallnodither ) + strlen( kernel_src_vscalefast );
+ source_str = (char*)malloc( kernel_src_size + 2 );
strcpy( source_str, kernel_src_hscale );
strcat( source_str, kernel_src_vscale );
strcat( source_str, kernel_src_nvtoyuv );
+ strcat( source_str, kernel_src_hscaleall );
+ strcat( source_str, kernel_src_hscalefast );
+ strcat( source_str, kernel_src_vscalealldither );
+ strcat( source_str, kernel_src_vscaleallnodither );
+ strcat( source_str, kernel_src_vscalefast );
#endif
-
source = source_str;
source_size[0] = strlen( source );
@@ -678,7 +687,8 @@ int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info,
sizeof(numDevices),
&numDevices,
NULL );
- if( status != CL_SUCCESS ){
+ if( status != CL_SUCCESS )
+ {
hb_log( "Notice: Unable to get the number of devices in context.\n" );
return 0;
}
@@ -730,7 +740,6 @@ int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info,
gpu_info->programs[idx] = clCreateProgramWithSource(
gpu_info->context, 1, &source, source_size, &status );
}
-
if((gpu_info->programs[idx] == (cl_program)NULL) || (status != CL_SUCCESS)){
hb_log( "Notice: Unable to get list of devices in context.\n" );
return(0);
@@ -931,4 +940,31 @@ int hb_get_opencl_env()
}
return status;
}
+
+
+int hb_create_buffer(cl_mem *cl_Buf,int flags,int size)
+{
+ int status;
+ *cl_Buf = clCreateBuffer( gpu_env.context, (flags), (size), NULL, &status );
+
+ if( status != CL_SUCCESS )
+ {
+ printf("clCreateBuffer error '%d'\n\n",status);
+ return 0;
+ }
+ return 1;
+}
+
+int hb_read_opencl_buffer(cl_mem cl_inBuf,unsigned char *outbuf,int size)
+{
+ int status;
+
+ status = clEnqueueReadBuffer(gpu_env.command_queue, cl_inBuf, CL_TRUE, 0, size, outbuf, 0, 0, 0);
+ if( status != CL_SUCCESS )
+ {
+ printf("av_read_opencl_buffer error '%d'\n",status);
+ return 0;
+ }
+ return 1;
+}
#endif
diff --git a/libhb/openclwrapper.h b/libhb/openclwrapper.h
index 933e7a3b3..c4d96f1e9 100644
--- a/libhb/openclwrapper.h
+++ b/libhb/openclwrapper.h
@@ -13,6 +13,7 @@
*/
#ifndef __OPENCL_WRAPPER_H
#define __OPENCL_WRAPPER_H
+#ifdef USE_OPENCL
#include "common.h"
//support AMD opencl
@@ -21,24 +22,20 @@
typedef struct _KernelEnv
{
-#ifdef USE_OPENCL
cl_context context;
cl_command_queue command_queue;
cl_program program;
cl_kernel kernel;
-#endif
char kernel_name[150];
int isAMD;
}KernelEnv;
typedef struct _OpenCLEnv
{
-#ifdef USE_OPENCL
cl_platform_id platform;
cl_context context;
cl_device_id devices;
cl_command_queue command_queue;
-#endif
}OpenCLEnv;
@@ -76,4 +73,10 @@ int hb_create_kernel( char * kernelname, KernelEnv * env );
int hb_release_kernel( KernelEnv * env );
int hb_get_opencl_env();
+
+int hb_create_buffer(cl_mem *cl_Buf,int flags,int size);
+
+int hb_read_opencl_buffer(cl_mem cl_inBuf,unsigned char *outbuf,int size);
+
+#endif
#endif
diff --git a/libhb/scale.c b/libhb/scale.c
new file mode 100644
index 000000000..27adcba32
--- /dev/null
+++ b/libhb/scale.c
@@ -0,0 +1,1199 @@
+#ifdef USE_OPENCL
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "hb.h"
+#include "scale.h"
+#include "scale_kernel.h"
+#include "libavutil/pixdesc.h"
+
+#define isScaleRGBinInt(x) \
+ ( \
+ (x)==AV_PIX_FMT_RGB48BE || \
+ (x)==AV_PIX_FMT_RGB48LE || \
+ (x)==AV_PIX_FMT_RGB32 || \
+ (x)==AV_PIX_FMT_RGB32_1 || \
+ (x)==AV_PIX_FMT_RGB24 || \
+ (x)==AV_PIX_FMT_RGB565BE || \
+ (x)==AV_PIX_FMT_RGB565LE || \
+ (x)==AV_PIX_FMT_RGB555BE || \
+ (x)==AV_PIX_FMT_RGB555LE || \
+ (x)==AV_PIX_FMT_RGB444BE || \
+ (x)==AV_PIX_FMT_RGB444LE || \
+ (x)==AV_PIX_FMT_RGB8 || \
+ (x)==AV_PIX_FMT_RGB4 || \
+ (x)==AV_PIX_FMT_RGB4_BYTE || \
+ (x)==AV_PIX_FMT_MONOBLACK || \
+ (x)==AV_PIX_FMT_MONOWHITE \
+ )
+#define isScaleBGRinInt(x) \
+ ( \
+ (x)==AV_PIX_FMT_BGR48BE || \
+ (x)==AV_PIX_FMT_BGR48LE || \
+ (x)==AV_PIX_FMT_BGR32 || \
+ (x)==AV_PIX_FMT_BGR32_1 || \
+ (x)==AV_PIX_FMT_BGR24 || \
+ (x)==AV_PIX_FMT_BGR565BE || \
+ (x)==AV_PIX_FMT_BGR565LE || \
+ (x)==AV_PIX_FMT_BGR555BE || \
+ (x)==AV_PIX_FMT_BGR555LE || \
+ (x)==AV_PIX_FMT_BGR444BE || \
+ (x)==AV_PIX_FMT_BGR444LE || \
+ (x)==AV_PIX_FMT_BGR8 || \
+ (x)==AV_PIX_FMT_BGR4 || \
+ (x)==AV_PIX_FMT_BGR4_BYTE|| \
+ (x)==AV_PIX_FMT_MONOBLACK|| \
+ (x)==AV_PIX_FMT_MONOWHITE \
+ )
+
+#define isScaleAnyRGB(x) \
+ ( \
+ isScaleRGBinInt(x) || \
+ isScaleBGRinInt(x) \
+ )
+
+#define isScaleGray(x) \
+ ((x) == AV_PIX_FMT_GRAY8 || \
+ (x) == AV_PIX_FMT_Y400A || \
+ (x) == AV_PIX_FMT_GRAY16BE || \
+ (x) == AV_PIX_FMT_GRAY16LE)
+
+static ScaleContext *g_scale;
+
+static double getScaleSplineCoeff(double a, double b, double c, double d,
+ double dist)
+{
+ if (dist <= 1.0)
+ return ((d * dist + c) * dist + b) * dist + a;
+ else
+ return getScaleSplineCoeff(0.0,
+ b + 2.0 * c + 3.0 * d,
+ c + 3.0 * d,
+ -b - 3.0 * c - 6.0 * d,
+ dist - 1.0);
+}
+
+static int initScaleFilter(int16_t **outFilter, int32_t **filterPos,
+ int *outFilterSize, int xInc, int srcW, int dstW,
+ int filterAlign, int one, int flags, int cpu_flags,
+ ScaleVector *srcFilter, ScaleVector *dstFilter,
+ double param[2])
+{
+ int i;
+ int filterSize;
+ int filter2Size;
+ int minFilterSize;
+ int64_t *filter = NULL;
+ int64_t *filter2 = NULL;
+ const int64_t fone = 1LL << 54;
+ int ret = -1;
+
+ *filterPos = (int32_t *)av_malloc((dstW + 3) * sizeof(**filterPos));
+ if (*filterPos == NULL && ((dstW + 3) * sizeof(**filterPos)) != 0) {
+ printf("Cannot allocate memory.\n");
+ goto fail;
+ }
+
+ if (FFABS(xInc - 0x10000) < 10) { // unscaled
+ int i;
+ filterSize = 1;
+ // FF_ALLOCZ_OR_GOTO(NULL, filter,dstW * sizeof(*filter) * filterSize, fail);
+ filter = (int64_t *)av_mallocz(dstW * sizeof(*filter) * filterSize);
+ if (filter == NULL && (dstW * sizeof(*filter) * filterSize) != 0) {
+ printf("Cannot allocate memory.\n");
+ goto fail;
+ }
+
+
+ for (i = 0; i < dstW; i++) {
+ filter[i * filterSize] = fone;
+ (*filterPos)[i] = i;
+ }
+ } else if (flags & SWS_POINT) { // lame looking point sampling mode
+ int i;
+ int64_t xDstInSrc;
+ filterSize = 1;
+ //FF_ALLOC_OR_GOTO(NULL, filter,
+ // dstW * sizeof(*filter) * filterSize, fail);
+ filter = (int64_t *)av_malloc(dstW * sizeof(*filter) * filterSize);
+ if(filter == NULL && (dstW * sizeof(*filter) * filterSize) != 0){
+ printf("Cannot allocate memory.\n");
+ goto fail;
+ }
+
+ xDstInSrc = xInc / 2 - 0x8000;
+ for (i = 0; i < dstW; i++) {
+ int xx = (xDstInSrc - ((filterSize - 1) << 15) + (1 << 15)) >> 16;
+
+ (*filterPos)[i] = xx;
+ filter[i] = fone;
+ xDstInSrc += xInc;
+ }
+ } else if ((xInc <= (1 << 16) && (flags & SWS_AREA)) ||
+ (flags & SWS_FAST_BILINEAR)) { // bilinear upscale
+ int i;
+ int64_t xDstInSrc;
+ filterSize = 2;
+ //FF_ALLOC_OR_GOTO(NULL, filter,
+ // dstW * sizeof(*filter) * filterSize, fail);
+ filter = (int64_t *)av_malloc(dstW * sizeof(*filter) * filterSize);
+ if(filter == NULL && (dstW * sizeof(*filter) * filterSize) != 0){
+ printf("Cannot allocate memory.\n");
+ goto fail;
+ }
+
+ xDstInSrc = xInc / 2 - 0x8000;
+ for (i = 0; i < dstW; i++) {
+ int xx = (xDstInSrc - ((filterSize - 1) << 15) + (1 << 15)) >> 16;
+ int j;
+
+ (*filterPos)[i] = xx;
+ // bilinear upscale / linear interpolate / area averaging
+ for (j = 0; j < filterSize; j++) {
+ int64_t coeff= fone - FFABS(((int64_t)xx<<16) - xDstInSrc)*(fone>>16);
+ if (coeff < 0)
+ coeff = 0;
+ filter[i * filterSize + j] = coeff;
+ xx++;
+ }
+ xDstInSrc += xInc;
+ }
+ } else {
+ int64_t xDstInSrc;
+ int sizeFactor;
+
+ if (flags & SWS_BICUBIC)
+ sizeFactor = 4;
+ else if (flags & SWS_X)
+ sizeFactor = 8;
+ else if (flags & SWS_AREA)
+ sizeFactor = 1; // downscale only, for upscale it is bilinear
+ else if (flags & SWS_GAUSS)
+ sizeFactor = 8; // infinite ;)
+ else if (flags & SWS_LANCZOS)
+ sizeFactor = param[0] != SWS_PARAM_DEFAULT ? ceil(2 * param[0]) : 6;
+ else if (flags & SWS_SINC)
+ sizeFactor = 20; // infinite ;)
+ else if (flags & SWS_SPLINE)
+ sizeFactor = 20; // infinite ;)
+ else if (flags & SWS_BILINEAR)
+ sizeFactor = 2;
+ else {
+ sizeFactor = 0; // GCC warning killer
+ assert(0);
+ }
+
+ if (xInc <= 1 << 16)
+ filterSize = 1 + sizeFactor; // upscale
+ else
+ filterSize = 1 + (sizeFactor * srcW + dstW - 1) / dstW;
+
+
+ filterSize = FFMIN(filterSize, srcW - 2);
+ filterSize = FFMAX(filterSize, 1);
+
+ // FF_ALLOC_OR_GOTO(NULL, filter,
+ // dstW * sizeof(*filter) * filterSize, fail);
+ filter = (int64_t *)av_malloc(dstW * sizeof(*filter) * filterSize);
+ if(filter == NULL && (dstW * sizeof(*filter) * filterSize) != 0){
+ printf("Cannot allocate memory.\n");
+ goto fail;
+ }
+
+ xDstInSrc = xInc - 0x10000;
+ for (i = 0; i < dstW; i++) {
+ int xx = (xDstInSrc - ((filterSize - 2) << 16)) / (1 << 17);
+ int j;
+ (*filterPos)[i] = xx;
+ for (j = 0; j < filterSize; j++) {
+ int64_t d = (FFABS(((int64_t)xx << 17) - xDstInSrc)) << 13;
+ double floatd;
+ int64_t coeff;
+
+ if (xInc > 1 << 16)
+ d = d * dstW / srcW;
+ floatd = d * (1.0 / (1 << 30));
+
+ if (flags & SWS_BICUBIC) {
+ int64_t B = (param[0] != SWS_PARAM_DEFAULT ? param[0] : 0) * (1 << 24);
+ int64_t C = (param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6) * (1 << 24);
+
+ if (d >= 1LL << 31) {
+ coeff = 0.0;
+ } else {
+ int64_t dd = (d * d) >> 30;
+ int64_t ddd = (dd * d) >> 30;
+
+ if (d < 1LL << 30)
+ coeff = (12 * (1 << 24) - 9 * B - 6 * C) * ddd +
+ (-18 * (1 << 24) + 12 * B + 6 * C) * dd +
+ (6 * (1 << 24) - 2 * B) * (1 << 30);
+ else
+ coeff = (-B - 6 * C) * ddd +
+ (6 * B + 30 * C) * dd +
+ (-12 * B - 48 * C) * d +
+ (8 * B + 24 * C) * (1 << 30);
+ }
+ coeff *= fone >> (30 + 24);
+ }
+#if 0
+ else if (flags & SWS_X) {
+ double p = param ? param * 0.01 : 0.3;
+ coeff = d ? sin(d * M_PI) / (d * M_PI) : 1.0;
+ coeff *= pow(2.0, -p * d * d);
+ }
+#endif
+ else if (flags & SWS_X) {
+ double A = param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0;
+ double c;
+
+ if (floatd < 1.0)
+ c = cos(floatd * M_PI);
+ else
+ c = -1.0;
+ if (c < 0.0)
+ c = -pow(-c, A);
+ else
+ c = pow(c, A);
+ coeff = (c * 0.5 + 0.5) * fone;
+ } else if (flags & SWS_AREA) {
+ int64_t d2 = d - (1 << 29);
+ if (d2 * xInc < -(1LL << (29 + 16)))
+ coeff = 1.0 * (1LL << (30 + 16));
+ else if (d2 * xInc < (1LL << (29 + 16)))
+ coeff = -d2 * xInc + (1LL << (29 + 16));
+ else
+ coeff = 0.0;
+ coeff *= fone >> (30 + 16);
+ } else if (flags & SWS_GAUSS) {
+ double p = param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
+ coeff = (pow(2.0, -p * floatd * floatd)) * fone;
+ } else if (flags & SWS_SINC) {
+ coeff = (d ? sin(floatd * M_PI) / (floatd * M_PI) : 1.0) * fone;
+ } else if (flags & SWS_LANCZOS) {
+ double p = param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
+ coeff = (d ? sin(floatd * M_PI) * sin(floatd * M_PI / p) /
+ (floatd * floatd * M_PI * M_PI / p) : 1.0) * fone;
+ if (floatd > p)
+ coeff = 0;
+ } else if (flags & SWS_BILINEAR) {
+ coeff = (1 << 30) - d;
+ if (coeff < 0)
+ coeff = 0;
+ coeff *= fone >> 30;
+ } else if (flags & SWS_SPLINE) {
+ double p = -2.196152422706632;
+ coeff = getScaleSplineCoeff(1.0, 0.0, p, -p - 1.0, floatd) * fone;
+ } else {
+ coeff = 0.0; // GCC warning killer
+ assert(0);
+ }
+
+ filter[i * filterSize + j] = coeff;
+ xx++;
+ }
+ xDstInSrc += 2 * xInc;
+ }
+ }
+
+ assert(filterSize > 0);
+ filter2Size = filterSize;
+ if (srcFilter)
+ filter2Size += srcFilter->length - 1;
+ if (dstFilter)
+ filter2Size += dstFilter->length - 1;
+ assert(filter2Size > 0);
+ //FF_ALLOCZ_OR_GOTO(NULL, filter2, filter2Size * dstW * sizeof(*filter2), fail);
+ filter2 = (int64_t *)av_mallocz(filter2Size * dstW * sizeof(*filter2));
+ if(filter2 == NULL && (filter2Size * dstW * sizeof(*filter2)) != 0)
+ {
+ printf("Can't alloc memory\n");
+ goto fail;
+ }
+
+ for (i = 0; i < dstW; i++) {
+ int j, k;
+
+ if (srcFilter) {
+ for (k = 0; k < srcFilter->length; k++) {
+ for (j = 0; j < filterSize; j++)
+ filter2[i * filter2Size + k + j] +=
+ srcFilter->coeff[k] * filter[i * filterSize + j];
+ }
+ } else {
+ for (j = 0; j < filterSize; j++)
+ filter2[i * filter2Size + j] = filter[i * filterSize + j];
+ }
+ // FIXME dstFilter
+
+ (*filterPos)[i] += (filterSize - 1) / 2 - (filter2Size - 1) / 2;
+ }
+ av_freep(&filter);
+
+ // Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not).
+ minFilterSize = 0;
+ for (i = dstW - 1; i >= 0; i--) {
+ int min = filter2Size;
+ int j;
+ int64_t cutOff = 0.0;
+
+ for (j = 0; j < filter2Size; j++) {
+ int k;
+ cutOff += FFABS(filter2[i * filter2Size]);
+
+ if (cutOff > SWS_MAX_REDUCE_CUTOFF * fone)
+ break;
+
+ if (i < dstW - 1 && (*filterPos)[i] >= (*filterPos)[i + 1])
+ break;
+
+ // move filter coefficients left
+ for (k = 1; k < filter2Size; k++)
+ filter2[i * filter2Size + k - 1] = filter2[i * filter2Size + k];
+ filter2[i * filter2Size + k - 1] = 0;
+ (*filterPos)[i]++;
+ }
+
+ cutOff = 0;
+ for (j = filter2Size - 1; j > 0; j--) {
+ cutOff += FFABS(filter2[i * filter2Size + j]);
+
+ if (cutOff > SWS_MAX_REDUCE_CUTOFF * fone)
+ break;
+ min--;
+ }
+
+ if (min > minFilterSize)
+ minFilterSize = min;
+ }
+
+
+ assert(minFilterSize > 0);
+ filterSize = (minFilterSize + (filterAlign - 1)) & (~(filterAlign - 1));
+ assert(filterSize > 0);
+ filter = (int64_t *)av_malloc(filterSize * dstW * sizeof(*filter));
+ if (filterSize >= MAX_FILTER_SIZE * 16 /
+ ((flags & SWS_ACCURATE_RND) ? APCK_SIZE : 16) || !filter)
+ goto fail;
+ *outFilterSize = filterSize;
+
+ if (flags & SWS_PRINT_INFO)
+ //av_log(NULL, AV_LOG_VERBOSE,
+ // "SwScaler: reducing / aligning filtersize %d -> %d\n",
+ // filter2Size, filterSize);
+ printf("SwScaler: reducing / aligning filtersize %d -> %d\n",filter2Size,filterSize);
+ for (i = 0; i < dstW; i++) {
+ int j;
+
+ for (j = 0; j < filterSize; j++) {
+ if (j >= filter2Size)
+ filter[i * filterSize + j] = 0;
+ else
+ filter[i * filterSize + j] = filter2[i * filter2Size + j];
+ if ((flags & SWS_BITEXACT) && j >= minFilterSize)
+ filter[i * filterSize + j] = 0;
+ }
+ }
+
+ // FIXME try to align filterPos if possible
+
+ // fix borders
+ for (i = 0; i < dstW; i++) {
+ int j;
+ if ((*filterPos)[i] < 0) {
+ // move filter coefficients left to compensate for filterPos
+ for (j = 1; j < filterSize; j++) {
+ int left = FFMAX(j + (*filterPos)[i], 0);
+ filter[i * filterSize + left] += filter[i * filterSize + j];
+ filter[i * filterSize + j] = 0;
+ }
+ (*filterPos)[i]= 0;
+ }
+
+ if ((*filterPos)[i] + filterSize > srcW) {
+ int shift = (*filterPos)[i] + filterSize - srcW;
+ // move filter coefficients right to compensate for filterPos
+ for (j = filterSize - 2; j >= 0; j--) {
+ int right = FFMIN(j + shift, filterSize - 1);
+ filter[i * filterSize + right] += filter[i * filterSize + j];
+ filter[i * filterSize + j] = 0;
+ }
+ (*filterPos)[i]= srcW - filterSize;
+ }
+ }
+
+ // Note the +1 is for the MMX scaler which reads over the end
+ // FF_ALLOCZ_OR_GOTO(NULL, *outFilter,
+ // *outFilterSize * (dstW + 3) * sizeof(int16_t), fail);
+ *outFilter = (int16_t *)av_mallocz(*outFilterSize * (dstW + 3) * sizeof(int16_t));
+ if( *outFilter == NULL && (*outFilterSize * (dstW + 3) * sizeof(int16_t)) != 0)
+ {
+ printf("Can't alloc memory\n");
+ goto fail;
+ }
+
+ for (i = 0; i < dstW; i++) {
+ int j;
+ int64_t error = 0;
+ int64_t sum = 0;
+
+ for (j = 0; j < filterSize; j++) {
+ sum += filter[i * filterSize + j];
+ }
+ sum = (sum + one / 2) / one;
+ for (j = 0; j < *outFilterSize; j++) {
+ int64_t v = filter[i * filterSize + j] + error;
+ int intV = ROUNDED_DIV(v, sum);
+ (*outFilter)[i * (*outFilterSize) + j] = intV;
+ error = v - intV * sum;
+ }
+ }
+
+ (*filterPos)[dstW + 0] =
+ (*filterPos)[dstW + 1] =
+ (*filterPos)[dstW + 2] = (*filterPos)[dstW - 1];
+ for (i = 0; i < *outFilterSize; i++) {
+ int k = (dstW - 1) * (*outFilterSize) + i;
+ (*outFilter)[k + 1 * (*outFilterSize)] =
+ (*outFilter)[k + 2 * (*outFilterSize)] =
+ (*outFilter)[k + 3 * (*outFilterSize)] = (*outFilter)[k];
+ }
+
+ ret = 0;
+
+fail:
+ av_free(filter);
+ av_free(filter2);
+ return ret;
+}
+
+static int handle_scale_jpeg(enum PixelFormat *format)
+{
+ switch (*format) {
+ case AV_PIX_FMT_YUVJ420P:
+ *format = AV_PIX_FMT_YUV420P;
+ return 1;
+ case AV_PIX_FMT_YUVJ422P:
+ *format = AV_PIX_FMT_YUV422P;
+ return 1;
+ case AV_PIX_FMT_YUVJ444P:
+ *format = AV_PIX_FMT_YUV444P;
+ return 1;
+ case AV_PIX_FMT_YUVJ440P:
+ *format = AV_PIX_FMT_YUV440P;
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+static void scaleGetSubSampleFactors(int *h, int *v, enum PixelFormat format)
+{
+ *h = av_pix_fmt_descriptors[format].log2_chroma_w;
+ *v = av_pix_fmt_descriptors[format].log2_chroma_h;
+}
+
+typedef struct FormatEntry {
+ int is_supported_in, is_supported_out;
+} FormatEntry;
+
+static const FormatEntry format_entries[AV_PIX_FMT_NB] = {
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 0 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 0, 0 },
+ { 1, 1 },
+ { 0, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 0, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 0 },
+ { 1, 0 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 0 },
+ { 1, 1 },
+ { 1, 1 },
+ { 0, 0 },
+ { 0, 0 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 0 },
+ { 1, 0 },
+ { 1, 0 },
+ { 1, 0 },
+ { 1, 0 },
+ { 1, 0 },
+ { 1, 0 },
+};
+
+int scale_isSupportedInput(enum PixelFormat pix_fmt)
+{
+ return (unsigned)pix_fmt < AV_PIX_FMT_NB ?
+ format_entries[pix_fmt].is_supported_in : 0;
+}
+
+int scale_isSupportedOutput(enum PixelFormat pix_fmt)
+{
+ return (unsigned)pix_fmt < AV_PIX_FMT_NB ?
+ format_entries[pix_fmt].is_supported_out : 0;
+}
+
+static void hcscale_fast_c(ScaleContext *c, int16_t *dst1, int16_t *dst2,
+ int dstWidth, const uint8_t *src1,
+ const uint8_t *src2, int srcW, int xInc)
+{
+ int i;
+ unsigned int xpos = 0;
+ for (i = 0; i < dstWidth; i++) {
+ register unsigned int xx = xpos >> 16;
+ register unsigned int xalpha = (xpos & 0xFFFF) >> 9;
+ dst1[i] = (src1[xx] * (xalpha ^ 127) + src1[xx + 1] * xalpha);
+ dst2[i] = (src2[xx] * (xalpha ^ 127) + src2[xx + 1] * xalpha);
+ xpos += xInc;
+ }
+ for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
+ dst1[i] = src1[srcW-1]*128;
+ dst2[i] = src2[srcW-1]*128;
+ }
+}
+
+static void hyscale_fast_c(ScaleContext *c, int16_t *dst, int dstWidth,
+ const uint8_t *src, int srcW, int xInc)
+{
+ int i;
+ unsigned int xpos = 0;
+ for (i = 0; i < dstWidth; i++) {
+ register unsigned int xx = xpos >> 16;
+ register unsigned int xalpha = (xpos & 0xFFFF) >> 9;
+ dst[i] = (src[xx] << 7) + (src[xx + 1] - src[xx]) * xalpha;
+ xpos += xInc;
+ }
+ for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
+ dst[i] = src[srcW-1]*128;
+}
+
+static void hScale16To19_c(ScaleContext *c, int16_t *_dst, int dstW,
+ const uint8_t *_src, const int16_t *filter,
+ const int32_t *filterPos, int filterSize)
+{
+ int i;
+ int32_t *dst = (int32_t *) _dst;
+ const uint16_t *src = (const uint16_t *) _src;
+ int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
+ int sh = bits - 4;
+
+ if((isScaleAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
+ sh= 9;
+
+ for (i = 0; i < dstW; i++) {
+ int j;
+ int srcPos = filterPos[i];
+ int val = 0;
+
+ for (j = 0; j < filterSize; j++) {
+ val += src[srcPos + j] * filter[filterSize * i + j];
+ }
+ dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
+ }
+}
+
+static void hScale16To15_c(ScaleContext *c, int16_t *dst, int dstW,
+ const uint8_t *_src, const int16_t *filter,
+ const int32_t *filterPos, int filterSize)
+{
+ int i;
+ const uint16_t *src = (const uint16_t *) _src;
+ int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
+
+ if(sh<15)
+ sh= isScaleAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
+
+ for (i = 0; i < dstW; i++) {
+ int j;
+ int srcPos = filterPos[i];
+ int val = 0;
+
+ for (j = 0; j < filterSize; j++) {
+ val += src[srcPos + j] * filter[filterSize * i + j];
+ }
+ // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
+ dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
+ }
+}
+
+static void hScale8To15_c(ScaleContext *c, int16_t *dst, int dstW,
+ const uint8_t *src, const int16_t *filter,
+ const int32_t *filterPos, int filterSize)
+{
+ int i;
+ for (i = 0; i < dstW; i++) {
+ int j;
+ int srcPos = filterPos[i];
+ int val = 0;
+ for (j = 0; j < filterSize; j++) {
+ val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
+ }
+ dst[i] = FFMIN(val >> 7, (1 << 15) - 1); // the cubic equation does overflow ...
+ }
+}
+
+static void hScale8To19_c(ScaleContext *c, int16_t *_dst, int dstW,
+ const uint8_t *src, const int16_t *filter,
+ const int32_t *filterPos, int filterSize)
+{
+ int i;
+ int32_t *dst = (int32_t *) _dst;
+ for (i = 0; i < dstW; i++) {
+ int j;
+ int srcPos = filterPos[i];
+ int val = 0;
+ for (j = 0; j < filterSize; j++) {
+ val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
+ }
+ dst[i] = FFMIN(val >> 3, (1 << 19) - 1); // the cubic equation does overflow ...
+ }
+}
+
+static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
+{
+ int i;
+ for (i = 0; i < width; i++) {
+ dstU[i] = (FFMIN(dstU[i], 30775) * 4663 - 9289992) >> 12; // -264
+ dstV[i] = (FFMIN(dstV[i], 30775) * 4663 - 9289992) >> 12; // -264
+ }
+}
+
+static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
+{
+ int i;
+ for (i = 0; i < width; i++) {
+ dstU[i] = (dstU[i] * 1799 + 4081085) >> 11; // 1469
+ dstV[i] = (dstV[i] * 1799 + 4081085) >> 11; // 1469
+ }
+}
+
+static void lumRangeToJpeg_c(int16_t *dst, int width)
+{
+ int i;
+ for (i = 0; i < width; i++)
+ dst[i] = (FFMIN(dst[i], 30189) * 19077 - 39057361) >> 14;
+}
+
+static void lumRangeFromJpeg_c(int16_t *dst, int width)
+{
+ int i;
+ for (i = 0; i < width; i++)
+ dst[i] = (dst[i] * 14071 + 33561947) >> 14;
+}
+
+static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
+{
+ int i;
+ int32_t *dstU = (int32_t *) _dstU;
+ int32_t *dstV = (int32_t *) _dstV;
+ for (i = 0; i < width; i++) {
+ dstU[i] = (FFMIN(dstU[i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12; // -264
+ dstV[i] = (FFMIN(dstV[i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12; // -264
+ }
+}
+
+static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
+{
+ int i;
+ int32_t *dstU = (int32_t *) _dstU;
+ int32_t *dstV = (int32_t *) _dstV;
+ for (i = 0; i < width; i++) {
+ dstU[i] = (dstU[i] * 1799 + (4081085 << 4)) >> 11; // 1469
+ dstV[i] = (dstV[i] * 1799 + (4081085 << 4)) >> 11; // 1469
+ }
+}
+
+static void lumRangeToJpeg16_c(int16_t *_dst, int width)
+{
+ int i;
+ int32_t *dst = (int32_t *) _dst;
+ for (i = 0; i < width; i++)
+ dst[i] = (FFMIN(dst[i], 30189 << 4) * 4769 - (39057361 << 2)) >> 12;
+}
+
+static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
+{
+ int i;
+ int32_t *dst = (int32_t *) _dst;
+ for (i = 0; i < width; i++)
+ dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12;
+}
+
+static av_cold void sws_init_swScale_c(ScaleContext *c)
+{
+ enum PixelFormat srcFormat = c->srcFormat;
+
+ ff_sws_init_output_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
+ &c->yuv2nv12cX, &c->yuv2packed1,
+ &c->yuv2packed2, &c->yuv2packedX);
+
+ ff_sws_init_input_funcs(c);
+
+ if (c->srcBpc == 8) {
+ if (c->dstBpc <= 10) {
+ c->hyScale = c->hcScale = hScale8To15_c;
+ if (c->flags & SWS_FAST_BILINEAR) {
+ c->hyscale_fast = hyscale_fast_c;
+ c->hcscale_fast = hcscale_fast_c;
+ }
+ } else {
+ c->hyScale = c->hcScale = hScale8To19_c;
+ }
+ } else {
+ c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c
+ : hScale16To15_c;
+ }
+
+ if (c->srcRange != c->dstRange && !isScaleAnyRGB(c->dstFormat)) {
+ if (c->dstBpc <= 10) {
+ if (c->srcRange) {
+ c->lumConvertRange = lumRangeFromJpeg_c;
+ c->chrConvertRange = chrRangeFromJpeg_c;
+ } else {
+ c->lumConvertRange = lumRangeToJpeg_c;
+ c->chrConvertRange = chrRangeToJpeg_c;
+ }
+ } else {
+ if (c->srcRange) {
+ c->lumConvertRange = lumRangeFromJpeg16_c;
+ c->chrConvertRange = chrRangeFromJpeg16_c;
+ } else {
+ c->lumConvertRange = lumRangeToJpeg16_c;
+ c->chrConvertRange = chrRangeToJpeg16_c;
+ }
+ }
+ }
+
+ if (!(isScaleGray(srcFormat) || isScaleGray(c->dstFormat) ||
+ srcFormat == AV_PIX_FMT_MONOBLACK || srcFormat == AV_PIX_FMT_MONOWHITE))
+ c->needs_hcscale = 1;
+}
+
+int scale_init_context(ScaleContext *c, ScaleFilter *srcFilter, ScaleFilter *dstFilter)
+{
+// int i, j;
+ ScaleFilter dummyFilter = { NULL, NULL, NULL, NULL };
+ int srcW = c->srcW;
+ int srcH = c->srcH;
+ int dstW = c->dstW;
+ int dstH = c->dstH;
+// int dst_stride = FFALIGN(dstW * sizeof(int16_t) + 66, 16);
+ int flags, cpu_flags;
+ enum PixelFormat srcFormat = c->srcFormat;
+ enum PixelFormat dstFormat = c->dstFormat;
+
+ cpu_flags = 0;
+ flags = c->flags;
+
+ if(srcFormat != c->srcFormat || dstFormat != c->dstFormat){
+ printf("deprecated pixel format used, make sure you did set range correctly\n");
+ c->srcFormat = srcFormat;
+ c->dstFormat = dstFormat;
+ }
+
+/*
+ if (!scale_isSupportedInput(srcFormat)) {
+ printf("%s is not supported as input pixel format\n",av_get_pix_fmt_name(srcFormat));
+ return -1;
+ }
+
+ if (!scale_isSupportedOutput(dstFormat)) {
+ printf("%s is not supported as output pixel format\n",av_get_pix_fmt_name(dstFormat));
+ return -1;
+ }
+ i = flags & (SWS_POINT |
+ SWS_AREA |
+ SWS_BILINEAR |
+ SWS_FAST_BILINEAR |
+ SWS_BICUBIC |
+ SWS_X |
+ SWS_GAUSS |
+ SWS_LANCZOS |
+ SWS_SINC |
+ SWS_SPLINE |
+ SWS_BICUBLIN);
+ if (!i || (i & (i - 1))) {
+ printf("Exactly one scaler algorithm must be chosen, got %X\n",i);
+ return -1;
+ }
+*/
+
+ if (srcW < 4 || srcH < 1 || dstW < 8 || dstH < 1) {
+ printf("%dx%d -> %dx%d is invalid scaling dimension\n",srcW,srcH,dstW,dstH);
+ return -1;
+ }
+
+ if (!dstFilter)
+ dstFilter = &dummyFilter;
+ if (!srcFilter)
+ srcFilter = &dummyFilter;
+
+ c->lumXInc = (((int64_t)srcW << 16) + (dstW >> 1)) / dstW;
+ c->lumYInc = (((int64_t)srcH << 16) + (dstH >> 1)) / dstH;
+ c->dstFormatBpp = av_get_bits_per_pixel(&av_pix_fmt_descriptors[dstFormat]);
+ c->srcFormatBpp = av_get_bits_per_pixel(&av_pix_fmt_descriptors[srcFormat]);
+ c->vRounder = 4 * 0x0001000100010001ULL;
+
+ scaleGetSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat);
+ scaleGetSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat);
+
+/*
+ if (isScaleAnyRGB(dstFormat) && !(flags&SWS_FULL_CHR_H_INT)) {
+ if (dstW&1) {
+ printf("Forcing full internal H chroma due to odd output size\n");
+ flags |= SWS_FULL_CHR_H_INT;
+ c->flags = flags;
+ }
+ }
+
+ if (flags & SWS_FULL_CHR_H_INT &&
+ isScaleAnyRGB(dstFormat) &&
+ dstFormat != AV_PIX_FMT_RGBA &&
+ dstFormat != AV_PIX_FMT_ARGB &&
+ dstFormat != AV_PIX_FMT_BGRA &&
+ dstFormat != AV_PIX_FMT_ABGR &&
+ dstFormat != AV_PIX_FMT_RGB24 &&
+ dstFormat != AV_PIX_FMT_BGR24) {
+ printf("full chroma interpolation for destination format '%s' not yet implemented\n", av_get_pix_fmt_name(dstFormat));
+ flags &= ~SWS_FULL_CHR_H_INT;
+ c->flags = flags;
+ }
+
+ if (isScaleAnyRGB(dstFormat) && !(flags & SWS_FULL_CHR_H_INT))
+ c->chrDstHSubSample = 1;
+*/
+
+ // drop some chroma lines if the user wants it
+ c->vChrDrop = (flags & SWS_SRC_V_CHR_DROP_MASK) >> SWS_SRC_V_CHR_DROP_SHIFT;
+ c->chrSrcVSubSample += c->vChrDrop;
+
+/*
+ if (isScaleAnyRGB(srcFormat) && !(flags & SWS_FULL_CHR_H_INP) &&
+ srcFormat != AV_PIX_FMT_RGB8 && srcFormat != AV_PIX_FMT_BGR8 &&
+ srcFormat != AV_PIX_FMT_RGB4 && srcFormat != AV_PIX_FMT_BGR4 &&
+ srcFormat != AV_PIX_FMT_RGB4_BYTE && srcFormat != AV_PIX_FMT_BGR4_BYTE &&
+ ((dstW >> c->chrDstHSubSample) <= (srcW >> 1) || (flags & SWS_FAST_BILINEAR)))
+ c->chrSrcHSubSample = 1;
+*/
+
+ c->chrSrcW = -((-srcW) >> c->chrSrcHSubSample);
+ c->chrSrcH = -((-srcH) >> c->chrSrcVSubSample);
+ c->chrDstW = -((-dstW) >> c->chrDstHSubSample);
+ c->chrDstH = -((-dstH) >> c->chrDstVSubSample);
+
+/*
+ c->srcBpc = 1 + av_pix_fmt_descriptors[srcFormat].comp[0].depth_minus1;
+ if (c->srcBpc < 8)
+ c->srcBpc = 8;
+ c->dstBpc = 1 + av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1;
+ if (c->dstBpc < 8)
+ c->dstBpc = 8;
+ if (isScaleAnyRGB(srcFormat) || srcFormat == AV_PIX_FMT_PAL8)
+ c->srcBpc = 16;
+ if (c->dstBpc == 16)
+ dst_stride <<= 1;
+ c->formatConvBuffer = (uint8_t *)av_mallocz(FFALIGN(srcW*2+78, 16) * 2);
+ if( c->formatConvBuffer == NULL && (FFALIGN(srcW*2+78, 16) * 2) != 0)
+ {
+ printf("Can't alloc memory formatConvBuffer\n");
+ goto fail;
+ }
+*/
+
+ c->chrXInc = (((int64_t)c->chrSrcW << 16) + (c->chrDstW >> 1)) / c->chrDstW;
+ c->chrYInc = (((int64_t)c->chrSrcH << 16) + (c->chrDstH >> 1)) / c->chrDstH;
+
+/*
+ if (flags & SWS_FAST_BILINEAR) {
+ if (c->canMMX2BeUsed) {
+ c->lumXInc += 20;
+ c->chrXInc += 20;
+ }
+ }
+*/
+
+ const int filterAlign = 1;
+
+ if (initScaleFilter(&c->hLumFilter, &c->hLumFilterPos,
+ &c->hLumFilterSize, c->lumXInc,
+ srcW, dstW, filterAlign, 1 << 14,
+ (flags & SWS_BICUBLIN) ? (flags | SWS_BICUBIC) : flags,
+ cpu_flags, srcFilter->lumH, dstFilter->lumH,
+ c->param) < 0)
+ goto fail;
+
+ if (initScaleFilter(&c->hChrFilter, &c->hChrFilterPos,
+ &c->hChrFilterSize, c->chrXInc,
+ c->chrSrcW, c->chrDstW, filterAlign, 1 << 14,
+ (flags & SWS_BICUBLIN) ? (flags | SWS_BILINEAR) : flags,
+ cpu_flags, srcFilter->chrH, dstFilter->chrH,
+ c->param) < 0)
+ goto fail;
+
+ if (initScaleFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize,
+ c->lumYInc, srcH, dstH, filterAlign, (1 << 12),
+ (flags & SWS_BICUBLIN) ? (flags | SWS_BICUBIC) : flags,
+ cpu_flags, srcFilter->lumV, dstFilter->lumV,
+ c->param) < 0)
+ goto fail;
+
+ if (initScaleFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize,
+ c->chrYInc, c->chrSrcH, c->chrDstH,
+ filterAlign, (1 << 12),
+ (flags & SWS_BICUBLIN) ? (flags | SWS_BILINEAR) : flags,
+ cpu_flags, srcFilter->chrV, dstFilter->chrV,
+ c->param) < 0)
+ goto fail;
+
+/*
+#if HAVE_ALTIVEC
+ FF_ALLOC_OR_GOTO(c, c->vYCoeffsBank, sizeof(vector signed short) * c->vLumFilterSize * c->dstH, fail);
+ FF_ALLOC_OR_GOTO(c, c->vCCoeffsBank, sizeof(vector signed short) * c->vChrFilterSize * c->chrDstH, fail);
+
+ for (i = 0; i < c->vLumFilterSize * c->dstH; i++) {
+ int j;
+ short *p = (short *)&c->vYCoeffsBank[i];
+ for (j = 0; j < 8; j++)
+ p[j] = c->vLumFilter[i];
+ }
+
+ for (i = 0; i < c->vChrFilterSize * c->chrDstH; i++) {
+ int j;
+ short *p = (short *)&c->vCCoeffsBank[i];
+ for (j = 0; j < 8; j++)
+ p[j] = c->vChrFilter[i];
+ }
+#endif
+*/
+
+/*
+ // calculate buffer sizes so that they won't run out while handling these damn slices
+ c->vLumBufSize = c->vLumFilterSize;
+ c->vChrBufSize = c->vChrFilterSize;
+ for (i = 0; i < dstH; i++) {
+ int chrI = (int64_t)i * c->chrDstH / dstH;
+ int nextSlice = FFMAX(c->vLumFilterPos[i] + c->vLumFilterSize - 1,
+ ((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)
+ << c->chrSrcVSubSample));
+
+ nextSlice >>= c->chrSrcVSubSample;
+ nextSlice <<= c->chrSrcVSubSample;
+ if (c->vLumFilterPos[i] + c->vLumBufSize < nextSlice)
+ c->vLumBufSize = nextSlice - c->vLumFilterPos[i];
+ if (c->vChrFilterPos[chrI] + c->vChrBufSize <
+ (nextSlice >> c->chrSrcVSubSample))
+ c->vChrBufSize = (nextSlice >> c->chrSrcVSubSample) -
+ c->vChrFilterPos[chrI];
+ }
+
+ c->lumPixBuf = (int16_t **)av_malloc( c->vLumBufSize * 3 * sizeof(int16_t *));
+ if(c->lumPixBuf == NULL && ( c->vLumBufSize * 3 * sizeof(int16_t *)) != 0)
+ {
+ printf("Can't alloc memory lumPixbuf\n");
+ goto fail;
+ }
+
+ c->chrUPixBuf = (int16_t **)av_malloc( c->vChrBufSize * 3 * sizeof(int16_t *));
+ if(c->chrUPixBuf == NULL && ( c->vChrBufSize * 3 * sizeof(int16_t *)) != 0)
+ {
+ printf("Can't alloc memory chrUpixbuf\n");
+ goto fail;
+ }
+
+ c->chrVPixBuf = (int16_t **)av_malloc( c->vChrBufSize * 3 * sizeof(int16_t *));
+ if(c->chrVPixBuf == NULL && ( c->vChrBufSize * 3 * sizeof(int16_t *)) != 0)
+ {
+ printf("Can't alloc memory chrVPixBuf\n");
+ goto fail;
+ }
+
+ for (i = 0; i < c->vLumBufSize; i++) {
+ c->lumPixBuf[i + c->vLumBufSize] = (int16_t *)malloc( dst_stride + 16);
+ if(c->lumPixBuf[i + c->vLumBufSize] == NULL && ( dst_stride + 16) != 0)
+ {
+ printf("Can't alloc memory lumPixBuf[]\n");
+ goto fail;
+ }
+ c->lumPixBuf[i] = c->lumPixBuf[i + c->vLumBufSize];
+ }
+
+ c->uv_off = (dst_stride>>1) + 64 / (c->dstBpc &~ 7);
+ c->uv_offx2 = dst_stride + 16;
+ for (i = 0; i < c->vChrBufSize; i++) {
+ c->chrUPixBuf[i + c->vChrBufSize] = (int16_t *)av_malloc( dst_stride * 2 + 32);
+ if(c->chrUPixBuf[i + c->vChrBufSize] == NULL && ( dst_stride * 2 + 32) != 0)
+ {
+ printf("Can't alloc memory chrUPixBuf[]\n");
+ goto fail;
+ }
+ c->chrUPixBuf[i] = c->chrUPixBuf[i + c->vChrBufSize];
+ c->chrVPixBuf[i] = c->chrVPixBuf[i + c->vChrBufSize]
+ = c->chrUPixBuf[i] + (dst_stride >> 1) + 8;
+ }
+
+ // try to avoid drawing green stuff between the right end and the stride end
+ for (i = 0; i < c->vChrBufSize; i++)
+ if(av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 == 15){
+ assert(c->dstBpc > 10);
+ for(j=0; j<dst_stride/2+1; j++)
+ ((int32_t*)(c->chrUPixBuf[i]))[j] = 1<<18;
+ } else
+ for(j=0; j<dst_stride+1; j++)
+ ((int16_t*)(c->chrUPixBuf[i]))[j] = 1<<14;
+
+ assert(c->chrDstH <= dstH);
+*/
+ if (flags & SWS_PRINT_INFO) {
+ printf("I have delete it, Haha\n");
+ }
+
+// sws_init_swScale_c(c);
+ return 0;
+fail:
+ return -1;
+}
+
+ScaleContext *scale_getContext(int srcW, int srcH, enum PixelFormat srcFormat,
+ int dstW, int dstH, enum PixelFormat dstFormat,
+ int flags, ScaleFilter *srcFilter,
+ ScaleFilter *dstFilter, const double *param)
+{
+ ScaleContext *sc = (ScaleContext*)malloc(sizeof(ScaleContext));
+ sc->flags = flags;
+ sc->srcW = srcW;
+ sc->srcH = srcH;
+ sc->dstW = dstW;
+ sc->dstH = dstH;
+ sc->srcRange = handle_scale_jpeg(&srcFormat);
+ sc->dstRange = handle_scale_jpeg(&dstFormat);
+ sc->srcFormat = srcFormat;
+ sc->dstFormat = dstFormat;
+ sc->hyscale_fast = 0;
+ sc->hcscale_fast = 0;
+
+ if (param) {
+ sc->param[0] = param[0];
+ sc->param[1] = param[1];
+ }
+
+ if (scale_init_context(sc, srcFilter, dstFilter) < 0) {
+ sws_freeContext(sc);
+ return NULL;
+ }
+
+ return sc;
+}
+
+int scale_opencl(ScaleContext *c,
+ void *cl_inbuf,
+ void *cl_outbuf,
+ int *srcStride,
+ int *dstStride)
+{
+ int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat);
+
+ av_scale_frame(c,cl_outbuf,cl_inbuf,srcStride,dstStride,&should_dither);
+
+ return 1;
+}
+
+void scale_init( int width, int height, int dstwidth, int dstheight )
+{
+ int srcW = width;
+ int srcH = height;
+ int dstW = dstwidth;
+ int dstH = dstheight;
+ enum PixelFormat inputfmt = AV_PIX_FMT_YUV420P;
+ enum PixelFormat outputfmt = AV_PIX_FMT_YUV420P;
+ int flags = SWS_BILINEAR;
+
+ g_scale = scale_getContext(srcW,srcH,inputfmt,dstW,dstH,outputfmt,flags,NULL,NULL,NULL);
+}
+
+void scale_release()
+{
+ sws_freeContext( g_scale );
+}
+#ifdef USE_OPENCL
+int scale_run( cl_mem inbuf, cl_mem outbuf, int linesizey, int linesizeuv, int height )
+{
+ g_scale->cl_src = inbuf;
+ g_scale->cl_dst = outbuf;
+
+ int src_stride[4] = { linesizey, linesizeuv, linesizeuv, 0 };
+ int dst_stride[4] = { g_scale->dstW, g_scale->chrDstW, g_scale->chrDstW, 0 };
+ int ret = -1;
+
+ ret = scale_opencl( g_scale, inbuf, outbuf, src_stride, dst_stride );
+
+ return ret;
+}
+#endif
+#endif
diff --git a/libhb/scale.h b/libhb/scale.h
new file mode 100644
index 000000000..a28ccdf38
--- /dev/null
+++ b/libhb/scale.h
@@ -0,0 +1,310 @@
+#ifndef SCALE_H
+#define SCALE_H
+#ifdef USE_OPENCL
+#include <stdint.h>
+#include "vadxva2.h"
+#include "libavutil/pixfmt.h"
+#include "hbffmpeg.h"
+
+#define YUVRGB_TABLE_HEADROOM 128
+#define MAX_FILTER_SIZE 256
+#define is16BPS(x) \
+ (av_pix_fmt_descriptors[x].comp[0].depth_minus1 == 15)
+
+#define is9_OR_10BPS(x) \
+ (av_pix_fmt_descriptors[x].comp[0].depth_minus1 == 8 || \
+ av_pix_fmt_descriptors[x].comp[0].depth_minus1 == 9)
+
+#if ARCH_X86_64
+# define APCK_PTR2 8
+# define APCK_COEF 16
+# define APCK_SIZE 24
+#else
+# define APCK_PTR2 4
+# define APCK_COEF 8
+# define APCK_SIZE 16
+#endif
+
+typedef void (*yuv2planar1_fn)(const int16_t *src, uint8_t *dest, int dstW,
+ const uint8_t *dither, int offset);
+
+typedef void (*yuv2planarX_fn)(const int16_t *filter, int filterSize,
+ const int16_t **src, uint8_t *dest, int dstW,
+ const uint8_t *dither, int offset);
+
+typedef void (*yuv2interleavedX_fn)(struct ScaleContext *c,
+ const int16_t *chrFilter,
+ int chrFilterSize,
+ const int16_t **chrUSrc,
+ const int16_t **chrVSrc,
+ uint8_t *dest, int dstW);
+
+typedef void (*yuv2packed1_fn)(struct ScaleContext *c, const int16_t *lumSrc,
+ const int16_t *chrUSrc[2],
+ const int16_t *chrVSrc[2],
+ const int16_t *alpSrc, uint8_t *dest,
+ int dstW, int uvalpha, int y);
+
+typedef void (*yuv2packed2_fn)(struct SCaleContext *c, const int16_t *lumSrc[2],
+ const int16_t *chrUSrc[2],
+ const int16_t *chrVSrc[2],
+ const int16_t *alpSrc[2],
+ uint8_t *dest,
+ int dstW, int yalpha, int uvalpha, int y);
+
+typedef void (*yuv2packedX_fn)(struct SCaleContext *c, const int16_t *lumFilter,
+ const int16_t **lumSrc, int lumFilterSize,
+ const int16_t *chrFilter,
+ const int16_t **chrUSrc,
+ const int16_t **chrVSrc, int chrFilterSize,
+ const int16_t **alpSrc, uint8_t *dest,
+ int dstW, int y);
+
+typedef int (*SwsFunc)(struct ScaleContext *context, const uint8_t *src[],
+ int srcStride[], int srcSliceY, int srcSliceH,
+ uint8_t *dst[], int dstStride[]);
+
+typedef struct {
+ double *coeff; ///< pointer to the list of coefficients
+ int length; ///< number of coefficients in the vector
+} ScaleVector;
+
+typedef struct {
+ ScaleVector *lumH;
+ ScaleVector *lumV;
+ ScaleVector *chrH;
+ ScaleVector *chrV;
+} ScaleFilter;
+
+typedef struct ScaleContext {
+ SwsFunc swScale;
+ int srcW; ///< Width of source luma/alpha planes.
+ int srcH; ///< Height of source luma/alpha planes.
+ int dstH; ///< Height of destination luma/alpha planes.
+ int chrSrcW; ///< Width of source chroma planes.
+ int chrSrcH; ///< Height of source chroma planes.
+ int chrDstW; ///< Width of destination chroma planes.
+ int chrDstH; ///< Height of destination chroma planes.
+ int lumXInc, chrXInc;
+ int lumYInc, chrYInc;
+ enum PixelFormat dstFormat; ///< Destination pixel format.
+ enum PixelFormat srcFormat; ///< Source pixel format.
+ int dstFormatBpp; ///< Number of bits per pixel of the destination pixel format.
+ int srcFormatBpp; ///< Number of bits per pixel of the source pixel format.
+ int dstBpc, srcBpc;
+ int chrSrcHSubSample; ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in source image.
+ int chrSrcVSubSample; ///< Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in source image.
+ int chrDstHSubSample; ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in destination image.
+ int chrDstVSubSample; ///< Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in destination image.
+ int vChrDrop; ///< Binary logarithm of extra vertical subsampling factor in source image chroma planes specified by user.
+ int sliceDir; ///< Direction that slices are fed to the scaler (1 = top-to-bottom, -1 = bottom-to-top).
+ double param[2]; ///< Input parameters for scaling algorithms that need them.
+
+ uint32_t pal_yuv[256];
+ uint32_t pal_rgb[256];
+
+ int16_t **lumPixBuf; ///< Ring buffer for scaled horizontal luma plane lines to be fed to the vertical scaler.
+ int16_t **chrUPixBuf; ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler.
+ int16_t **chrVPixBuf; ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler.
+ int16_t **alpPixBuf; ///< Ring buffer for scaled horizontal alpha plane lines to be fed to the vertical scaler.
+ int vLumBufSize; ///< Number of vertical luma/alpha lines allocated in the ring buffer.
+ int vChrBufSize; ///< Number of vertical chroma lines allocated in the ring buffer.
+ int lastInLumBuf; ///< Last scaled horizontal luma/alpha line from source in the ring buffer.
+ int lastInChrBuf; ///< Last scaled horizontal chroma line from source in the ring buffer.
+ int lumBufIndex; ///< Index in ring buffer of the last scaled horizontal luma/alpha line from source.
+ int chrBufIndex; ///< Index in ring buffer of the last scaled horizontal chroma line from source.
+
+ uint8_t *formatConvBuffer;
+ int16_t *hLumFilter; ///< Array of horizontal filter coefficients for luma/alpha planes.
+ int16_t *hChrFilter; ///< Array of horizontal filter coefficients for chroma planes.
+ int16_t *vLumFilter; ///< Array of vertical filter coefficients for luma/alpha planes.
+ int16_t *vChrFilter; ///< Array of vertical filter coefficients for chroma planes.
+ int32_t *hLumFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for luma/alpha planes.
+ int32_t *hChrFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for chroma planes.
+ int32_t *vLumFilterPos; ///< Array of vertical filter starting positions for each dst[i] for luma/alpha planes.
+ int32_t *vChrFilterPos; ///< Array of vertical filter starting positions for each dst[i] for chroma planes.
+ int hLumFilterSize; ///< Horizontal filter size for luma/alpha pixels.
+ int hChrFilterSize; ///< Horizontal filter size for chroma pixels.
+ int vLumFilterSize; ///< Vertical filter size for luma/alpha pixels.
+ int vChrFilterSize; ///< Vertical filter size for chroma pixels.
+
+ int lumMmx2FilterCodeSize; ///< Runtime-generated MMX2 horizontal fast bilinear scaler code size for luma/alpha planes.
+ int chrMmx2FilterCodeSize; ///< Runtime-generated MMX2 horizontal fast bilinear scaler code size for chroma planes.
+ uint8_t *lumMmx2FilterCode; ///< Runtime-generated MMX2 horizontal fast bilinear scaler code for luma/alpha planes.
+ uint8_t *chrMmx2FilterCode; ///< Runtime-generated MMX2 horizontal fast bilinear scaler code for chroma planes.
+
+ int canMMX2BeUsed;
+
+ unsigned char *dest;
+ unsigned char *source;
+
+ int dstY; ///< Last destination vertical line output from last slice.
+ int flags; ///< Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc...
+ void *yuvTable; // pointer to the yuv->rgb table start so it can be freed()
+ uint8_t *table_rV[256 + 2*YUVRGB_TABLE_HEADROOM];
+ uint8_t *table_gU[256 + 2*YUVRGB_TABLE_HEADROOM];
+ int table_gV[256 + 2*YUVRGB_TABLE_HEADROOM];
+ uint8_t *table_bU[256 + 2*YUVRGB_TABLE_HEADROOM];
+
+ //Colorspace stuff
+ int contrast, brightness, saturation; // for sws_getColorspaceDetails
+ int srcColorspaceTable[4];
+ int dstColorspaceTable[4];
+ int srcRange; ///< 0 = MPG YUV range, 1 = JPG YUV range (source image).
+ int dstRange; ///< 0 = MPG YUV range, 1 = JPG YUV range (destination image).
+ int src0Alpha;
+ int dst0Alpha;
+ int yuv2rgb_y_offset;
+ int yuv2rgb_y_coeff;
+ int yuv2rgb_v2r_coeff;
+ int yuv2rgb_v2g_coeff;
+ int yuv2rgb_u2g_coeff;
+ int yuv2rgb_u2b_coeff;
+
+#define RED_DITHER "0*8"
+#define GREEN_DITHER "1*8"
+#define BLUE_DITHER "2*8"
+#define Y_COEFF "3*8"
+#define VR_COEFF "4*8"
+#define UB_COEFF "5*8"
+#define VG_COEFF "6*8"
+#define UG_COEFF "7*8"
+#define Y_OFFSET "8*8"
+#define U_OFFSET "9*8"
+#define V_OFFSET "10*8"
+#define LUM_MMX_FILTER_OFFSET "11*8"
+#define CHR_MMX_FILTER_OFFSET "11*8+4*4*256"
+#define DSTW_OFFSET "11*8+4*4*256*2" //do not change, it is hardcoded in the ASM
+#define ESP_OFFSET "11*8+4*4*256*2+8"
+#define VROUNDER_OFFSET "11*8+4*4*256*2+16"
+#define U_TEMP "11*8+4*4*256*2+24"
+#define V_TEMP "11*8+4*4*256*2+32"
+#define Y_TEMP "11*8+4*4*256*2+40"
+#define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48"
+#define UV_OFF_PX "11*8+4*4*256*3+48"
+#define UV_OFF_BYTE "11*8+4*4*256*3+56"
+#define DITHER16 "11*8+4*4*256*3+64"
+#define DITHER32 "11*8+4*4*256*3+80"
+
+ DECLARE_ALIGNED(8, uint64_t, redDither);
+ DECLARE_ALIGNED(8, uint64_t, greenDither);
+ DECLARE_ALIGNED(8, uint64_t, blueDither);
+
+ DECLARE_ALIGNED(8, uint64_t, yCoeff);
+ DECLARE_ALIGNED(8, uint64_t, vrCoeff);
+ DECLARE_ALIGNED(8, uint64_t, ubCoeff);
+ DECLARE_ALIGNED(8, uint64_t, vgCoeff);
+ DECLARE_ALIGNED(8, uint64_t, ugCoeff);
+ DECLARE_ALIGNED(8, uint64_t, yOffset);
+ DECLARE_ALIGNED(8, uint64_t, uOffset);
+ DECLARE_ALIGNED(8, uint64_t, vOffset);
+ int32_t lumMmxFilter[4 * MAX_FILTER_SIZE];
+ int32_t chrMmxFilter[4 * MAX_FILTER_SIZE];
+ int dstW; ///< Width of destination luma/alpha planes.
+ DECLARE_ALIGNED(8, uint64_t, esp);
+ DECLARE_ALIGNED(8, uint64_t, vRounder);
+ DECLARE_ALIGNED(8, uint64_t, u_temp);
+ DECLARE_ALIGNED(8, uint64_t, v_temp);
+ DECLARE_ALIGNED(8, uint64_t, y_temp);
+ int32_t alpMmxFilter[4 * MAX_FILTER_SIZE];
+
+ DECLARE_ALIGNED(8, ptrdiff_t, uv_off); ///< offset (in pixels) between u and v planes
+ DECLARE_ALIGNED(8, ptrdiff_t, uv_offx2); ///< offset (in bytes) between u and v planes
+ DECLARE_ALIGNED(8, uint16_t, dither16)[8];
+ DECLARE_ALIGNED(8, uint32_t, dither32)[8];
+
+ const uint8_t *chrDither8, *lumDither8;
+
+#if HAVE_ALTIVEC
+ vector signed short CY;
+ vector signed short CRV;
+ vector signed short CBU;
+ vector signed short CGU;
+ vector signed short CGV;
+ vector signed short OY;
+ vector unsigned short CSHIFT;
+ vector signed short *vYCoeffsBank, *vCCoeffsBank;
+#endif
+
+#if ARCH_BFIN
+ DECLARE_ALIGNED(4, uint32_t, oy);
+ DECLARE_ALIGNED(4, uint32_t, oc);
+ DECLARE_ALIGNED(4, uint32_t, zero);
+ DECLARE_ALIGNED(4, uint32_t, cy);
+ DECLARE_ALIGNED(4, uint32_t, crv);
+ DECLARE_ALIGNED(4, uint32_t, rmask);
+ DECLARE_ALIGNED(4, uint32_t, cbu);
+ DECLARE_ALIGNED(4, uint32_t, bmask);
+ DECLARE_ALIGNED(4, uint32_t, cgu);
+ DECLARE_ALIGNED(4, uint32_t, cgv);
+ DECLARE_ALIGNED(4, uint32_t, gmask);
+#endif
+
+#if HAVE_VIS
+ DECLARE_ALIGNED(8, uint64_t, sparc_coeffs)[10];
+#endif
+ int use_mmx_vfilter;
+
+ /* function pointers for swScale() */
+ yuv2planar1_fn yuv2plane1;
+ yuv2planarX_fn yuv2planeX;
+ yuv2interleavedX_fn yuv2nv12cX;
+ yuv2packed1_fn yuv2packed1;
+ yuv2packed2_fn yuv2packed2;
+ yuv2packedX_fn yuv2packedX;
+
+ /// Unscaled conversion of luma plane to YV12 for horizontal scaler.
+ void (*lumToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3,
+ int width, uint32_t *pal);
+ /// Unscaled conversion of alpha plane to YV12 for horizontal scaler.
+ void (*alpToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3,
+ int width, uint32_t *pal);
+ /// Unscaled conversion of chroma planes to YV12 for horizontal scaler.
+ void (*chrToYV12)(uint8_t *dstU, uint8_t *dstV,
+ const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
+ int width, uint32_t *pal);
+
+ void (*readLumPlanar)(uint8_t *dst, const uint8_t *src[4], int width);
+ void (*readChrPlanar)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4],
+ int width);
+
+ void (*hyscale_fast)(struct SwsContext *c,
+ int16_t *dst, int dstWidth,
+ const uint8_t *src, int srcW, int xInc);
+ void (*hcscale_fast)(struct SwsContext *c,
+ int16_t *dst1, int16_t *dst2, int dstWidth,
+ const uint8_t *src1, const uint8_t *src2,
+ int srcW, int xInc);
+
+ void (*hyScale)(struct SwsContext *c, int16_t *dst, int dstW,
+ const uint8_t *src, const int16_t *filter,
+ const int32_t *filterPos, int filterSize);
+ void (*hcScale)(struct SwsContext *c, int16_t *dst, int dstW,
+ const uint8_t *src, const int16_t *filter,
+ const int32_t *filterPos, int filterSize);
+
+ void (*lumConvertRange)(int16_t *dst, int width);
+ void (*chrConvertRange)(int16_t *dst1, int16_t *dst2, int width);
+
+ int needs_hcscale; ///< Set if there are chroma planes to be converted.
+
+ cl_mem cl_hLumFilter;
+ cl_mem cl_hLumFilterPos;
+ cl_mem cl_hChrFilter;
+ cl_mem cl_hChrFilterPos;
+ cl_mem cl_vLumFilter;
+ cl_mem cl_vLumFilterPos;
+ cl_mem cl_vChrFilter;
+ cl_mem cl_vChrFilterPos;
+
+ cl_mem cl_intermediaBuf;
+
+ cl_mem cl_src;
+ cl_mem cl_dst;
+} ScaleContext;
+
+void scale_init(int, int, int, int);
+void scale_release();
+int scale_run(cl_mem inbuf, cl_mem outbuf, int linesizey, int linesizeuv, int height);
+#endif
+#endif
diff --git a/libhb/scale_kernel.c b/libhb/scale_kernel.c
new file mode 100644
index 000000000..4d0b285c3
--- /dev/null
+++ b/libhb/scale_kernel.c
@@ -0,0 +1,215 @@
+
+#ifdef USE_OPENCL
+#include <assert.h>
+#include <math.h>
+#include <stdio.h>
+#include <string.h>
+#include <windows.h>
+#include <time.h>
+#include "scale.h"
+#include "openclwrapper.h"
+
+#define OCLCHECK( method, ...) \
+ status = method(__VA_ARGS__); if(status != CL_SUCCESS) { \
+ printf(" error %s %d\n",# method, status); assert(0); return status; }
+
+#define CREATEBUF( out, flags, size, ptr)\
+ out = clCreateBuffer( kenv->context, (flags), (size), ptr, &status );\
+ if( status != CL_SUCCESS ) { printf( "clCreateBuffer faild %d\n", status ); return -1; }
+
+ #define CL_PARAM_NUM 20
+
+/****************************************************************************************************************************/
+/*************************Combine the hscale and yuv2plane into scaling******************************************************/
+/****************************************************************************************************************************/
+static int CreateCLBuffer( ScaleContext *c, KernelEnv *kenv )
+{
+ cl_int status;
+
+ if(!c->hyscale_fast || !c->hcscale_fast)
+ {
+ CREATEBUF(c->cl_hLumFilter,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->dstW*c->hLumFilterSize*sizeof(cl_short),c->hLumFilter);
+ CREATEBUF(c->cl_hLumFilterPos,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->dstW*sizeof(cl_int),c->hLumFilterPos);
+ CREATEBUF(c->cl_hChrFilter,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->chrDstW*c->hChrFilterSize*sizeof(cl_short),c->hChrFilter);
+ CREATEBUF(c->cl_hChrFilterPos,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->chrDstW*sizeof(cl_int),c->hChrFilterPos);
+ }
+ if( c->vLumFilterSize > 1 && c->vChrFilterSize > 1 )
+ {
+ CREATEBUF(c->cl_vLumFilter,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,c->dstH*c->vLumFilterSize*sizeof(cl_short),c->vLumFilter);
+ CREATEBUF(c->cl_vChrFilter,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,c->chrDstH*c->vChrFilterSize*sizeof(cl_short),c->vChrFilter);
+ }
+ CREATEBUF(c->cl_vLumFilterPos,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,c->dstH*sizeof(cl_int),c->vLumFilterPos);
+ CREATEBUF(c->cl_vChrFilterPos,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,c->chrDstH*sizeof(cl_int),c->vChrFilterPos);
+
+ return 1;
+}
+
+int av_scale_frame_func( void **userdata, KernelEnv *kenv )
+{
+ ScaleContext *c = (ScaleContext *)userdata[0];
+
+ c->cl_src = (cl_mem)userdata[2];
+ c->cl_dst = (cl_mem)userdata[1];
+
+ /*frame size*/
+ int *tmp = (int *)userdata[3];
+ int srcStride = tmp[0];
+ int srcChrStride = tmp[1];
+ int srcW = c->srcW;
+ int srcH = c->srcH;
+
+ tmp = (int *)userdata[4];
+ int dstStride = tmp[0];
+ int dstChrStride = tmp[1];
+ int dstW = c->dstW;
+ int dstH = c->dstH;
+
+ /* local variable */
+ cl_int status;
+ size_t global_work_size[2];
+
+ int intermediaSize;
+
+ int st = CreateCLBuffer(c,kenv);
+ if( !st )
+ {
+ printf( "CreateBuffer[%s] faild %d\n", "scale_opencl",st );
+ return -1;
+ }
+
+ intermediaSize = dstStride * srcH + dstChrStride * srcH;
+
+ CREATEBUF(c->cl_intermediaBuf,CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,intermediaSize*sizeof(cl_short),NULL);
+
+ static int init_chr_status = 0;
+ static cl_kernel chr_kernel;
+
+ if(init_chr_status == 0){
+
+ if(!(c->flags & 1))
+ {
+ chr_kernel = clCreateKernel( kenv->program, "hscale_all_opencl", NULL );
+ //Set the Kernel Argument;
+ OCLCHECK(clSetKernelArg,chr_kernel,2,sizeof(cl_mem),(void*)&c->cl_hLumFilter);
+ OCLCHECK(clSetKernelArg,chr_kernel,3,sizeof(cl_mem),(void*)&c->cl_hLumFilterPos);
+ OCLCHECK(clSetKernelArg,chr_kernel,4,sizeof(int),(void*)&c->hLumFilterSize);
+ OCLCHECK(clSetKernelArg,chr_kernel,5,sizeof(cl_mem),(void*)&c->cl_hChrFilter);
+ OCLCHECK(clSetKernelArg,chr_kernel,6,sizeof(cl_mem),(void*)&c->cl_hChrFilterPos);
+ OCLCHECK(clSetKernelArg,chr_kernel,7,sizeof(int),(void*)&c->hChrFilterSize);
+ }
+
+ /*Set the arguments*/
+ OCLCHECK(clSetKernelArg,chr_kernel,8,sizeof(dstW),(void*)&dstW);
+ OCLCHECK(clSetKernelArg,chr_kernel,9,sizeof(srcH),(void*)&srcH);
+ OCLCHECK(clSetKernelArg,chr_kernel,10,sizeof(srcW),(void*)&srcW);
+ OCLCHECK(clSetKernelArg,chr_kernel,11,sizeof(srcH),(void*)&srcH);
+ OCLCHECK(clSetKernelArg,chr_kernel,12,sizeof(dstStride),(void*)&dstStride);
+ OCLCHECK(clSetKernelArg,chr_kernel,13,sizeof(dstChrStride),(void*)&dstChrStride);
+ OCLCHECK(clSetKernelArg,chr_kernel,14,sizeof(srcStride),(void*)&srcStride);
+ OCLCHECK(clSetKernelArg,chr_kernel,15,sizeof(srcChrStride),(void*)&srcChrStride);
+ init_chr_status = 1;
+ }
+
+ kenv->kernel = chr_kernel;
+ OCLCHECK(clSetKernelArg,chr_kernel,0,sizeof(cl_mem),(void*)&c->cl_intermediaBuf);
+ OCLCHECK(clSetKernelArg,chr_kernel,1,sizeof(cl_mem),(void*)&c->cl_src);
+ /*Run the Kernel*/
+ global_work_size[0] = c->chrDstW;//dstW >> 1; //must times 256;
+ global_work_size[1] = c->chrSrcH;
+
+ OCLCHECK(clEnqueueNDRangeKernel,kenv->command_queue, kenv->kernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
+
+ static int init_lum_status = 0;
+ static cl_kernel lum_kernel;
+
+ if( init_lum_status == 0 ){
+ //Vertical:
+ /*Create Kernel*/
+ if( c->vLumFilterSize > 1 && c->vChrFilterSize > 1 )
+ lum_kernel = clCreateKernel( kenv->program, "vscale_all_nodither_opencl", NULL );
+ else
+ lum_kernel = clCreateKernel( kenv->program, "vscale_fast_opencl", NULL );
+
+ if( c->vLumFilterSize > 1 && c->vChrFilterSize > 1 )
+ {
+ OCLCHECK(clSetKernelArg,lum_kernel,2,sizeof(cl_mem),(void*)&c->cl_vLumFilter);
+ OCLCHECK(clSetKernelArg,lum_kernel,3,sizeof(int),(void*)&c->vLumFilterSize);
+ OCLCHECK(clSetKernelArg,lum_kernel,4,sizeof(cl_mem),(void*)&c->cl_vChrFilter);
+ OCLCHECK(clSetKernelArg,lum_kernel,5,sizeof(int),(void*)&c->vChrFilterSize);
+ OCLCHECK(clSetKernelArg,lum_kernel,6,sizeof(cl_mem),(void*)&c->cl_vLumFilterPos);
+ OCLCHECK(clSetKernelArg,lum_kernel,7,sizeof(cl_mem),(void*)&c->cl_vChrFilterPos);
+ OCLCHECK(clSetKernelArg,lum_kernel,8,sizeof(dstW),(void*)&dstW);
+ OCLCHECK(clSetKernelArg,lum_kernel,9,sizeof(dstH),(void*)&dstH);
+ OCLCHECK(clSetKernelArg,lum_kernel,10,sizeof(srcW),(void*)&srcW);
+ OCLCHECK(clSetKernelArg,lum_kernel,11,sizeof(srcH),(void*)&srcH);
+ OCLCHECK(clSetKernelArg,lum_kernel,12,sizeof(dstStride),(void*)&dstStride);
+ OCLCHECK(clSetKernelArg,lum_kernel,13,sizeof(dstChrStride),(void*)&dstChrStride);
+ OCLCHECK(clSetKernelArg,lum_kernel,14,sizeof(dstStride),(void*)&dstStride);
+ OCLCHECK(clSetKernelArg,lum_kernel,15,sizeof(dstChrStride),(void*)&dstChrStride);
+ }else{
+
+ OCLCHECK(clSetKernelArg,lum_kernel,2,sizeof(cl_mem),(void*)&c->cl_vLumFilterPos);
+ OCLCHECK(clSetKernelArg,lum_kernel,3,sizeof(cl_mem),(void*)&c->cl_vChrFilterPos);
+ OCLCHECK(clSetKernelArg,lum_kernel,4,sizeof(dstW),(void*)&dstW);
+ OCLCHECK(clSetKernelArg,lum_kernel,5,sizeof(dstH),(void*)&dstH);
+ OCLCHECK(clSetKernelArg,lum_kernel,6,sizeof(srcW),(void*)&srcW);
+ OCLCHECK(clSetKernelArg,lum_kernel,7,sizeof(srcH),(void*)&srcH);
+ OCLCHECK(clSetKernelArg,lum_kernel,8,sizeof(dstStride),(void*)&dstStride);
+ OCLCHECK(clSetKernelArg,lum_kernel,9,sizeof(dstChrStride),(void*)&dstChrStride);
+ OCLCHECK(clSetKernelArg,lum_kernel,10,sizeof(dstStride),(void*)&dstStride);
+ OCLCHECK(clSetKernelArg,lum_kernel,11,sizeof(dstChrStride),(void*)&dstChrStride);
+ }
+ init_lum_status = 1;
+ }
+
+ kenv->kernel = lum_kernel;
+ OCLCHECK(clSetKernelArg,kenv->kernel,0,sizeof(cl_mem),(void*)&c->cl_dst);
+ OCLCHECK(clSetKernelArg,kenv->kernel,1,sizeof(cl_mem),(void*)&c->cl_intermediaBuf);
+
+ /*Run the Kernel*/
+ global_work_size[0] = c->chrDstW;
+ global_work_size[1] = c->chrDstH;
+
+ OCLCHECK(clEnqueueNDRangeKernel,kenv->command_queue,kenv->kernel,2,NULL,global_work_size,NULL,0,NULL,NULL);
+
+ clReleaseMemObject( c->cl_intermediaBuf );
+
+ return 1;
+}
+
+void av_scale_frame(ScaleContext *c,
+ void *dst,
+ void *src,
+ int *srcStride,
+ int *dstStride,
+ int *should_dither)
+{
+
+ static int regflg = 0;
+ void *userdata[CL_PARAM_NUM];
+ userdata[0] = (void *)c;
+ userdata[1] = (void *)dst;
+ userdata[2] = (void *)src;
+ userdata[3] = (void *)srcStride;
+ userdata[4] = (void *)dstStride;
+ userdata[5] = (void *)should_dither;
+
+ if( regflg==0 )
+ {
+ int st = hb_register_kernel_wrapper( "scale_opencl", av_scale_frame_func);
+ if( !st )
+ {
+ printf( "register kernel[%s] faild %d\n", "scale_opencl",st );
+ return;
+ }
+ regflg++;
+ }
+
+ if( !hb_run_kernel( "scale_opencl", userdata ))
+ {
+ printf("run kernel function[%s] faild\n", "scale_opencl_func" );
+ return;
+ }
+}
+
+#endif
diff --git a/libhb/scale_kernel.h b/libhb/scale_kernel.h
new file mode 100644
index 000000000..29562bdaf
--- /dev/null
+++ b/libhb/scale_kernel.h
@@ -0,0 +1,6 @@
+#ifndef _H_SCALE_KERNEL_H
+#define _H_SCALE_KERNEL_H
+#ifdef USE_OPENCL
+void av_scale_frame(ScaleContext *c, void *dst, void *src, int *srcStride, int *dstStride, int *should_dither);
+#endif
+#endif
diff --git a/libhb/stream.c b/libhb/stream.c
index 46b4cb71e..0790394a8 100644
--- a/libhb/stream.c
+++ b/libhb/stream.c
@@ -610,7 +610,7 @@ static int hb_stream_get_type(hb_stream_t *stream)
if ( fread(buf, 1, sizeof(buf), stream->file_handle) == sizeof(buf) )
{
-#ifdef USE_OPENCL
+#ifdef USE_HWD
if ( hb_get_gui_info(&hb_gui, 1) || (hb_get_gui_info(&hb_gui, 3) == 0) )
return 0;
#endif
@@ -1101,23 +1101,26 @@ hb_title_t * hb_stream_title_scan(hb_stream_t *stream, hb_title_t * title)
{
hb_log( "transport stream missing PCRs - using video DTS instead" );
}
-#ifdef USE_OPENCL
+#ifdef USE_HWD
hb_va_dxva2_t * dxva2 = NULL;
dxva2 = hb_va_create_dxva2( dxva2, title->video_codec_param );
if (dxva2)
{
- title->uvd_support = 1;
+ title->hwd_support = 1;
hb_va_close(dxva2);
dxva2 = NULL;
}
else
- title->uvd_support = 0;
+ title->hwd_support = 0;
+#else
+ title->hwd_support = 0;
+#endif
+#ifdef USE_OPENCL
if (TestGPU() == 0)
title->opencl_support = 1;
else
title->opencl_support = 0;
#else
- title->uvd_support = 0;
title->opencl_support = 0;
#endif
// Height, width, rate and aspect ratio information is filled in
@@ -5671,24 +5674,26 @@ static hb_title_t *ffmpeg_title_scan( hb_stream_t *stream, hb_title_t *title )
chapter->seconds = title->seconds;
hb_list_add( title->list_chapter, chapter );
}
-
-#ifdef USE_OPENCL
+#ifdef USE_HWD
hb_va_dxva2_t * dxva2 = NULL;
dxva2 = hb_va_create_dxva2( dxva2, title->video_codec_param );
if (dxva2)
{
- title->uvd_support = 1;
+ title->hwd_support = 1;
hb_va_close(dxva2);
dxva2 = NULL;
}
else
- title->uvd_support = 0;
+ title->hwd_support = 0;
+#else
+ title->hwd_support = 0;
+#endif
+#ifdef USE_OPENCL
if (TestGPU() == 0)
title->opencl_support = 1;
else
title->opencl_support = 0;
#else
- title->uvd_support = 0;
title->opencl_support = 0;
#endif
diff --git a/libhb/vadxva2.c b/libhb/vadxva2.c
index ff6940497..548dfe144 100644
--- a/libhb/vadxva2.c
+++ b/libhb/vadxva2.c
@@ -10,31 +10,18 @@
Li Cao <[email protected]> <http://www.multicorewareinc.com/>
*/
-
#include "vadxva2.h"
+
+#ifdef USE_OPENCL
#include "CL/cl.h"
#include "oclnv12toyuv.h"
+#include "scale.h"
-static int hb_va_setup( hb_va_dxva2_t *dxva2, void **hw, int width, int height );
-static int hb_va_get( hb_va_dxva2_t *dxva2, AVFrame *frame );
-static int hb_d3d_create_device( hb_va_dxva2_t *dxva2 );
-static void hb_d3d_destroy_device( hb_va_dxva2_t *dxvva2 );
-static int hb_d3d_create_device_manager( hb_va_dxva2_t *dxva2 );
-static void hb_d3d_destroy_device_manager( hb_va_dxva2_t *dxva2 );
-static int hb_dx_create_video_service( hb_va_dxva2_t *dxva2 );
-static void hb_dx_destroy_video_service( hb_va_dxva2_t *dxva2 );
-static int hb_dx_find_video_service_conversion( hb_va_dxva2_t *dxva2, GUID *input, D3DFORMAT *output );
-static int hb_dx_create_video_decoder( hb_va_dxva2_t *dxva2, int codec_id, const hb_title_t* fmt );
-static void hb_dx_create_video_conversion( hb_va_dxva2_t *dxva2 );
-static const hb_d3d_format_t *hb_d3d_find_format( D3DFORMAT format );
-static const hb_dx_mode_t *hb_dx_find_mode( const GUID *guid );
-static void hb_dx_destroy_video_decoder( hb_va_dxva2_t *dxva2 );
-
-#ifdef USE_OPENCL
int TestGPU()
{
int status = 1;
- unsigned int i;
+ unsigned int i, j;
+ cl_device_id device;
cl_uint numPlatforms = 0;
status = clGetPlatformIDs(0,NULL,&numPlatforms);
if(status != 0)
@@ -59,24 +46,55 @@ int TestGPU()
sizeof (pbuff),
pbuff,
NULL);
- if (status)
- continue;
- status = clGetDeviceIDs(platforms[i],
+ if (status)
+ continue;
+ status = clGetDeviceIDs(platforms[i],
CL_DEVICE_TYPE_GPU ,
0 ,
NULL ,
&numDevices);
- if (status != CL_SUCCESS)
- continue;
- if(numDevices)
- break;
+
+ cl_device_id *devices = (cl_device_id *)malloc(numDevices * sizeof(cl_device_id));
+ status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
+ for (j = 0; j < numDevices; j++)
+ {
+ char dbuff[100];
+ status = clGetDeviceInfo(devices[j], CL_DEVICE_VENDOR, sizeof(dbuff), dbuff, NULL);
+ device = devices[j];
+ if(!strcmp(dbuff, "Advanced Micro Devices, Inc."))
+ {
+ return 0;
+ }
+ }
+
+ if (status != CL_SUCCESS)
+ continue;
+ if( numDevices)
+ break;
}
free(platforms);
}
end:
- return status;
+ return -1;
}
#endif
+
+#ifdef USE_HWD
+
+static int hb_va_setup( hb_va_dxva2_t *dxva2, void **hw, int width, int height );
+static int hb_va_get( hb_va_dxva2_t *dxva2, AVFrame *frame );
+static int hb_d3d_create_device( hb_va_dxva2_t *dxva2 );
+static void hb_d3d_destroy_device( hb_va_dxva2_t *dxvva2 );
+static int hb_d3d_create_device_manager( hb_va_dxva2_t *dxva2 );
+static void hb_d3d_destroy_device_manager( hb_va_dxva2_t *dxva2 );
+static int hb_dx_create_video_service( hb_va_dxva2_t *dxva2 );
+static void hb_dx_destroy_video_service( hb_va_dxva2_t *dxva2 );
+static int hb_dx_find_video_service_conversion( hb_va_dxva2_t *dxva2, GUID *input, D3DFORMAT *output );
+static int hb_dx_create_video_decoder( hb_va_dxva2_t *dxva2, int codec_id, const hb_title_t* fmt );
+static void hb_dx_create_video_conversion( hb_va_dxva2_t *dxva2 );
+static const hb_d3d_format_t *hb_d3d_find_format( D3DFORMAT format );
+static const hb_dx_mode_t *hb_dx_find_mode( const GUID *guid );
+static void hb_dx_destroy_video_decoder( hb_va_dxva2_t *dxva2 );
/**
* It destroys a Direct3D device manager
*/
@@ -640,6 +658,35 @@ static void hb_copy_from_nv12( uint8_t *dst, uint8_t *src[2], size_t src_pitch[2
}
}
}
+
+#ifdef USE_OPENCL
+void hb_init_filter( cl_mem src, int srcwidth, int srcheight, uint8_t* dst, int dstwidth, int dstheight, int *crop )
+{
+ T_FilterLink fl = {0};
+ int STEP = srcwidth * srcheight * 3 / 2;
+ int OUTSTEP = dstwidth * dstheight * 3 / 2;
+ int HEIGHT = srcheight;
+ int LINESIZEY = srcwidth;
+ int LINESIZEUV = srcwidth / 2;
+
+ cl_mem cl_outbuf;
+
+ if( !hb_create_buffer( &(cl_outbuf), CL_MEM_WRITE_ONLY, STEP ) )
+ {
+ hb_log("av_create_buffer cl_outbuf Error\n");
+ return;
+ }
+
+ fl.cl_outbuf = cl_outbuf;
+
+ scale_run( src, fl.cl_outbuf, LINESIZEY, LINESIZEUV, HEIGHT );
+
+ hb_read_opencl_buffer( fl.cl_outbuf, dst, OUTSTEP );
+ CL_FREE( cl_outbuf );
+
+ return;
+}
+#endif
/**
* lock frame data form surface.
* nv12 to yuv with opencl and with C reference
@@ -671,20 +718,16 @@ int hb_va_extract( hb_va_dxva2_t *dxva2, uint8_t *dst, AVFrame *frame, int job_w
#ifdef USE_OPENCL
if( ( dxva2->width > job_w || dxva2->height > job_h ) && (TestGPU() == 0) && (hb_get_gui_info(&hb_gui, 2) == 1))
{
-/* int i;
- uint8_t *tmp = (uint8_t*)malloc( dxva2->width*dxva2->height*3/2 );
- for( i = 0; i < dxva2->height; i++ )
- {
- memcpy( tmp+i*dxva2->width, plane[0]+i*lock.Pitch, dxva2->width );
- if( i<dxva2->height>>1 )
- memcpy( tmp+(dxva2->width*dxva2->height)+i*dxva2->width, plane[1]+i*lock.Pitch, dxva2->width );
- }
-*/
hb_ocl_nv12toyuv( plane, lock.Pitch, dxva2->width, dxva2->height, crop, dxva2 );
- //hb_ocl_nv12toyuv( tmp, dxva2->width, dxva2->height, crop, dxva2 );
- hb_ocl_scale( dxva2->cl_mem_yuv, NULL, dst, dxva2->width - ( crop[2] + crop[3] ), dxva2->height - ( crop[0] + crop[1] ), job_w, job_h, os );
- //free( tmp );
- }
+
+ static int init_flag = 0;
+ if(init_flag == 0){
+ scale_init( dxva2->width - crop[2] - crop[3], dxva2->height - crop[0] - crop[1], job_w, job_h );
+ init_flag = 1;
+ }
+
+ hb_init_filter( dxva2->cl_mem_yuv, dxva2->width - crop[2] - crop[3], dxva2->height - crop[0] - crop[1], dst, job_w, job_h, crop );
+ }
else
#endif
{
@@ -785,20 +828,20 @@ void hb_va_new_dxva2( hb_va_dxva2_t *dxva2, AVCodecContext *p_context )
enum PixelFormat hb_ffmpeg_get_format( AVCodecContext *p_context, const enum PixelFormat *pi_fmt )
{
int i;
- static const char *ppsz_name[PIX_FMT_NB] =
- {
- [PIX_FMT_VDPAU_H264] = "PIX_FMT_VDPAU_H264",
- [PIX_FMT_VAAPI_IDCT] = "PIX_FMT_VAAPI_IDCT",
- [PIX_FMT_VAAPI_VLD] = "PIX_FMT_VAAPI_VLD",
- [PIX_FMT_VAAPI_MOCO] = "PIX_FMT_VAAPI_MOCO",
- [PIX_FMT_DXVA2_VLD] = "PIX_FMT_DXVA2_VLD",
- [PIX_FMT_YUYV422] = "PIX_FMT_YUYV422",
- [PIX_FMT_YUV420P] = "PIX_FMT_YUV420P",
+ static const char *ppsz_name[AV_PIX_FMT_NB] =
+ {
+ [AV_PIX_FMT_VDPAU_H264] = "AV_PIX_FMT_VDPAU_H264",
+ [AV_PIX_FMT_VAAPI_IDCT] = "AV_PIX_FMT_VAAPI_IDCT",
+ [AV_PIX_FMT_VAAPI_VLD] = "AV_PIX_FMT_VAAPI_VLD",
+ [AV_PIX_FMT_VAAPI_MOCO] = "AV_PIX_FMT_VAAPI_MOCO",
+ [AV_PIX_FMT_DXVA2_VLD] = "AV_PIX_FMT_DXVA2_VLD",
+ [AV_PIX_FMT_YUYV422] = "AV_PIX_FMT_YUYV422",
+ [AV_PIX_FMT_YUV420P] = "AV_PIX_FMT_YUV420P",
};
- for( i = 0; pi_fmt[i] != PIX_FMT_NONE; i++ )
+ for( i = 0; pi_fmt[i] != AV_PIX_FMT_NONE; i++ )
{
hb_log( "dxva2:Available decoder output format %d (%s)", pi_fmt[i], ppsz_name[pi_fmt[i]] ? : "Unknown" );
- if( pi_fmt[i] == PIX_FMT_DXVA2_VLD )
+ if( pi_fmt[i] == AV_PIX_FMT_DXVA2_VLD )
{
return pi_fmt[i];
}
@@ -825,3 +868,4 @@ int hb_va_get_frame_buf( hb_va_dxva2_t *dxva2, AVCodecContext *p_context, AVFram
return HB_WORK_OK;
}
+#endif
diff --git a/libhb/vadxva2.h b/libhb/vadxva2.h
index 7a2af4862..eeab5bec8 100644
--- a/libhb/vadxva2.h
+++ b/libhb/vadxva2.h
@@ -16,6 +16,7 @@
#ifndef VA_DXVA2_H
#define VA_DXVA2_H
+#ifdef USE_HWD
#include "hbffmpeg.h"
#include "d3d9.h"
#include "libavcodec/dxva2.h"
@@ -143,6 +144,25 @@ typedef struct
#endif
} hb_va_dxva2_t;
+typedef struct FilterLink_T
+{
+#ifdef USE_OPENCL
+ cl_mem cl_inbuf;
+ cl_mem cl_outbuf;
+#endif
+ uint8_t *mem_inbuf;
+ uint8_t *mem_outbuf;
+ int width;
+ int height;
+ int linesizeY;
+ int linesizeUV;
+ int inmemdataflag;
+ int outmemdataflag;
+ int incldataflag;
+ int outcldataflag;
+ int framenum;
+ int outputSize;
+} T_FilterLink;
static const hb_d3d_format_t d3d_formats[] =
{
@@ -192,3 +212,4 @@ void hb_va_release( hb_va_dxva2_t *dxva2, AVFrame *frame );
void hb_va_close( hb_va_dxva2_t *dxva2 );
#endif
+#endif
diff --git a/libhb/work.c b/libhb/work.c
index c8057f0e4..6625d5ff7 100644
--- a/libhb/work.c
+++ b/libhb/work.c
@@ -554,18 +554,17 @@ static void do_job( hb_job_t * job )
job->list_work = hb_list_init();
hb_log( "starting job" );
-#ifdef USE_OPENCL
- if ( job->use_opencl || job->use_uvd)
+ if ( job->use_opencl || job->use_hwd)
{
- /* init opencl environment */
hb_log( "Using GPU : Yes.\n" );
- job->use_opencl =! hb_init_opencl_run_env(0, NULL, "-I.");
+ /* init opencl environment */
+#ifdef USE_OPENCL
+ if ( job->use_opencl )
+ job->use_opencl =! hb_init_opencl_run_env(0, NULL, "-I.");
+#endif
}
else
hb_log( "Using GPU : NO.\n" );
-#else
- hb_log( "Using GPU : NO.\n" );
-#endif
/* Look for the scanned subtitle in the existing subtitle list
* select_subtitle implies that we did a scan. */
if( !job->indepth_scan && interjob->select_subtitle )
@@ -1021,10 +1020,11 @@ static void do_job( hb_job_t * job )
title->video_codec_param = AV_CODEC_ID_MPEG2VIDEO;
}
#endif
-#ifdef USE_OPENCL
- if ( /*job->use_opencl &&*/ hb_use_dxva( title ) && (TestGPU() == 0) && job->use_uvd )
- {
- vcodec = WORK_DECAVCODECVACCL;
+#ifdef USE_HWD
+ if ( /*job->use_opencl &&*/ hb_use_dxva( title ) && job->use_hwd )
+ {
+ //vcodec = WORK_DECAVCODECVACCL;
+ job->use_hw_decode = 1;
}
#endif
hb_list_add( job->list_work, ( w = hb_get_work( vcodec ) ) );
diff --git a/make/configure.py b/make/configure.py
index 598ef754b..0fc32de0f 100644
--- a/make/configure.py
+++ b/make/configure.py
@@ -1087,6 +1087,8 @@ def createCLI():
grp.add_option( '--enable-ff-mpeg2', default=False, action='store_true', help=h )
h = IfHost( 'enable OpenCL features', '*-*-*', none=optparse.SUPPRESS_HELP ).value
grp.add_option( '--enable-opencl', default=False, action='store_true', help=h )
+ h = IfHost( 'enable HWD features', '*-*-*', none=optparse.SUPPRESS_HELP ).value
+ grp.add_option( '--enable-hwd', default=False, action='store_true', help=h )
cli.add_option_group( grp )
@@ -1521,6 +1523,7 @@ int main ()
doc.add( 'FEATURE.gst', int( not options.disable_gst ))
doc.add( 'FEATURE.ff.mpeg2', int( options.enable_ff_mpeg2 ))
doc.add( 'FEATURE.opencl', int( options.enable_opencl ))
+ doc.add( 'FEATURE.hwd', int( options.enable_hwd ))
doc.add( 'FEATURE.xcode', int( not (Tools.xcodebuild.fail or options.disable_xcode or options.cross) ))
if not Tools.xcodebuild.fail and not options.disable_xcode:
diff --git a/test/module.defs b/test/module.defs
index 246b62a94..ba7eaf5a4 100644
--- a/test/module.defs
+++ b/test/module.defs
@@ -48,6 +48,9 @@ ifeq (1,$(FEATURE.opencl))
TEST.GCC.l += OpenCL
TEST.GCC.D += USE_OPENCL
endif
+ifeq (1,$(FEATURE.hwd))
+ TEST.GCC.D += USE_HWD
+endif
TEST.GCC.l += pthreadGC2 iconv ws2_32
TEST.GCC.D += PTW32_STATIC_LIB
TEST.GCC.args.extra.exe++ += -static
diff --git a/test/test case/handbrake_test.txt b/test/test case/handbrake_test.txt
index 47fb0dd8e..a0e9397a1 100644
--- a/test/test case/handbrake_test.txt
+++ b/test/test case/handbrake_test.txt
@@ -7,74 +7,61 @@
#- the names are used as test names and file names
# universal
--i "infile" -t 1 -o "outfile.m4v" -f mp4 -w 720 --loose-anamorphic -e x264 -q 20 --vfr -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x cabac=0:ref=2:me=umh:bframes=0:weightp=0:8x8dct=0:trellis=0:subq=6 --verbose=1
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.m4v" -f mp4 -P -U -w 720 --loose-anamorphic --modulus 2 -e x264 -q 20 -r 30 --pfr -a 1,1 -E faac,copy:ac3 -6 dpl2,auto -R Auto,Auto -B 160,0 -D 0,0 --gain 0,0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\2-1-chapters.csv" --x264-preset=fast --x264-profile=baseline --h264-level="3.0" --verbose=1
# iPod
--i "infile" -t 1 -o "outfile.m4v" -f mp4 -I -w 320 -l 240 -e x264 -b 700 --vfr -a 1 -E faac -B 160 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x level=30:bframes=0:weightp=0:cabac=0:ref=1:vbv-maxrate=768:vbv-bufsize=2000:analyse=all:me=umh:no-fast-pskip=1:subq=6:8x8dct=0:trellis=0 --verbose=1
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.mp4" -f mp4 -I -P -U -w 320 -l 180 --modulus 2 -e x264 -q 22 -r 30 --pfr -a 1 -E faac -6 dpl2 -R Auto -B 160 -D 0 --gain 0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\2-1-chapters.csv" --x264-profile=baseline --h264-level="1.3" --verbose=1
# iPhone_iPod_Touch
- -i "infile" -t 1 -o "outfile.m4v" -f mp4 -w 480 -l 320 -e x264 -q 20 --vfr -a 1 -E faac -B 128 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x cabac=0:ref=2:me=umh:bframes=0:weightp=0:subq=6:8x8dct=0:trellis=0 --verbose=1
-
-
-# iPhone_4
- -i "infile" -t 1 -o "outfile.m4v" -f mp4 -4 -w 960 --loose-anamorphic -e x264 -q 20 -r 29.97 --pfr -a 1 -E faac -B 160 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" --verbose=1
-
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.mp4" -f mp4 -4 -P -U -w 960 --loose-anamorphic --modulus 2 -e x264 -q 22 -r 29.97 --pfr -a 1 -E faac -6 dpl2 -R Auto -B 160 -D 0 --gain 0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\2-1-chapters.csv" --x264-profile=high --h264-level="3.1" --verbose=1
# iPad
+ -i "infile" -t 1 --angle 1 -c 1 -o "outfile.mp4" -f mp4 -4 -P -U -w 1280 --loose-anamorphic --modulus 2 -e x264 -q 20 -r 29.97 --pfr -a 1 -E faac -6 dpl2 -R Auto -B 160 -D 0 --gain 0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\2-1-chapters.csv" --x264-profile=high --h264-level="3.1" --verbose=1
--i "infile" -t 1 -o "outfile.m4v" -f mp4 -4 -w 1024 --loose-anamorphic -e x264 -q 20 -r 29.97 --pfr -a 1 -E faac -B 160 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" --verbose=1
-
-
-# Apple_TV
-
- -i "infile" -t 1 -o "outfile.m4v" -f mp4 -4 -w 960 --loose-anamorphic -e x264 -q 20 --vfr -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x cabac=0:ref=2:me=umh:b-pyramid=none:b-adapt=2:weightb=0:trellis=0:weightp=0:vbv-maxrate=9500:vbv-bufsize=9500 --verbose=1
-
-# Apple_TV_2
--i "infile" -t 1 -o "outfile.m4v" -f mp4 -4 -w 1280 --loose-anamorphic -e x264 -q 20 -r 29.97 --pfr -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" --verbose=1
-
-
-# Android_Mid
--i "infile" -t 1 -o "outfile.m4v" -f mp4 -w 480 -l 270 -e x264 -q 22 -r 29.97 --pfr -a 1 -E faac -B 128 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 -x cabac=0:ref=2:me=umh:bframes=0:weightp=0:subq=6:8x8dct=0:trellis=0 --verbose=1
-
-
-# Android_High
- -i "infile" -t 1 -o "outfile.mp4" -f mp4 -w 720 --loose-anamorphic -e x264 -q 22 -r 29.97 --pfr -a 1 -E faac -B 128 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 -x weightp=0:cabac=0 --verbose=1
+# AppleTV
+ -i "infile" -t 1 --angle 1 -c 1 -o "outfile.m4v" -f mp4 -4 -P -U -w 960 --loose-anamorphic --modulus 2 -e x264 -q 20 -r 30 --pfr -a 1,1 -E faac,copy:ac3 -6 dpl2,auto -R Auto,Auto -B 160,0 -D 0,0 --gain 0,0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\2-1-chapters.csv" -x cabac=0:ref=2:b-pyramid=none:weightb=0:weightp=0:vbv-maxrate=9500:vbv-bufsize=9500 --verbose=1
+# AppleTV2
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.m4v" -f mp4 -4 -P -U -w 1280 --loose-anamorphic --modulus 2 -e x264 -q 20 -r 29.97 --pfr -a 1,1 -E faac,copy:ac3 -6 dpl2,auto -R Auto,Auto -B 160,0 -D 0,0 --gain 0,0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\2-1-chapters.csv" --x264-profile=high --h264-level="3.1" --verbose=1
-# Normal_1080p_to_1080p_fixed_qp
--i "infile" -t 1 -o "outfile.m4v" -f mp4 --strict-anamorphic -e x264 -q 20 --vfr -a 1 -E faac -B 160 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x ref=1:weightp=1:subq=2:rc-lookahead=10:trellis=0:8x8dct=0 --verbose=1
+# AppleTV3
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.m4v" -f mp4 -4 -P -U --decomb -w 1920 --loose-anamorphic --modulus 2 -e x264 -q 20 -r 30 --pfr -a 1,1 -E faac,copy:ac3 -6 dpl2,auto -R Auto,Auto -B 160,0 -D 0,0 --gain 0,0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\2-1-chapters.csv" --x264-profile=high --h264-level="4.0" --verbose=1
+# Android
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.mp4" -f mp4 -P -U -w 720 --loose-anamorphic --modulus 2 -e x264 -q 22 -r 29.97 --pfr -a 1 -E faac -6 dpl2 -R Auto -B 128 -D 0 --gain 0 --audio-fallback ffac3 --x264-profile=main --h264-level="2.2" --verbose=1
-# Normal_1080p_to_720p_fixed_qp
--i "infile" -t 1 -o "outfile.m4v" -f mp4 -w 1280 -l 720 --custom-anamorphic --display-width 1282 --keep-display-aspect -e x264 -q 20 --vfr -a 1 -E faac -B 160 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x ref=1:weightp=1:subq=2:rc-lookahead=10:trellis=0:8x8dct=0 --verbose=1
+# Android_Tablet
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.mp4" -f mp4 -P -U -w 1280 --loose-anamorphic --modulus 2 -e x264 -q 22 -r 29.97 --pfr -a 1 -E faac -6 dpl2 -R Auto -B 128 -D 0 --gain 0 --audio-fallback ffac3 --x264-profile=main --h264-level="3.1" --verbose=1
-# Normal_1080p_to_1080p_13_mbps
--i "infile" -t 1 -o "outfile.m4v" -f mp4 --strict-anamorphic -e x264 -b 13000 --vfr -a 1 -E faac -B 160 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x ref=1:weightp=1:subq=2:rc-lookahead=10:trellis=0:8x8dct=0 --verbose=1
+# Normal_to_480p
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.mp4" -f mp4 -P -U -w 720 --loose-anamorphic --modulus 2 -e x264 -q 20 --vfr -a 1 -E faac -6 dpl2 -R Auto -B 160 -D 0 --gain 0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\1-1-chapters.csv" --x264-preset=veryfast --x264-profile=main --h264-level="4.0" --verbose=1
+# Normal_to_720p
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.mp4" -f mp4 -P -U -w 1280 --loose-anamorphic --modulus 2 -e x264 -q 20 --vfr -a 1 -E faac -6 dpl2 -R Auto -B 160 -D 0 --gain 0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\1-1-chapters.csv" --x264-preset=veryfast --x264-profile=main --h264-level="4.0" --verbose=1
-# Normal_1080p_to_720p_6_mbps
- -i "infile" -t 1 -c 1 -o "outfile.m4v" -f mp4 -w 1280 -l 720 --custom-anamorphic --display-width 1282 --keep-display-aspect -e x264 -b 6000 --vfr -a 1 -E faac -B 160 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x ref=1:weightp=1:subq=2:rc-lookahead=10:trellis=0:8x8dct=0 --verbose=1
-# high_1080p_to_1080p_fixed_qp
--i "infile" -t 1 -o "high_1080p.m4v" -f mp4 -4 --detelecine --decomb -w 1920 --loose-anamorphic -e x264 -q 20 --vfr -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x b-adapt=2:rc-lookahead=50 --verbose=1
+# Normal_to_1080p
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.mp4" -f mp4 -P -U -w 1920 --loose-anamorphic --modulus 2 -e x264 -q 20 --vfr -a 1 -E faac -6 dpl2 -R Auto -B 160 -D 0 --gain 0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\1-1-chapters.csv" --x264-preset=veryfast --x264-profile=main --h264-level="4.0" --verbose=1
+# Normal_to_1080p_qp
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.mp4" -f mp4 -P -U -w 1920 -l 1080 --custom-anamorphic --display-width 1920 --keep-display-aspect --modulus 2 -e x264 -q 20 --vfr -a 1 -E faac -6 dpl2 -R Auto -B 160 -D 0 --gain 0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\1-1-chapters.csv" --x264-preset=veryfast --x264-profile=main --h264-level="4.0" --verbose=1
-# high_1080p_to_720p_fixed qp
- -i "infile" -t 1 -o "outfile.m4v" -f mp4 -4 --detelecine --decomb -w 1280 -l 720 --custom-anamorphic --display-width 1282 --keep-display-aspect -e x264 -q 20 --vfr -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x b-adapt=2:rc-lookahead=50 --verbose=1
+# High Profile_to_480p
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.m4v" -f mp4 -4 -P -U --decomb -w 720 --loose-anamorphic --modulus 2 -e x264 -q 20 --vfr -a 1,1 -E faac,copy:ac3 -6 dpl2,auto -R Auto,Auto -B 160,0 -D 0,0 --gain 0,0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\1-1-chapters.csv" --x264-profile=high --h264-level="4.1" --verbose=1
-# high_1080p_to_1080p_13_mbps
- -i "infile" -t 1 -o "outfile.m4v" -f mp4 -4 --detelecine --decomb -w 1920 --loose-anamorphic -e x264 -b 13000 --vfr -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x b-adapt=2:rc-lookahead=50 --verbose=1
+# High Profile_to_720p
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.m4v" -f mp4 -4 -P -U --decomb -w 1280 --loose-anamorphic --modulus 2 -e x264 -q 20 --vfr -a 1,1 -E faac,copy:ac3 -6 dpl2,auto -R Auto,Auto -B 160,0 -D 0,0 --gain 0,0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\1-1-chapters.csv" --x264-profile=high --h264-level="4.1" --verbose=1
-# high_1080p_to_720p_6mbps
--i "infile" -t 1 -o "outfile.m4v" -f mp4 -4 --detelecine --decomb -w 1280 -l 720 --custom-anamorphic --display-width 1282 --keep-display-aspect -e x264 -b 6000 --vfr -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x b-adapt=2:rc-lookahead=50 --verbose=1
+# High Profile_to_1080p
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.m4v" -f mp4 -4 -P -U --decomb -w 1920 --loose-anamorphic --modulus 2 -e x264 -q 20 --vfr -a 1,1 -E faac,copy:ac3 -6 dpl2,auto -R Auto,Auto -B 160,0 -D 0,0 --gain 0,0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\1-1-chapters.csv" --x264-profile=high --h264-level="4.1" --verbose=1
-# strange_resolution
--i "infile" -t 1 -o "outfile.m4v" -f mp4 -w 480 -l 1078 -e x264 -q 20 --vfr -a 1 -E faac -B 128 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x cabac=0:ref=2:me=umh:bframes=0:weightp=0:subq=6:8x8dct=0:trellis=0 --verbose=1
+# High Profile_to_1080p_qp
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.m4v" -f mp4 -4 -P -U --decomb -w 1920 -l 1080 --custom-anamorphic --display-width 720 --pixel-aspect 720:480 --modulus 2 -e x264 -q 20 --vfr -a 1,1 -E faac,copy:ac3 -6 dpl2,auto -R Auto,Auto -B 160,0 -D 0,0 --gain 0,0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\1-1-chapters.csv" --x264-profile=high --h264-level="4.1" --verbose=1
#-end of script
diff --git a/test/test case/readme.txt b/test/test case/readme.txt
index 8c182e26c..87c91201f 100644
--- a/test/test case/readme.txt
+++ b/test/test case/readme.txt
@@ -37,7 +37,9 @@ The handbrake_test.txt is a "script" file that contains the individual tests to
blank lines are ignored.
-To enable UVD decoding, add -P -U to each execution line in handbrake_test.txt.
+
+To enable OpenCL, add -P to each execution line in handbrake_test.txt.
+To enable hardware decoding, add -U to each execution line in handbrake_test.txt.
Example:
# universal
-i "infile" -t 1 -o "outfile.m4v" -P -U -f mp4 -w 720 --loose-anamorphic -e x264 -q 20 --vfr -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x cabac=0:ref=2:me=umh:bframes=0:weightp=0:8x8dct=0:trellis=0:subq=6 --verbose=1
diff --git a/test/test.c b/test/test.c
index 82797fd6e..42c53ab8d 100644
--- a/test/test.c
+++ b/test/test.c
@@ -137,7 +137,7 @@ static int64_t stop_at_pts = 0;
static int stop_at_frame = 0;
static uint64_t min_title_duration = 10;
static int use_opencl = 0;
-static int use_uvd = 0;
+static int use_hwd = 0;
/* Exit cleanly on Ctrl-C */
static volatile int die = 0;
@@ -218,7 +218,8 @@ int main( int argc, char ** argv )
h = hb_init( debug, update );
hb_dvd_set_dvdnav( dvdnav );
#ifdef USE_OPENCL
- hb_get_opencl_env();
+ if ( use_opencl )
+ hb_get_opencl_env();
#endif
/* Show version */
fprintf( stderr, "%s - %s - %s\n",
@@ -260,7 +261,7 @@ int main( int argc, char ** argv )
titleindex = 0;
}
- hb_set_gui_info(&hb_gui, use_uvd, use_opencl, titleindex);
+ hb_set_gui_info(&hb_gui, use_hwd, use_opencl, titleindex);
hb_scan( h, input, titleindex, preview_count, store_previews, min_title_duration * 90000LL );
/* Wait... */
@@ -429,10 +430,10 @@ static void PrintTitleInfo( hb_title_t * title, int feature )
fprintf( stderr, " + support opencl: yes\n");
else
fprintf( stderr, " + support opencl: no\n");
- if (title->uvd_support)
- fprintf( stderr, " + support uvd: yes\n");
+ if (title->hwd_support)
+ fprintf( stderr, " + support hwd: yes\n");
else
- fprintf( stderr, " + support uvd: no\n");
+ fprintf( stderr, " + support hwd: no\n");
fprintf( stderr, " + chapters:\n" );
for( i = 0; i < hb_list_count( title->list_chapter ); i++ )
{
@@ -1412,9 +1413,9 @@ static int HandleEvents( hb_handle_t * h )
job->maxWidth = maxWidth;
if (maxHeight)
job->maxHeight = maxHeight;
- if (use_uvd)
+ if (use_hwd)
{
- job->use_uvd = use_uvd;
+ job->use_hwd = use_hwd;
}
switch( anamorphic_mode )
@@ -1588,13 +1589,8 @@ static int HandleEvents( hb_handle_t * h )
filter_str = hb_strdup_printf("%d:%d:%d:%d:%d:%d",
job->width, job->height,
job->crop[0], job->crop[1], job->crop[2], job->crop[3] );
-
-#ifdef USE_OPENCL
- if ( use_opencl )
- filter = hb_filter_init( HB_FILTER_CROP_SCALE_ACCL );
- else
-#endif
- filter = hb_filter_init( HB_FILTER_CROP_SCALE );
+
+ filter = hb_filter_init( HB_FILTER_CROP_SCALE );
hb_add_filter( job, filter, filter_str );
free( filter_str );
@@ -3251,7 +3247,7 @@ static int ParseOptions( int argc, char ** argv )
{ "optimize", no_argument, NULL, 'O' },
{ "ipod-atom", no_argument, NULL, 'I' },
{ "use-opencl", no_argument, NULL, 'P' },
- { "use-uvd", no_argument, NULL, 'U' },
+ { "use-hwd", no_argument, NULL, 'U' },
{ "title", required_argument, NULL, 't' },
{ "min-duration",required_argument, NULL, MIN_DURATION },
@@ -3416,7 +3412,7 @@ static int ParseOptions( int argc, char ** argv )
use_opencl = 1;
break;
case 'U':
- use_uvd = 1;
+ use_hwd = 1;
break;
case 't':
diff --git a/win/CS/HandBrake.ApplicationServices/Model/EncodeTask.cs b/win/CS/HandBrake.ApplicationServices/Model/EncodeTask.cs
index 5c9f166e0..f9053a8bb 100644
--- a/win/CS/HandBrake.ApplicationServices/Model/EncodeTask.cs
+++ b/win/CS/HandBrake.ApplicationServices/Model/EncodeTask.cs
@@ -88,7 +88,7 @@ namespace HandBrake.ApplicationServices.Model
this.IncludeChapterMarkers = task.IncludeChapterMarkers;
this.IPod5GSupport = task.IPod5GSupport;
this.OpenCLSupport = task.OpenCLSupport;
- this.UVDSupport = task.UVDSupport;
+ this.HWDSupport = task.HWDSupport;
this.KeepDisplayAspect = task.KeepDisplayAspect;
this.LargeFile = task.LargeFile;
this.MaxHeight = task.MaxHeight;
@@ -197,9 +197,9 @@ namespace HandBrake.ApplicationServices.Model
public bool OpenCLSupport { get; set; }
/// <summary>
- /// Gets or sets a value indicating whether UVDSupport.
+ /// Gets or sets a value indicating whether HWDSupport.
/// </summary>
- public bool UVDSupport { get; set; }
+ public bool HWDSupport { get; set; }
#endregion
#region Picture
diff --git a/win/CS/HandBrake.ApplicationServices/Parsing/Title.cs b/win/CS/HandBrake.ApplicationServices/Parsing/Title.cs
index 466a33aa1..00488eac8 100644
--- a/win/CS/HandBrake.ApplicationServices/Parsing/Title.cs
+++ b/win/CS/HandBrake.ApplicationServices/Parsing/Title.cs
@@ -121,9 +121,9 @@ namespace HandBrake.ApplicationServices.Parsing
public int OpenCLSupport { get; set; }
/// <summary>
- /// Gets or sets the UVD
+ /// Gets or sets the HWD
/// </summary>
- public int UVDSupport { get; set; }
+ public int HWDSupport { get; set; }
#endregion
/// <summary>
@@ -236,14 +236,14 @@ namespace HandBrake.ApplicationServices.Parsing
}
nextLine = output.ReadLine();
- m = Regex.Match(nextLine, @"^ \+ support uvd:");
+ m = Regex.Match(nextLine, @"^ \+ support hwd:");
if (m.Success)
{
- temp = nextLine.Replace("+ support uvd:", string.Empty).Trim();
+ temp = nextLine.Replace("+ support hwd:", string.Empty).Trim();
if (string.Compare(temp, "yes") == 0)
- thisTitle.UVDSupport = 1;
+ thisTitle.HWDSupport = 1;
else
- thisTitle.UVDSupport = 0;
+ thisTitle.HWDSupport = 0;
}
thisTitle.Chapters.AddRange(Chapter.ParseList(output));
diff --git a/win/CS/HandBrake.ApplicationServices/Utilities/InteropModelCreator.cs b/win/CS/HandBrake.ApplicationServices/Utilities/InteropModelCreator.cs
index 5cb0d4768..618dd43cc 100644
--- a/win/CS/HandBrake.ApplicationServices/Utilities/InteropModelCreator.cs
+++ b/win/CS/HandBrake.ApplicationServices/Utilities/InteropModelCreator.cs
@@ -102,7 +102,7 @@ namespace HandBrake.ApplicationServices.Utilities
profile.Height = work.Height.HasValue ? work.Height.Value : 0;
profile.IPod5GSupport = work.IPod5GSupport;
profile.OpenCLGSupport = work.OpenCLSupport;
- profile.UVDSupport = work.UVDSupport;
+ profile.HWDSupport = work.HWDSupport;
profile.IncludeChapterMarkers = work.IncludeChapterMarkers;
profile.KeepDisplayAspect = work.KeepDisplayAspect;
profile.LargeFile = work.LargeFile;
diff --git a/win/CS/HandBrake.ApplicationServices/Utilities/PlistUtility.cs b/win/CS/HandBrake.ApplicationServices/Utilities/PlistUtility.cs
index 260abfe54..e22345134 100644
--- a/win/CS/HandBrake.ApplicationServices/Utilities/PlistUtility.cs
+++ b/win/CS/HandBrake.ApplicationServices/Utilities/PlistUtility.cs
@@ -142,7 +142,7 @@ namespace HandBrake.ApplicationServices.Utilities
AddEncodeElement(xmlWriter, "Mp4LargeFile", "integer", parsed.LargeFile ? "1" : "0");
AddEncodeElement(xmlWriter, "Mp4iPodCompatible", "integer", parsed.IPod5GSupport ? "1" : "0");
AddEncodeElement(xmlWriter, "OpenCLSupport", "integer", parsed.OpenCLSupport ? "1" : "0");
- AddEncodeElement(xmlWriter, "UVDSupport", "integer", parsed.UVDSupport ? "1" : "0");
+ AddEncodeElement(xmlWriter, "HWDSupport", "integer", parsed.HWDSupport ? "1" : "0");
AddEncodeElement(xmlWriter, "PictureAutoCrop", "integer", "1");
AddEncodeElement(xmlWriter, "PictureBottomCrop", "integer", parsed.Cropping.Bottom.ToString());
diff --git a/win/CS/HandBrake.ApplicationServices/Utilities/QueryGeneratorUtility.cs b/win/CS/HandBrake.ApplicationServices/Utilities/QueryGeneratorUtility.cs
index 69cbfc258..6cdf1d900 100644
--- a/win/CS/HandBrake.ApplicationServices/Utilities/QueryGeneratorUtility.cs
+++ b/win/CS/HandBrake.ApplicationServices/Utilities/QueryGeneratorUtility.cs
@@ -246,7 +246,7 @@ namespace HandBrake.ApplicationServices.Utilities
if (task.OpenCLSupport)
query += " -P ";
- if (task.UVDSupport)
+ if (task.HWDSupport)
query += " -U ";
return query;
diff --git a/win/CS/HandBrake.ApplicationServices/Utilities/QueryParserUtility.cs b/win/CS/HandBrake.ApplicationServices/Utilities/QueryParserUtility.cs
index a167801df..9655d6629 100644
--- a/win/CS/HandBrake.ApplicationServices/Utilities/QueryParserUtility.cs
+++ b/win/CS/HandBrake.ApplicationServices/Utilities/QueryParserUtility.cs
@@ -57,7 +57,7 @@ namespace HandBrake.ApplicationServices.Utilities
Match largerMp4 = Regex.Match(input, @" -4");
Match ipodAtom = Regex.Match(input, @" -I");
Match openclSupport = Regex.Match(input, @" -P");
- Match uvdSupport = Regex.Match(input, @" -U");
+ Match hwdSupport = Regex.Match(input, @" -U");
// Picture Settings Tab
Match width = Regex.Match(input, @"-w ([0-9]+)");
@@ -158,7 +158,7 @@ namespace HandBrake.ApplicationServices.Utilities
parsed.IPod5GSupport = ipodAtom.Success;
parsed.OptimizeMP4 = optimizeMP4.Success;
parsed.OpenCLSupport = openclSupport.Success;
- parsed.UVDSupport = uvdSupport.Success;
+ parsed.HWDSupport = hwdSupport.Success;
#endregion
diff --git a/win/CS/HandBrake.Interop/HandBrakeInterop/HandBrakeInstance.cs b/win/CS/HandBrake.Interop/HandBrakeInterop/HandBrakeInstance.cs
index 74f1683a9..06bf022f3 100644
--- a/win/CS/HandBrake.Interop/HandBrakeInterop/HandBrakeInstance.cs
+++ b/win/CS/HandBrake.Interop/HandBrakeInterop/HandBrakeInstance.cs
@@ -1448,7 +1448,7 @@ namespace HandBrake.Interop
nativeJob.mp4_optimize = profile.Optimize ? 1 : 0;
nativeJob.ipod_atom = profile.IPod5GSupport ? 1 : 0;
nativeJob.opencl_support = profile.OpenCLGSupport ? 1 : 0;
- nativeJob.uvd_support = profile.UVDSupport ? 1 : 0;
+ nativeJob.hwd_support = profile.HWDSupport ? 1 : 0;
if (title.AngleCount > 1)
{
diff --git a/win/CS/HandBrake.Interop/HandBrakeInterop/HbLib/hb_job_s.cs b/win/CS/HandBrake.Interop/HandBrakeInterop/HbLib/hb_job_s.cs
index a761780fa..562531083 100644
--- a/win/CS/HandBrake.Interop/HandBrakeInterop/HbLib/hb_job_s.cs
+++ b/win/CS/HandBrake.Interop/HandBrakeInterop/HbLib/hb_job_s.cs
@@ -149,7 +149,7 @@ namespace HandBrake.Interop.HbLib
public int opencl_support;
/// int
- public int uvd_support;
+ public int hwd_support;
/// int
public int indepth_scan;
diff --git a/win/CS/HandBrake.Interop/HandBrakeInterop/Model/Encoding/EncodingProfile.cs b/win/CS/HandBrake.Interop/HandBrakeInterop/Model/Encoding/EncodingProfile.cs
index 9667d5e6e..937c776a7 100644
--- a/win/CS/HandBrake.Interop/HandBrakeInterop/Model/Encoding/EncodingProfile.cs
+++ b/win/CS/HandBrake.Interop/HandBrakeInterop/Model/Encoding/EncodingProfile.cs
@@ -28,7 +28,7 @@ namespace HandBrake.Interop.Model.Encoding
public bool Optimize { get; set; }
public bool IPod5GSupport { get; set; }
public bool OpenCLGSupport { get; set; }
- public bool UVDSupport { get; set; }
+ public bool HWDSupport { get; set; }
public int Width { get; set; }
public int Height { get; set; }
@@ -85,7 +85,7 @@ namespace HandBrake.Interop.Model.Encoding
Optimize = this.Optimize,
IPod5GSupport = this.IPod5GSupport,
OpenCLGSupport = this.OpenCLGSupport,
- UVDSupport = this.UVDSupport,
+ HWDSupport = this.HWDSupport,
Width = this.Width,
Height = this.Height,
diff --git a/win/CS/HandBrakeWPF/ViewModels/MainViewModel.cs b/win/CS/HandBrakeWPF/ViewModels/MainViewModel.cs
index e814e897c..ad09f3043 100644
--- a/win/CS/HandBrakeWPF/ViewModels/MainViewModel.cs
+++ b/win/CS/HandBrakeWPF/ViewModels/MainViewModel.cs
@@ -377,7 +377,7 @@ namespace HandBrakeWPF.ViewModels
this.CurrentTask.OptimizeMP4 = selectedPreset.Task.OptimizeMP4;
this.CurrentTask.IPod5GSupport = selectedPreset.Task.IPod5GSupport;
this.CurrentTask.OpenCLSupport = selectedPreset.Task.OpenCLSupport;
- this.CurrentTask.UVDSupport = selectedPreset.Task.UVDSupport;
+ this.CurrentTask.HWDSupport = selectedPreset.Task.HWDSupport;
this.SelectedOutputFormat = selectedPreset.Task.OutputFormat;
// Tab Settings
@@ -1697,7 +1697,7 @@ namespace HandBrakeWPF.ViewModels
{
this.SupportOpenCL = false;
}
- if (this.selectedTitle.UVDSupport == 0)
+ if (this.selectedTitle.HWDSupport == 0)
{
this.SupportHardwareDecoding = true;
}
diff --git a/win/CS/HandBrakeWPF/Views/MainView.xaml b/win/CS/HandBrakeWPF/Views/MainView.xaml
index 295ebb4c1..0f85a2b73 100644
--- a/win/CS/HandBrakeWPF/Views/MainView.xaml
+++ b/win/CS/HandBrakeWPF/Views/MainView.xaml
@@ -482,11 +482,11 @@
Converter={StaticResource boolToVisConverter},
ConverterParameter=true}"
/>
- <CheckBox Name="UVD"
+ <CheckBox Name="HWD"
Margin="8,0,0,0"
VerticalAlignment="Center"
- Content="UVD Support"
- IsChecked="{Binding Path=CurrentTask.UVDSupport}" IsEnabled="True"
+ Content="HWD Support"
+ IsChecked="{Binding Path=CurrentTask.HWDSupport}" IsEnabled="True"
Visibility="{Binding SupportHardwareDecoding,
Converter={StaticResource boolToVisConverter},
ConverterParameter=true}"