diff options
author | handbrake <[email protected]> | 2013-01-31 08:18:55 +0000 |
---|---|---|
committer | handbrake <[email protected]> | 2013-01-31 08:18:55 +0000 |
commit | bc3b56381c6ac214b7736a00972c9d0902902453 (patch) | |
tree | 6caffbe88ef2ee5efbd2279cf00556a986a7c22e | |
parent | 39821495c82b104b77e628d5cb244be041e285ed (diff) |
add a new compile option: --enable-hwd
change the GUI uvd checkbox's name to hardware decoder
modify the issue compile on linux
modify the issue on Intel platform with Intel integrate graphic
add a new opencl scale algorithm, PSNR goes up (added files: scale.h, scale.c, scale_kernel.h, scale_kernel.c)
merge the cropscaleaccl.c to cropscale.c
merge the decavcodecaccl.c to decavcodec.c
git-svn-id: svn://svn.handbrake.fr/HandBrake/branches/opencl@5227 b64f7644-9d1e-0410-96f1-a4d463321fa5
41 files changed, 2740 insertions, 1783 deletions
diff --git a/contrib/ffmpeg/module.defs b/contrib/ffmpeg/module.defs index 01fd5f753..435ce778b 100644 --- a/contrib/ffmpeg/module.defs +++ b/contrib/ffmpeg/module.defs @@ -20,11 +20,6 @@ FFMPEG.CONFIGURE.extra = \ --disable-network \ --disable-hwaccels \ --disable-encoders \ - --enable-dxva2 \ - --enable-hwaccel=h264_dxva2 \ - --enable-hwaccel=mpeg2_dxva2 \ - --enable-hwaccel=vc1_dxva2 \ - --enable-hwaccel=wmv3_dxva2 \ --enable-encoder=aac \ --enable-encoder=ac3 \ --enable-encoder=flac \ @@ -47,6 +42,11 @@ else ifeq (1-mingw,$(BUILD.cross)-$(BUILD.system)) FFMPEG.CONFIGURE.extra += \ --enable-w32threads \ --enable-memalign-hack \ + --enable-dxva2 \ + --enable-hwaccel=h264_dxva2 \ + --enable-hwaccel=mpeg2_dxva2 \ + --enable-hwaccel=vc1_dxva2 \ + --enable-hwaccel=wmv3_dxva2 \ --target-os=mingw32 \ --arch=i386 \ --enable-cross-compile --cross-prefix=$(BUILD.cross.prefix) diff --git a/libhb/common.c b/libhb/common.c index eccc00354..c10eae93d 100644 --- a/libhb/common.c +++ b/libhb/common.c @@ -1739,6 +1739,7 @@ static void job_setup( hb_job_t * job, hb_title_t * title ) job->list_attachment = hb_attachment_list_copy( title->list_attachment ); job->metadata = hb_metadata_copy( title->metadata ); + job->use_hw_decode = 0; } static void job_clean( hb_job_t * job ) @@ -1993,11 +1994,7 @@ hb_filter_object_t * hb_filter_init( int filter_id ) case HB_FILTER_CROP_SCALE: filter = &hb_filter_crop_scale; break; -#ifdef USE_OPENCL - case HB_FILTER_CROP_SCALE_ACCL: - filter = &hb_filter_crop_scale_accl; - break; -#endif + case HB_FILTER_ROTATE: filter = &hb_filter_rotate; break; @@ -2974,15 +2971,15 @@ int hb_use_dxva( hb_title_t * title ) int hb_get_gui_info(hb_gui_t * gui, int option) { if ( option == 1 ) - return gui->use_uvd; + return gui->use_hwd; else if ( option == 2 ) return gui->use_opencl; else return gui->title_scan; } -void hb_set_gui_info(hb_gui_t *gui, int uvd, int opencl, int titlescan) +void hb_set_gui_info(hb_gui_t *gui, int hwd, int opencl, int titlescan) { - gui->use_uvd = uvd; + gui->use_hwd = hwd; gui->use_opencl = opencl; gui->title_scan = titlescan; } diff --git a/libhb/common.h b/libhb/common.h index 3d5f38f15..8586d2a6b 100644 --- a/libhb/common.h +++ b/libhb/common.h @@ -148,10 +148,10 @@ int hb_subtitle_can_pass( int source, int mux ); hb_attachment_t *hb_attachment_copy(const hb_attachment_t *src); int hb_get_gui_info(hb_gui_t *gui, int option); -void hb_set_gui_info(hb_gui_t *gui, int uvd, int opencl, int titlescan); +void hb_set_gui_info(hb_gui_t *gui, int hwd, int opencl, int titlescan); struct hb_gui_s { - int use_uvd; + int use_hwd; int use_opencl; int title_scan; }; @@ -441,7 +441,8 @@ struct hb_job_s // initially (for frame accurate positioning // to non-I frames). int use_opencl;/* 0 is disable use of opencl. 1 is enable use of opencl */ - int use_uvd; + int use_hwd; + int use_hw_decode; #ifdef __LIBHB__ /* Internal data */ @@ -786,7 +787,7 @@ struct hb_title_s uint32_t flags; // set if video stream doesn't have IDR frames int opencl_support; - int uvd_support; + int hwd_support; #define HBTF_NO_IDR (1 << 0) #define HBTF_SCAN_COMPLETE (1 << 0) }; @@ -1086,9 +1087,7 @@ enum HB_FILTER_DENOISE, HB_FILTER_RENDER_SUB, HB_FILTER_CROP_SCALE, -#ifdef USE_OPENCL - HB_FILTER_CROP_SCALE_ACCL, -#endif + // Finally filters that don't care what order they are in, // except that they must be after the above filters HB_FILTER_ROTATE, diff --git a/libhb/cropscale.c b/libhb/cropscale.c index 61aa85101..e0a3e2af3 100644 --- a/libhb/cropscale.c +++ b/libhb/cropscale.c @@ -9,6 +9,8 @@ #include "hb.h" #include "hbffmpeg.h" +#include "common.h" + struct hb_filter_private_s { @@ -19,7 +21,13 @@ struct hb_filter_private_s int width_out; int height_out; int crop[4]; + +#ifdef USE_OPENCL int use_dxva; + int title_width; + int title_height; + hb_oclscale_t * os; //ocl scaler handler +#endif struct SwsContext * context; }; @@ -59,6 +67,16 @@ static int hb_crop_scale_init( hb_filter_object_t * filter, pv->height_in = init->height; pv->width_out = init->width; pv->height_out = init->height; +#ifdef USE_OPENCL + pv->use_dxva = init->use_dxva; + if ( hb_get_gui_info(&hb_gui, 2) ) + { + pv->title_width = init->title_width; + pv->title_height = init->title_height; + pv->os = ( hb_oclscale_t * )malloc( sizeof( hb_oclscale_t ) ); + memset( pv->os, 0, sizeof( hb_oclscale_t ) ); + } +#endif memcpy( pv->crop, init->crop, sizeof( int[4] ) ); if( filter->settings ) { @@ -95,6 +113,30 @@ static int hb_crop_scale_info( hb_filter_object_t * filter, info->out.height = pv->height_out; memcpy( info->out.crop, pv->crop, sizeof( int[4] ) ); +#ifdef USE_OPENCL + if ( hb_get_gui_info(&hb_gui, 2) ) + { + int cropped_width = pv->title_width - ( pv->crop[2] + pv->crop[3] ); + int cropped_height = pv->title_height - ( pv->crop[0] + pv->crop[1] ); + + sprintf( info->human_readable_desc, + "source: %d * %d, crop (%d/%d/%d/%d): %d * %d, scale: %d * %d", + pv->title_width, pv->title_height, + pv->crop[0], pv->crop[1], pv->crop[2], pv->crop[3], + cropped_width, cropped_height, pv->width_out, pv->height_out ); + } + else + { + int cropped_width = pv->width_in - ( pv->crop[2] + pv->crop[3] ); + int cropped_height = pv->height_in - ( pv->crop[0] + pv->crop[1] ); + + sprintf( info->human_readable_desc, + "source: %d * %d, crop (%d/%d/%d/%d): %d * %d, scale: %d * %d", + pv->width_in, pv->height_in, + pv->crop[0], pv->crop[1], pv->crop[2], pv->crop[3], + cropped_width, cropped_height, pv->width_out, pv->height_out ); + } +#else int cropped_width = pv->width_in - ( pv->crop[2] + pv->crop[3] ); int cropped_height = pv->height_in - ( pv->crop[0] + pv->crop[1] ); @@ -103,7 +145,7 @@ static int hb_crop_scale_info( hb_filter_object_t * filter, pv->width_in, pv->height_in, pv->crop[0], pv->crop[1], pv->crop[2], pv->crop[3], cropped_width, cropped_height, pv->width_out, pv->height_out ); - +#endif return 0; } @@ -115,7 +157,23 @@ static void hb_crop_scale_close( hb_filter_object_t * filter ) { return; } - +#ifdef USE_OPENCL + if ( hb_get_gui_info(&hb_gui, 2) && pv->os) + { + CL_FREE( pv->os->h_in_buf ); + CL_FREE( pv->os->h_out_buf ); + CL_FREE( pv->os->v_out_buf ); + CL_FREE( pv->os->h_coeff_y ); + CL_FREE( pv->os->h_coeff_uv ); + CL_FREE( pv->os->h_index_y ); + CL_FREE( pv->os->h_index_uv ); + CL_FREE( pv->os->v_coeff_y ); + CL_FREE( pv->os->v_coeff_uv ); + CL_FREE( pv->os->v_index_y ); + CL_FREE( pv->os->v_index_uv ); + free( pv->os ); + } +#endif if ( pv->context ) { sws_freeContext( pv->context ); @@ -124,7 +182,24 @@ static void hb_crop_scale_close( hb_filter_object_t * filter ) free( pv ); filter->private_data = NULL; } - +#ifdef USE_OPENCL +static uint8_t *copy_plane( uint8_t *dst, uint8_t* src, int dstride, int sstride, int h ) +{ + if( dstride == sstride ) + { + memcpy( dst, src, dstride * h ); + return dst + dstride * h; + } + int lbytes = dstride <= sstride ? dstride : sstride; + while( --h >= 0 ) + { + memcpy( dst, src, lbytes ); + src += sstride; + dst += dstride; + } + return dst; +} +#endif static hb_buffer_t* crop_scale( hb_filter_private_t * pv, hb_buffer_t * in ) { AVPicture pic_in; @@ -141,6 +216,76 @@ static hb_buffer_t* crop_scale( hb_filter_private_t * pv, hb_buffer_t * in ) av_picture_crop( &pic_crop, &pic_in, in->f.fmt, pv->crop[0], pv->crop[2] ); +#ifdef USE_OPENCL + if ( hb_get_gui_info(&hb_gui, 2) ) + { + int w = in->f.width - ( pv->crop[2] + pv->crop[3] ); + int h = in->f.height - ( pv->crop[0] + pv->crop[1] ); + uint8_t *tmp_in = malloc( w * h * 3 / 2 ); + uint8_t *tmp_out = malloc( pv->width_out * pv->height_out * 3 / 2 ); + if( pic_crop.data[0] || pic_crop.data[1] || pic_crop.data[2] || pic_crop.data[3] ) + { + int i; + for( i = 0; i< h>>1; i++ ) + { + memcpy( tmp_in + ( ( i<<1 ) + 0 ) * w, pic_crop.data[0]+ ( ( i<<1 ) + 0 ) * pic_crop.linesize[0], w ); + memcpy( tmp_in + ( ( i<<1 ) + 1 ) * w, pic_crop.data[0]+ ( ( i<<1 ) + 1 ) * pic_crop.linesize[0], w ); + memcpy( tmp_in + ( w * h ) + i * ( w>>1 ), pic_crop.data[1] + i * pic_crop.linesize[1], w >> 1 ); + memcpy( tmp_in + ( w * h ) + ( ( w * h )>>2 ) + i * ( w>>1 ), pic_crop.data[2] + i * pic_crop.linesize[2], w >> 1 ); + } + } + else + { + memcpy( tmp_in, pic_crop.data[0], w * h ); + memcpy( tmp_in + w * h, pic_crop.data[1], (w*h)>>2 ); + memcpy( tmp_in + w * h + ((w*h)>>2), pic_crop.data[2], (w*h)>>2 ); + } + hb_ocl_scale( NULL, tmp_in, tmp_out, w, h, out->f.width, out->f.height, pv->os ); + w = out->plane[0].stride; + h = out->plane[0].height; + uint8_t *dst = out->plane[0].data; + copy_plane( dst, tmp_out, w, pv->width_out, h ); + w = out->plane[1].stride; + h = out->plane[1].height; + dst = out->plane[1].data; + copy_plane( dst, tmp_out + pv->width_out * pv->height_out, w, pv->width_out>>1, h ); + w = out->plane[2].stride; + h = out->plane[2].height; + dst = out->plane[2].data; + copy_plane( dst, tmp_out + pv->width_out * pv->height_out +( ( pv->width_out * pv->height_out )>>2 ), w, pv->width_out>>1, h ); + free( tmp_out ); + free( tmp_in ); + } + else + { + if ( !pv->context || + pv->width_in != in->f.width || + pv->height_in != in->f.height || + pv->pix_fmt != in->f.fmt ) + { + // Something changed, need a new scaling context. + if( pv->context ) + sws_freeContext( pv->context ); + pv->context = hb_sws_get_context( + in->f.width - (pv->crop[2] + pv->crop[3]), + in->f.height - (pv->crop[0] + pv->crop[1]), + in->f.fmt, + out->f.width, out->f.height, out->f.fmt, + SWS_LANCZOS | SWS_ACCURATE_RND ); + pv->width_in = in->f.width; + pv->height_in = in->f.height; + pv->pix_fmt = in->f.fmt; + } + + // Scale pic_crop into pic_render according to the + // context set up above + sws_scale(pv->context, + (const uint8_t* const*)pic_crop.data, + pic_crop.linesize, + 0, in->f.height - (pv->crop[0] + pv->crop[1]), + pic_out.data, pic_out.linesize); + } +#else if ( !pv->context || pv->width_in != in->f.width || pv->height_in != in->f.height || @@ -168,7 +313,7 @@ static hb_buffer_t* crop_scale( hb_filter_private_t * pv, hb_buffer_t * in ) pic_crop.linesize, 0, in->f.height - (pv->crop[0] + pv->crop[1]), pic_out.data, pic_out.linesize); - +#endif out->s = in->s; hb_buffer_move_subs( out, in ); return out; @@ -202,6 +347,17 @@ static int hb_crop_scale_work( hb_filter_object_t * filter, pv->width_out = in->f.width - (pv->crop[2] + pv->crop[3]); pv->height_out = in->f.height - (pv->crop[0] + pv->crop[1]); } +#ifdef USE_OPENCL + if ( (in->f.fmt == pv->pix_fmt_out && + !pv->crop[0] && !pv->crop[1] && !pv->crop[2] && !pv->crop[3] && + in->f.width == pv->width_out && in->f.height == pv->height_out) || + (pv->use_dxva && in->f.width == pv->width_out && in->f.height == pv->height_out) ) + { + *buf_out = in; + *buf_in = NULL; + return HB_FILTER_OK; + } +#else if ( in->f.fmt == pv->pix_fmt_out && !pv->crop[0] && !pv->crop[1] && !pv->crop[2] && !pv->crop[3] && in->f.width == pv->width_out && in->f.height == pv->height_out ) @@ -210,13 +366,6 @@ static int hb_crop_scale_work( hb_filter_object_t * filter, *buf_in = NULL; return HB_FILTER_OK; } -#ifdef USE_OPENCL - if ( pv->use_dxva && in->f.width == pv->width_out && in->f.height == pv->height_out ) - { - *buf_out = in; - *buf_in = NULL; - return HB_FILTER_OK; - } #endif *buf_out = crop_scale( pv, in ); diff --git a/libhb/cropscaleaccl.c b/libhb/cropscaleaccl.c deleted file mode 100644 index b254acde0..000000000 --- a/libhb/cropscaleaccl.c +++ /dev/null @@ -1,262 +0,0 @@ -/* cropscaleaccl.c - - Copyright (c) 2003-2012 HandBrake Team - This file is part of the HandBrake source code - Homepage: <http://handbrake.fr/>. - It may be used under the terms of the GNU General Public License v2. - For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html - - Authors: Peng Gao <[email protected]> <http://www.multicorewareinc.com/> - Li Cao <[email protected]> <http://www.multicorewareinc.com/> - */ -#ifdef USE_OPENCL -#include "hb.h" -#include "common.h" - -struct hb_filter_private_s -{ - int width_in; - int height_in; - int pix_fmt; - int pix_fmt_out; - int width_out; - int height_out; - int crop[4]; - int use_dxva; - - int title_width; - int title_height; - hb_oclscale_t * os; //ocl scaler handler - struct SwsContext * context; -}; - -static int hb_crop_scale_init( hb_filter_object_t * filter, - hb_filter_init_t * init ); - -static int hb_crop_scale_work( hb_filter_object_t * filter, - hb_buffer_t ** buf_in, - hb_buffer_t ** buf_out ); - -static int hb_crop_scale_info( hb_filter_object_t * filter, - hb_filter_info_t * info ); - -static void hb_crop_scale_close( hb_filter_object_t * filter ); - -hb_filter_object_t hb_filter_crop_scale_accl = -{ - .id = HB_FILTER_CROP_SCALE_ACCL, - .enforce_order = 1, - .name = "Custom Crop and Scale", - .settings = NULL, - .init = hb_crop_scale_init, - .work = hb_crop_scale_work, - .close = hb_crop_scale_close, - .info = hb_crop_scale_info, -}; - -static int hb_crop_scale_init( hb_filter_object_t * filter, - hb_filter_init_t * init ) -{ - filter->private_data = calloc( 1, sizeof(struct hb_filter_private_s) ); - hb_filter_private_t * pv = filter->private_data; - - // TODO: add pix format option to settings - pv->pix_fmt_out = init->pix_fmt; - pv->width_in = init->width; - pv->height_in = init->height; - pv->width_out = init->width; - pv->height_out = init->height; - pv->use_dxva = init->use_dxva; - pv->title_width = init->title_width; - pv->title_height = init->title_height; - memcpy( pv->crop, init->crop, sizeof( int[4] ) ); - if( filter->settings ) - { - sscanf( filter->settings, "%d:%d:%d:%d:%d:%d", - &pv->width_out, &pv->height_out, - &pv->crop[0], &pv->crop[1], &pv->crop[2], &pv->crop[3] ); - } - // Set init values so the next stage in the pipline - // knows what it will be getting - init->pix_fmt = pv->pix_fmt; - init->width = pv->width_out; - init->height = pv->height_out; - memcpy( init->crop, pv->crop, sizeof( int[4] ) ); - pv->os = ( hb_oclscale_t * )malloc( sizeof( hb_oclscale_t ) ); - memset( pv->os, 0, sizeof( hb_oclscale_t ) ); - return 0; -} - -static int hb_crop_scale_info( hb_filter_object_t * filter, - hb_filter_info_t * info ) -{ - hb_filter_private_t * pv = filter->private_data; - - if( !pv ) - return 0; - - // Set init values so the next stage in the pipline - // knows what it will be getting - memset( info, 0, sizeof( hb_filter_info_t ) ); - info->out.pix_fmt = pv->pix_fmt; - info->out.width = pv->width_out; - info->out.height = pv->height_out; - memcpy( info->out.crop, pv->crop, sizeof( int[4] ) ); - - int cropped_width = pv->title_width - ( pv->crop[2] + pv->crop[3] ); - int cropped_height = pv->title_height - ( pv->crop[0] + pv->crop[1] ); - - sprintf( info->human_readable_desc, - "source: %d * %d, crop (%d/%d/%d/%d): %d * %d, scale: %d * %d", - pv->title_width, pv->title_height, - pv->crop[0], pv->crop[1], pv->crop[2], pv->crop[3], - cropped_width, cropped_height, pv->width_out, pv->height_out ); - - return 0; -} - -static void hb_crop_scale_close( hb_filter_object_t * filter ) -{ - hb_filter_private_t * pv = filter->private_data; - - if( !pv ) - { - return; - } - if ( pv->os ) - { - CL_FREE( pv->os->h_in_buf ); - CL_FREE( pv->os->h_out_buf ); - CL_FREE( pv->os->v_out_buf ); - CL_FREE( pv->os->h_coeff_y ); - CL_FREE( pv->os->h_coeff_uv ); - CL_FREE( pv->os->h_index_y ); - CL_FREE( pv->os->h_index_uv ); - CL_FREE( pv->os->v_coeff_y ); - CL_FREE( pv->os->v_coeff_uv ); - CL_FREE( pv->os->v_index_y ); - CL_FREE( pv->os->v_index_uv ); - free( pv->os ); - } - free( pv ); - filter->private_data = NULL; -} - -static uint8_t *copy_plane( uint8_t *dst, uint8_t* src, int dstride, int sstride, int h ) -{ - if( dstride == sstride ) - { - memcpy( dst, src, dstride * h ); - return dst + dstride * h; - } - int lbytes = dstride <= sstride ? dstride : sstride; - while( --h >= 0 ) - { - memcpy( dst, src, lbytes ); - src += sstride; - dst += dstride; - } - return dst; -} - -static hb_buffer_t* crop_scale( hb_filter_private_t * pv, hb_buffer_t * in ) -{ - AVPicture pic_in; - AVPicture pic_out; - AVPicture pic_crop; - hb_buffer_t * out; - out = hb_video_buffer_init( pv->width_out, pv->height_out ); - - hb_avpicture_fill( &pic_in, in ); - hb_avpicture_fill( &pic_out, out ); - - // Crop; this alters the pointer to the data to point to the - // correct place for cropped frame - av_picture_crop( &pic_crop, &pic_in, in->f.fmt, - pv->crop[0], pv->crop[2] ); - - - int w = in->f.width - ( pv->crop[2] + pv->crop[3] ); - int h = in->f.height - ( pv->crop[0] + pv->crop[1] ); - uint8_t *tmp_in = malloc( w * h * 3 / 2 ); - uint8_t *tmp_out = malloc( pv->width_out * pv->height_out * 3 / 2 ); - if( pic_crop.data[0] || pic_crop.data[1] || pic_crop.data[2] || pic_crop.data[3] ) - { - int i; - for( i = 0; i< h>>1; i++ ) - { - memcpy( tmp_in + ( ( i<<1 ) + 0 ) * w, pic_crop.data[0]+ ( ( i<<1 ) + 0 ) * pic_crop.linesize[0], w ); - memcpy( tmp_in + ( ( i<<1 ) + 1 ) * w, pic_crop.data[0]+ ( ( i<<1 ) + 1 ) * pic_crop.linesize[0], w ); - memcpy( tmp_in + ( w * h ) + i * ( w>>1 ), pic_crop.data[1] + i * pic_crop.linesize[1], w >> 1 ); - memcpy( tmp_in + ( w * h ) + ( ( w * h )>>2 ) + i * ( w>>1 ), pic_crop.data[2] + i * pic_crop.linesize[2], w >> 1 ); - } - } - else - { - memcpy( tmp_in, pic_crop.data[0], w * h ); - memcpy( tmp_in + w * h, pic_crop.data[1], (w*h)>>2 ); - memcpy( tmp_in + w * h + ((w*h)>>2), pic_crop.data[2], (w*h)>>2 ); - } - hb_ocl_scale( NULL, tmp_in, tmp_out, w, h, out->f.width, out->f.height, pv->os ); - w = out->plane[0].stride; - h = out->plane[0].height; - uint8_t *dst = out->plane[0].data; - copy_plane( dst, tmp_out, w, pv->width_out, h ); - w = out->plane[1].stride; - h = out->plane[1].height; - dst = out->plane[1].data; - copy_plane( dst, tmp_out + pv->width_out * pv->height_out, w, pv->width_out>>1, h ); - w = out->plane[2].stride; - h = out->plane[2].height; - dst = out->plane[2].data; - copy_plane( dst, tmp_out + pv->width_out * pv->height_out +( ( pv->width_out * pv->height_out )>>2 ), w, pv->width_out>>1, h ); - free( tmp_out ); - free( tmp_in ); - out->s = in->s; - hb_buffer_move_subs( out, in ); - return out; -} - -static int hb_crop_scale_work( hb_filter_object_t * filter, - hb_buffer_t ** buf_in, - hb_buffer_t ** buf_out ) -{ - hb_filter_private_t * pv = filter->private_data; - hb_buffer_t * in = *buf_in; - - if( in->size <= 0 ) - { - *buf_out = in; - *buf_in = NULL; - return HB_FILTER_DONE; - } - - if( !pv ) - { - *buf_out = in; - *buf_in = NULL; - return HB_FILTER_OK; - } - - // If width or height were not set, set them now based on the - // input width & height - if( pv->width_out <= 0 || pv->height_out <= 0 ) - { - pv->width_out = in->f.width - (pv->crop[2] + pv->crop[3]); - pv->height_out = in->f.height - (pv->crop[0] + pv->crop[1]); - } - if( ( in->f.fmt == pv->pix_fmt_out && - !pv->crop[0] && !pv->crop[1] && !pv->crop[2] && !pv->crop[3] && - in->f.width == pv->width_out && in->f.height == pv->height_out ) || - ( pv->use_dxva && in->f.width == pv->width_out && in->f.height == pv->height_out ) ) - { - *buf_out = in; - *buf_in = NULL; - return HB_FILTER_OK; - } - *buf_out = crop_scale( pv, in ); - - - return HB_FILTER_OK; -} -#endif diff --git a/libhb/decavcodec.c b/libhb/decavcodec.c index fe59300fe..43457720a 100644 --- a/libhb/decavcodec.c +++ b/libhb/decavcodec.c @@ -40,8 +40,13 @@ #include "hb.h" #include "hbffmpeg.h" +#include "audio_remap.h" #include "audio_resample.h" +#ifdef USE_HWD +#include "vadxva2.h" +#endif + static void compute_frame_duration( hb_work_private_t *pv ); static void flushDelayQueue( hb_work_private_t *pv ); static int decavcodecaInit( hb_work_object_t *, hb_job_t * ); @@ -99,7 +104,11 @@ struct hb_work_private_s int sws_pix_fmt; int cadence[12]; int wait_for_keyframe; - +#ifdef USE_HWD + hb_va_dxva2_t * dxva2; + uint8_t *dst_frame; + hb_oclscale_t *os; +#endif hb_audio_resample_t *resample; }; @@ -264,6 +273,32 @@ static void closePrivData( hb_work_private_t ** ppv ) hb_list_empty( &pv->list ); } hb_audio_resample_free(pv->resample); +#ifdef USE_HWD + if ( pv->os ) + { +#ifdef USE_OPENCL + CL_FREE( pv->os->h_in_buf ); + CL_FREE( pv->os->h_out_buf ); + CL_FREE( pv->os->v_out_buf ); + CL_FREE( pv->os->h_coeff_y ); + CL_FREE( pv->os->h_coeff_uv ); + CL_FREE( pv->os->h_index_y ); + CL_FREE( pv->os->h_index_uv ); + CL_FREE( pv->os->v_coeff_y ); + CL_FREE( pv->os->v_coeff_uv ); + CL_FREE( pv->os->v_index_y ); + CL_FREE( pv->os->v_index_uv ); +#endif + free( pv->os ); + } + if ( pv->dxva2 ) + { +#ifdef USE_OPENCL + CL_FREE( pv->dxva2->cl_mem_nv12 ); +#endif + hb_va_close( pv->dxva2 ); + } +#endif free( pv ); } *ppv = NULL; @@ -272,7 +307,9 @@ static void closePrivData( hb_work_private_t ** ppv ) static void decavcodecClose( hb_work_object_t * w ) { hb_work_private_t * pv = w->private_data; - +#ifdef USE_HWD + if( pv->dst_frame ) free( pv->dst_frame ); +#endif if ( pv ) { closePrivData( &pv ); @@ -501,6 +538,48 @@ static hb_buffer_t *copy_frame( hb_work_private_t *pv, AVFrame *frame ) w = pv->job->title->width; h = pv->job->title->height; } +#ifdef USE_HWD + if (pv->dxva2 && pv->job) + { + hb_buffer_t *buf; + int ww, hh; + if( (w > pv->job->width || h > pv->job->height) && (hb_get_gui_info(&hb_gui, 2) == 1) ) + { + buf = hb_video_buffer_init( pv->job->width, pv->job->height ); + ww = pv->job->width; + hh = pv->job->height; + } + else + { + buf = hb_video_buffer_init( w, h ); + ww = w; + hh = h; + } + if( !pv->dst_frame ) + { + pv->dst_frame = malloc( ww * hh * 3 / 2 ); + } + if( hb_va_extract( pv->dxva2, pv->dst_frame, frame, pv->job->width, pv->job->height, pv->job->title->crop, pv->os ) == HB_WORK_ERROR ) + { + hb_log( "hb_va_Extract failed!!!!!!" ); + } + w = buf->plane[0].stride; + h = buf->plane[0].height; + uint8_t *dst = buf->plane[0].data; + copy_plane( dst, pv->dst_frame, w, ww, h ); + w = buf->plane[1].stride; + h = buf->plane[1].height; + dst = buf->plane[1].data; + copy_plane( dst, pv->dst_frame + ww * hh, w, ww>>1, h ); + w = buf->plane[2].stride; + h = buf->plane[2].height; + dst = buf->plane[2].data; + copy_plane( dst, pv->dst_frame + ww * hh +( ( ww * hh )>>2 ), w, ww>>1, h ); + return buf; + } + else + { +#endif hb_buffer_t *buf = hb_video_buffer_init( w, h ); uint8_t *dst = buf->data; @@ -547,10 +626,26 @@ static hb_buffer_t *copy_frame( hb_work_private_t *pv, AVFrame *frame ) copy_plane( dst, frame->data[2], w, frame->linesize[2], h ); } return buf; +#ifdef USE_HWD +} +#endif } static int get_frame_buf( AVCodecContext *context, AVFrame *frame ) { +#ifdef USE_HWD + hb_work_private_t *pv = (hb_work_private_t*)context->opaque; + if ( (pv != NULL) && pv->dxva2 ) + { + int result = HB_WORK_ERROR; + hb_work_private_t *pv = (hb_work_private_t*)context->opaque; + result = hb_va_get_frame_buf( pv->dxva2, context, frame ); + if( result==HB_WORK_ERROR ) + return avcodec_default_get_buffer( context, frame ); + return 0; + } + else +#endif return avcodec_default_get_buffer( context, frame ); } @@ -763,7 +858,18 @@ static int decodeFrame( hb_work_object_t *w, uint8_t *data, int size, int sequen { frame_dur += frame.repeat_pict * pv->field_duration; } - +#ifdef USE_HWD + if( pv->dxva2 && pv->dxva2->do_job==HB_WORK_OK ) + { + if( avp.pts>0 ) + { + if( pv->dxva2->input_pts[0]!=0 && pv->dxva2->input_pts[1]==0 ) + frame.pkt_pts = pv->dxva2->input_pts[0]; + else + frame.pkt_pts = pv->dxva2->input_pts[0]<pv->dxva2->input_pts[1] ? pv->dxva2->input_pts[0] : pv->dxva2->input_pts[1]; + } + } +#endif // If there was no pts for this frame, assume constant frame rate // video & estimate the next frame time from the last & duration. double pts; @@ -949,6 +1055,24 @@ static hb_buffer_t *link_buf_list( hb_work_private_t *pv ) } return head; } +#ifdef USE_HWD +static void hb_ffmpeg_release_frame_buf( struct AVCodecContext *p_context, AVFrame *frame ) +{ + hb_work_private_t *p_dec = (hb_work_private_t*)p_context->opaque; + int i; + if( p_dec->dxva2 ) + { + hb_va_release( p_dec->dxva2, frame ); + } + else if( !frame->opaque ) + { + if( frame->type == FF_BUFFER_TYPE_INTERNAL ) + avcodec_default_release_buffer( p_context, frame ); + } + for( i = 0; i < 4; i++ ) + frame->data[i] = NULL; +} +#endif static void init_video_avcodec_context( hb_work_private_t *pv ) { @@ -956,6 +1080,10 @@ static void init_video_avcodec_context( hb_work_private_t *pv ) pv->context->opaque = pv; pv->context->get_buffer = get_frame_buf; pv->context->reget_buffer = reget_frame_buf; +#ifdef USE_HWD + if( pv->dxva2 && pv->dxva2->do_job==HB_WORK_OK ) + pv->context->release_buffer = hb_ffmpeg_release_frame_buf; +#endif } static int decavcodecvInit( hb_work_object_t * w, hb_job_t * job ) @@ -990,7 +1118,27 @@ static int decavcodecvInit( hb_work_object_t * w, hb_job_t * job ) pv->context->workaround_bugs = FF_BUG_AUTODETECT; pv->context->err_recognition = AV_EF_CRCCHECK; pv->context->error_concealment = FF_EC_GUESS_MVS|FF_EC_DEBLOCK; +#ifdef USE_HWD + if( ((w->codec_param==AV_CODEC_ID_H264) + || (w->codec_param==AV_CODEC_ID_MPEG2VIDEO) + || (w->codec_param==AV_CODEC_ID_VC1) + || (w->codec_param==AV_CODEC_ID_WMV3) + || (w->codec_param==AV_CODEC_ID_MPEG4)) + && pv->job && job->use_hw_decode) + { + pv->dxva2 = hb_va_create_dxva2( pv->dxva2, w->codec_param ); + if( pv->dxva2 && pv->dxva2->do_job==HB_WORK_OK ) + { + hb_va_new_dxva2( pv->dxva2, pv->context ); + init_video_avcodec_context( pv ); + pv->context->get_format = hb_ffmpeg_get_format; + pv->os = ( hb_oclscale_t * )malloc( sizeof( hb_oclscale_t ) ); + memset( pv->os, 0, sizeof( hb_oclscale_t ) ); + pv->threads = 1; + } + } +#endif if ( hb_avcodec_open( pv->context, codec, NULL, pv->threads ) ) { hb_log( "decavcodecvInit: avcodec_open failed" ); @@ -1180,6 +1328,16 @@ static int decavcodecvWork( hb_work_object_t * w, hb_buffer_t ** buf_in, pv->new_chap = in->s.new_chap; pv->chap_time = pts >= 0? pts : pv->pts_next; } +#ifdef USE_HWD + if( pv->dxva2 && pv->dxva2->do_job==HB_WORK_OK ) + { + if( pv->dxva2->input_pts[0]<=pv->dxva2->input_pts[1] ) + pv->dxva2->input_pts[0] = pts; + else if( pv->dxva2->input_pts[0]>pv->dxva2->input_pts[1] ) + pv->dxva2->input_pts[1] = pts; + pv->dxva2->input_dts = dts; + } +#endif decodeVideo( w, in->data, in->size, in->sequence, pts, dts, in->s.frametype ); hb_buffer_close( &in ); *buf_out = link_buf_list( pv ); @@ -1421,7 +1579,19 @@ hb_work_object_t hb_decavcodecv = .info = decavcodecvInfo, .bsinfo = decavcodecvBSInfo }; - +#ifdef USE_HWD +hb_work_object_t hb_decavcodecv_accl = +{ + .id = WORK_DECAVCODECVACCL, + .name = "Video hardware decoder (libavcodec)", + .init = decavcodecvInit, + .work = decavcodecvWork, + .close = decavcodecClose, + .flush = decavcodecvFlush, + .info = decavcodecvInfo, + .bsinfo = decavcodecvBSInfo +}; +#endif static void decodeAudio(hb_audio_t *audio, hb_work_private_t *pv, uint8_t *data, int size, int64_t pts) { diff --git a/libhb/decavcodecaccl.c b/libhb/decavcodecaccl.c deleted file mode 100644 index 3842053ec..000000000 --- a/libhb/decavcodecaccl.c +++ /dev/null @@ -1,1311 +0,0 @@ -/* decavcodecaccl.c - - Copyright (c) 2003-2012 HandBrake Team - This file is part of the HandBrake source code - Homepage: <http://handbrake.fr/>. - It may be used under the terms of the GNU General Public License v2. - For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html - - Authors: Peng Gao <[email protected]> <http://www.multicorewareinc.com/> - Li Cao <[email protected]> <http://www.multicorewareinc.com/> - - */ - -/* This module is Handbrake's interface to the ffmpeg decoder library - (libavcodec & small parts of libavformat). It contains four Handbrake - "work objects": - - decavcodeca connects HB to an ffmpeg audio decoder - decavcodecvaccl connects HB to an ffmpeg video decoder - - (Two different routines are needed because the ffmpeg library - has different decoder calling conventions for audio & video. - These work objects are self-contained & follow all - of HB's conventions for a decoder module. They can be used like - any other HB decoder (deca52, decmpeg2, etc.). - - These decoders handle 2 kinds of input. Streams that are demuxed - by HandBrake and streams that are demuxed by libavformat. In the - case of streams that are demuxed by HandBrake, there is an extra - parse step required that happens in decodeVideo and decavcodecaWork. - In the case of streams that are demuxed by libavformat, there is context - information that we need from the libavformat. This information is - propagated from hb_stream_open to these decoders through title->opaque_priv. - - A consequence of the above is that the streams that are demuxed by HandBrake - *can't* use information from the AVStream because there isn't one - they - get their data from either the dvd reader or the mpeg reader, not the ffmpeg - stream reader. That means that they have to make up for deficiencies in the - AVCodecContext info by using stuff kept in the HB "title" struct. It - also means that ffmpeg codecs that randomly scatter state needed by - the decoder across both the AVCodecContext & the AVStream (e.g., the - VC1 decoder) can't easily be used by the HB mpeg stream reader. - */ -#define HAVE_DXVA2 -#ifdef HAVE_DXVA2 -#include "hb.h" -#include "hbffmpeg.h" -#include "vadxva2.h" -#include "audio_remap.h" -#include "audio_resample.h" - -static void compute_frame_duration( hb_work_private_t *pv ); -static void flushDelayQueue( hb_work_private_t *pv ); - -#define HEAP_SIZE 8 -typedef struct { - // there are nheap items on the heap indexed 1..nheap (i.e., top of - // heap is 1). The 0th slot is unused - a marker is put there to check - // for overwrite errs. - int64_t h[HEAP_SIZE+1]; - int nheap; -} pts_heap_t; - -struct hb_work_private_s -{ - hb_job_t *job; - hb_title_t *title; - AVCodecContext *context; - AVCodecParserContext *parser; - int threads; - int video_codec_opened; - hb_list_t *list; - double duration; // frame duration (for video) - double field_duration; // field duration (for video) - int frame_duration_set; // Indicates valid timing was found in stream - double pts_next; // next pts we expect to generate - int64_t chap_time; // time of next chap mark (if new_chap != 0) - int new_chap; // output chapter mark pending - uint32_t nframes; - uint32_t ndrops; - uint32_t decode_errors; - int brokenByMicrosoft; // video stream may contain packed b-frames - hb_buffer_t* delayq[HEAP_SIZE]; - int queue_primed; - pts_heap_t pts_heap; - void* buffer; - struct SwsContext *sws_context; // if we have to rescale or convert color space - int sws_width; - int sws_height; - int sws_pix_fmt; - int cadence[12]; - int wait_for_keyframe; - hb_va_dxva2_t * dxva2; - uint8_t *dst_frame; - hb_oclscale_t *os; - hb_audio_resample_t *resample; -}; - -static hb_buffer_t *link_buf_list( hb_work_private_t *pv ); - - -static int64_t heap_pop( pts_heap_t *heap ) -{ - int64_t result; - - if( heap->nheap <= 0 ) - { - return -1; - } - - // return the top of the heap then put the bottom element on top, - // decrease the heap size by one & rebalence the heap. - result = heap->h[1]; - - int64_t v = heap->h[heap->nheap--]; - int parent = 1; - int child = parent << 1; - while( child <= heap->nheap ) - { - // find the smallest of the two children of parent - if (child < heap->nheap && heap->h[child] > heap->h[child+1] ) - ++child; - - if (v <= heap->h[child]) - // new item is smaller than either child so it's the new parent. - break; - - // smallest child is smaller than new item so move it up then - // check its children. - int64_t hp = heap->h[child]; - heap->h[parent] = hp; - parent = child; - child = parent << 1; - } - heap->h[parent] = v; - return result; -} - -static void heap_push( pts_heap_t *heap, int64_t v ) -{ - if ( heap->nheap < HEAP_SIZE ) - { - ++heap->nheap; - } - - // stick the new value on the bottom of the heap then bubble it - // up to its correct spot. - int child = heap->nheap; - while (child > 1) { - int parent = child >> 1; - if (heap->h[parent] <= v) - break; - // move parent down - int64_t hp = heap->h[parent]; - heap->h[child] = hp; - child = parent; - } - heap->h[child] = v; -} - -/*********************************************************************** - * Close - *********************************************************************** - * - **********************************************************************/ -static void closePrivData( hb_work_private_t ** ppv ) -{ - hb_work_private_t * pv = *ppv; - - if ( pv ) - { - flushDelayQueue( pv ); - - if ( pv->job && pv->context && pv->context->codec ) - { - hb_log( "%s-decoder done: %u frames, %u decoder errors, %u drops", - pv->context->codec->name, pv->nframes, pv->decode_errors, - pv->ndrops ); - } - if ( pv->sws_context ) - { - sws_freeContext( pv->sws_context ); - } - if ( pv->parser ) - { - av_parser_close(pv->parser); - } - if ( pv->context && pv->context->codec ) - { - hb_avcodec_close( pv->context ); - } - if ( pv->context ) - { - av_freep( &pv->context->extradata ); - av_free( pv->context ); - } - if ( pv->list ) - { - hb_list_empty( &pv->list ); - } - - hb_audio_resample_free( pv->resample ); - if ( pv->os ) - { -#ifdef USE_OPENCL - CL_FREE( pv->os->h_in_buf ); - CL_FREE( pv->os->h_out_buf ); - CL_FREE( pv->os->v_out_buf ); - CL_FREE( pv->os->h_coeff_y ); - CL_FREE( pv->os->h_coeff_uv ); - CL_FREE( pv->os->h_index_y ); - CL_FREE( pv->os->h_index_uv ); - CL_FREE( pv->os->v_coeff_y ); - CL_FREE( pv->os->v_coeff_uv ); - CL_FREE( pv->os->v_index_y ); - CL_FREE( pv->os->v_index_uv ); -#endif - free( pv->os ); - } - if ( pv->dxva2 ) - { - -#ifdef USE_OPENCL - CL_FREE( pv->dxva2->cl_mem_nv12 ); -#endif - hb_va_close( pv->dxva2 ); - } - free( pv ); - } - *ppv = NULL; -} - -/* ------------------------------------------------------------- - * General purpose video decoder using libavcodec - */ - -static uint8_t *copy_plane( uint8_t *dst, uint8_t* src, int dstride, int sstride, - int h ) -{ - if ( dstride == sstride ) - { - memcpy( dst, src, dstride * h ); - return dst + dstride * h; - } - int lbytes = dstride <= sstride? dstride : sstride; - while ( --h >= 0 ) - { - memcpy( dst, src, lbytes ); - src += sstride; - dst += dstride; - } - return dst; -} - -// copy one video frame into an HB buf. If the frame isn't in our color space -// or at least one of its dimensions is odd, use sws_scale to convert/rescale it. -// Otherwise just copy the bits. -static hb_buffer_t *copy_frame( hb_work_private_t *pv, AVFrame *frame ) -{ - AVCodecContext *context = pv->context; - int w, h; - if ( ! pv->job ) - { - // HandBrake's video pipeline uses yuv420 color. This means all - // dimensions must be even. So we must adjust the dimensions - // of incoming video if not even. - w = context->width & ~1; - h = context->height & ~1; - } - else - { - w = pv->job->title->width; - h = pv->job->title->height; - } - if( pv->dxva2 && pv->job ) - { - hb_buffer_t *buf; - int ww, hh; - if( (w > pv->job->width || h > pv->job->height) && (hb_get_gui_info(&hb_gui, 2) == 1) ) - { - buf = hb_video_buffer_init( pv->job->width, pv->job->height ); - ww = pv->job->width; - hh = pv->job->height; - } - else - { - buf = hb_video_buffer_init( w, h ); - ww = w; - hh = h; - } - if( !pv->dst_frame ) - { - pv->dst_frame = malloc( ww * hh * 3 / 2 ); - } - if( hb_va_extract( pv->dxva2, pv->dst_frame, frame, pv->job->width, pv->job->height, pv->job->title->crop, pv->os ) == HB_WORK_ERROR ) - { - hb_log( "hb_va_Extract failed!!!!!!" ); - } - - w = buf->plane[0].stride; - h = buf->plane[0].height; - uint8_t *dst = buf->plane[0].data; - copy_plane( dst, pv->dst_frame, w, ww, h ); - w = buf->plane[1].stride; - h = buf->plane[1].height; - dst = buf->plane[1].data; - copy_plane( dst, pv->dst_frame + ww * hh, w, ww>>1, h ); - w = buf->plane[2].stride; - h = buf->plane[2].height; - dst = buf->plane[2].data; - copy_plane( dst, pv->dst_frame + ww * hh +( ( ww * hh )>>2 ), w, ww>>1, h ); - return buf; - } - else - { - hb_buffer_t *buf = hb_video_buffer_init( w, h ); - uint8_t *dst = buf->data; - if( context->pix_fmt != AV_PIX_FMT_YUV420P || w != context->width || - h != context->height ) - { - // have to convert to our internal color space and/or rescale - AVPicture dstpic; - hb_avpicture_fill( &dstpic, buf ); - if( !pv->sws_context || - pv->sws_width != context->width || - pv->sws_height != context->height || - pv->sws_pix_fmt != context->pix_fmt ) - { - if( pv->sws_context ) - sws_freeContext( pv->sws_context ); - pv->sws_context = hb_sws_get_context( - context->width, context->height, context->pix_fmt, - w, h, PIX_FMT_YUV420P, - SWS_LANCZOS|SWS_ACCURATE_RND ); - pv->sws_width = context->width; - pv->sws_height = context->height; - pv->sws_pix_fmt = context->pix_fmt; - } - sws_scale( pv->sws_context, (const uint8_t*const*)frame->data, - frame->linesize, 0, context->height, - dstpic.data, dstpic.linesize ); - } - else - { - w = buf->plane[0].stride; - h = buf->plane[0].height; - dst = buf->plane[0].data; - copy_plane( dst, frame->data[0], w, frame->linesize[0], h ); - w = buf->plane[1].stride; - h = buf->plane[1].height; - dst = buf->plane[1].data; - copy_plane( dst, frame->data[1], w, frame->linesize[1], h ); - w = buf->plane[2].stride; - h = buf->plane[2].height; - dst = buf->plane[2].data; - copy_plane( dst, frame->data[2], w, frame->linesize[2], h ); - } - return buf; - } - -} - - -static int get_frame_buf( AVCodecContext *context, AVFrame *frame ) -{ - int result = HB_WORK_ERROR; - hb_work_private_t *pv = (hb_work_private_t*)context->opaque; - if( pv->dxva2 ) - { - result = hb_va_get_frame_buf( pv->dxva2, context, frame ); - } - if( result==HB_WORK_ERROR ) - return avcodec_default_get_buffer( context, frame ); - return 0; -} - -static int reget_frame_buf( AVCodecContext *context, AVFrame *frame ) -{ - return avcodec_default_reget_buffer( context, frame ); -} - -static void log_chapter( hb_work_private_t *pv, int chap_num, int64_t pts ) -{ - hb_chapter_t *c; - - if ( !pv->job ) - return; - - c = hb_list_item( pv->job->list_chapter, chap_num - 1 ); - if ( c && c->title ) - { - hb_log( "%s: \"%s\" (%d) at frame %u time %"PRId64, - pv->context->codec->name, c->title, chap_num, pv->nframes, pts ); - } - else - { - hb_log( "%s: Chapter %d at frame %u time %"PRId64, - pv->context->codec->name, chap_num, pv->nframes, pts ); - } -} - -static void flushDelayQueue( hb_work_private_t *pv ) -{ - hb_buffer_t *buf; - int slot = pv->queue_primed ? pv->nframes & (HEAP_SIZE-1) : 0; - - // flush all the video packets left on our timestamp-reordering delay q - while ( ( buf = pv->delayq[slot] ) != NULL ) - { - buf->s.start = heap_pop( &pv->pts_heap ); - hb_list_add( pv->list, buf ); - pv->delayq[slot] = NULL; - slot = ( slot + 1 ) & (HEAP_SIZE-1); - } -} - -#define TOP_FIRST PIC_FLAG_TOP_FIELD_FIRST -#define PROGRESSIVE PIC_FLAG_PROGRESSIVE_FRAME -#define REPEAT_FIRST PIC_FLAG_REPEAT_FIRST_FIELD -#define TB 8 -#define BT 16 -#define BT_PROG 32 -#define BTB_PROG 64 -#define TB_PROG 128 -#define TBT_PROG 256 - -static void checkCadence( int * cadence, uint16_t flags, int64_t start ) -{ - /* Rotate the cadence tracking. */ - int i = 0; - for(i=11; i > 0; i--) - { - cadence[i] = cadence[i-1]; - } - - if ( !(flags & PROGRESSIVE) && !(flags & TOP_FIRST) ) - { - /* Not progressive, not top first... - That means it's probably bottom - first, 2 fields displayed. - */ - //hb_log("MPEG2 Flag: Bottom field first, 2 fields displayed."); - cadence[0] = BT; - } - else if ( !(flags & PROGRESSIVE) && (flags & TOP_FIRST) ) - { - /* Not progressive, top is first, - Two fields displayed. - */ - //hb_log("MPEG2 Flag: Top field first, 2 fields displayed."); - cadence[0] = TB; - } - else if ( (flags & PROGRESSIVE) && !(flags & TOP_FIRST) && !( flags & REPEAT_FIRST ) ) - { - /* Progressive, but noting else. - That means Bottom first, - 2 fields displayed. - */ - //hb_log("MPEG2 Flag: Progressive. Bottom field first, 2 fields displayed."); - cadence[0] = BT_PROG; - } - else if ( (flags & PROGRESSIVE) && !(flags & TOP_FIRST) && ( flags & REPEAT_FIRST ) ) - { - /* Progressive, and repeat. . - That means Bottom first, - 3 fields displayed. - */ - //hb_log("MPEG2 Flag: Progressive repeat. Bottom field first, 3 fields displayed."); - cadence[0] = BTB_PROG; - } - else if ( (flags & PROGRESSIVE) && (flags & TOP_FIRST) && !( flags & REPEAT_FIRST ) ) - { - /* Progressive, top first. - That means top first, - 2 fields displayed. - */ - //hb_log("MPEG2 Flag: Progressive. Top field first, 2 fields displayed."); - cadence[0] = TB_PROG; - } - else if ( (flags & PROGRESSIVE) && (flags & TOP_FIRST) && ( flags & REPEAT_FIRST ) ) - { - /* Progressive, top, repeat. - That means top first, - 3 fields displayed. - */ - //hb_log("MPEG2 Flag: Progressive repeat. Top field first, 3 fields displayed."); - cadence[0] = TBT_PROG; - } - - if ( (cadence[2] <= TB) && (cadence[1] <= TB) && (cadence[0] > TB) && (cadence[11]) ) - hb_log("%fs: Video -> Film", (float)start / 90000); - if ( (cadence[2] > TB) && (cadence[1] <= TB) && (cadence[0] <= TB) && (cadence[11]) ) - hb_log("%fs: Film -> Video", (float)start / 90000); -} - -/* - * Decodes a video frame from the specified raw packet data - * ('data', 'size', 'sequence'). - * The output of this function is stored in 'pv->list', which contains a list - * of zero or more decoded packets. - * - * The returned packets are guaranteed to have their timestamps in the correct - * order, even if the original packets decoded by libavcodec have misordered - * timestamps, due to the use of 'packed B-frames'. - * - * Internally the set of decoded packets may be buffered in 'pv->delayq' - * until enough packets have been decoded so that the timestamps can be - * correctly rewritten, if this is necessary. - */ -static int decodeFrame( hb_work_object_t *w, uint8_t *data, int size, int sequence, int64_t pts, int64_t dts, uint8_t frametype ) -{ - hb_work_private_t *pv = w->private_data; - int got_picture, oldlevel = 0; - AVFrame frame = { { 0 } }; - AVPacket avp; - - if ( global_verbosity_level <= 1 ) - { - oldlevel = av_log_get_level(); - av_log_set_level( AV_LOG_QUIET ); - } - - av_init_packet(&avp); - avp.data = data; - avp.size = size; - avp.pts = pts; - avp.dts = dts; - /* - * libav avcodec_decode_video2() needs AVPacket flagged with AV_PKT_FLAG_KEY - * for some codecs. For example, sequence of PNG in a mov container. - */ - if ( frametype & HB_FRAME_KEY ) - { - avp.flags |= AV_PKT_FLAG_KEY; - } - - if ( avcodec_decode_video2( pv->context, &frame, &got_picture, &avp ) < 0 ) - { - ++pv->decode_errors; - } - if ( global_verbosity_level <= 1 ) - { - av_log_set_level( oldlevel ); - } - if( got_picture && pv->wait_for_keyframe > 0 ) - { - // Libav is inconsistant about how it flags keyframes. For many - // codecs it simply sets frame.key_frame. But for others, it only - // sets frame.pict_type. And for yet others neither gets set at all - // (qtrle). - int key = frame.key_frame || (w->codec_param != AV_CODEC_ID_H264 && - (frame.pict_type == 0 || - frame.pict_type == AV_PICTURE_TYPE_I)); - if( !key ) - { - pv->wait_for_keyframe--; - return 0; - } - pv->wait_for_keyframe = 0; - } - if( got_picture ) - { - uint16_t flags = 0; - - // ffmpeg makes it hard to attach a pts to a frame. if the MPEG ES - // packet had a pts we handed it to av_parser_parse (if the packet had - // no pts we set it to AV_NOPTS_VALUE, but before the parse we can't - // distinguish between the start of a video frame with no pts & an - // intermediate packet of some frame which never has a pts). we hope - // that when parse returns the frame to us the pts we originally - // handed it will be in parser->pts. we put this pts into avp.pts so - // that when avcodec_decode_video finally gets around to allocating an - // AVFrame to hold the decoded frame, avcodec_default_get_buffer can - // stuff that pts into the it. if all of these relays worked at this - // point frame.pts should hold the frame's pts from the original data - // stream or AV_NOPTS_VALUE if it didn't have one. in the latter case - // we generate the next pts in sequence for it. - if ( !pv->frame_duration_set ) - compute_frame_duration( pv ); - - double frame_dur = pv->duration; - if ( frame.repeat_pict ) - { - frame_dur += frame.repeat_pict * pv->field_duration; - } - - - if( pv->dxva2 && pv->dxva2->do_job==HB_WORK_OK ) - { - if( avp.pts>0 ) - { - if( pv->dxva2->input_pts[0]!=0 && pv->dxva2->input_pts[1]==0 ) - frame.pkt_pts = pv->dxva2->input_pts[0]; - else - frame.pkt_pts = pv->dxva2->input_pts[0]<pv->dxva2->input_pts[1] ? pv->dxva2->input_pts[0] : pv->dxva2->input_pts[1]; - } - } - // If there was no pts for this frame, assume constant frame rate - // video & estimate the next frame time from the last & duration. - double pts; - if (frame.pkt_pts == AV_NOPTS_VALUE) - { - pts = pv->pts_next; - } - else - { - pts = frame.pkt_pts; - } - pv->pts_next = pts + frame_dur; - - if ( frame.top_field_first ) - { - flags |= PIC_FLAG_TOP_FIELD_FIRST; - } - if ( !frame.interlaced_frame ) - { - flags |= PIC_FLAG_PROGRESSIVE_FRAME; - } - if ( frame.repeat_pict == 1 ) - { - flags |= PIC_FLAG_REPEAT_FIRST_FIELD; - } - if ( frame.repeat_pict == 2 ) - { - flags |= PIC_FLAG_REPEAT_FRAME; - } - - hb_buffer_t *buf; - - // if we're doing a scan or this content couldn't have been broken - // by Microsoft we don't worry about timestamp reordering - if ( ! pv->job || ! pv->brokenByMicrosoft ) - { - buf = copy_frame( pv, &frame ); - buf->s.start = pts; - buf->sequence = sequence; - - buf->s.flags = flags; - - if ( pv->new_chap && buf->s.start >= pv->chap_time ) - { - buf->s.new_chap = pv->new_chap; - log_chapter( pv, pv->new_chap, buf->s.start ); - pv->new_chap = 0; - pv->chap_time = 0; - } - else if ( pv->nframes == 0 && pv->job ) - { - log_chapter( pv, pv->job->chapter_start, buf->s.start ); - } - checkCadence( pv->cadence, flags, buf->s.start ); - hb_list_add( pv->list, buf ); - ++pv->nframes; - return got_picture; - } - - // XXX This following probably addresses a libavcodec bug but I don't - // see an easy fix so we workaround it here. - // - // The M$ 'packed B-frames' atrocity results in decoded frames with - // the wrong timestamp. E.g., if there are 2 b-frames the timestamps - // we see here will be "2 3 1 5 6 4 ..." instead of "1 2 3 4 5 6". - // The frames are actually delivered in the right order but with - // the wrong timestamp. To get the correct timestamp attached to - // each frame we have a delay queue (longer than the max number of - // b-frames) & a sorting heap for the timestamps. As each frame - // comes out of the decoder the oldest frame in the queue is removed - // and associated with the smallest timestamp. Then the new frame is - // added to the queue & its timestamp is pushed on the heap. - // This does nothing if the timestamps are correct (i.e., the video - // uses a codec that Micro$oft hasn't broken yet) but the frames - // get timestamped correctly even when M$ has munged them. - - // remove the oldest picture from the frame queue (if any) & - // give it the smallest timestamp from our heap. The queue size - // is a power of two so we get the slot of the oldest by masking - // the frame count & this will become the slot of the newest - // once we've removed & processed the oldest. - int slot = pv->nframes & (HEAP_SIZE-1); - if ( ( buf = pv->delayq[slot] ) != NULL ) - { - pv->queue_primed = 1; - buf->s.start = heap_pop( &pv->pts_heap ); - - if ( pv->new_chap && buf->s.start >= pv->chap_time ) - { - buf->s.new_chap = pv->new_chap; - log_chapter( pv, pv->new_chap, buf->s.start ); - pv->new_chap = 0; - pv->chap_time = 0; - } - else if ( pv->nframes == 0 && pv->job ) - { - log_chapter( pv, pv->job->chapter_start, buf->s.start ); - } - checkCadence( pv->cadence, buf->s.flags, buf->s.start ); - hb_list_add( pv->list, buf ); - } - - // add the new frame to the delayq & push its timestamp on the heap - buf = copy_frame( pv, &frame ); - buf->sequence = sequence; - /* Store picture flags for later use by filters */ - buf->s.flags = flags; - pv->delayq[slot] = buf; - heap_push( &pv->pts_heap, pts ); - - ++pv->nframes; - } - - return got_picture; -} -static void decodeVideo( hb_work_object_t *w, uint8_t *data, int size, int sequence, int64_t pts, int64_t dts, uint8_t frametype ) -{ - hb_work_private_t *pv = w->private_data; - - /* - * The following loop is a do..while because we need to handle both - * data & the flush at the end (signaled by size=0). At the end there's - * generally a frame in the parser & one or more frames in the decoder - * (depending on the bframes setting). - */ - int pos = 0; - do { - uint8_t *pout; - int pout_len, len; - int64_t parser_pts, parser_dts; - if ( pv->parser ) - { - len = av_parser_parse2( pv->parser, pv->context, &pout, &pout_len, - data + pos, size - pos, pts, dts, 0 ); - parser_pts = pv->parser->pts; - parser_dts = pv->parser->dts; - } - else - { - pout = data; - len = pout_len = size; - parser_pts = pts; - parser_dts = dts; - } - pos += len; - - if ( pout_len > 0 ) - { - decodeFrame( w, pout, pout_len, sequence, parser_pts, parser_dts, frametype ); - } - } while ( pos < size ); - - /* the stuff above flushed the parser, now flush the decoder */ - if ( size <= 0 ) - { - while ( decodeFrame( w, NULL, 0, sequence, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0 ) ) - { - } - flushDelayQueue( pv ); - } -} - -/* - * Removes all packets from 'pv->list', links them together into - * a linked-list, and returns the first packet in the list. - */ -static hb_buffer_t *link_buf_list( hb_work_private_t *pv ) -{ - hb_buffer_t *head = hb_list_item( pv->list, 0 ); - - if ( head ) - { - hb_list_rem( pv->list, head ); - - hb_buffer_t *last = head, *buf; - - while ( ( buf = hb_list_item( pv->list, 0 ) ) != NULL ) - { - hb_list_rem( pv->list, buf ); - last->next = buf; - last = buf; - } - } - return head; -} -static void hb_ffmpeg_release_frame_buf( struct AVCodecContext *p_context, AVFrame *frame ) -{ - hb_work_private_t *p_dec = (hb_work_private_t*)p_context->opaque; - int i; - if( p_dec->dxva2 ) - { - hb_va_release( p_dec->dxva2, frame ); - } - else if( !frame->opaque ) - { - if( frame->type == FF_BUFFER_TYPE_INTERNAL ) - avcodec_default_release_buffer( p_context, frame ); - } - for( i = 0; i < 4; i++ ) - frame->data[i] = NULL; -} - -static void init_video_avcodec_context( hb_work_private_t *pv ) -{ - /* we have to wrap ffmpeg's get_buffer to be able to set the pts (?!) */ - pv->context->opaque = pv; - pv->context->get_buffer = get_frame_buf; - pv->context->reget_buffer = reget_frame_buf; - if( pv->dxva2 && pv->dxva2->do_job==HB_WORK_OK ) - pv->context->release_buffer = hb_ffmpeg_release_frame_buf; -} - -static int decavcodecvInit( hb_work_object_t * w, hb_job_t * job ) -{ - - hb_work_private_t *pv = calloc( 1, sizeof( hb_work_private_t ) ); - - w->private_data = pv; - pv->wait_for_keyframe = 60; - pv->job = job; - if ( job ) - pv->title = job->title; - else - pv->title = w->title; - pv->list = hb_list_init(); - - if( pv->job && pv->job->title && !pv->job->title->has_resolution_change ) - { - pv->threads = HB_FFMPEG_THREADS_AUTO; - } - if ( pv->title->opaque_priv ) - { - AVFormatContext *ic = (AVFormatContext*)pv->title->opaque_priv; - AVCodec *codec = avcodec_find_decoder( w->codec_param ); - if ( codec == NULL ) - { - hb_log( "decavcodecvInit: failed to find codec for id (%d)", w->codec_param ); - return 1; - } - pv->context = avcodec_alloc_context3(codec); - avcodec_copy_context( pv->context, ic->streams[pv->title->video_id]->codec); - pv->context->workaround_bugs = FF_BUG_AUTODETECT; - pv->context->err_recognition = AV_EF_CRCCHECK; - pv->context->error_concealment = FF_EC_GUESS_MVS|FF_EC_DEBLOCK; - if( ((w->codec_param==AV_CODEC_ID_H264) - || (w->codec_param==AV_CODEC_ID_MPEG2VIDEO) - || (w->codec_param==AV_CODEC_ID_VC1) - || (w->codec_param==AV_CODEC_ID_WMV3) - || (w->codec_param==AV_CODEC_ID_MPEG4)) - && pv->job ) - { - pv->dxva2 = hb_va_create_dxva2( pv->dxva2, w->codec_param ); - if( pv->dxva2 && pv->dxva2->do_job==HB_WORK_OK ) - { - hb_va_new_dxva2( pv->dxva2, pv->context ); - init_video_avcodec_context( pv ); - pv->context->get_format = hb_ffmpeg_get_format; - pv->os = ( hb_oclscale_t * )malloc( sizeof( hb_oclscale_t ) ); - memset( pv->os, 0, sizeof( hb_oclscale_t ) ); - pv->threads = 1; - - } - } - if( hb_avcodec_open( pv->context, codec, NULL, pv->threads ) ) - { - hb_log( "decavcodecvInit: avcodec_open failed" ); - return 1; - } - pv->video_codec_opened = 1; - // avi, mkv and possibly mp4 containers can contain the M$ VFW packed - // b-frames abortion that messes up frame ordering and timestamps. - // XXX ffmpeg knows which streams are broken but doesn't expose the - // info externally. We should patch ffmpeg to add a flag to the - // codec context for this but until then we mark all ffmpeg streams - // as suspicious. - pv->brokenByMicrosoft = 1; - } - else - { - AVCodec *codec = avcodec_find_decoder( w->codec_param ); - pv->parser = av_parser_init( w->codec_param ); - pv->context = avcodec_alloc_context3( codec ); - pv->context->workaround_bugs = FF_BUG_AUTODETECT; - pv->context->err_recognition = AV_EF_CRCCHECK; - pv->context->error_concealment = FF_EC_GUESS_MVS|FF_EC_DEBLOCK; - init_video_avcodec_context( pv ); - } - return 0; -} - -static int next_hdr( hb_buffer_t *in, int offset ) -{ - uint8_t *dat = in->data; - uint16_t last2 = 0xffff; - for ( ; in->size - offset > 1; ++offset ) - { - if ( last2 == 0 && dat[offset] == 0x01 ) - // found an mpeg start code - return offset - 2; - - last2 = ( last2 << 8 ) | dat[offset]; - } - - return -1; -} - -static int find_hdr( hb_buffer_t *in, int offset, uint8_t hdr_type ) -{ - if ( in->size - offset < 4 ) - // not enough room for an mpeg start code - return -1; - - for ( ; ( offset = next_hdr( in, offset ) ) >= 0; ++offset ) - { - if ( in->data[offset+3] == hdr_type ) - // found it - break; - } - return offset; -} - -static int setup_extradata( hb_work_object_t *w, hb_buffer_t *in ) -{ - hb_work_private_t *pv = w->private_data; - - // we can't call the avstream funcs but the read_header func in the - // AVInputFormat may set up some state in the AVContext. In particular - // vc1t_read_header allocates 'extradata' to deal with header issues - // related to Microsoft's bizarre engineering notions. We alloc a chunk - // of space to make vc1 work then associate the codec with the context. - if ( w->codec_param != AV_CODEC_ID_VC1 ) - { - // we haven't been inflicted with M$ - allocate a little space as - // a marker and return success. - pv->context->extradata_size = 0; - // av_malloc uses posix_memalign which is allowed to - // return NULL when allocating 0 bytes. We use extradata == NULL - // to trigger initialization of extradata and the decoder, so - // we can not set it to NULL here. So allocate a small - // buffer instead. - pv->context->extradata = av_malloc(1); - return 0; - } - - // find the start and and of the sequence header - int shdr, shdr_end; - if ( ( shdr = find_hdr( in, 0, 0x0f ) ) < 0 ) - { - // didn't find start of seq hdr - return 1; - } - if ( ( shdr_end = next_hdr( in, shdr + 4 ) ) < 0 ) - { - shdr_end = in->size; - } - shdr_end -= shdr; - - // find the start and and of the entry point header - int ehdr, ehdr_end; - if ( ( ehdr = find_hdr( in, 0, 0x0e ) ) < 0 ) - { - // didn't find start of entry point hdr - return 1; - } - if ( ( ehdr_end = next_hdr( in, ehdr + 4 ) ) < 0 ) - { - ehdr_end = in->size; - } - ehdr_end -= ehdr; - - // found both headers - allocate an extradata big enough to hold both - // then copy them into it. - pv->context->extradata_size = shdr_end + ehdr_end; - pv->context->extradata = av_malloc(pv->context->extradata_size + 8); - memcpy( pv->context->extradata, in->data + shdr, shdr_end ); - memcpy( pv->context->extradata + shdr_end, in->data + ehdr, ehdr_end ); - memset( pv->context->extradata + shdr_end + ehdr_end, 0, 8); - return 0; -} - -static int decavcodecvWork( hb_work_object_t * w, hb_buffer_t ** buf_in, - hb_buffer_t ** buf_out ) -{ - hb_work_private_t *pv = w->private_data; - hb_buffer_t *in = *buf_in; - int64_t pts = AV_NOPTS_VALUE; - int64_t dts = pts; - - *buf_in = NULL; - *buf_out = NULL; - - /* if we got an empty buffer signaling end-of-stream send it downstream */ - if ( in->size == 0 ) - { - if ( pv->context->codec != NULL ) - { - decodeVideo( w, in->data, in->size, in->sequence, pts, dts, in->s.frametype ); - } - hb_list_add( pv->list, in ); - *buf_out = link_buf_list( pv ); - return HB_WORK_DONE; - } - - // if this is the first frame open the codec (we have to wait for the - // first frame because of M$ VC1 braindamage). - if ( !pv->video_codec_opened ) - { - AVCodec *codec = avcodec_find_decoder( w->codec_param ); - if ( codec == NULL ) - { - hb_log( "decavcodecvWork: failed to find codec for id (%d)", w->codec_param ); - *buf_out = hb_buffer_init( 0 );; - return HB_WORK_DONE; - } - // Note that there is currently a small memory leak in libav at this - // point. pv->context->priv_data gets allocated by - // avcodec_alloc_context3(), then avcodec_get_context_defaults3() - // memsets the context and looses the pointer. - // - // avcodec_get_context_defaults3() looks as if they intended for - // it to preserve any existing priv_data because they test the pointer - // before allocating new memory, but the memset has already cleared it. - avcodec_get_context_defaults3( pv->context, codec ); - init_video_avcodec_context( pv ); - if ( setup_extradata( w, in ) ) - { - // we didn't find the headers needed to set up extradata. - // the codec will abort if we open it so just free the buf - // and hope we eventually get the info we need. - hb_buffer_close( &in ); - return HB_WORK_OK; - } - // disable threaded decoding for scan, can cause crashes - if ( hb_avcodec_open( pv->context, codec, NULL, pv->threads ) ) - { - hb_log( "decavcodecvWork: avcodec_open failed" ); - *buf_out = hb_buffer_init( 0 );; - return HB_WORK_DONE; - } - pv->video_codec_opened = 1; - } - - if( in->s.start >= 0 ) - { - pts = in->s.start; - dts = in->s.renderOffset; - } - if ( in->s.new_chap ) - { - pv->new_chap = in->s.new_chap; - pv->chap_time = pts >= 0? pts : pv->pts_next; - } - if( pv->dxva2 && pv->dxva2->do_job==HB_WORK_OK ) - { - if( pv->dxva2->input_pts[0]<=pv->dxva2->input_pts[1] ) - pv->dxva2->input_pts[0] = pts; - else if( pv->dxva2->input_pts[0]>pv->dxva2->input_pts[1] ) - pv->dxva2->input_pts[1] = pts; - pv->dxva2->input_dts = dts; - } - decodeVideo( w, in->data, in->size, in->sequence, pts, dts, in->s.frametype ); - hb_buffer_close( &in ); - *buf_out = link_buf_list( pv ); - return HB_WORK_OK; -} - -static void compute_frame_duration( hb_work_private_t *pv ) -{ - double duration = 0.; - int64_t max_fps = 64L; - - // context->time_base may be in fields, so set the max *fields* per second - if ( pv->context->ticks_per_frame > 1 ) - max_fps *= pv->context->ticks_per_frame; - - if ( pv->title->opaque_priv ) - { - // If ffmpeg is demuxing for us, it collects some additional - // information about framerates that is often more accurate - // than context->time_base. - AVFormatContext *ic = (AVFormatContext*)pv->title->opaque_priv; - AVStream *st = ic->streams[pv->title->video_id]; - if ( st->nb_frames && st->duration ) - { - // compute the average frame duration from the total number - // of frames & the total duration. - duration = ( (double)st->duration * (double)st->time_base.num ) / - ( (double)st->nb_frames * (double)st->time_base.den ); - } - else - { - // XXX We don't have a frame count or duration so try to use the - // far less reliable time base info in the stream. - // Because the time bases are so screwed up, we only take values - // in the range 8fps - 64fps. - AVRational *tb = NULL; - if ( st->avg_frame_rate.den * 64L > st->avg_frame_rate.num && - st->avg_frame_rate.num > st->avg_frame_rate.den * 8L ) - { - tb = &(st->avg_frame_rate); - duration = (double)tb->den / (double)tb->num; - } - else if ( st->time_base.num * 64L > st->time_base.den && - st->time_base.den > st->time_base.num * 8L ) - { - tb = &(st->time_base); - duration = (double)tb->num / (double)tb->den; - } - else if ( st->r_frame_rate.den * 64L > st->r_frame_rate.num && - st->r_frame_rate.num > st->r_frame_rate.den * 8L ) - { - tb = &(st->r_frame_rate); - duration = (double)tb->den / (double)tb->num; - } - } - if ( !duration && - pv->context->time_base.num * max_fps > pv->context->time_base.den && - pv->context->time_base.den > pv->context->time_base.num * 8L ) - { - duration = (double)pv->context->time_base.num / - (double)pv->context->time_base.den; - if ( pv->context->ticks_per_frame > 1 ) - { - // for ffmpeg 0.5 & later, the H.264 & MPEG-2 time base is - // field rate rather than frame rate so convert back to frames. - duration *= pv->context->ticks_per_frame; - } - } - } - else - { - if ( pv->context->time_base.num * max_fps > pv->context->time_base.den && - pv->context->time_base.den > pv->context->time_base.num * 8L ) - { - duration = (double)pv->context->time_base.num / - (double)pv->context->time_base.den; - if ( pv->context->ticks_per_frame > 1 ) - { - // for ffmpeg 0.5 & later, the H.264 & MPEG-2 time base is - // field rate rather than frame rate so convert back to frames. - duration *= pv->context->ticks_per_frame; - } - } - } - if ( duration == 0 ) - { - // No valid timing info found in the stream, so pick some value - duration = 1001. / 24000.; - } - else - { - pv->frame_duration_set = 1; - } - pv->duration = duration * 90000.; - pv->field_duration = pv->duration; - if ( pv->context->ticks_per_frame > 1 ) - { - pv->field_duration /= pv->context->ticks_per_frame; - } -} - -static int decavcodecvInfo( hb_work_object_t *w, hb_work_info_t *info ) -{ - hb_work_private_t *pv = w->private_data; - - memset( info, 0, sizeof(*info) ); - - info->bitrate = pv->context->bit_rate; - // HandBrake's video pipeline uses yuv420 color. This means all - // dimensions must be even. So we must adjust the dimensions - // of incoming video if not even. - info->width = pv->context->width & ~1; - info->height = pv->context->height & ~1; - - info->pixel_aspect_width = pv->context->sample_aspect_ratio.num; - info->pixel_aspect_height = pv->context->sample_aspect_ratio.den; - - compute_frame_duration( pv ); - info->rate = 27000000; - info->rate_base = pv->duration * 300.; - - info->profile = pv->context->profile; - info->level = pv->context->level; - info->name = pv->context->codec->name; - - switch( pv->context->color_primaries ) - { - case AVCOL_PRI_BT709: - info->color_prim = HB_COLR_PRI_BT709; - break; - case AVCOL_PRI_BT470BG: - info->color_prim = HB_COLR_PRI_EBUTECH; - break; - case AVCOL_PRI_BT470M: - case AVCOL_PRI_SMPTE170M: - case AVCOL_PRI_SMPTE240M: - info->color_prim = HB_COLR_PRI_SMPTEC; - break; - default: - { - if( ( info->width >= 1280 || info->height >= 720 ) || - ( info->width > 720 && info->height > 576 ) ) - // ITU BT.709 HD content - info->color_prim = HB_COLR_PRI_BT709; - else if( info->rate_base == 1080000 ) - // ITU BT.601 DVD or SD TV content (PAL) - info->color_prim = HB_COLR_PRI_EBUTECH; - else - // ITU BT.601 DVD or SD TV content (NTSC) - info->color_prim = HB_COLR_PRI_SMPTEC; - break; - } - } - - switch( pv->context->color_trc ) - { - case AVCOL_TRC_SMPTE240M: - info->color_transfer = HB_COLR_TRA_SMPTE240M; - break; - default: - // ITU BT.601, BT.709, anything else - info->color_transfer = HB_COLR_TRA_BT709; - break; - } - - switch( pv->context->colorspace ) - { - case AVCOL_SPC_BT709: - info->color_matrix = HB_COLR_MAT_BT709; - break; - case AVCOL_SPC_FCC: - case AVCOL_SPC_BT470BG: - case AVCOL_SPC_SMPTE170M: - case AVCOL_SPC_RGB: // libswscale rgb2yuv - info->color_matrix = HB_COLR_MAT_SMPTE170M; - break; - case AVCOL_SPC_SMPTE240M: - info->color_matrix = HB_COLR_MAT_SMPTE240M; - break; - default: - { - if( ( info->width >= 1280 || info->height >= 720 ) || - ( info->width > 720 && info->height > 576 ) ) - // ITU BT.709 HD content - info->color_matrix = HB_COLR_MAT_BT709; - else - // ITU BT.601 DVD or SD TV content (PAL) - // ITU BT.601 DVD or SD TV content (NTSC) - info->color_matrix = HB_COLR_MAT_SMPTE170M; - break; - } - } - - return 1; -} - -static int decavcodecvBSInfo( hb_work_object_t *w, const hb_buffer_t *buf, - hb_work_info_t *info ) -{ - return 0; -} - -static void decavcodecvFlush( hb_work_object_t *w ) -{ - hb_work_private_t *pv = w->private_data; - - if ( pv->context->codec ) - { - flushDelayQueue( pv ); - hb_buffer_t *buf = link_buf_list( pv ); - hb_buffer_close( &buf ); - if ( pv->title->opaque_priv == NULL ) - { - pv->video_codec_opened = 0; - hb_avcodec_close( pv->context ); - av_freep( &pv->context->extradata ); - if ( pv->parser ) - { - av_parser_close(pv->parser); - } - pv->parser = av_parser_init( w->codec_param ); - } - else - { - avcodec_flush_buffers( pv->context ); - } - } - pv->wait_for_keyframe = 60; -} - -static void decavcodecClose( hb_work_object_t * w ) -{ - hb_work_private_t * pv = w->private_data; - if( pv->dst_frame ) free( pv->dst_frame ); - if( pv ) - { - closePrivData( &pv ); - w->private_data = NULL; - } -} - -hb_work_object_t hb_decavcodecv_accl = -{ - .id = WORK_DECAVCODECVACCL, - .name = "Video hardware decoder (libavcodec)", - .init = decavcodecvInit, - .work = decavcodecvWork, - .close = decavcodecClose, - .flush = decavcodecvFlush, - .info = decavcodecvInfo, - .bsinfo = decavcodecvBSInfo -}; - -#endif diff --git a/libhb/dxva2api.c b/libhb/dxva2api.c index 04011c0c5..004cd681a 100644 --- a/libhb/dxva2api.c +++ b/libhb/dxva2api.c @@ -10,6 +10,7 @@ Li Cao <[email protected]> <http://www.multicorewareinc.com/> */ +#ifdef USE_HWD #include "dxva2api.h" __inline float hb_dx_fixedtofloat( const DXVA2_Fixed32 _fixed_ ) @@ -34,3 +35,4 @@ __inline DXVA2_Fixed32 hb_dx_floattofixed( const float _float_ ) _fixed_.Value = HIWORD( _float_ * 0x10000 ); return _fixed_; } +#endif diff --git a/libhb/dxva2api.h b/libhb/dxva2api.h index dc5909477..a64d700fb 100644 --- a/libhb/dxva2api.h +++ b/libhb/dxva2api.h @@ -14,7 +14,7 @@ #ifndef _DXVA2API_H #define _DXVA2API_H - +#ifdef USE_HWD #define MINGW_DXVA2API_H_VERSION (2) #if __GNUC__ >=3 @@ -818,5 +818,5 @@ __inline float hb_dx_fixedtofloat( const DXVA2_Fixed32 _fixed_ ); __inline const DXVA2_Fixed32 hb_dx_fixed32_opaque_alpha(); __inline DXVA2_Fixed32 hb_dx_floattofixed( const float _float_ ); - +#endif #endif //_DXVA2API_H diff --git a/libhb/hb.c b/libhb/hb.c index 63c6c6931..c1188caa2 100644 --- a/libhb/hb.c +++ b/libhb/hb.c @@ -484,7 +484,9 @@ hb_handle_t * hb_init( int verbose, int update_check ) #endif hb_register( &hb_encavcodeca ); hb_register( &hb_reader ); +#ifdef USE_HWD hb_register( &hb_decavcodecv_accl ); +#endif return h; } @@ -583,7 +585,9 @@ hb_handle_t * hb_init_dl( int verbose, int update_check ) #endif hb_register( &hb_encavcodeca ); hb_register( &hb_reader ); +#ifdef USE_HWD hb_register( &hb_decavcodecv_accl ); +#endif return h; } diff --git a/libhb/internal.h b/libhb/internal.h index 9262ff6f8..79ed86f89 100644 --- a/libhb/internal.h +++ b/libhb/internal.h @@ -428,7 +428,6 @@ extern hb_filter_object_t hb_filter_denoise; extern hb_filter_object_t hb_filter_decomb; extern hb_filter_object_t hb_filter_rotate; extern hb_filter_object_t hb_filter_crop_scale; -extern hb_filter_object_t hb_filter_crop_scale_accl; extern hb_filter_object_t hb_filter_render_sub; extern hb_filter_object_t hb_filter_vfr; diff --git a/libhb/module.defs b/libhb/module.defs index d3d409513..d679c68f2 100644 --- a/libhb/module.defs +++ b/libhb/module.defs @@ -40,10 +40,14 @@ endif ifeq (1,$(FEATURE.opencl)) LIBHB.GCC.D += USE_OPENCL endif +ifeq (1,$(FEATURE.hwd)) +LIBHB.GCC.D += USE_HWD +endif LIBHB.GCC.D += __LIBHB__ USE_PTHREAD LIBHB.GCC.I += $(LIBHB.build/) $(CONTRIB.build/)include +ifeq (1,$(FEATURE.opencl)) LIBHB.GCC.I += $(AMDAPPSDKROOT)/include - +endif ifeq ($(BUILD.system),cygwin) LIBHB.GCC.D += SYS_CYGWIN else ifeq ($(BUILD.system),darwin) diff --git a/libhb/oclnv12toyuv.c b/libhb/oclnv12toyuv.c index 8d49563bf..ee0f7661c 100644 --- a/libhb/oclnv12toyuv.c +++ b/libhb/oclnv12toyuv.c @@ -11,6 +11,7 @@ */ #ifdef USE_OPENCL +#ifdef USE_HWD #include "vadxva2.h" #include "oclnv12toyuv.h" @@ -220,3 +221,4 @@ int hb_ocl_nv12toyuv( uint8_t *bufi[], int p, int w, int h, int *crop, hb_va_dxv return 0; } #endif +#endif diff --git a/libhb/oclnv12toyuv.h b/libhb/oclnv12toyuv.h index 3307b8efe..5098d805e 100644 --- a/libhb/oclnv12toyuv.h +++ b/libhb/oclnv12toyuv.h @@ -22,8 +22,8 @@ * nv12 to yuv interface * bufi is input frame of nv12, w is input frame width, h is input frame height */ -//int hb_ocl_nv12toyuv( uint8_t *bufi, int w, int h, int *crop, hb_va_dxva2_t *dxva2 ); +#ifdef USE_HWD int hb_ocl_nv12toyuv( uint8_t *bufi[], int p, int w, int h, int *crop, hb_va_dxva2_t *dxva2 ); - +#endif #endif #endif diff --git a/libhb/openclkernels.h b/libhb/openclkernels.h index 0ab3014ab..8b95ff234 100644 --- a/libhb/openclkernels.h +++ b/libhb/openclkernels.h @@ -119,4 +119,378 @@ char *kernel_src_nvtoyuv = KERNEL( }
);
+char *kernel_src_hscaleall = KERNEL(
+
+ kernel void hscale_all_opencl(
+ global short *dst,
+ const global unsigned char *src,
+ const global short *yfilter,
+ const global int *yfilterPos,
+ int yfilterSize,
+ const global short *cfilter,
+ const global int *cfilterPos,
+ int cfilterSize,
+ int dstWidth,
+ int dstHeight,
+ int srcWidth,
+ int srcHeight,
+ int dstStride,
+ int dstChrStride,
+ int srcStride,
+ int srcChrStride)
+ {
+ int w = get_global_id(0);
+ int h = get_global_id(1);
+
+ int chrWidth = get_global_size(0);
+ int chrHeight = get_global_size(1);
+
+ int srcPos1 = h * srcStride + yfilterPos[w];
+ int srcPos2 = h * srcStride + yfilterPos[w + chrWidth];
+ int srcPos3 = (h + (srcHeight >> 1)) * srcStride + yfilterPos[w];
+ int srcPos4 = (h + (srcHeight >> 1)) * srcStride + yfilterPos[w + chrWidth];
+ int srcc1Pos = srcStride * srcHeight + (h) * (srcChrStride) + cfilterPos[w];
+ int srcc2Pos = srcc1Pos + ((srcChrStride)*(chrHeight));
+
+ int val1 = 0;
+ int val2 = 0;
+ int val3 = 0;
+ int val4 = 0;
+ int val5 = 0;
+ int val6 = 0;
+
+ int filterPos1 = yfilterSize * w;
+ int filterPos2 = yfilterSize * (w + chrWidth);
+ int cfilterPos1 = cfilterSize * w;
+
+ int j;
+ for (j = 0; j < yfilterSize; j++)
+ {
+ val1 += src[srcPos1 + j] * yfilter[filterPos1+ j];
+ val2 += src[srcPos2 + j] * yfilter[filterPos2 + j];
+ val3 += src[srcPos3 + j] * yfilter[filterPos1 + j];
+ val4 += src[srcPos4 + j] * yfilter[filterPos2 + j];
+ val5 += src[srcc1Pos+j] * cfilter[cfilterPos1 + j];
+ val6 += src[srcc2Pos+j] * cfilter[cfilterPos1 + j];
+ }
+ int dstPos1 = h *dstStride;
+ int dstPos2 = (h + chrHeight) * dstStride;
+
+ dst[dstPos1 + w] = ((val1 >> 7) > ((1 << 15) - 1) ? ((1 << 15) - 1) : (val1 >> 7));
+ dst[dstPos1 + w + chrWidth] = ((val2 >> 7) > ((1 << 15) - 1) ? ((1 << 15) - 1) : (val2 >> 7));
+ dst[dstPos2 + w] = ((val3 >> 7) > ((1 << 15) - 1) ? ((1 << 15) - 1) : (val3 >> 7));
+ dst[dstPos2 + w + chrWidth] = ((val4 >> 7) > ((1 << 15) - 1) ? ((1 << 15) - 1) : (val4 >> 7));
+
+ int dstPos3 = h * (dstChrStride) + w + dstStride*dstHeight;
+ int dstPos4 = h * (dstChrStride) + w + dstStride*dstHeight + ((dstChrStride)*chrHeight);
+ dst[dstPos3] = ((val5 >> 7) > ((1 << 15) - 1) ? ((1 << 15) - 1) : (val5 >> 7));
+ dst[dstPos4] = ((val6 >> 7) > ((1 << 15) - 1) ? ((1 << 15) - 1) : (val6 >> 7));
+ }
+ );
+
+char *kernel_src_hscalefast = KERNEL(
+
+ kernel void hscale_fast_opencl(
+ global short *dst,
+ const global unsigned char *src,
+ int xInc,
+ int chrXInc,
+ int dstWidth,
+ int dstHeight,
+ int srcWidth,
+ int srcHeight,
+ int dstStride,
+ int dstChrStride,
+ int srcStride,
+ int srcChrStride)
+ {
+
+ int w = get_global_id(0);
+ int h = get_global_id(1);
+
+ int chrWidth = get_global_size(0);
+ int chrHeight = get_global_size(1);
+ int xpos1 = 0;
+ int xpos2 = 0;
+ int xx = xpos1 >> 16;
+ int xalpha = (xpos1 & 0xFFFF) >> 9;
+ dst[h * dstStride + w] = (src[h * srcStride + xx] << 7) + (src[h * srcStride + xx + 1] -src[h * srcStride + xx]) * xalpha;
+ int lowpart = h + (chrHeight);
+ dst[lowpart * dstStride + w] = (src[lowpart * srcStride + xx] << 7) + (src[lowpart * srcStride + xx + 1] - src[lowpart * srcStride + xx]) * xalpha;
+
+ int inv_i = w * xInc >> 16;
+ if( inv_i >= srcWidth - 1)
+ {
+ dst[h*dstStride + w] = src[h*srcStride + srcWidth-1]*128;
+ dst[lowpart*dstStride + w] = src[lowpart*srcStride + srcWidth - 1] * 128;
+ }
+
+ int rightpart = w + (chrWidth);
+ xx = xpos2 >> 16;
+ xalpha = (xpos2 & 0xFFFF) >> 9;
+ dst[h * dstStride + rightpart] = (src[h *srcStride + xx] << 7) + (src[h * srcStride + xx + 1] - src[h * srcStride + xx]) * xalpha;
+ dst[lowpart * dstStride + rightpart] = (src[lowpart * srcStride + xx] << 7) + (src[lowpart * srcStride + xx + 1] - src[lowpart * srcStride + xx]) * xalpha;
+ inv_i = rightpart * xInc >> 16;
+ if( inv_i >= srcWidth - 1)
+ {
+ dst[h*dstStride + rightpart] = src[h*srcStride + srcWidth - 1] * 128;
+ dst[lowpart*dstStride + rightpart] = src[lowpart * srcStride + srcWidth - 1] * 128;
+ }
+
+ int xpos = 0;
+ xpos = chrXInc * w;
+ xx = xpos >> 16;
+ xalpha = (xpos & 0xFFFF) >> 9;
+ src += srcStride * srcHeight;
+ dst += dstStride * dstHeight;
+ dst[h*(dstChrStride) + w] = (src[h * (srcChrStride) + xx] *(xalpha^127) + src[h * (srcChrStride) + xx + 1] * xalpha);
+ inv_i = w * xInc >> 16;
+ if( inv_i >= (srcWidth >> 1) - 1)
+ {
+ dst[h*(dstChrStride) + w] = src[h*(srcChrStride) + (srcWidth >> 1) -1]*128;
+ }
+
+ xpos = chrXInc * (w);
+ xx = xpos >> 16;
+ src += srcChrStride * srcHeight >> 1;
+ dst += (dstChrStride * chrHeight);
+ dst[h*(dstChrStride) + w] = (src[h * (srcChrStride) + xx]*(xalpha^127) + src[h * (srcChrStride) + xx + 1 ] * xalpha);
+
+ if( inv_i >= (srcWidth >> 1) - 1)
+ {
+ //v channel:
+ dst[h*(dstChrStride) + w] = src[h*(srcChrStride)+ (srcWidth >> 1) -1]*128;
+ }
+
+
+ }
+ );
+
+char *kernel_src_vscalealldither = KERNEL(
+
+ kernel void vscale_all_dither_opencl(
+ global unsigned char *dst,
+ const global short *src,
+ const global short *yfilter,
+ int yfilterSize,
+ const global short *cfilter,
+ int cfilterSize,
+ const global int *yfilterPos,
+ const global int *cfilterPos,
+ int dstWidth,
+ int dstHeight,
+ int srcWidth,
+ int srcHeight,
+ int dstStride,
+ int dstChrStride,
+ int srcStride,
+ int srcChrStride)
+ {
+ const unsigned char hb_dither_8x8_128[8][8] = {
+ { 36, 68, 60, 92, 34, 66, 58, 90, },
+ { 100, 4, 124, 28, 98, 2, 122, 26, },
+ { 52, 84, 44, 76, 50, 82, 42, 74, },
+ { 116, 20, 108, 12, 114, 18, 106, 10, },
+ { 32, 64, 56, 88, 38, 70, 62, 94, },
+ { 96, 0, 120, 24, 102, 6, 126, 30, },
+ { 48, 80, 40, 72, 54, 86, 46, 78, },
+ { 112, 16, 104, 8, 118, 22, 110, 14, },
+ };
+
+
+ int w = get_global_id(0);
+ int h = get_global_id(1);
+
+ int chrWidth = get_global_size(0);
+ int chrHeight = get_global_size(1);
+ const unsigned char *local_up_dither;
+ const unsigned char *local_down_dither;
+
+ local_up_dither = hb_dither_8x8_128[h & 7];
+ local_down_dither = hb_dither_8x8_128[(h + chrHeight) & 7];
+
+ //yscale;
+ int srcPos1 = (yfilterPos[h]) * srcStride + w;
+ int srcPos2 = (yfilterPos[h]) * srcStride + w + (chrWidth);
+ int srcPos3 = (yfilterPos[h + chrHeight]) * srcStride + w;
+ int srcPos4 = (yfilterPos[h + chrHeight]) * srcStride + w + chrWidth;
+ int src1Pos = dstStride * srcHeight + (cfilterPos[h]) * dstChrStride + (w);
+ int src2Pos = dstStride * srcHeight + (dstChrStride*(srcHeight>>1)) + (cfilterPos[h]) * dstChrStride + w;
+
+ int val1 = (local_up_dither[w & 7] << 12); //y offset is 0;
+ int val2 = (local_up_dither[(w + chrWidth) & 7] << 12);
+ int val3 = (local_down_dither[w &7] << 12);
+ int val4 = (local_down_dither[(w + chrWidth) & 7] << 12);
+ int val5 = (local_up_dither[w & 7] << 12);
+ int val6 = (local_up_dither[(w + 3) & 7] << 12); // 3 is offset of the chrome channel.
+
+ int j;
+ int filterPos1 = h * yfilterSize;
+ int filterPos2 = ( h + chrHeight ) * yfilterSize;
+ for(j = 0; j < yfilterSize; j++)
+ {
+ val1 += src[srcPos1] * yfilter[filterPos1 + j];
+ srcPos1 += srcStride;
+ val2 += src[srcPos2] * yfilter[filterPos1 + j];
+ srcPos2 += srcStride;
+ val3 += src[srcPos3] * yfilter[filterPos2 + j];
+ srcPos3 += srcStride;
+ val4 += src[srcPos4] * yfilter[filterPos2 + j];
+ srcPos4 += srcStride;
+ val5 += src[src1Pos] * cfilter[filterPos1 + j];
+ val6 += src[src2Pos] * cfilter[filterPos1 + j];
+ src1Pos += dstChrStride;
+ src2Pos += dstChrStride;
+ }
+ dst[h * dstStride + w] = (((val1 >> 19)&(~0xFF)) ? ((-(val1 >> 19)) >> 31) : (val1 >> 19));
+ dst[h * dstStride + w + chrWidth] = (((val2 >> 19)&(~0xFF)) ? ((-(val2 >> 19)) >> 31) : (val2 >> 19));
+ dst[(h + chrHeight) * dstStride + w] = (((val3 >> 19)&(~0xFF)) ? ((-(val3 >> 19)) >> 31) : (val3 >> 19));
+ dst[(h + chrHeight) * dstStride + w + chrWidth] = (((val4 >> 19)&(~0xFF)) ? ((-(val4 >> 19)) >> 31) : (val4 >> 19));
+
+ int dst1Pos = dstStride * dstHeight + h*(dstChrStride)+(w);
+ int dst2Pos = (dstChrStride * chrHeight) + dst1Pos;
+ dst[dst1Pos] = (((val5 >> 19)&(~0xFF)) ? ((-(val5 >> 19)) >> 31) : (val5 >> 19));
+ dst[dst2Pos] = (((val6 >> 19)&(~0xFF)) ? ((-(val6 >> 19)) >> 31) : (val6 >> 19));
+ }
+ );
+
+char *kernel_src_vscaleallnodither = KERNEL(
+
+ kernel void vscale_all_nodither_opencl(
+ global unsigned char *dst,
+ const global short *src,
+ const global short *yfilter,
+ int yfilterSize,
+ const global short *cfilter,
+ int cfilterSize,
+ const global int *yfilterPos,
+ const global int *cfilterPos,
+ int dstWidth,
+ int dstHeight,
+ int srcWidth,
+ int srcHeight,
+ int dstStride,
+ int dstChrStride,
+ int srcStride,
+ int srcChrStride)
+ {
+ const unsigned char hb_sws_pb_64[8] = {
+ 64, 64, 64, 64, 64, 64, 64, 64
+ };
+
+ int w = get_global_id(0);
+ int h = get_global_id(1);
+
+ int chrWidth = get_global_size(0);
+ int chrHeight = get_global_size(1);
+ const unsigned char *local_up_dither;
+ const unsigned char *local_down_dither;
+
+ local_up_dither = hb_sws_pb_64;
+ local_down_dither = hb_sws_pb_64;
+
+
+ //yscale;
+ int srcPos1 = (yfilterPos[h]) * srcStride + w;
+ int srcPos2 = (yfilterPos[h]) * srcStride + w + (chrWidth);
+ int srcPos3 = (yfilterPos[h + chrHeight]) * srcStride + w;
+ int srcPos4 = (yfilterPos[h + chrHeight]) * srcStride + w + chrWidth;
+ int src1Pos = dstStride * srcHeight + (cfilterPos[h]) * dstChrStride + (w);
+ int src2Pos = dstStride * srcHeight + (dstChrStride*(srcHeight>>1)) + (cfilterPos[h]) * dstChrStride + w;
+
+ int val1 = (local_up_dither[w & 7] << 12); //y offset is 0;
+ int val2 = (local_up_dither[(w + chrWidth) & 7] << 12);
+ int val3 = (local_down_dither[w &7] << 12);
+ int val4 = (local_down_dither[(w + chrWidth) & 7] << 12);
+ int val5 = (local_up_dither[w & 7] << 12);
+ int val6 = (local_up_dither[(w + 3) & 7] << 12); // 3 is offset of the chrome channel.
+
+
+ int j;
+ int filterPos1 = h * yfilterSize;
+ int filterPos2 = ( h + chrHeight ) * yfilterSize;
+ for(j = 0; j < yfilterSize; j++)
+ {
+ val1 += src[srcPos1] * yfilter[filterPos1 + j];
+ srcPos1 += srcStride;
+ val2 += src[srcPos2] * yfilter[filterPos1 + j];
+ srcPos2 += srcStride;
+ val3 += src[srcPos3] * yfilter[filterPos2 + j];
+ srcPos3 += srcStride;
+ val4 += src[srcPos4] * yfilter[filterPos2 + j];
+ srcPos4 += srcStride;
+ val5 += src[src1Pos] * cfilter[filterPos1 + j];
+ val6 += src[src2Pos] * cfilter[filterPos1 + j];
+ src1Pos += dstChrStride;
+ src2Pos += dstChrStride;
+ }
+ dst[h * dstStride + w] = (((val1 >> 19)&(~0xFF)) ? ((-(val1 >> 19)) >> 31) : (val1 >> 19));
+ dst[h * dstStride + w + chrWidth] = (((val2 >> 19)&(~0xFF)) ? ((-(val2 >> 19)) >> 31) : (val2 >> 19));
+ dst[(h + chrHeight) * dstStride + w] = (((val3 >> 19)&(~0xFF)) ? ((-(val3 >> 19)) >> 31) : (val3 >> 19));
+ dst[(h + chrHeight) * dstStride + w + chrWidth] = (((val4 >> 19)&(~0xFF)) ? ((-(val4 >> 19)) >> 31) : (val4 >> 19));;
+
+ int dst1Pos = dstStride * dstHeight + h*(dstChrStride)+(w);
+ int dst2Pos = (dstChrStride * chrHeight) + dst1Pos;
+ dst[dst1Pos] = (((val5 >> 19)&(~0xFF)) ? ((-(val5 >> 19)) >> 31) : (val5 >> 19));
+ dst[dst2Pos] = (((val6 >> 19)&(~0xFF)) ? ((-(val6 >> 19)) >> 31) : (val6 >> 19));
+ }
+ );
+
+char *kernel_src_vscalefast = KERNEL(
+
+ kernel void vscale_fast_opencl(
+ global unsigned char *dst,
+ const global short *src,
+ const global int *yfilterPos,
+ const global int *cfilterPos,
+ int dstWidth,
+ int dstHeight,
+ int srcWidth,
+ int srcHeight,
+ int dstStride,
+ int dstChrStride,
+ int srcStride,
+ int srcChrStride)
+ {
+ const unsigned char hb_sws_pb_64[8] = {
+ 64, 64, 64, 64, 64, 64, 64, 64
+ };
+
+ int w = get_global_id(0);
+ int h = get_global_id(1);
+
+ int chrWidth = get_global_size(0);
+ int chrHeight = get_global_size(1);
+
+ const unsigned char *local_up_dither;
+ const unsigned char *local_down_dither;
+
+ local_up_dither = hb_sws_pb_64;
+ local_down_dither = hb_sws_pb_64;
+
+
+ int rightpart = w + chrWidth;
+ int bh = h + chrHeight; // bottom part
+ short val1 = (src[(yfilterPos[h]) * dstStride + w] + local_up_dither[(w + 0) & 7]) >> 7; //lum offset is 0;
+ short val2 = (src[(yfilterPos[h]) * dstStride + rightpart] + local_up_dither[rightpart & 7]) >> 7;
+ short val3 = (src[(yfilterPos[bh]) * dstStride + w] + local_down_dither[w & 7]) >> 7;
+ short val4 = (src[(yfilterPos[bh]) * dstStride + rightpart] + local_down_dither[rightpart & 7]) >> 7;
+ dst[h * dstStride + w] = ((val1&(~0xFF)) ? ((-val1) >> 31) : (val1));
+ dst[h * dstStride + rightpart] = ((val2&(~0xFF)) ? ((-val2) >> 31) : (val2));
+ dst[bh * dstStride + w] = ((val3&(~0xFF)) ? ((-val3) >> 31) : (val3));
+ dst[bh * dstStride + rightpart] = ((val4&(~0xFF)) ? ((-val4) >> 31) : (val4));
+
+ src += dstStride * srcHeight;
+ dst += dstStride * dstHeight;
+ val1 = (src[cfilterPos[h] * (dstChrStride) + w] + local_up_dither[ w & 7]) >> 7;
+ dst[h * (dstChrStride) + w] = ((val1&(~0xFF)) ? ((-val1) >> 31) : (val1));
+
+ src += dstChrStride * (srcHeight >> 1);
+ dst += dstChrStride * chrHeight;
+ val1 = (src[cfilterPos[h] * dstChrStride + w] + local_up_dither[ (w + 3) & 7] ) >> 7;
+ dst[h * dstChrStride + w] = ((val1&(~0xFF)) ? ((-val1) >> 31) : (val1));
+
+ }
+ );
+
#endif
diff --git a/libhb/openclwrapper.c b/libhb/openclwrapper.c index a10e9d64d..2371c2167 100644 --- a/libhb/openclwrapper.c +++ b/libhb/openclwrapper.c @@ -99,6 +99,7 @@ int hb_regist_opencl_kernel() ADD_KERNEL_CFG( 0, "frame_h_scale", NULL )
ADD_KERNEL_CFG( 1, "frame_v_scale", NULL )
ADD_KERNEL_CFG( 2, "nv12toyuv", NULL )
+ ADD_KERNEL_CFG( 3, "scale_opencl", NULL )
return 0;
}
@@ -331,7 +332,7 @@ int hb_generat_bin_from_kernel_source( cl_program program, const char * cl_file_ if( !hb_write_binary_to_file( fileName, binaries[i], binarySizes[i] ))
{
- hb_log( "Notice: Unable to write opencl kernel, writing to tempory directory instead." );
+ hb_log( "Notice: Unable to write opencl kernel, writing to temporary directory instead." );
//printf( "opencl-wrapper: write binary[%s] failds\n", fileName);
return 0;
} //else
@@ -450,8 +451,7 @@ int hb_init_opencl_env( GPUEnv *gpu_info ) if( status != CL_SUCCESS )
{
- hb_log( "Notice: No more platform vendor info.\n" );
- return(1);
+ continue;
}
gpu_info->platform = platforms[i];
@@ -470,8 +470,7 @@ int hb_init_opencl_env( GPUEnv *gpu_info ) if( status != CL_SUCCESS )
{
- hb_log( "Notice: No available GPU device.\n" );
- return(1);
+ continue;
}
if( numDevices )
@@ -482,6 +481,12 @@ int hb_init_opencl_env( GPUEnv *gpu_info ) }
if( NULL == gpu_info->platform )
{
+ hb_log( "Notice: No OpenCL-compatible GPU found.\n" );
+ return(1);
+ }
+ if( status != CL_SUCCESS )
+ {
+ hb_log( "Notice: No OpenCL-compatible GPU found.\n" );
return(1);
}
@@ -659,14 +664,18 @@ int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info, if( status == 0 )
return(0);
#else
- int kernel_src_size = strlen( kernel_src_hscale )+strlen( kernel_src_vscale )+strlen( kernel_src_nvtoyuv );
- source_str = (char*)malloc( kernel_src_size+2 );
+ int kernel_src_size = strlen( kernel_src_hscale ) + strlen( kernel_src_vscale ) + strlen( kernel_src_nvtoyuv ) + strlen( kernel_src_hscaleall ) + strlen( kernel_src_hscalefast ) + strlen( kernel_src_vscalealldither ) + strlen( kernel_src_vscaleallnodither ) + strlen( kernel_src_vscalefast );
+ source_str = (char*)malloc( kernel_src_size + 2 );
strcpy( source_str, kernel_src_hscale );
strcat( source_str, kernel_src_vscale );
strcat( source_str, kernel_src_nvtoyuv );
+ strcat( source_str, kernel_src_hscaleall );
+ strcat( source_str, kernel_src_hscalefast );
+ strcat( source_str, kernel_src_vscalealldither );
+ strcat( source_str, kernel_src_vscaleallnodither );
+ strcat( source_str, kernel_src_vscalefast );
#endif
-
source = source_str;
source_size[0] = strlen( source );
@@ -678,7 +687,8 @@ int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info, sizeof(numDevices),
&numDevices,
NULL );
- if( status != CL_SUCCESS ){
+ if( status != CL_SUCCESS )
+ {
hb_log( "Notice: Unable to get the number of devices in context.\n" );
return 0;
}
@@ -730,7 +740,6 @@ int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info, gpu_info->programs[idx] = clCreateProgramWithSource(
gpu_info->context, 1, &source, source_size, &status );
}
-
if((gpu_info->programs[idx] == (cl_program)NULL) || (status != CL_SUCCESS)){
hb_log( "Notice: Unable to get list of devices in context.\n" );
return(0);
@@ -931,4 +940,31 @@ int hb_get_opencl_env() }
return status;
}
+
+
+int hb_create_buffer(cl_mem *cl_Buf,int flags,int size)
+{
+ int status;
+ *cl_Buf = clCreateBuffer( gpu_env.context, (flags), (size), NULL, &status );
+
+ if( status != CL_SUCCESS )
+ {
+ printf("clCreateBuffer error '%d'\n\n",status);
+ return 0;
+ }
+ return 1;
+}
+
+int hb_read_opencl_buffer(cl_mem cl_inBuf,unsigned char *outbuf,int size)
+{
+ int status;
+
+ status = clEnqueueReadBuffer(gpu_env.command_queue, cl_inBuf, CL_TRUE, 0, size, outbuf, 0, 0, 0);
+ if( status != CL_SUCCESS )
+ {
+ printf("av_read_opencl_buffer error '%d'\n",status);
+ return 0;
+ }
+ return 1;
+}
#endif
diff --git a/libhb/openclwrapper.h b/libhb/openclwrapper.h index 933e7a3b3..c4d96f1e9 100644 --- a/libhb/openclwrapper.h +++ b/libhb/openclwrapper.h @@ -13,6 +13,7 @@ */
#ifndef __OPENCL_WRAPPER_H
#define __OPENCL_WRAPPER_H
+#ifdef USE_OPENCL
#include "common.h"
//support AMD opencl
@@ -21,24 +22,20 @@ typedef struct _KernelEnv
{
-#ifdef USE_OPENCL
cl_context context;
cl_command_queue command_queue;
cl_program program;
cl_kernel kernel;
-#endif
char kernel_name[150];
int isAMD;
}KernelEnv;
typedef struct _OpenCLEnv
{
-#ifdef USE_OPENCL
cl_platform_id platform;
cl_context context;
cl_device_id devices;
cl_command_queue command_queue;
-#endif
}OpenCLEnv;
@@ -76,4 +73,10 @@ int hb_create_kernel( char * kernelname, KernelEnv * env ); int hb_release_kernel( KernelEnv * env );
int hb_get_opencl_env();
+
+int hb_create_buffer(cl_mem *cl_Buf,int flags,int size);
+
+int hb_read_opencl_buffer(cl_mem cl_inBuf,unsigned char *outbuf,int size);
+
+#endif
#endif
diff --git a/libhb/scale.c b/libhb/scale.c new file mode 100644 index 000000000..27adcba32 --- /dev/null +++ b/libhb/scale.c @@ -0,0 +1,1199 @@ +#ifdef USE_OPENCL
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "hb.h"
+#include "scale.h"
+#include "scale_kernel.h"
+#include "libavutil/pixdesc.h"
+
+#define isScaleRGBinInt(x) \
+ ( \
+ (x)==AV_PIX_FMT_RGB48BE || \
+ (x)==AV_PIX_FMT_RGB48LE || \
+ (x)==AV_PIX_FMT_RGB32 || \
+ (x)==AV_PIX_FMT_RGB32_1 || \
+ (x)==AV_PIX_FMT_RGB24 || \
+ (x)==AV_PIX_FMT_RGB565BE || \
+ (x)==AV_PIX_FMT_RGB565LE || \
+ (x)==AV_PIX_FMT_RGB555BE || \
+ (x)==AV_PIX_FMT_RGB555LE || \
+ (x)==AV_PIX_FMT_RGB444BE || \
+ (x)==AV_PIX_FMT_RGB444LE || \
+ (x)==AV_PIX_FMT_RGB8 || \
+ (x)==AV_PIX_FMT_RGB4 || \
+ (x)==AV_PIX_FMT_RGB4_BYTE || \
+ (x)==AV_PIX_FMT_MONOBLACK || \
+ (x)==AV_PIX_FMT_MONOWHITE \
+ )
+#define isScaleBGRinInt(x) \
+ ( \
+ (x)==AV_PIX_FMT_BGR48BE || \
+ (x)==AV_PIX_FMT_BGR48LE || \
+ (x)==AV_PIX_FMT_BGR32 || \
+ (x)==AV_PIX_FMT_BGR32_1 || \
+ (x)==AV_PIX_FMT_BGR24 || \
+ (x)==AV_PIX_FMT_BGR565BE || \
+ (x)==AV_PIX_FMT_BGR565LE || \
+ (x)==AV_PIX_FMT_BGR555BE || \
+ (x)==AV_PIX_FMT_BGR555LE || \
+ (x)==AV_PIX_FMT_BGR444BE || \
+ (x)==AV_PIX_FMT_BGR444LE || \
+ (x)==AV_PIX_FMT_BGR8 || \
+ (x)==AV_PIX_FMT_BGR4 || \
+ (x)==AV_PIX_FMT_BGR4_BYTE|| \
+ (x)==AV_PIX_FMT_MONOBLACK|| \
+ (x)==AV_PIX_FMT_MONOWHITE \
+ )
+
+#define isScaleAnyRGB(x) \
+ ( \
+ isScaleRGBinInt(x) || \
+ isScaleBGRinInt(x) \
+ )
+
+#define isScaleGray(x) \
+ ((x) == AV_PIX_FMT_GRAY8 || \
+ (x) == AV_PIX_FMT_Y400A || \
+ (x) == AV_PIX_FMT_GRAY16BE || \
+ (x) == AV_PIX_FMT_GRAY16LE)
+
+static ScaleContext *g_scale;
+
+static double getScaleSplineCoeff(double a, double b, double c, double d,
+ double dist)
+{
+ if (dist <= 1.0)
+ return ((d * dist + c) * dist + b) * dist + a;
+ else
+ return getScaleSplineCoeff(0.0,
+ b + 2.0 * c + 3.0 * d,
+ c + 3.0 * d,
+ -b - 3.0 * c - 6.0 * d,
+ dist - 1.0);
+}
+
+static int initScaleFilter(int16_t **outFilter, int32_t **filterPos,
+ int *outFilterSize, int xInc, int srcW, int dstW,
+ int filterAlign, int one, int flags, int cpu_flags,
+ ScaleVector *srcFilter, ScaleVector *dstFilter,
+ double param[2])
+{
+ int i;
+ int filterSize;
+ int filter2Size;
+ int minFilterSize;
+ int64_t *filter = NULL;
+ int64_t *filter2 = NULL;
+ const int64_t fone = 1LL << 54;
+ int ret = -1;
+
+ *filterPos = (int32_t *)av_malloc((dstW + 3) * sizeof(**filterPos));
+ if (*filterPos == NULL && ((dstW + 3) * sizeof(**filterPos)) != 0) {
+ printf("Cannot allocate memory.\n");
+ goto fail;
+ }
+
+ if (FFABS(xInc - 0x10000) < 10) { // unscaled
+ int i;
+ filterSize = 1;
+ // FF_ALLOCZ_OR_GOTO(NULL, filter,dstW * sizeof(*filter) * filterSize, fail);
+ filter = (int64_t *)av_mallocz(dstW * sizeof(*filter) * filterSize);
+ if (filter == NULL && (dstW * sizeof(*filter) * filterSize) != 0) {
+ printf("Cannot allocate memory.\n");
+ goto fail;
+ }
+
+
+ for (i = 0; i < dstW; i++) {
+ filter[i * filterSize] = fone;
+ (*filterPos)[i] = i;
+ }
+ } else if (flags & SWS_POINT) { // lame looking point sampling mode
+ int i;
+ int64_t xDstInSrc;
+ filterSize = 1;
+ //FF_ALLOC_OR_GOTO(NULL, filter,
+ // dstW * sizeof(*filter) * filterSize, fail);
+ filter = (int64_t *)av_malloc(dstW * sizeof(*filter) * filterSize);
+ if(filter == NULL && (dstW * sizeof(*filter) * filterSize) != 0){
+ printf("Cannot allocate memory.\n");
+ goto fail;
+ }
+
+ xDstInSrc = xInc / 2 - 0x8000;
+ for (i = 0; i < dstW; i++) {
+ int xx = (xDstInSrc - ((filterSize - 1) << 15) + (1 << 15)) >> 16;
+
+ (*filterPos)[i] = xx;
+ filter[i] = fone;
+ xDstInSrc += xInc;
+ }
+ } else if ((xInc <= (1 << 16) && (flags & SWS_AREA)) ||
+ (flags & SWS_FAST_BILINEAR)) { // bilinear upscale
+ int i;
+ int64_t xDstInSrc;
+ filterSize = 2;
+ //FF_ALLOC_OR_GOTO(NULL, filter,
+ // dstW * sizeof(*filter) * filterSize, fail);
+ filter = (int64_t *)av_malloc(dstW * sizeof(*filter) * filterSize);
+ if(filter == NULL && (dstW * sizeof(*filter) * filterSize) != 0){
+ printf("Cannot allocate memory.\n");
+ goto fail;
+ }
+
+ xDstInSrc = xInc / 2 - 0x8000;
+ for (i = 0; i < dstW; i++) {
+ int xx = (xDstInSrc - ((filterSize - 1) << 15) + (1 << 15)) >> 16;
+ int j;
+
+ (*filterPos)[i] = xx;
+ // bilinear upscale / linear interpolate / area averaging
+ for (j = 0; j < filterSize; j++) {
+ int64_t coeff= fone - FFABS(((int64_t)xx<<16) - xDstInSrc)*(fone>>16);
+ if (coeff < 0)
+ coeff = 0;
+ filter[i * filterSize + j] = coeff;
+ xx++;
+ }
+ xDstInSrc += xInc;
+ }
+ } else {
+ int64_t xDstInSrc;
+ int sizeFactor;
+
+ if (flags & SWS_BICUBIC)
+ sizeFactor = 4;
+ else if (flags & SWS_X)
+ sizeFactor = 8;
+ else if (flags & SWS_AREA)
+ sizeFactor = 1; // downscale only, for upscale it is bilinear
+ else if (flags & SWS_GAUSS)
+ sizeFactor = 8; // infinite ;)
+ else if (flags & SWS_LANCZOS)
+ sizeFactor = param[0] != SWS_PARAM_DEFAULT ? ceil(2 * param[0]) : 6;
+ else if (flags & SWS_SINC)
+ sizeFactor = 20; // infinite ;)
+ else if (flags & SWS_SPLINE)
+ sizeFactor = 20; // infinite ;)
+ else if (flags & SWS_BILINEAR)
+ sizeFactor = 2;
+ else {
+ sizeFactor = 0; // GCC warning killer
+ assert(0);
+ }
+
+ if (xInc <= 1 << 16)
+ filterSize = 1 + sizeFactor; // upscale
+ else
+ filterSize = 1 + (sizeFactor * srcW + dstW - 1) / dstW;
+
+
+ filterSize = FFMIN(filterSize, srcW - 2);
+ filterSize = FFMAX(filterSize, 1);
+
+ // FF_ALLOC_OR_GOTO(NULL, filter,
+ // dstW * sizeof(*filter) * filterSize, fail);
+ filter = (int64_t *)av_malloc(dstW * sizeof(*filter) * filterSize);
+ if(filter == NULL && (dstW * sizeof(*filter) * filterSize) != 0){
+ printf("Cannot allocate memory.\n");
+ goto fail;
+ }
+
+ xDstInSrc = xInc - 0x10000;
+ for (i = 0; i < dstW; i++) {
+ int xx = (xDstInSrc - ((filterSize - 2) << 16)) / (1 << 17);
+ int j;
+ (*filterPos)[i] = xx;
+ for (j = 0; j < filterSize; j++) {
+ int64_t d = (FFABS(((int64_t)xx << 17) - xDstInSrc)) << 13;
+ double floatd;
+ int64_t coeff;
+
+ if (xInc > 1 << 16)
+ d = d * dstW / srcW;
+ floatd = d * (1.0 / (1 << 30));
+
+ if (flags & SWS_BICUBIC) {
+ int64_t B = (param[0] != SWS_PARAM_DEFAULT ? param[0] : 0) * (1 << 24);
+ int64_t C = (param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6) * (1 << 24);
+
+ if (d >= 1LL << 31) {
+ coeff = 0.0;
+ } else {
+ int64_t dd = (d * d) >> 30;
+ int64_t ddd = (dd * d) >> 30;
+
+ if (d < 1LL << 30)
+ coeff = (12 * (1 << 24) - 9 * B - 6 * C) * ddd +
+ (-18 * (1 << 24) + 12 * B + 6 * C) * dd +
+ (6 * (1 << 24) - 2 * B) * (1 << 30);
+ else
+ coeff = (-B - 6 * C) * ddd +
+ (6 * B + 30 * C) * dd +
+ (-12 * B - 48 * C) * d +
+ (8 * B + 24 * C) * (1 << 30);
+ }
+ coeff *= fone >> (30 + 24);
+ }
+#if 0
+ else if (flags & SWS_X) {
+ double p = param ? param * 0.01 : 0.3;
+ coeff = d ? sin(d * M_PI) / (d * M_PI) : 1.0;
+ coeff *= pow(2.0, -p * d * d);
+ }
+#endif
+ else if (flags & SWS_X) {
+ double A = param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0;
+ double c;
+
+ if (floatd < 1.0)
+ c = cos(floatd * M_PI);
+ else
+ c = -1.0;
+ if (c < 0.0)
+ c = -pow(-c, A);
+ else
+ c = pow(c, A);
+ coeff = (c * 0.5 + 0.5) * fone;
+ } else if (flags & SWS_AREA) {
+ int64_t d2 = d - (1 << 29);
+ if (d2 * xInc < -(1LL << (29 + 16)))
+ coeff = 1.0 * (1LL << (30 + 16));
+ else if (d2 * xInc < (1LL << (29 + 16)))
+ coeff = -d2 * xInc + (1LL << (29 + 16));
+ else
+ coeff = 0.0;
+ coeff *= fone >> (30 + 16);
+ } else if (flags & SWS_GAUSS) {
+ double p = param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
+ coeff = (pow(2.0, -p * floatd * floatd)) * fone;
+ } else if (flags & SWS_SINC) {
+ coeff = (d ? sin(floatd * M_PI) / (floatd * M_PI) : 1.0) * fone;
+ } else if (flags & SWS_LANCZOS) {
+ double p = param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
+ coeff = (d ? sin(floatd * M_PI) * sin(floatd * M_PI / p) /
+ (floatd * floatd * M_PI * M_PI / p) : 1.0) * fone;
+ if (floatd > p)
+ coeff = 0;
+ } else if (flags & SWS_BILINEAR) {
+ coeff = (1 << 30) - d;
+ if (coeff < 0)
+ coeff = 0;
+ coeff *= fone >> 30;
+ } else if (flags & SWS_SPLINE) {
+ double p = -2.196152422706632;
+ coeff = getScaleSplineCoeff(1.0, 0.0, p, -p - 1.0, floatd) * fone;
+ } else {
+ coeff = 0.0; // GCC warning killer
+ assert(0);
+ }
+
+ filter[i * filterSize + j] = coeff;
+ xx++;
+ }
+ xDstInSrc += 2 * xInc;
+ }
+ }
+
+ assert(filterSize > 0);
+ filter2Size = filterSize;
+ if (srcFilter)
+ filter2Size += srcFilter->length - 1;
+ if (dstFilter)
+ filter2Size += dstFilter->length - 1;
+ assert(filter2Size > 0);
+ //FF_ALLOCZ_OR_GOTO(NULL, filter2, filter2Size * dstW * sizeof(*filter2), fail);
+ filter2 = (int64_t *)av_mallocz(filter2Size * dstW * sizeof(*filter2));
+ if(filter2 == NULL && (filter2Size * dstW * sizeof(*filter2)) != 0)
+ {
+ printf("Can't alloc memory\n");
+ goto fail;
+ }
+
+ for (i = 0; i < dstW; i++) {
+ int j, k;
+
+ if (srcFilter) {
+ for (k = 0; k < srcFilter->length; k++) {
+ for (j = 0; j < filterSize; j++)
+ filter2[i * filter2Size + k + j] +=
+ srcFilter->coeff[k] * filter[i * filterSize + j];
+ }
+ } else {
+ for (j = 0; j < filterSize; j++)
+ filter2[i * filter2Size + j] = filter[i * filterSize + j];
+ }
+ // FIXME dstFilter
+
+ (*filterPos)[i] += (filterSize - 1) / 2 - (filter2Size - 1) / 2;
+ }
+ av_freep(&filter);
+
+ // Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not).
+ minFilterSize = 0;
+ for (i = dstW - 1; i >= 0; i--) {
+ int min = filter2Size;
+ int j;
+ int64_t cutOff = 0.0;
+
+ for (j = 0; j < filter2Size; j++) {
+ int k;
+ cutOff += FFABS(filter2[i * filter2Size]);
+
+ if (cutOff > SWS_MAX_REDUCE_CUTOFF * fone)
+ break;
+
+ if (i < dstW - 1 && (*filterPos)[i] >= (*filterPos)[i + 1])
+ break;
+
+ // move filter coefficients left
+ for (k = 1; k < filter2Size; k++)
+ filter2[i * filter2Size + k - 1] = filter2[i * filter2Size + k];
+ filter2[i * filter2Size + k - 1] = 0;
+ (*filterPos)[i]++;
+ }
+
+ cutOff = 0;
+ for (j = filter2Size - 1; j > 0; j--) {
+ cutOff += FFABS(filter2[i * filter2Size + j]);
+
+ if (cutOff > SWS_MAX_REDUCE_CUTOFF * fone)
+ break;
+ min--;
+ }
+
+ if (min > minFilterSize)
+ minFilterSize = min;
+ }
+
+
+ assert(minFilterSize > 0);
+ filterSize = (minFilterSize + (filterAlign - 1)) & (~(filterAlign - 1));
+ assert(filterSize > 0);
+ filter = (int64_t *)av_malloc(filterSize * dstW * sizeof(*filter));
+ if (filterSize >= MAX_FILTER_SIZE * 16 /
+ ((flags & SWS_ACCURATE_RND) ? APCK_SIZE : 16) || !filter)
+ goto fail;
+ *outFilterSize = filterSize;
+
+ if (flags & SWS_PRINT_INFO)
+ //av_log(NULL, AV_LOG_VERBOSE,
+ // "SwScaler: reducing / aligning filtersize %d -> %d\n",
+ // filter2Size, filterSize);
+ printf("SwScaler: reducing / aligning filtersize %d -> %d\n",filter2Size,filterSize);
+ for (i = 0; i < dstW; i++) {
+ int j;
+
+ for (j = 0; j < filterSize; j++) {
+ if (j >= filter2Size)
+ filter[i * filterSize + j] = 0;
+ else
+ filter[i * filterSize + j] = filter2[i * filter2Size + j];
+ if ((flags & SWS_BITEXACT) && j >= minFilterSize)
+ filter[i * filterSize + j] = 0;
+ }
+ }
+
+ // FIXME try to align filterPos if possible
+
+ // fix borders
+ for (i = 0; i < dstW; i++) {
+ int j;
+ if ((*filterPos)[i] < 0) {
+ // move filter coefficients left to compensate for filterPos
+ for (j = 1; j < filterSize; j++) {
+ int left = FFMAX(j + (*filterPos)[i], 0);
+ filter[i * filterSize + left] += filter[i * filterSize + j];
+ filter[i * filterSize + j] = 0;
+ }
+ (*filterPos)[i]= 0;
+ }
+
+ if ((*filterPos)[i] + filterSize > srcW) {
+ int shift = (*filterPos)[i] + filterSize - srcW;
+ // move filter coefficients right to compensate for filterPos
+ for (j = filterSize - 2; j >= 0; j--) {
+ int right = FFMIN(j + shift, filterSize - 1);
+ filter[i * filterSize + right] += filter[i * filterSize + j];
+ filter[i * filterSize + j] = 0;
+ }
+ (*filterPos)[i]= srcW - filterSize;
+ }
+ }
+
+ // Note the +1 is for the MMX scaler which reads over the end
+ // FF_ALLOCZ_OR_GOTO(NULL, *outFilter,
+ // *outFilterSize * (dstW + 3) * sizeof(int16_t), fail);
+ *outFilter = (int16_t *)av_mallocz(*outFilterSize * (dstW + 3) * sizeof(int16_t));
+ if( *outFilter == NULL && (*outFilterSize * (dstW + 3) * sizeof(int16_t)) != 0)
+ {
+ printf("Can't alloc memory\n");
+ goto fail;
+ }
+
+ for (i = 0; i < dstW; i++) {
+ int j;
+ int64_t error = 0;
+ int64_t sum = 0;
+
+ for (j = 0; j < filterSize; j++) {
+ sum += filter[i * filterSize + j];
+ }
+ sum = (sum + one / 2) / one;
+ for (j = 0; j < *outFilterSize; j++) {
+ int64_t v = filter[i * filterSize + j] + error;
+ int intV = ROUNDED_DIV(v, sum);
+ (*outFilter)[i * (*outFilterSize) + j] = intV;
+ error = v - intV * sum;
+ }
+ }
+
+ (*filterPos)[dstW + 0] =
+ (*filterPos)[dstW + 1] =
+ (*filterPos)[dstW + 2] = (*filterPos)[dstW - 1];
+ for (i = 0; i < *outFilterSize; i++) {
+ int k = (dstW - 1) * (*outFilterSize) + i;
+ (*outFilter)[k + 1 * (*outFilterSize)] =
+ (*outFilter)[k + 2 * (*outFilterSize)] =
+ (*outFilter)[k + 3 * (*outFilterSize)] = (*outFilter)[k];
+ }
+
+ ret = 0;
+
+fail:
+ av_free(filter);
+ av_free(filter2);
+ return ret;
+}
+
+static int handle_scale_jpeg(enum PixelFormat *format)
+{
+ switch (*format) {
+ case AV_PIX_FMT_YUVJ420P:
+ *format = AV_PIX_FMT_YUV420P;
+ return 1;
+ case AV_PIX_FMT_YUVJ422P:
+ *format = AV_PIX_FMT_YUV422P;
+ return 1;
+ case AV_PIX_FMT_YUVJ444P:
+ *format = AV_PIX_FMT_YUV444P;
+ return 1;
+ case AV_PIX_FMT_YUVJ440P:
+ *format = AV_PIX_FMT_YUV440P;
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+static void scaleGetSubSampleFactors(int *h, int *v, enum PixelFormat format)
+{
+ *h = av_pix_fmt_descriptors[format].log2_chroma_w;
+ *v = av_pix_fmt_descriptors[format].log2_chroma_h;
+}
+
+typedef struct FormatEntry {
+ int is_supported_in, is_supported_out;
+} FormatEntry;
+
+static const FormatEntry format_entries[AV_PIX_FMT_NB] = {
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 0 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 0, 0 },
+ { 1, 1 },
+ { 0, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 0, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 0 },
+ { 1, 0 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 0 },
+ { 1, 1 },
+ { 1, 1 },
+ { 0, 0 },
+ { 0, 0 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 1 },
+ { 1, 0 },
+ { 1, 0 },
+ { 1, 0 },
+ { 1, 0 },
+ { 1, 0 },
+ { 1, 0 },
+ { 1, 0 },
+};
+
+int scale_isSupportedInput(enum PixelFormat pix_fmt)
+{
+ return (unsigned)pix_fmt < AV_PIX_FMT_NB ?
+ format_entries[pix_fmt].is_supported_in : 0;
+}
+
+int scale_isSupportedOutput(enum PixelFormat pix_fmt)
+{
+ return (unsigned)pix_fmt < AV_PIX_FMT_NB ?
+ format_entries[pix_fmt].is_supported_out : 0;
+}
+
+static void hcscale_fast_c(ScaleContext *c, int16_t *dst1, int16_t *dst2,
+ int dstWidth, const uint8_t *src1,
+ const uint8_t *src2, int srcW, int xInc)
+{
+ int i;
+ unsigned int xpos = 0;
+ for (i = 0; i < dstWidth; i++) {
+ register unsigned int xx = xpos >> 16;
+ register unsigned int xalpha = (xpos & 0xFFFF) >> 9;
+ dst1[i] = (src1[xx] * (xalpha ^ 127) + src1[xx + 1] * xalpha);
+ dst2[i] = (src2[xx] * (xalpha ^ 127) + src2[xx + 1] * xalpha);
+ xpos += xInc;
+ }
+ for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
+ dst1[i] = src1[srcW-1]*128;
+ dst2[i] = src2[srcW-1]*128;
+ }
+}
+
+static void hyscale_fast_c(ScaleContext *c, int16_t *dst, int dstWidth,
+ const uint8_t *src, int srcW, int xInc)
+{
+ int i;
+ unsigned int xpos = 0;
+ for (i = 0; i < dstWidth; i++) {
+ register unsigned int xx = xpos >> 16;
+ register unsigned int xalpha = (xpos & 0xFFFF) >> 9;
+ dst[i] = (src[xx] << 7) + (src[xx + 1] - src[xx]) * xalpha;
+ xpos += xInc;
+ }
+ for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
+ dst[i] = src[srcW-1]*128;
+}
+
+static void hScale16To19_c(ScaleContext *c, int16_t *_dst, int dstW,
+ const uint8_t *_src, const int16_t *filter,
+ const int32_t *filterPos, int filterSize)
+{
+ int i;
+ int32_t *dst = (int32_t *) _dst;
+ const uint16_t *src = (const uint16_t *) _src;
+ int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
+ int sh = bits - 4;
+
+ if((isScaleAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
+ sh= 9;
+
+ for (i = 0; i < dstW; i++) {
+ int j;
+ int srcPos = filterPos[i];
+ int val = 0;
+
+ for (j = 0; j < filterSize; j++) {
+ val += src[srcPos + j] * filter[filterSize * i + j];
+ }
+ dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
+ }
+}
+
+static void hScale16To15_c(ScaleContext *c, int16_t *dst, int dstW,
+ const uint8_t *_src, const int16_t *filter,
+ const int32_t *filterPos, int filterSize)
+{
+ int i;
+ const uint16_t *src = (const uint16_t *) _src;
+ int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
+
+ if(sh<15)
+ sh= isScaleAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
+
+ for (i = 0; i < dstW; i++) {
+ int j;
+ int srcPos = filterPos[i];
+ int val = 0;
+
+ for (j = 0; j < filterSize; j++) {
+ val += src[srcPos + j] * filter[filterSize * i + j];
+ }
+ // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
+ dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
+ }
+}
+
+static void hScale8To15_c(ScaleContext *c, int16_t *dst, int dstW,
+ const uint8_t *src, const int16_t *filter,
+ const int32_t *filterPos, int filterSize)
+{
+ int i;
+ for (i = 0; i < dstW; i++) {
+ int j;
+ int srcPos = filterPos[i];
+ int val = 0;
+ for (j = 0; j < filterSize; j++) {
+ val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
+ }
+ dst[i] = FFMIN(val >> 7, (1 << 15) - 1); // the cubic equation does overflow ...
+ }
+}
+
+static void hScale8To19_c(ScaleContext *c, int16_t *_dst, int dstW,
+ const uint8_t *src, const int16_t *filter,
+ const int32_t *filterPos, int filterSize)
+{
+ int i;
+ int32_t *dst = (int32_t *) _dst;
+ for (i = 0; i < dstW; i++) {
+ int j;
+ int srcPos = filterPos[i];
+ int val = 0;
+ for (j = 0; j < filterSize; j++) {
+ val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
+ }
+ dst[i] = FFMIN(val >> 3, (1 << 19) - 1); // the cubic equation does overflow ...
+ }
+}
+
+static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
+{
+ int i;
+ for (i = 0; i < width; i++) {
+ dstU[i] = (FFMIN(dstU[i], 30775) * 4663 - 9289992) >> 12; // -264
+ dstV[i] = (FFMIN(dstV[i], 30775) * 4663 - 9289992) >> 12; // -264
+ }
+}
+
+static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
+{
+ int i;
+ for (i = 0; i < width; i++) {
+ dstU[i] = (dstU[i] * 1799 + 4081085) >> 11; // 1469
+ dstV[i] = (dstV[i] * 1799 + 4081085) >> 11; // 1469
+ }
+}
+
+static void lumRangeToJpeg_c(int16_t *dst, int width)
+{
+ int i;
+ for (i = 0; i < width; i++)
+ dst[i] = (FFMIN(dst[i], 30189) * 19077 - 39057361) >> 14;
+}
+
+static void lumRangeFromJpeg_c(int16_t *dst, int width)
+{
+ int i;
+ for (i = 0; i < width; i++)
+ dst[i] = (dst[i] * 14071 + 33561947) >> 14;
+}
+
+static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
+{
+ int i;
+ int32_t *dstU = (int32_t *) _dstU;
+ int32_t *dstV = (int32_t *) _dstV;
+ for (i = 0; i < width; i++) {
+ dstU[i] = (FFMIN(dstU[i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12; // -264
+ dstV[i] = (FFMIN(dstV[i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12; // -264
+ }
+}
+
+static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
+{
+ int i;
+ int32_t *dstU = (int32_t *) _dstU;
+ int32_t *dstV = (int32_t *) _dstV;
+ for (i = 0; i < width; i++) {
+ dstU[i] = (dstU[i] * 1799 + (4081085 << 4)) >> 11; // 1469
+ dstV[i] = (dstV[i] * 1799 + (4081085 << 4)) >> 11; // 1469
+ }
+}
+
+static void lumRangeToJpeg16_c(int16_t *_dst, int width)
+{
+ int i;
+ int32_t *dst = (int32_t *) _dst;
+ for (i = 0; i < width; i++)
+ dst[i] = (FFMIN(dst[i], 30189 << 4) * 4769 - (39057361 << 2)) >> 12;
+}
+
+static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
+{
+ int i;
+ int32_t *dst = (int32_t *) _dst;
+ for (i = 0; i < width; i++)
+ dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12;
+}
+
+static av_cold void sws_init_swScale_c(ScaleContext *c)
+{
+ enum PixelFormat srcFormat = c->srcFormat;
+
+ ff_sws_init_output_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
+ &c->yuv2nv12cX, &c->yuv2packed1,
+ &c->yuv2packed2, &c->yuv2packedX);
+
+ ff_sws_init_input_funcs(c);
+
+ if (c->srcBpc == 8) {
+ if (c->dstBpc <= 10) {
+ c->hyScale = c->hcScale = hScale8To15_c;
+ if (c->flags & SWS_FAST_BILINEAR) {
+ c->hyscale_fast = hyscale_fast_c;
+ c->hcscale_fast = hcscale_fast_c;
+ }
+ } else {
+ c->hyScale = c->hcScale = hScale8To19_c;
+ }
+ } else {
+ c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c
+ : hScale16To15_c;
+ }
+
+ if (c->srcRange != c->dstRange && !isScaleAnyRGB(c->dstFormat)) {
+ if (c->dstBpc <= 10) {
+ if (c->srcRange) {
+ c->lumConvertRange = lumRangeFromJpeg_c;
+ c->chrConvertRange = chrRangeFromJpeg_c;
+ } else {
+ c->lumConvertRange = lumRangeToJpeg_c;
+ c->chrConvertRange = chrRangeToJpeg_c;
+ }
+ } else {
+ if (c->srcRange) {
+ c->lumConvertRange = lumRangeFromJpeg16_c;
+ c->chrConvertRange = chrRangeFromJpeg16_c;
+ } else {
+ c->lumConvertRange = lumRangeToJpeg16_c;
+ c->chrConvertRange = chrRangeToJpeg16_c;
+ }
+ }
+ }
+
+ if (!(isScaleGray(srcFormat) || isScaleGray(c->dstFormat) ||
+ srcFormat == AV_PIX_FMT_MONOBLACK || srcFormat == AV_PIX_FMT_MONOWHITE))
+ c->needs_hcscale = 1;
+}
+
+int scale_init_context(ScaleContext *c, ScaleFilter *srcFilter, ScaleFilter *dstFilter)
+{
+// int i, j;
+ ScaleFilter dummyFilter = { NULL, NULL, NULL, NULL };
+ int srcW = c->srcW;
+ int srcH = c->srcH;
+ int dstW = c->dstW;
+ int dstH = c->dstH;
+// int dst_stride = FFALIGN(dstW * sizeof(int16_t) + 66, 16);
+ int flags, cpu_flags;
+ enum PixelFormat srcFormat = c->srcFormat;
+ enum PixelFormat dstFormat = c->dstFormat;
+
+ cpu_flags = 0;
+ flags = c->flags;
+
+ if(srcFormat != c->srcFormat || dstFormat != c->dstFormat){
+ printf("deprecated pixel format used, make sure you did set range correctly\n");
+ c->srcFormat = srcFormat;
+ c->dstFormat = dstFormat;
+ }
+
+/*
+ if (!scale_isSupportedInput(srcFormat)) {
+ printf("%s is not supported as input pixel format\n",av_get_pix_fmt_name(srcFormat));
+ return -1;
+ }
+
+ if (!scale_isSupportedOutput(dstFormat)) {
+ printf("%s is not supported as output pixel format\n",av_get_pix_fmt_name(dstFormat));
+ return -1;
+ }
+ i = flags & (SWS_POINT |
+ SWS_AREA |
+ SWS_BILINEAR |
+ SWS_FAST_BILINEAR |
+ SWS_BICUBIC |
+ SWS_X |
+ SWS_GAUSS |
+ SWS_LANCZOS |
+ SWS_SINC |
+ SWS_SPLINE |
+ SWS_BICUBLIN);
+ if (!i || (i & (i - 1))) {
+ printf("Exactly one scaler algorithm must be chosen, got %X\n",i);
+ return -1;
+ }
+*/
+
+ if (srcW < 4 || srcH < 1 || dstW < 8 || dstH < 1) {
+ printf("%dx%d -> %dx%d is invalid scaling dimension\n",srcW,srcH,dstW,dstH);
+ return -1;
+ }
+
+ if (!dstFilter)
+ dstFilter = &dummyFilter;
+ if (!srcFilter)
+ srcFilter = &dummyFilter;
+
+ c->lumXInc = (((int64_t)srcW << 16) + (dstW >> 1)) / dstW;
+ c->lumYInc = (((int64_t)srcH << 16) + (dstH >> 1)) / dstH;
+ c->dstFormatBpp = av_get_bits_per_pixel(&av_pix_fmt_descriptors[dstFormat]);
+ c->srcFormatBpp = av_get_bits_per_pixel(&av_pix_fmt_descriptors[srcFormat]);
+ c->vRounder = 4 * 0x0001000100010001ULL;
+
+ scaleGetSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat);
+ scaleGetSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat);
+
+/*
+ if (isScaleAnyRGB(dstFormat) && !(flags&SWS_FULL_CHR_H_INT)) {
+ if (dstW&1) {
+ printf("Forcing full internal H chroma due to odd output size\n");
+ flags |= SWS_FULL_CHR_H_INT;
+ c->flags = flags;
+ }
+ }
+
+ if (flags & SWS_FULL_CHR_H_INT &&
+ isScaleAnyRGB(dstFormat) &&
+ dstFormat != AV_PIX_FMT_RGBA &&
+ dstFormat != AV_PIX_FMT_ARGB &&
+ dstFormat != AV_PIX_FMT_BGRA &&
+ dstFormat != AV_PIX_FMT_ABGR &&
+ dstFormat != AV_PIX_FMT_RGB24 &&
+ dstFormat != AV_PIX_FMT_BGR24) {
+ printf("full chroma interpolation for destination format '%s' not yet implemented\n", av_get_pix_fmt_name(dstFormat));
+ flags &= ~SWS_FULL_CHR_H_INT;
+ c->flags = flags;
+ }
+
+ if (isScaleAnyRGB(dstFormat) && !(flags & SWS_FULL_CHR_H_INT))
+ c->chrDstHSubSample = 1;
+*/
+
+ // drop some chroma lines if the user wants it
+ c->vChrDrop = (flags & SWS_SRC_V_CHR_DROP_MASK) >> SWS_SRC_V_CHR_DROP_SHIFT;
+ c->chrSrcVSubSample += c->vChrDrop;
+
+/*
+ if (isScaleAnyRGB(srcFormat) && !(flags & SWS_FULL_CHR_H_INP) &&
+ srcFormat != AV_PIX_FMT_RGB8 && srcFormat != AV_PIX_FMT_BGR8 &&
+ srcFormat != AV_PIX_FMT_RGB4 && srcFormat != AV_PIX_FMT_BGR4 &&
+ srcFormat != AV_PIX_FMT_RGB4_BYTE && srcFormat != AV_PIX_FMT_BGR4_BYTE &&
+ ((dstW >> c->chrDstHSubSample) <= (srcW >> 1) || (flags & SWS_FAST_BILINEAR)))
+ c->chrSrcHSubSample = 1;
+*/
+
+ c->chrSrcW = -((-srcW) >> c->chrSrcHSubSample);
+ c->chrSrcH = -((-srcH) >> c->chrSrcVSubSample);
+ c->chrDstW = -((-dstW) >> c->chrDstHSubSample);
+ c->chrDstH = -((-dstH) >> c->chrDstVSubSample);
+
+/*
+ c->srcBpc = 1 + av_pix_fmt_descriptors[srcFormat].comp[0].depth_minus1;
+ if (c->srcBpc < 8)
+ c->srcBpc = 8;
+ c->dstBpc = 1 + av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1;
+ if (c->dstBpc < 8)
+ c->dstBpc = 8;
+ if (isScaleAnyRGB(srcFormat) || srcFormat == AV_PIX_FMT_PAL8)
+ c->srcBpc = 16;
+ if (c->dstBpc == 16)
+ dst_stride <<= 1;
+ c->formatConvBuffer = (uint8_t *)av_mallocz(FFALIGN(srcW*2+78, 16) * 2);
+ if( c->formatConvBuffer == NULL && (FFALIGN(srcW*2+78, 16) * 2) != 0)
+ {
+ printf("Can't alloc memory formatConvBuffer\n");
+ goto fail;
+ }
+*/
+
+ c->chrXInc = (((int64_t)c->chrSrcW << 16) + (c->chrDstW >> 1)) / c->chrDstW;
+ c->chrYInc = (((int64_t)c->chrSrcH << 16) + (c->chrDstH >> 1)) / c->chrDstH;
+
+/*
+ if (flags & SWS_FAST_BILINEAR) {
+ if (c->canMMX2BeUsed) {
+ c->lumXInc += 20;
+ c->chrXInc += 20;
+ }
+ }
+*/
+
+ const int filterAlign = 1;
+
+ if (initScaleFilter(&c->hLumFilter, &c->hLumFilterPos,
+ &c->hLumFilterSize, c->lumXInc,
+ srcW, dstW, filterAlign, 1 << 14,
+ (flags & SWS_BICUBLIN) ? (flags | SWS_BICUBIC) : flags,
+ cpu_flags, srcFilter->lumH, dstFilter->lumH,
+ c->param) < 0)
+ goto fail;
+
+ if (initScaleFilter(&c->hChrFilter, &c->hChrFilterPos,
+ &c->hChrFilterSize, c->chrXInc,
+ c->chrSrcW, c->chrDstW, filterAlign, 1 << 14,
+ (flags & SWS_BICUBLIN) ? (flags | SWS_BILINEAR) : flags,
+ cpu_flags, srcFilter->chrH, dstFilter->chrH,
+ c->param) < 0)
+ goto fail;
+
+ if (initScaleFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize,
+ c->lumYInc, srcH, dstH, filterAlign, (1 << 12),
+ (flags & SWS_BICUBLIN) ? (flags | SWS_BICUBIC) : flags,
+ cpu_flags, srcFilter->lumV, dstFilter->lumV,
+ c->param) < 0)
+ goto fail;
+
+ if (initScaleFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize,
+ c->chrYInc, c->chrSrcH, c->chrDstH,
+ filterAlign, (1 << 12),
+ (flags & SWS_BICUBLIN) ? (flags | SWS_BILINEAR) : flags,
+ cpu_flags, srcFilter->chrV, dstFilter->chrV,
+ c->param) < 0)
+ goto fail;
+
+/*
+#if HAVE_ALTIVEC
+ FF_ALLOC_OR_GOTO(c, c->vYCoeffsBank, sizeof(vector signed short) * c->vLumFilterSize * c->dstH, fail);
+ FF_ALLOC_OR_GOTO(c, c->vCCoeffsBank, sizeof(vector signed short) * c->vChrFilterSize * c->chrDstH, fail);
+
+ for (i = 0; i < c->vLumFilterSize * c->dstH; i++) {
+ int j;
+ short *p = (short *)&c->vYCoeffsBank[i];
+ for (j = 0; j < 8; j++)
+ p[j] = c->vLumFilter[i];
+ }
+
+ for (i = 0; i < c->vChrFilterSize * c->chrDstH; i++) {
+ int j;
+ short *p = (short *)&c->vCCoeffsBank[i];
+ for (j = 0; j < 8; j++)
+ p[j] = c->vChrFilter[i];
+ }
+#endif
+*/
+
+/*
+ // calculate buffer sizes so that they won't run out while handling these damn slices
+ c->vLumBufSize = c->vLumFilterSize;
+ c->vChrBufSize = c->vChrFilterSize;
+ for (i = 0; i < dstH; i++) {
+ int chrI = (int64_t)i * c->chrDstH / dstH;
+ int nextSlice = FFMAX(c->vLumFilterPos[i] + c->vLumFilterSize - 1,
+ ((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)
+ << c->chrSrcVSubSample));
+
+ nextSlice >>= c->chrSrcVSubSample;
+ nextSlice <<= c->chrSrcVSubSample;
+ if (c->vLumFilterPos[i] + c->vLumBufSize < nextSlice)
+ c->vLumBufSize = nextSlice - c->vLumFilterPos[i];
+ if (c->vChrFilterPos[chrI] + c->vChrBufSize <
+ (nextSlice >> c->chrSrcVSubSample))
+ c->vChrBufSize = (nextSlice >> c->chrSrcVSubSample) -
+ c->vChrFilterPos[chrI];
+ }
+
+ c->lumPixBuf = (int16_t **)av_malloc( c->vLumBufSize * 3 * sizeof(int16_t *));
+ if(c->lumPixBuf == NULL && ( c->vLumBufSize * 3 * sizeof(int16_t *)) != 0)
+ {
+ printf("Can't alloc memory lumPixbuf\n");
+ goto fail;
+ }
+
+ c->chrUPixBuf = (int16_t **)av_malloc( c->vChrBufSize * 3 * sizeof(int16_t *));
+ if(c->chrUPixBuf == NULL && ( c->vChrBufSize * 3 * sizeof(int16_t *)) != 0)
+ {
+ printf("Can't alloc memory chrUpixbuf\n");
+ goto fail;
+ }
+
+ c->chrVPixBuf = (int16_t **)av_malloc( c->vChrBufSize * 3 * sizeof(int16_t *));
+ if(c->chrVPixBuf == NULL && ( c->vChrBufSize * 3 * sizeof(int16_t *)) != 0)
+ {
+ printf("Can't alloc memory chrVPixBuf\n");
+ goto fail;
+ }
+
+ for (i = 0; i < c->vLumBufSize; i++) {
+ c->lumPixBuf[i + c->vLumBufSize] = (int16_t *)malloc( dst_stride + 16);
+ if(c->lumPixBuf[i + c->vLumBufSize] == NULL && ( dst_stride + 16) != 0)
+ {
+ printf("Can't alloc memory lumPixBuf[]\n");
+ goto fail;
+ }
+ c->lumPixBuf[i] = c->lumPixBuf[i + c->vLumBufSize];
+ }
+
+ c->uv_off = (dst_stride>>1) + 64 / (c->dstBpc &~ 7);
+ c->uv_offx2 = dst_stride + 16;
+ for (i = 0; i < c->vChrBufSize; i++) {
+ c->chrUPixBuf[i + c->vChrBufSize] = (int16_t *)av_malloc( dst_stride * 2 + 32);
+ if(c->chrUPixBuf[i + c->vChrBufSize] == NULL && ( dst_stride * 2 + 32) != 0)
+ {
+ printf("Can't alloc memory chrUPixBuf[]\n");
+ goto fail;
+ }
+ c->chrUPixBuf[i] = c->chrUPixBuf[i + c->vChrBufSize];
+ c->chrVPixBuf[i] = c->chrVPixBuf[i + c->vChrBufSize]
+ = c->chrUPixBuf[i] + (dst_stride >> 1) + 8;
+ }
+
+ // try to avoid drawing green stuff between the right end and the stride end
+ for (i = 0; i < c->vChrBufSize; i++)
+ if(av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 == 15){
+ assert(c->dstBpc > 10);
+ for(j=0; j<dst_stride/2+1; j++)
+ ((int32_t*)(c->chrUPixBuf[i]))[j] = 1<<18;
+ } else
+ for(j=0; j<dst_stride+1; j++)
+ ((int16_t*)(c->chrUPixBuf[i]))[j] = 1<<14;
+
+ assert(c->chrDstH <= dstH);
+*/
+ if (flags & SWS_PRINT_INFO) {
+ printf("I have delete it, Haha\n");
+ }
+
+// sws_init_swScale_c(c);
+ return 0;
+fail:
+ return -1;
+}
+
+ScaleContext *scale_getContext(int srcW, int srcH, enum PixelFormat srcFormat,
+ int dstW, int dstH, enum PixelFormat dstFormat,
+ int flags, ScaleFilter *srcFilter,
+ ScaleFilter *dstFilter, const double *param)
+{
+ ScaleContext *sc = (ScaleContext*)malloc(sizeof(ScaleContext));
+ sc->flags = flags;
+ sc->srcW = srcW;
+ sc->srcH = srcH;
+ sc->dstW = dstW;
+ sc->dstH = dstH;
+ sc->srcRange = handle_scale_jpeg(&srcFormat);
+ sc->dstRange = handle_scale_jpeg(&dstFormat);
+ sc->srcFormat = srcFormat;
+ sc->dstFormat = dstFormat;
+ sc->hyscale_fast = 0;
+ sc->hcscale_fast = 0;
+
+ if (param) {
+ sc->param[0] = param[0];
+ sc->param[1] = param[1];
+ }
+
+ if (scale_init_context(sc, srcFilter, dstFilter) < 0) {
+ sws_freeContext(sc);
+ return NULL;
+ }
+
+ return sc;
+}
+
+int scale_opencl(ScaleContext *c,
+ void *cl_inbuf,
+ void *cl_outbuf,
+ int *srcStride,
+ int *dstStride)
+{
+ int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat);
+
+ av_scale_frame(c,cl_outbuf,cl_inbuf,srcStride,dstStride,&should_dither);
+
+ return 1;
+}
+
+void scale_init( int width, int height, int dstwidth, int dstheight )
+{
+ int srcW = width;
+ int srcH = height;
+ int dstW = dstwidth;
+ int dstH = dstheight;
+ enum PixelFormat inputfmt = AV_PIX_FMT_YUV420P;
+ enum PixelFormat outputfmt = AV_PIX_FMT_YUV420P;
+ int flags = SWS_BILINEAR;
+
+ g_scale = scale_getContext(srcW,srcH,inputfmt,dstW,dstH,outputfmt,flags,NULL,NULL,NULL);
+}
+
+void scale_release()
+{
+ sws_freeContext( g_scale );
+}
+#ifdef USE_OPENCL
+int scale_run( cl_mem inbuf, cl_mem outbuf, int linesizey, int linesizeuv, int height )
+{
+ g_scale->cl_src = inbuf;
+ g_scale->cl_dst = outbuf;
+
+ int src_stride[4] = { linesizey, linesizeuv, linesizeuv, 0 };
+ int dst_stride[4] = { g_scale->dstW, g_scale->chrDstW, g_scale->chrDstW, 0 };
+ int ret = -1;
+
+ ret = scale_opencl( g_scale, inbuf, outbuf, src_stride, dst_stride );
+
+ return ret;
+}
+#endif
+#endif
diff --git a/libhb/scale.h b/libhb/scale.h new file mode 100644 index 000000000..a28ccdf38 --- /dev/null +++ b/libhb/scale.h @@ -0,0 +1,310 @@ +#ifndef SCALE_H
+#define SCALE_H
+#ifdef USE_OPENCL
+#include <stdint.h>
+#include "vadxva2.h"
+#include "libavutil/pixfmt.h"
+#include "hbffmpeg.h"
+
+#define YUVRGB_TABLE_HEADROOM 128
+#define MAX_FILTER_SIZE 256
+#define is16BPS(x) \
+ (av_pix_fmt_descriptors[x].comp[0].depth_minus1 == 15)
+
+#define is9_OR_10BPS(x) \
+ (av_pix_fmt_descriptors[x].comp[0].depth_minus1 == 8 || \
+ av_pix_fmt_descriptors[x].comp[0].depth_minus1 == 9)
+
+#if ARCH_X86_64
+# define APCK_PTR2 8
+# define APCK_COEF 16
+# define APCK_SIZE 24
+#else
+# define APCK_PTR2 4
+# define APCK_COEF 8
+# define APCK_SIZE 16
+#endif
+
+typedef void (*yuv2planar1_fn)(const int16_t *src, uint8_t *dest, int dstW,
+ const uint8_t *dither, int offset);
+
+typedef void (*yuv2planarX_fn)(const int16_t *filter, int filterSize,
+ const int16_t **src, uint8_t *dest, int dstW,
+ const uint8_t *dither, int offset);
+
+typedef void (*yuv2interleavedX_fn)(struct ScaleContext *c,
+ const int16_t *chrFilter,
+ int chrFilterSize,
+ const int16_t **chrUSrc,
+ const int16_t **chrVSrc,
+ uint8_t *dest, int dstW);
+
+typedef void (*yuv2packed1_fn)(struct ScaleContext *c, const int16_t *lumSrc,
+ const int16_t *chrUSrc[2],
+ const int16_t *chrVSrc[2],
+ const int16_t *alpSrc, uint8_t *dest,
+ int dstW, int uvalpha, int y);
+
+typedef void (*yuv2packed2_fn)(struct SCaleContext *c, const int16_t *lumSrc[2],
+ const int16_t *chrUSrc[2],
+ const int16_t *chrVSrc[2],
+ const int16_t *alpSrc[2],
+ uint8_t *dest,
+ int dstW, int yalpha, int uvalpha, int y);
+
+typedef void (*yuv2packedX_fn)(struct SCaleContext *c, const int16_t *lumFilter,
+ const int16_t **lumSrc, int lumFilterSize,
+ const int16_t *chrFilter,
+ const int16_t **chrUSrc,
+ const int16_t **chrVSrc, int chrFilterSize,
+ const int16_t **alpSrc, uint8_t *dest,
+ int dstW, int y);
+
+typedef int (*SwsFunc)(struct ScaleContext *context, const uint8_t *src[],
+ int srcStride[], int srcSliceY, int srcSliceH,
+ uint8_t *dst[], int dstStride[]);
+
+typedef struct {
+ double *coeff; ///< pointer to the list of coefficients
+ int length; ///< number of coefficients in the vector
+} ScaleVector;
+
+typedef struct {
+ ScaleVector *lumH;
+ ScaleVector *lumV;
+ ScaleVector *chrH;
+ ScaleVector *chrV;
+} ScaleFilter;
+
+typedef struct ScaleContext {
+ SwsFunc swScale;
+ int srcW; ///< Width of source luma/alpha planes.
+ int srcH; ///< Height of source luma/alpha planes.
+ int dstH; ///< Height of destination luma/alpha planes.
+ int chrSrcW; ///< Width of source chroma planes.
+ int chrSrcH; ///< Height of source chroma planes.
+ int chrDstW; ///< Width of destination chroma planes.
+ int chrDstH; ///< Height of destination chroma planes.
+ int lumXInc, chrXInc;
+ int lumYInc, chrYInc;
+ enum PixelFormat dstFormat; ///< Destination pixel format.
+ enum PixelFormat srcFormat; ///< Source pixel format.
+ int dstFormatBpp; ///< Number of bits per pixel of the destination pixel format.
+ int srcFormatBpp; ///< Number of bits per pixel of the source pixel format.
+ int dstBpc, srcBpc;
+ int chrSrcHSubSample; ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in source image.
+ int chrSrcVSubSample; ///< Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in source image.
+ int chrDstHSubSample; ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in destination image.
+ int chrDstVSubSample; ///< Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in destination image.
+ int vChrDrop; ///< Binary logarithm of extra vertical subsampling factor in source image chroma planes specified by user.
+ int sliceDir; ///< Direction that slices are fed to the scaler (1 = top-to-bottom, -1 = bottom-to-top).
+ double param[2]; ///< Input parameters for scaling algorithms that need them.
+
+ uint32_t pal_yuv[256];
+ uint32_t pal_rgb[256];
+
+ int16_t **lumPixBuf; ///< Ring buffer for scaled horizontal luma plane lines to be fed to the vertical scaler.
+ int16_t **chrUPixBuf; ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler.
+ int16_t **chrVPixBuf; ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler.
+ int16_t **alpPixBuf; ///< Ring buffer for scaled horizontal alpha plane lines to be fed to the vertical scaler.
+ int vLumBufSize; ///< Number of vertical luma/alpha lines allocated in the ring buffer.
+ int vChrBufSize; ///< Number of vertical chroma lines allocated in the ring buffer.
+ int lastInLumBuf; ///< Last scaled horizontal luma/alpha line from source in the ring buffer.
+ int lastInChrBuf; ///< Last scaled horizontal chroma line from source in the ring buffer.
+ int lumBufIndex; ///< Index in ring buffer of the last scaled horizontal luma/alpha line from source.
+ int chrBufIndex; ///< Index in ring buffer of the last scaled horizontal chroma line from source.
+
+ uint8_t *formatConvBuffer;
+ int16_t *hLumFilter; ///< Array of horizontal filter coefficients for luma/alpha planes.
+ int16_t *hChrFilter; ///< Array of horizontal filter coefficients for chroma planes.
+ int16_t *vLumFilter; ///< Array of vertical filter coefficients for luma/alpha planes.
+ int16_t *vChrFilter; ///< Array of vertical filter coefficients for chroma planes.
+ int32_t *hLumFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for luma/alpha planes.
+ int32_t *hChrFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for chroma planes.
+ int32_t *vLumFilterPos; ///< Array of vertical filter starting positions for each dst[i] for luma/alpha planes.
+ int32_t *vChrFilterPos; ///< Array of vertical filter starting positions for each dst[i] for chroma planes.
+ int hLumFilterSize; ///< Horizontal filter size for luma/alpha pixels.
+ int hChrFilterSize; ///< Horizontal filter size for chroma pixels.
+ int vLumFilterSize; ///< Vertical filter size for luma/alpha pixels.
+ int vChrFilterSize; ///< Vertical filter size for chroma pixels.
+
+ int lumMmx2FilterCodeSize; ///< Runtime-generated MMX2 horizontal fast bilinear scaler code size for luma/alpha planes.
+ int chrMmx2FilterCodeSize; ///< Runtime-generated MMX2 horizontal fast bilinear scaler code size for chroma planes.
+ uint8_t *lumMmx2FilterCode; ///< Runtime-generated MMX2 horizontal fast bilinear scaler code for luma/alpha planes.
+ uint8_t *chrMmx2FilterCode; ///< Runtime-generated MMX2 horizontal fast bilinear scaler code for chroma planes.
+
+ int canMMX2BeUsed;
+
+ unsigned char *dest;
+ unsigned char *source;
+
+ int dstY; ///< Last destination vertical line output from last slice.
+ int flags; ///< Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc...
+ void *yuvTable; // pointer to the yuv->rgb table start so it can be freed()
+ uint8_t *table_rV[256 + 2*YUVRGB_TABLE_HEADROOM];
+ uint8_t *table_gU[256 + 2*YUVRGB_TABLE_HEADROOM];
+ int table_gV[256 + 2*YUVRGB_TABLE_HEADROOM];
+ uint8_t *table_bU[256 + 2*YUVRGB_TABLE_HEADROOM];
+
+ //Colorspace stuff
+ int contrast, brightness, saturation; // for sws_getColorspaceDetails
+ int srcColorspaceTable[4];
+ int dstColorspaceTable[4];
+ int srcRange; ///< 0 = MPG YUV range, 1 = JPG YUV range (source image).
+ int dstRange; ///< 0 = MPG YUV range, 1 = JPG YUV range (destination image).
+ int src0Alpha;
+ int dst0Alpha;
+ int yuv2rgb_y_offset;
+ int yuv2rgb_y_coeff;
+ int yuv2rgb_v2r_coeff;
+ int yuv2rgb_v2g_coeff;
+ int yuv2rgb_u2g_coeff;
+ int yuv2rgb_u2b_coeff;
+
+#define RED_DITHER "0*8"
+#define GREEN_DITHER "1*8"
+#define BLUE_DITHER "2*8"
+#define Y_COEFF "3*8"
+#define VR_COEFF "4*8"
+#define UB_COEFF "5*8"
+#define VG_COEFF "6*8"
+#define UG_COEFF "7*8"
+#define Y_OFFSET "8*8"
+#define U_OFFSET "9*8"
+#define V_OFFSET "10*8"
+#define LUM_MMX_FILTER_OFFSET "11*8"
+#define CHR_MMX_FILTER_OFFSET "11*8+4*4*256"
+#define DSTW_OFFSET "11*8+4*4*256*2" //do not change, it is hardcoded in the ASM
+#define ESP_OFFSET "11*8+4*4*256*2+8"
+#define VROUNDER_OFFSET "11*8+4*4*256*2+16"
+#define U_TEMP "11*8+4*4*256*2+24"
+#define V_TEMP "11*8+4*4*256*2+32"
+#define Y_TEMP "11*8+4*4*256*2+40"
+#define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48"
+#define UV_OFF_PX "11*8+4*4*256*3+48"
+#define UV_OFF_BYTE "11*8+4*4*256*3+56"
+#define DITHER16 "11*8+4*4*256*3+64"
+#define DITHER32 "11*8+4*4*256*3+80"
+
+ DECLARE_ALIGNED(8, uint64_t, redDither);
+ DECLARE_ALIGNED(8, uint64_t, greenDither);
+ DECLARE_ALIGNED(8, uint64_t, blueDither);
+
+ DECLARE_ALIGNED(8, uint64_t, yCoeff);
+ DECLARE_ALIGNED(8, uint64_t, vrCoeff);
+ DECLARE_ALIGNED(8, uint64_t, ubCoeff);
+ DECLARE_ALIGNED(8, uint64_t, vgCoeff);
+ DECLARE_ALIGNED(8, uint64_t, ugCoeff);
+ DECLARE_ALIGNED(8, uint64_t, yOffset);
+ DECLARE_ALIGNED(8, uint64_t, uOffset);
+ DECLARE_ALIGNED(8, uint64_t, vOffset);
+ int32_t lumMmxFilter[4 * MAX_FILTER_SIZE];
+ int32_t chrMmxFilter[4 * MAX_FILTER_SIZE];
+ int dstW; ///< Width of destination luma/alpha planes.
+ DECLARE_ALIGNED(8, uint64_t, esp);
+ DECLARE_ALIGNED(8, uint64_t, vRounder);
+ DECLARE_ALIGNED(8, uint64_t, u_temp);
+ DECLARE_ALIGNED(8, uint64_t, v_temp);
+ DECLARE_ALIGNED(8, uint64_t, y_temp);
+ int32_t alpMmxFilter[4 * MAX_FILTER_SIZE];
+
+ DECLARE_ALIGNED(8, ptrdiff_t, uv_off); ///< offset (in pixels) between u and v planes
+ DECLARE_ALIGNED(8, ptrdiff_t, uv_offx2); ///< offset (in bytes) between u and v planes
+ DECLARE_ALIGNED(8, uint16_t, dither16)[8];
+ DECLARE_ALIGNED(8, uint32_t, dither32)[8];
+
+ const uint8_t *chrDither8, *lumDither8;
+
+#if HAVE_ALTIVEC
+ vector signed short CY;
+ vector signed short CRV;
+ vector signed short CBU;
+ vector signed short CGU;
+ vector signed short CGV;
+ vector signed short OY;
+ vector unsigned short CSHIFT;
+ vector signed short *vYCoeffsBank, *vCCoeffsBank;
+#endif
+
+#if ARCH_BFIN
+ DECLARE_ALIGNED(4, uint32_t, oy);
+ DECLARE_ALIGNED(4, uint32_t, oc);
+ DECLARE_ALIGNED(4, uint32_t, zero);
+ DECLARE_ALIGNED(4, uint32_t, cy);
+ DECLARE_ALIGNED(4, uint32_t, crv);
+ DECLARE_ALIGNED(4, uint32_t, rmask);
+ DECLARE_ALIGNED(4, uint32_t, cbu);
+ DECLARE_ALIGNED(4, uint32_t, bmask);
+ DECLARE_ALIGNED(4, uint32_t, cgu);
+ DECLARE_ALIGNED(4, uint32_t, cgv);
+ DECLARE_ALIGNED(4, uint32_t, gmask);
+#endif
+
+#if HAVE_VIS
+ DECLARE_ALIGNED(8, uint64_t, sparc_coeffs)[10];
+#endif
+ int use_mmx_vfilter;
+
+ /* function pointers for swScale() */
+ yuv2planar1_fn yuv2plane1;
+ yuv2planarX_fn yuv2planeX;
+ yuv2interleavedX_fn yuv2nv12cX;
+ yuv2packed1_fn yuv2packed1;
+ yuv2packed2_fn yuv2packed2;
+ yuv2packedX_fn yuv2packedX;
+
+ /// Unscaled conversion of luma plane to YV12 for horizontal scaler.
+ void (*lumToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3,
+ int width, uint32_t *pal);
+ /// Unscaled conversion of alpha plane to YV12 for horizontal scaler.
+ void (*alpToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3,
+ int width, uint32_t *pal);
+ /// Unscaled conversion of chroma planes to YV12 for horizontal scaler.
+ void (*chrToYV12)(uint8_t *dstU, uint8_t *dstV,
+ const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
+ int width, uint32_t *pal);
+
+ void (*readLumPlanar)(uint8_t *dst, const uint8_t *src[4], int width);
+ void (*readChrPlanar)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4],
+ int width);
+
+ void (*hyscale_fast)(struct SwsContext *c,
+ int16_t *dst, int dstWidth,
+ const uint8_t *src, int srcW, int xInc);
+ void (*hcscale_fast)(struct SwsContext *c,
+ int16_t *dst1, int16_t *dst2, int dstWidth,
+ const uint8_t *src1, const uint8_t *src2,
+ int srcW, int xInc);
+
+ void (*hyScale)(struct SwsContext *c, int16_t *dst, int dstW,
+ const uint8_t *src, const int16_t *filter,
+ const int32_t *filterPos, int filterSize);
+ void (*hcScale)(struct SwsContext *c, int16_t *dst, int dstW,
+ const uint8_t *src, const int16_t *filter,
+ const int32_t *filterPos, int filterSize);
+
+ void (*lumConvertRange)(int16_t *dst, int width);
+ void (*chrConvertRange)(int16_t *dst1, int16_t *dst2, int width);
+
+ int needs_hcscale; ///< Set if there are chroma planes to be converted.
+
+ cl_mem cl_hLumFilter;
+ cl_mem cl_hLumFilterPos;
+ cl_mem cl_hChrFilter;
+ cl_mem cl_hChrFilterPos;
+ cl_mem cl_vLumFilter;
+ cl_mem cl_vLumFilterPos;
+ cl_mem cl_vChrFilter;
+ cl_mem cl_vChrFilterPos;
+
+ cl_mem cl_intermediaBuf;
+
+ cl_mem cl_src;
+ cl_mem cl_dst;
+} ScaleContext;
+
+void scale_init(int, int, int, int);
+void scale_release();
+int scale_run(cl_mem inbuf, cl_mem outbuf, int linesizey, int linesizeuv, int height);
+#endif
+#endif
diff --git a/libhb/scale_kernel.c b/libhb/scale_kernel.c new file mode 100644 index 000000000..4d0b285c3 --- /dev/null +++ b/libhb/scale_kernel.c @@ -0,0 +1,215 @@ +
+#ifdef USE_OPENCL
+#include <assert.h>
+#include <math.h>
+#include <stdio.h>
+#include <string.h>
+#include <windows.h>
+#include <time.h>
+#include "scale.h"
+#include "openclwrapper.h"
+
+#define OCLCHECK( method, ...) \
+ status = method(__VA_ARGS__); if(status != CL_SUCCESS) { \
+ printf(" error %s %d\n",# method, status); assert(0); return status; }
+
+#define CREATEBUF( out, flags, size, ptr)\
+ out = clCreateBuffer( kenv->context, (flags), (size), ptr, &status );\
+ if( status != CL_SUCCESS ) { printf( "clCreateBuffer faild %d\n", status ); return -1; }
+
+ #define CL_PARAM_NUM 20
+
+/****************************************************************************************************************************/
+/*************************Combine the hscale and yuv2plane into scaling******************************************************/
+/****************************************************************************************************************************/
+static int CreateCLBuffer( ScaleContext *c, KernelEnv *kenv )
+{
+ cl_int status;
+
+ if(!c->hyscale_fast || !c->hcscale_fast)
+ {
+ CREATEBUF(c->cl_hLumFilter,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->dstW*c->hLumFilterSize*sizeof(cl_short),c->hLumFilter);
+ CREATEBUF(c->cl_hLumFilterPos,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->dstW*sizeof(cl_int),c->hLumFilterPos);
+ CREATEBUF(c->cl_hChrFilter,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->chrDstW*c->hChrFilterSize*sizeof(cl_short),c->hChrFilter);
+ CREATEBUF(c->cl_hChrFilterPos,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->chrDstW*sizeof(cl_int),c->hChrFilterPos);
+ }
+ if( c->vLumFilterSize > 1 && c->vChrFilterSize > 1 )
+ {
+ CREATEBUF(c->cl_vLumFilter,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,c->dstH*c->vLumFilterSize*sizeof(cl_short),c->vLumFilter);
+ CREATEBUF(c->cl_vChrFilter,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,c->chrDstH*c->vChrFilterSize*sizeof(cl_short),c->vChrFilter);
+ }
+ CREATEBUF(c->cl_vLumFilterPos,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,c->dstH*sizeof(cl_int),c->vLumFilterPos);
+ CREATEBUF(c->cl_vChrFilterPos,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,c->chrDstH*sizeof(cl_int),c->vChrFilterPos);
+
+ return 1;
+}
+
+int av_scale_frame_func( void **userdata, KernelEnv *kenv )
+{
+ ScaleContext *c = (ScaleContext *)userdata[0];
+
+ c->cl_src = (cl_mem)userdata[2];
+ c->cl_dst = (cl_mem)userdata[1];
+
+ /*frame size*/
+ int *tmp = (int *)userdata[3];
+ int srcStride = tmp[0];
+ int srcChrStride = tmp[1];
+ int srcW = c->srcW;
+ int srcH = c->srcH;
+
+ tmp = (int *)userdata[4];
+ int dstStride = tmp[0];
+ int dstChrStride = tmp[1];
+ int dstW = c->dstW;
+ int dstH = c->dstH;
+
+ /* local variable */
+ cl_int status;
+ size_t global_work_size[2];
+
+ int intermediaSize;
+
+ int st = CreateCLBuffer(c,kenv);
+ if( !st )
+ {
+ printf( "CreateBuffer[%s] faild %d\n", "scale_opencl",st );
+ return -1;
+ }
+
+ intermediaSize = dstStride * srcH + dstChrStride * srcH;
+
+ CREATEBUF(c->cl_intermediaBuf,CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,intermediaSize*sizeof(cl_short),NULL);
+
+ static int init_chr_status = 0;
+ static cl_kernel chr_kernel;
+
+ if(init_chr_status == 0){
+
+ if(!(c->flags & 1))
+ {
+ chr_kernel = clCreateKernel( kenv->program, "hscale_all_opencl", NULL );
+ //Set the Kernel Argument;
+ OCLCHECK(clSetKernelArg,chr_kernel,2,sizeof(cl_mem),(void*)&c->cl_hLumFilter);
+ OCLCHECK(clSetKernelArg,chr_kernel,3,sizeof(cl_mem),(void*)&c->cl_hLumFilterPos);
+ OCLCHECK(clSetKernelArg,chr_kernel,4,sizeof(int),(void*)&c->hLumFilterSize);
+ OCLCHECK(clSetKernelArg,chr_kernel,5,sizeof(cl_mem),(void*)&c->cl_hChrFilter);
+ OCLCHECK(clSetKernelArg,chr_kernel,6,sizeof(cl_mem),(void*)&c->cl_hChrFilterPos);
+ OCLCHECK(clSetKernelArg,chr_kernel,7,sizeof(int),(void*)&c->hChrFilterSize);
+ }
+
+ /*Set the arguments*/
+ OCLCHECK(clSetKernelArg,chr_kernel,8,sizeof(dstW),(void*)&dstW);
+ OCLCHECK(clSetKernelArg,chr_kernel,9,sizeof(srcH),(void*)&srcH);
+ OCLCHECK(clSetKernelArg,chr_kernel,10,sizeof(srcW),(void*)&srcW);
+ OCLCHECK(clSetKernelArg,chr_kernel,11,sizeof(srcH),(void*)&srcH);
+ OCLCHECK(clSetKernelArg,chr_kernel,12,sizeof(dstStride),(void*)&dstStride);
+ OCLCHECK(clSetKernelArg,chr_kernel,13,sizeof(dstChrStride),(void*)&dstChrStride);
+ OCLCHECK(clSetKernelArg,chr_kernel,14,sizeof(srcStride),(void*)&srcStride);
+ OCLCHECK(clSetKernelArg,chr_kernel,15,sizeof(srcChrStride),(void*)&srcChrStride);
+ init_chr_status = 1;
+ }
+
+ kenv->kernel = chr_kernel;
+ OCLCHECK(clSetKernelArg,chr_kernel,0,sizeof(cl_mem),(void*)&c->cl_intermediaBuf);
+ OCLCHECK(clSetKernelArg,chr_kernel,1,sizeof(cl_mem),(void*)&c->cl_src);
+ /*Run the Kernel*/
+ global_work_size[0] = c->chrDstW;//dstW >> 1; //must times 256;
+ global_work_size[1] = c->chrSrcH;
+
+ OCLCHECK(clEnqueueNDRangeKernel,kenv->command_queue, kenv->kernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
+
+ static int init_lum_status = 0;
+ static cl_kernel lum_kernel;
+
+ if( init_lum_status == 0 ){
+ //Vertical:
+ /*Create Kernel*/
+ if( c->vLumFilterSize > 1 && c->vChrFilterSize > 1 )
+ lum_kernel = clCreateKernel( kenv->program, "vscale_all_nodither_opencl", NULL );
+ else
+ lum_kernel = clCreateKernel( kenv->program, "vscale_fast_opencl", NULL );
+
+ if( c->vLumFilterSize > 1 && c->vChrFilterSize > 1 )
+ {
+ OCLCHECK(clSetKernelArg,lum_kernel,2,sizeof(cl_mem),(void*)&c->cl_vLumFilter);
+ OCLCHECK(clSetKernelArg,lum_kernel,3,sizeof(int),(void*)&c->vLumFilterSize);
+ OCLCHECK(clSetKernelArg,lum_kernel,4,sizeof(cl_mem),(void*)&c->cl_vChrFilter);
+ OCLCHECK(clSetKernelArg,lum_kernel,5,sizeof(int),(void*)&c->vChrFilterSize);
+ OCLCHECK(clSetKernelArg,lum_kernel,6,sizeof(cl_mem),(void*)&c->cl_vLumFilterPos);
+ OCLCHECK(clSetKernelArg,lum_kernel,7,sizeof(cl_mem),(void*)&c->cl_vChrFilterPos);
+ OCLCHECK(clSetKernelArg,lum_kernel,8,sizeof(dstW),(void*)&dstW);
+ OCLCHECK(clSetKernelArg,lum_kernel,9,sizeof(dstH),(void*)&dstH);
+ OCLCHECK(clSetKernelArg,lum_kernel,10,sizeof(srcW),(void*)&srcW);
+ OCLCHECK(clSetKernelArg,lum_kernel,11,sizeof(srcH),(void*)&srcH);
+ OCLCHECK(clSetKernelArg,lum_kernel,12,sizeof(dstStride),(void*)&dstStride);
+ OCLCHECK(clSetKernelArg,lum_kernel,13,sizeof(dstChrStride),(void*)&dstChrStride);
+ OCLCHECK(clSetKernelArg,lum_kernel,14,sizeof(dstStride),(void*)&dstStride);
+ OCLCHECK(clSetKernelArg,lum_kernel,15,sizeof(dstChrStride),(void*)&dstChrStride);
+ }else{
+
+ OCLCHECK(clSetKernelArg,lum_kernel,2,sizeof(cl_mem),(void*)&c->cl_vLumFilterPos);
+ OCLCHECK(clSetKernelArg,lum_kernel,3,sizeof(cl_mem),(void*)&c->cl_vChrFilterPos);
+ OCLCHECK(clSetKernelArg,lum_kernel,4,sizeof(dstW),(void*)&dstW);
+ OCLCHECK(clSetKernelArg,lum_kernel,5,sizeof(dstH),(void*)&dstH);
+ OCLCHECK(clSetKernelArg,lum_kernel,6,sizeof(srcW),(void*)&srcW);
+ OCLCHECK(clSetKernelArg,lum_kernel,7,sizeof(srcH),(void*)&srcH);
+ OCLCHECK(clSetKernelArg,lum_kernel,8,sizeof(dstStride),(void*)&dstStride);
+ OCLCHECK(clSetKernelArg,lum_kernel,9,sizeof(dstChrStride),(void*)&dstChrStride);
+ OCLCHECK(clSetKernelArg,lum_kernel,10,sizeof(dstStride),(void*)&dstStride);
+ OCLCHECK(clSetKernelArg,lum_kernel,11,sizeof(dstChrStride),(void*)&dstChrStride);
+ }
+ init_lum_status = 1;
+ }
+
+ kenv->kernel = lum_kernel;
+ OCLCHECK(clSetKernelArg,kenv->kernel,0,sizeof(cl_mem),(void*)&c->cl_dst);
+ OCLCHECK(clSetKernelArg,kenv->kernel,1,sizeof(cl_mem),(void*)&c->cl_intermediaBuf);
+
+ /*Run the Kernel*/
+ global_work_size[0] = c->chrDstW;
+ global_work_size[1] = c->chrDstH;
+
+ OCLCHECK(clEnqueueNDRangeKernel,kenv->command_queue,kenv->kernel,2,NULL,global_work_size,NULL,0,NULL,NULL);
+
+ clReleaseMemObject( c->cl_intermediaBuf );
+
+ return 1;
+}
+
+void av_scale_frame(ScaleContext *c,
+ void *dst,
+ void *src,
+ int *srcStride,
+ int *dstStride,
+ int *should_dither)
+{
+
+ static int regflg = 0;
+ void *userdata[CL_PARAM_NUM];
+ userdata[0] = (void *)c;
+ userdata[1] = (void *)dst;
+ userdata[2] = (void *)src;
+ userdata[3] = (void *)srcStride;
+ userdata[4] = (void *)dstStride;
+ userdata[5] = (void *)should_dither;
+
+ if( regflg==0 )
+ {
+ int st = hb_register_kernel_wrapper( "scale_opencl", av_scale_frame_func);
+ if( !st )
+ {
+ printf( "register kernel[%s] faild %d\n", "scale_opencl",st );
+ return;
+ }
+ regflg++;
+ }
+
+ if( !hb_run_kernel( "scale_opencl", userdata ))
+ {
+ printf("run kernel function[%s] faild\n", "scale_opencl_func" );
+ return;
+ }
+}
+
+#endif
diff --git a/libhb/scale_kernel.h b/libhb/scale_kernel.h new file mode 100644 index 000000000..29562bdaf --- /dev/null +++ b/libhb/scale_kernel.h @@ -0,0 +1,6 @@ +#ifndef _H_SCALE_KERNEL_H +#define _H_SCALE_KERNEL_H +#ifdef USE_OPENCL +void av_scale_frame(ScaleContext *c, void *dst, void *src, int *srcStride, int *dstStride, int *should_dither); +#endif +#endif diff --git a/libhb/stream.c b/libhb/stream.c index 46b4cb71e..0790394a8 100644 --- a/libhb/stream.c +++ b/libhb/stream.c @@ -610,7 +610,7 @@ static int hb_stream_get_type(hb_stream_t *stream) if ( fread(buf, 1, sizeof(buf), stream->file_handle) == sizeof(buf) ) { -#ifdef USE_OPENCL +#ifdef USE_HWD if ( hb_get_gui_info(&hb_gui, 1) || (hb_get_gui_info(&hb_gui, 3) == 0) ) return 0; #endif @@ -1101,23 +1101,26 @@ hb_title_t * hb_stream_title_scan(hb_stream_t *stream, hb_title_t * title) { hb_log( "transport stream missing PCRs - using video DTS instead" ); } -#ifdef USE_OPENCL +#ifdef USE_HWD hb_va_dxva2_t * dxva2 = NULL; dxva2 = hb_va_create_dxva2( dxva2, title->video_codec_param ); if (dxva2) { - title->uvd_support = 1; + title->hwd_support = 1; hb_va_close(dxva2); dxva2 = NULL; } else - title->uvd_support = 0; + title->hwd_support = 0; +#else + title->hwd_support = 0; +#endif +#ifdef USE_OPENCL if (TestGPU() == 0) title->opencl_support = 1; else title->opencl_support = 0; #else - title->uvd_support = 0; title->opencl_support = 0; #endif // Height, width, rate and aspect ratio information is filled in @@ -5671,24 +5674,26 @@ static hb_title_t *ffmpeg_title_scan( hb_stream_t *stream, hb_title_t *title ) chapter->seconds = title->seconds; hb_list_add( title->list_chapter, chapter ); } - -#ifdef USE_OPENCL +#ifdef USE_HWD hb_va_dxva2_t * dxva2 = NULL; dxva2 = hb_va_create_dxva2( dxva2, title->video_codec_param ); if (dxva2) { - title->uvd_support = 1; + title->hwd_support = 1; hb_va_close(dxva2); dxva2 = NULL; } else - title->uvd_support = 0; + title->hwd_support = 0; +#else + title->hwd_support = 0; +#endif +#ifdef USE_OPENCL if (TestGPU() == 0) title->opencl_support = 1; else title->opencl_support = 0; #else - title->uvd_support = 0; title->opencl_support = 0; #endif diff --git a/libhb/vadxva2.c b/libhb/vadxva2.c index ff6940497..548dfe144 100644 --- a/libhb/vadxva2.c +++ b/libhb/vadxva2.c @@ -10,31 +10,18 @@ Li Cao <[email protected]> <http://www.multicorewareinc.com/> */ - #include "vadxva2.h" + +#ifdef USE_OPENCL #include "CL/cl.h" #include "oclnv12toyuv.h" +#include "scale.h" -static int hb_va_setup( hb_va_dxva2_t *dxva2, void **hw, int width, int height ); -static int hb_va_get( hb_va_dxva2_t *dxva2, AVFrame *frame ); -static int hb_d3d_create_device( hb_va_dxva2_t *dxva2 ); -static void hb_d3d_destroy_device( hb_va_dxva2_t *dxvva2 ); -static int hb_d3d_create_device_manager( hb_va_dxva2_t *dxva2 ); -static void hb_d3d_destroy_device_manager( hb_va_dxva2_t *dxva2 ); -static int hb_dx_create_video_service( hb_va_dxva2_t *dxva2 ); -static void hb_dx_destroy_video_service( hb_va_dxva2_t *dxva2 ); -static int hb_dx_find_video_service_conversion( hb_va_dxva2_t *dxva2, GUID *input, D3DFORMAT *output ); -static int hb_dx_create_video_decoder( hb_va_dxva2_t *dxva2, int codec_id, const hb_title_t* fmt ); -static void hb_dx_create_video_conversion( hb_va_dxva2_t *dxva2 ); -static const hb_d3d_format_t *hb_d3d_find_format( D3DFORMAT format ); -static const hb_dx_mode_t *hb_dx_find_mode( const GUID *guid ); -static void hb_dx_destroy_video_decoder( hb_va_dxva2_t *dxva2 ); - -#ifdef USE_OPENCL int TestGPU() { int status = 1; - unsigned int i; + unsigned int i, j; + cl_device_id device; cl_uint numPlatforms = 0; status = clGetPlatformIDs(0,NULL,&numPlatforms); if(status != 0) @@ -59,24 +46,55 @@ int TestGPU() sizeof (pbuff), pbuff, NULL); - if (status) - continue; - status = clGetDeviceIDs(platforms[i], + if (status) + continue; + status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU , 0 , NULL , &numDevices); - if (status != CL_SUCCESS) - continue; - if(numDevices) - break; + + cl_device_id *devices = (cl_device_id *)malloc(numDevices * sizeof(cl_device_id)); + status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, numDevices, devices, NULL); + for (j = 0; j < numDevices; j++) + { + char dbuff[100]; + status = clGetDeviceInfo(devices[j], CL_DEVICE_VENDOR, sizeof(dbuff), dbuff, NULL); + device = devices[j]; + if(!strcmp(dbuff, "Advanced Micro Devices, Inc.")) + { + return 0; + } + } + + if (status != CL_SUCCESS) + continue; + if( numDevices) + break; } free(platforms); } end: - return status; + return -1; } #endif + +#ifdef USE_HWD + +static int hb_va_setup( hb_va_dxva2_t *dxva2, void **hw, int width, int height ); +static int hb_va_get( hb_va_dxva2_t *dxva2, AVFrame *frame ); +static int hb_d3d_create_device( hb_va_dxva2_t *dxva2 ); +static void hb_d3d_destroy_device( hb_va_dxva2_t *dxvva2 ); +static int hb_d3d_create_device_manager( hb_va_dxva2_t *dxva2 ); +static void hb_d3d_destroy_device_manager( hb_va_dxva2_t *dxva2 ); +static int hb_dx_create_video_service( hb_va_dxva2_t *dxva2 ); +static void hb_dx_destroy_video_service( hb_va_dxva2_t *dxva2 ); +static int hb_dx_find_video_service_conversion( hb_va_dxva2_t *dxva2, GUID *input, D3DFORMAT *output ); +static int hb_dx_create_video_decoder( hb_va_dxva2_t *dxva2, int codec_id, const hb_title_t* fmt ); +static void hb_dx_create_video_conversion( hb_va_dxva2_t *dxva2 ); +static const hb_d3d_format_t *hb_d3d_find_format( D3DFORMAT format ); +static const hb_dx_mode_t *hb_dx_find_mode( const GUID *guid ); +static void hb_dx_destroy_video_decoder( hb_va_dxva2_t *dxva2 ); /** * It destroys a Direct3D device manager */ @@ -640,6 +658,35 @@ static void hb_copy_from_nv12( uint8_t *dst, uint8_t *src[2], size_t src_pitch[2 } } } + +#ifdef USE_OPENCL +void hb_init_filter( cl_mem src, int srcwidth, int srcheight, uint8_t* dst, int dstwidth, int dstheight, int *crop ) +{ + T_FilterLink fl = {0}; + int STEP = srcwidth * srcheight * 3 / 2; + int OUTSTEP = dstwidth * dstheight * 3 / 2; + int HEIGHT = srcheight; + int LINESIZEY = srcwidth; + int LINESIZEUV = srcwidth / 2; + + cl_mem cl_outbuf; + + if( !hb_create_buffer( &(cl_outbuf), CL_MEM_WRITE_ONLY, STEP ) ) + { + hb_log("av_create_buffer cl_outbuf Error\n"); + return; + } + + fl.cl_outbuf = cl_outbuf; + + scale_run( src, fl.cl_outbuf, LINESIZEY, LINESIZEUV, HEIGHT ); + + hb_read_opencl_buffer( fl.cl_outbuf, dst, OUTSTEP ); + CL_FREE( cl_outbuf ); + + return; +} +#endif /** * lock frame data form surface. * nv12 to yuv with opencl and with C reference @@ -671,20 +718,16 @@ int hb_va_extract( hb_va_dxva2_t *dxva2, uint8_t *dst, AVFrame *frame, int job_w #ifdef USE_OPENCL if( ( dxva2->width > job_w || dxva2->height > job_h ) && (TestGPU() == 0) && (hb_get_gui_info(&hb_gui, 2) == 1)) { -/* int i; - uint8_t *tmp = (uint8_t*)malloc( dxva2->width*dxva2->height*3/2 ); - for( i = 0; i < dxva2->height; i++ ) - { - memcpy( tmp+i*dxva2->width, plane[0]+i*lock.Pitch, dxva2->width ); - if( i<dxva2->height>>1 ) - memcpy( tmp+(dxva2->width*dxva2->height)+i*dxva2->width, plane[1]+i*lock.Pitch, dxva2->width ); - } -*/ hb_ocl_nv12toyuv( plane, lock.Pitch, dxva2->width, dxva2->height, crop, dxva2 ); - //hb_ocl_nv12toyuv( tmp, dxva2->width, dxva2->height, crop, dxva2 ); - hb_ocl_scale( dxva2->cl_mem_yuv, NULL, dst, dxva2->width - ( crop[2] + crop[3] ), dxva2->height - ( crop[0] + crop[1] ), job_w, job_h, os ); - //free( tmp ); - } + + static int init_flag = 0; + if(init_flag == 0){ + scale_init( dxva2->width - crop[2] - crop[3], dxva2->height - crop[0] - crop[1], job_w, job_h ); + init_flag = 1; + } + + hb_init_filter( dxva2->cl_mem_yuv, dxva2->width - crop[2] - crop[3], dxva2->height - crop[0] - crop[1], dst, job_w, job_h, crop ); + } else #endif { @@ -785,20 +828,20 @@ void hb_va_new_dxva2( hb_va_dxva2_t *dxva2, AVCodecContext *p_context ) enum PixelFormat hb_ffmpeg_get_format( AVCodecContext *p_context, const enum PixelFormat *pi_fmt ) { int i; - static const char *ppsz_name[PIX_FMT_NB] = - { - [PIX_FMT_VDPAU_H264] = "PIX_FMT_VDPAU_H264", - [PIX_FMT_VAAPI_IDCT] = "PIX_FMT_VAAPI_IDCT", - [PIX_FMT_VAAPI_VLD] = "PIX_FMT_VAAPI_VLD", - [PIX_FMT_VAAPI_MOCO] = "PIX_FMT_VAAPI_MOCO", - [PIX_FMT_DXVA2_VLD] = "PIX_FMT_DXVA2_VLD", - [PIX_FMT_YUYV422] = "PIX_FMT_YUYV422", - [PIX_FMT_YUV420P] = "PIX_FMT_YUV420P", + static const char *ppsz_name[AV_PIX_FMT_NB] = + { + [AV_PIX_FMT_VDPAU_H264] = "AV_PIX_FMT_VDPAU_H264", + [AV_PIX_FMT_VAAPI_IDCT] = "AV_PIX_FMT_VAAPI_IDCT", + [AV_PIX_FMT_VAAPI_VLD] = "AV_PIX_FMT_VAAPI_VLD", + [AV_PIX_FMT_VAAPI_MOCO] = "AV_PIX_FMT_VAAPI_MOCO", + [AV_PIX_FMT_DXVA2_VLD] = "AV_PIX_FMT_DXVA2_VLD", + [AV_PIX_FMT_YUYV422] = "AV_PIX_FMT_YUYV422", + [AV_PIX_FMT_YUV420P] = "AV_PIX_FMT_YUV420P", }; - for( i = 0; pi_fmt[i] != PIX_FMT_NONE; i++ ) + for( i = 0; pi_fmt[i] != AV_PIX_FMT_NONE; i++ ) { hb_log( "dxva2:Available decoder output format %d (%s)", pi_fmt[i], ppsz_name[pi_fmt[i]] ? : "Unknown" ); - if( pi_fmt[i] == PIX_FMT_DXVA2_VLD ) + if( pi_fmt[i] == AV_PIX_FMT_DXVA2_VLD ) { return pi_fmt[i]; } @@ -825,3 +868,4 @@ int hb_va_get_frame_buf( hb_va_dxva2_t *dxva2, AVCodecContext *p_context, AVFram return HB_WORK_OK; } +#endif diff --git a/libhb/vadxva2.h b/libhb/vadxva2.h index 7a2af4862..eeab5bec8 100644 --- a/libhb/vadxva2.h +++ b/libhb/vadxva2.h @@ -16,6 +16,7 @@ #ifndef VA_DXVA2_H #define VA_DXVA2_H +#ifdef USE_HWD #include "hbffmpeg.h" #include "d3d9.h" #include "libavcodec/dxva2.h" @@ -143,6 +144,25 @@ typedef struct #endif } hb_va_dxva2_t; +typedef struct FilterLink_T +{ +#ifdef USE_OPENCL + cl_mem cl_inbuf; + cl_mem cl_outbuf; +#endif + uint8_t *mem_inbuf; + uint8_t *mem_outbuf; + int width; + int height; + int linesizeY; + int linesizeUV; + int inmemdataflag; + int outmemdataflag; + int incldataflag; + int outcldataflag; + int framenum; + int outputSize; +} T_FilterLink; static const hb_d3d_format_t d3d_formats[] = { @@ -192,3 +212,4 @@ void hb_va_release( hb_va_dxva2_t *dxva2, AVFrame *frame ); void hb_va_close( hb_va_dxva2_t *dxva2 ); #endif +#endif diff --git a/libhb/work.c b/libhb/work.c index c8057f0e4..6625d5ff7 100644 --- a/libhb/work.c +++ b/libhb/work.c @@ -554,18 +554,17 @@ static void do_job( hb_job_t * job ) job->list_work = hb_list_init(); hb_log( "starting job" ); -#ifdef USE_OPENCL - if ( job->use_opencl || job->use_uvd) + if ( job->use_opencl || job->use_hwd) { - /* init opencl environment */ hb_log( "Using GPU : Yes.\n" ); - job->use_opencl =! hb_init_opencl_run_env(0, NULL, "-I."); + /* init opencl environment */ +#ifdef USE_OPENCL + if ( job->use_opencl ) + job->use_opencl =! hb_init_opencl_run_env(0, NULL, "-I."); +#endif } else hb_log( "Using GPU : NO.\n" ); -#else - hb_log( "Using GPU : NO.\n" ); -#endif /* Look for the scanned subtitle in the existing subtitle list * select_subtitle implies that we did a scan. */ if( !job->indepth_scan && interjob->select_subtitle ) @@ -1021,10 +1020,11 @@ static void do_job( hb_job_t * job ) title->video_codec_param = AV_CODEC_ID_MPEG2VIDEO; } #endif -#ifdef USE_OPENCL - if ( /*job->use_opencl &&*/ hb_use_dxva( title ) && (TestGPU() == 0) && job->use_uvd ) - { - vcodec = WORK_DECAVCODECVACCL; +#ifdef USE_HWD + if ( /*job->use_opencl &&*/ hb_use_dxva( title ) && job->use_hwd ) + { + //vcodec = WORK_DECAVCODECVACCL; + job->use_hw_decode = 1; } #endif hb_list_add( job->list_work, ( w = hb_get_work( vcodec ) ) ); diff --git a/make/configure.py b/make/configure.py index 598ef754b..0fc32de0f 100644 --- a/make/configure.py +++ b/make/configure.py @@ -1087,6 +1087,8 @@ def createCLI(): grp.add_option( '--enable-ff-mpeg2', default=False, action='store_true', help=h ) h = IfHost( 'enable OpenCL features', '*-*-*', none=optparse.SUPPRESS_HELP ).value grp.add_option( '--enable-opencl', default=False, action='store_true', help=h ) + h = IfHost( 'enable HWD features', '*-*-*', none=optparse.SUPPRESS_HELP ).value + grp.add_option( '--enable-hwd', default=False, action='store_true', help=h ) cli.add_option_group( grp ) @@ -1521,6 +1523,7 @@ int main () doc.add( 'FEATURE.gst', int( not options.disable_gst )) doc.add( 'FEATURE.ff.mpeg2', int( options.enable_ff_mpeg2 )) doc.add( 'FEATURE.opencl', int( options.enable_opencl )) + doc.add( 'FEATURE.hwd', int( options.enable_hwd )) doc.add( 'FEATURE.xcode', int( not (Tools.xcodebuild.fail or options.disable_xcode or options.cross) )) if not Tools.xcodebuild.fail and not options.disable_xcode: diff --git a/test/module.defs b/test/module.defs index 246b62a94..ba7eaf5a4 100644 --- a/test/module.defs +++ b/test/module.defs @@ -48,6 +48,9 @@ ifeq (1,$(FEATURE.opencl)) TEST.GCC.l += OpenCL TEST.GCC.D += USE_OPENCL endif +ifeq (1,$(FEATURE.hwd)) + TEST.GCC.D += USE_HWD +endif TEST.GCC.l += pthreadGC2 iconv ws2_32 TEST.GCC.D += PTW32_STATIC_LIB TEST.GCC.args.extra.exe++ += -static diff --git a/test/test case/handbrake_test.txt b/test/test case/handbrake_test.txt index 47fb0dd8e..a0e9397a1 100644 --- a/test/test case/handbrake_test.txt +++ b/test/test case/handbrake_test.txt @@ -7,74 +7,61 @@ #- the names are used as test names and file names
# universal
--i "infile" -t 1 -o "outfile.m4v" -f mp4 -w 720 --loose-anamorphic -e x264 -q 20 --vfr -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x cabac=0:ref=2:me=umh:bframes=0:weightp=0:8x8dct=0:trellis=0:subq=6 --verbose=1
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.m4v" -f mp4 -P -U -w 720 --loose-anamorphic --modulus 2 -e x264 -q 20 -r 30 --pfr -a 1,1 -E faac,copy:ac3 -6 dpl2,auto -R Auto,Auto -B 160,0 -D 0,0 --gain 0,0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\2-1-chapters.csv" --x264-preset=fast --x264-profile=baseline --h264-level="3.0" --verbose=1
# iPod
--i "infile" -t 1 -o "outfile.m4v" -f mp4 -I -w 320 -l 240 -e x264 -b 700 --vfr -a 1 -E faac -B 160 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x level=30:bframes=0:weightp=0:cabac=0:ref=1:vbv-maxrate=768:vbv-bufsize=2000:analyse=all:me=umh:no-fast-pskip=1:subq=6:8x8dct=0:trellis=0 --verbose=1
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.mp4" -f mp4 -I -P -U -w 320 -l 180 --modulus 2 -e x264 -q 22 -r 30 --pfr -a 1 -E faac -6 dpl2 -R Auto -B 160 -D 0 --gain 0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\2-1-chapters.csv" --x264-profile=baseline --h264-level="1.3" --verbose=1
# iPhone_iPod_Touch
- -i "infile" -t 1 -o "outfile.m4v" -f mp4 -w 480 -l 320 -e x264 -q 20 --vfr -a 1 -E faac -B 128 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x cabac=0:ref=2:me=umh:bframes=0:weightp=0:subq=6:8x8dct=0:trellis=0 --verbose=1
-
-
-# iPhone_4
- -i "infile" -t 1 -o "outfile.m4v" -f mp4 -4 -w 960 --loose-anamorphic -e x264 -q 20 -r 29.97 --pfr -a 1 -E faac -B 160 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" --verbose=1
-
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.mp4" -f mp4 -4 -P -U -w 960 --loose-anamorphic --modulus 2 -e x264 -q 22 -r 29.97 --pfr -a 1 -E faac -6 dpl2 -R Auto -B 160 -D 0 --gain 0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\2-1-chapters.csv" --x264-profile=high --h264-level="3.1" --verbose=1
# iPad
+ -i "infile" -t 1 --angle 1 -c 1 -o "outfile.mp4" -f mp4 -4 -P -U -w 1280 --loose-anamorphic --modulus 2 -e x264 -q 20 -r 29.97 --pfr -a 1 -E faac -6 dpl2 -R Auto -B 160 -D 0 --gain 0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\2-1-chapters.csv" --x264-profile=high --h264-level="3.1" --verbose=1
--i "infile" -t 1 -o "outfile.m4v" -f mp4 -4 -w 1024 --loose-anamorphic -e x264 -q 20 -r 29.97 --pfr -a 1 -E faac -B 160 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" --verbose=1
-
-
-# Apple_TV
-
- -i "infile" -t 1 -o "outfile.m4v" -f mp4 -4 -w 960 --loose-anamorphic -e x264 -q 20 --vfr -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x cabac=0:ref=2:me=umh:b-pyramid=none:b-adapt=2:weightb=0:trellis=0:weightp=0:vbv-maxrate=9500:vbv-bufsize=9500 --verbose=1
-
-# Apple_TV_2
--i "infile" -t 1 -o "outfile.m4v" -f mp4 -4 -w 1280 --loose-anamorphic -e x264 -q 20 -r 29.97 --pfr -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" --verbose=1
-
-
-# Android_Mid
--i "infile" -t 1 -o "outfile.m4v" -f mp4 -w 480 -l 270 -e x264 -q 22 -r 29.97 --pfr -a 1 -E faac -B 128 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 -x cabac=0:ref=2:me=umh:bframes=0:weightp=0:subq=6:8x8dct=0:trellis=0 --verbose=1
-
-
-# Android_High
- -i "infile" -t 1 -o "outfile.mp4" -f mp4 -w 720 --loose-anamorphic -e x264 -q 22 -r 29.97 --pfr -a 1 -E faac -B 128 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 -x weightp=0:cabac=0 --verbose=1
+# AppleTV
+ -i "infile" -t 1 --angle 1 -c 1 -o "outfile.m4v" -f mp4 -4 -P -U -w 960 --loose-anamorphic --modulus 2 -e x264 -q 20 -r 30 --pfr -a 1,1 -E faac,copy:ac3 -6 dpl2,auto -R Auto,Auto -B 160,0 -D 0,0 --gain 0,0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\2-1-chapters.csv" -x cabac=0:ref=2:b-pyramid=none:weightb=0:weightp=0:vbv-maxrate=9500:vbv-bufsize=9500 --verbose=1
+# AppleTV2
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.m4v" -f mp4 -4 -P -U -w 1280 --loose-anamorphic --modulus 2 -e x264 -q 20 -r 29.97 --pfr -a 1,1 -E faac,copy:ac3 -6 dpl2,auto -R Auto,Auto -B 160,0 -D 0,0 --gain 0,0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\2-1-chapters.csv" --x264-profile=high --h264-level="3.1" --verbose=1
-# Normal_1080p_to_1080p_fixed_qp
--i "infile" -t 1 -o "outfile.m4v" -f mp4 --strict-anamorphic -e x264 -q 20 --vfr -a 1 -E faac -B 160 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x ref=1:weightp=1:subq=2:rc-lookahead=10:trellis=0:8x8dct=0 --verbose=1
+# AppleTV3
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.m4v" -f mp4 -4 -P -U --decomb -w 1920 --loose-anamorphic --modulus 2 -e x264 -q 20 -r 30 --pfr -a 1,1 -E faac,copy:ac3 -6 dpl2,auto -R Auto,Auto -B 160,0 -D 0,0 --gain 0,0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\2-1-chapters.csv" --x264-profile=high --h264-level="4.0" --verbose=1
+# Android
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.mp4" -f mp4 -P -U -w 720 --loose-anamorphic --modulus 2 -e x264 -q 22 -r 29.97 --pfr -a 1 -E faac -6 dpl2 -R Auto -B 128 -D 0 --gain 0 --audio-fallback ffac3 --x264-profile=main --h264-level="2.2" --verbose=1
-# Normal_1080p_to_720p_fixed_qp
--i "infile" -t 1 -o "outfile.m4v" -f mp4 -w 1280 -l 720 --custom-anamorphic --display-width 1282 --keep-display-aspect -e x264 -q 20 --vfr -a 1 -E faac -B 160 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x ref=1:weightp=1:subq=2:rc-lookahead=10:trellis=0:8x8dct=0 --verbose=1
+# Android_Tablet
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.mp4" -f mp4 -P -U -w 1280 --loose-anamorphic --modulus 2 -e x264 -q 22 -r 29.97 --pfr -a 1 -E faac -6 dpl2 -R Auto -B 128 -D 0 --gain 0 --audio-fallback ffac3 --x264-profile=main --h264-level="3.1" --verbose=1
-# Normal_1080p_to_1080p_13_mbps
--i "infile" -t 1 -o "outfile.m4v" -f mp4 --strict-anamorphic -e x264 -b 13000 --vfr -a 1 -E faac -B 160 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x ref=1:weightp=1:subq=2:rc-lookahead=10:trellis=0:8x8dct=0 --verbose=1
+# Normal_to_480p
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.mp4" -f mp4 -P -U -w 720 --loose-anamorphic --modulus 2 -e x264 -q 20 --vfr -a 1 -E faac -6 dpl2 -R Auto -B 160 -D 0 --gain 0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\1-1-chapters.csv" --x264-preset=veryfast --x264-profile=main --h264-level="4.0" --verbose=1
+# Normal_to_720p
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.mp4" -f mp4 -P -U -w 1280 --loose-anamorphic --modulus 2 -e x264 -q 20 --vfr -a 1 -E faac -6 dpl2 -R Auto -B 160 -D 0 --gain 0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\1-1-chapters.csv" --x264-preset=veryfast --x264-profile=main --h264-level="4.0" --verbose=1
-# Normal_1080p_to_720p_6_mbps
- -i "infile" -t 1 -c 1 -o "outfile.m4v" -f mp4 -w 1280 -l 720 --custom-anamorphic --display-width 1282 --keep-display-aspect -e x264 -b 6000 --vfr -a 1 -E faac -B 160 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x ref=1:weightp=1:subq=2:rc-lookahead=10:trellis=0:8x8dct=0 --verbose=1
-# high_1080p_to_1080p_fixed_qp
--i "infile" -t 1 -o "high_1080p.m4v" -f mp4 -4 --detelecine --decomb -w 1920 --loose-anamorphic -e x264 -q 20 --vfr -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x b-adapt=2:rc-lookahead=50 --verbose=1
+# Normal_to_1080p
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.mp4" -f mp4 -P -U -w 1920 --loose-anamorphic --modulus 2 -e x264 -q 20 --vfr -a 1 -E faac -6 dpl2 -R Auto -B 160 -D 0 --gain 0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\1-1-chapters.csv" --x264-preset=veryfast --x264-profile=main --h264-level="4.0" --verbose=1
+# Normal_to_1080p_qp
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.mp4" -f mp4 -P -U -w 1920 -l 1080 --custom-anamorphic --display-width 1920 --keep-display-aspect --modulus 2 -e x264 -q 20 --vfr -a 1 -E faac -6 dpl2 -R Auto -B 160 -D 0 --gain 0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\1-1-chapters.csv" --x264-preset=veryfast --x264-profile=main --h264-level="4.0" --verbose=1
-# high_1080p_to_720p_fixed qp
- -i "infile" -t 1 -o "outfile.m4v" -f mp4 -4 --detelecine --decomb -w 1280 -l 720 --custom-anamorphic --display-width 1282 --keep-display-aspect -e x264 -q 20 --vfr -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x b-adapt=2:rc-lookahead=50 --verbose=1
+# High Profile_to_480p
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.m4v" -f mp4 -4 -P -U --decomb -w 720 --loose-anamorphic --modulus 2 -e x264 -q 20 --vfr -a 1,1 -E faac,copy:ac3 -6 dpl2,auto -R Auto,Auto -B 160,0 -D 0,0 --gain 0,0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\1-1-chapters.csv" --x264-profile=high --h264-level="4.1" --verbose=1
-# high_1080p_to_1080p_13_mbps
- -i "infile" -t 1 -o "outfile.m4v" -f mp4 -4 --detelecine --decomb -w 1920 --loose-anamorphic -e x264 -b 13000 --vfr -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x b-adapt=2:rc-lookahead=50 --verbose=1
+# High Profile_to_720p
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.m4v" -f mp4 -4 -P -U --decomb -w 1280 --loose-anamorphic --modulus 2 -e x264 -q 20 --vfr -a 1,1 -E faac,copy:ac3 -6 dpl2,auto -R Auto,Auto -B 160,0 -D 0,0 --gain 0,0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\1-1-chapters.csv" --x264-profile=high --h264-level="4.1" --verbose=1
-# high_1080p_to_720p_6mbps
--i "infile" -t 1 -o "outfile.m4v" -f mp4 -4 --detelecine --decomb -w 1280 -l 720 --custom-anamorphic --display-width 1282 --keep-display-aspect -e x264 -b 6000 --vfr -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x b-adapt=2:rc-lookahead=50 --verbose=1
+# High Profile_to_1080p
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.m4v" -f mp4 -4 -P -U --decomb -w 1920 --loose-anamorphic --modulus 2 -e x264 -q 20 --vfr -a 1,1 -E faac,copy:ac3 -6 dpl2,auto -R Auto,Auto -B 160,0 -D 0,0 --gain 0,0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\1-1-chapters.csv" --x264-profile=high --h264-level="4.1" --verbose=1
-# strange_resolution
--i "infile" -t 1 -o "outfile.m4v" -f mp4 -w 480 -l 1078 -e x264 -q 20 --vfr -a 1 -E faac -B 128 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x cabac=0:ref=2:me=umh:bframes=0:weightp=0:subq=6:8x8dct=0:trellis=0 --verbose=1
+# High Profile_to_1080p_qp
+-i "infile" -t 1 --angle 1 -c 1 -o "outfile.m4v" -f mp4 -4 -P -U --decomb -w 1920 -l 1080 --custom-anamorphic --display-width 720 --pixel-aspect 720:480 --modulus 2 -e x264 -q 20 --vfr -a 1,1 -E faac,copy:ac3 -6 dpl2,auto -R Auto,Auto -B 160,0 -D 0,0 --gain 0,0 --audio-fallback ffac3 --markers="C:\Users\AMDComal\AppData\Local\Temp\1-1-chapters.csv" --x264-profile=high --h264-level="4.1" --verbose=1
#-end of script
diff --git a/test/test case/readme.txt b/test/test case/readme.txt index 8c182e26c..87c91201f 100644 --- a/test/test case/readme.txt +++ b/test/test case/readme.txt @@ -37,7 +37,9 @@ The handbrake_test.txt is a "script" file that contains the individual tests to blank lines are ignored.
-To enable UVD decoding, add -P -U to each execution line in handbrake_test.txt.
+
+To enable OpenCL, add -P to each execution line in handbrake_test.txt.
+To enable hardware decoding, add -U to each execution line in handbrake_test.txt.
Example:
# universal
-i "infile" -t 1 -o "outfile.m4v" -P -U -f mp4 -w 720 --loose-anamorphic -e x264 -q 20 --vfr -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x cabac=0:ref=2:me=umh:bframes=0:weightp=0:8x8dct=0:trellis=0:subq=6 --verbose=1
diff --git a/test/test.c b/test/test.c index 82797fd6e..42c53ab8d 100644 --- a/test/test.c +++ b/test/test.c @@ -137,7 +137,7 @@ static int64_t stop_at_pts = 0; static int stop_at_frame = 0; static uint64_t min_title_duration = 10; static int use_opencl = 0; -static int use_uvd = 0; +static int use_hwd = 0; /* Exit cleanly on Ctrl-C */ static volatile int die = 0; @@ -218,7 +218,8 @@ int main( int argc, char ** argv ) h = hb_init( debug, update ); hb_dvd_set_dvdnav( dvdnav ); #ifdef USE_OPENCL - hb_get_opencl_env(); + if ( use_opencl ) + hb_get_opencl_env(); #endif /* Show version */ fprintf( stderr, "%s - %s - %s\n", @@ -260,7 +261,7 @@ int main( int argc, char ** argv ) titleindex = 0; } - hb_set_gui_info(&hb_gui, use_uvd, use_opencl, titleindex); + hb_set_gui_info(&hb_gui, use_hwd, use_opencl, titleindex); hb_scan( h, input, titleindex, preview_count, store_previews, min_title_duration * 90000LL ); /* Wait... */ @@ -429,10 +430,10 @@ static void PrintTitleInfo( hb_title_t * title, int feature ) fprintf( stderr, " + support opencl: yes\n"); else fprintf( stderr, " + support opencl: no\n"); - if (title->uvd_support) - fprintf( stderr, " + support uvd: yes\n"); + if (title->hwd_support) + fprintf( stderr, " + support hwd: yes\n"); else - fprintf( stderr, " + support uvd: no\n"); + fprintf( stderr, " + support hwd: no\n"); fprintf( stderr, " + chapters:\n" ); for( i = 0; i < hb_list_count( title->list_chapter ); i++ ) { @@ -1412,9 +1413,9 @@ static int HandleEvents( hb_handle_t * h ) job->maxWidth = maxWidth; if (maxHeight) job->maxHeight = maxHeight; - if (use_uvd) + if (use_hwd) { - job->use_uvd = use_uvd; + job->use_hwd = use_hwd; } switch( anamorphic_mode ) @@ -1588,13 +1589,8 @@ static int HandleEvents( hb_handle_t * h ) filter_str = hb_strdup_printf("%d:%d:%d:%d:%d:%d", job->width, job->height, job->crop[0], job->crop[1], job->crop[2], job->crop[3] ); - -#ifdef USE_OPENCL - if ( use_opencl ) - filter = hb_filter_init( HB_FILTER_CROP_SCALE_ACCL ); - else -#endif - filter = hb_filter_init( HB_FILTER_CROP_SCALE ); + + filter = hb_filter_init( HB_FILTER_CROP_SCALE ); hb_add_filter( job, filter, filter_str ); free( filter_str ); @@ -3251,7 +3247,7 @@ static int ParseOptions( int argc, char ** argv ) { "optimize", no_argument, NULL, 'O' }, { "ipod-atom", no_argument, NULL, 'I' }, { "use-opencl", no_argument, NULL, 'P' }, - { "use-uvd", no_argument, NULL, 'U' }, + { "use-hwd", no_argument, NULL, 'U' }, { "title", required_argument, NULL, 't' }, { "min-duration",required_argument, NULL, MIN_DURATION }, @@ -3416,7 +3412,7 @@ static int ParseOptions( int argc, char ** argv ) use_opencl = 1; break; case 'U': - use_uvd = 1; + use_hwd = 1; break; case 't': diff --git a/win/CS/HandBrake.ApplicationServices/Model/EncodeTask.cs b/win/CS/HandBrake.ApplicationServices/Model/EncodeTask.cs index 5c9f166e0..f9053a8bb 100644 --- a/win/CS/HandBrake.ApplicationServices/Model/EncodeTask.cs +++ b/win/CS/HandBrake.ApplicationServices/Model/EncodeTask.cs @@ -88,7 +88,7 @@ namespace HandBrake.ApplicationServices.Model this.IncludeChapterMarkers = task.IncludeChapterMarkers;
this.IPod5GSupport = task.IPod5GSupport;
this.OpenCLSupport = task.OpenCLSupport;
- this.UVDSupport = task.UVDSupport;
+ this.HWDSupport = task.HWDSupport;
this.KeepDisplayAspect = task.KeepDisplayAspect;
this.LargeFile = task.LargeFile;
this.MaxHeight = task.MaxHeight;
@@ -197,9 +197,9 @@ namespace HandBrake.ApplicationServices.Model public bool OpenCLSupport { get; set; }
/// <summary>
- /// Gets or sets a value indicating whether UVDSupport.
+ /// Gets or sets a value indicating whether HWDSupport.
/// </summary>
- public bool UVDSupport { get; set; }
+ public bool HWDSupport { get; set; }
#endregion
#region Picture
diff --git a/win/CS/HandBrake.ApplicationServices/Parsing/Title.cs b/win/CS/HandBrake.ApplicationServices/Parsing/Title.cs index 466a33aa1..00488eac8 100644 --- a/win/CS/HandBrake.ApplicationServices/Parsing/Title.cs +++ b/win/CS/HandBrake.ApplicationServices/Parsing/Title.cs @@ -121,9 +121,9 @@ namespace HandBrake.ApplicationServices.Parsing public int OpenCLSupport { get; set; }
/// <summary>
- /// Gets or sets the UVD
+ /// Gets or sets the HWD
/// </summary>
- public int UVDSupport { get; set; }
+ public int HWDSupport { get; set; }
#endregion
/// <summary>
@@ -236,14 +236,14 @@ namespace HandBrake.ApplicationServices.Parsing }
nextLine = output.ReadLine();
- m = Regex.Match(nextLine, @"^ \+ support uvd:");
+ m = Regex.Match(nextLine, @"^ \+ support hwd:");
if (m.Success)
{
- temp = nextLine.Replace("+ support uvd:", string.Empty).Trim();
+ temp = nextLine.Replace("+ support hwd:", string.Empty).Trim();
if (string.Compare(temp, "yes") == 0)
- thisTitle.UVDSupport = 1;
+ thisTitle.HWDSupport = 1;
else
- thisTitle.UVDSupport = 0;
+ thisTitle.HWDSupport = 0;
}
thisTitle.Chapters.AddRange(Chapter.ParseList(output));
diff --git a/win/CS/HandBrake.ApplicationServices/Utilities/InteropModelCreator.cs b/win/CS/HandBrake.ApplicationServices/Utilities/InteropModelCreator.cs index 5cb0d4768..618dd43cc 100644 --- a/win/CS/HandBrake.ApplicationServices/Utilities/InteropModelCreator.cs +++ b/win/CS/HandBrake.ApplicationServices/Utilities/InteropModelCreator.cs @@ -102,7 +102,7 @@ namespace HandBrake.ApplicationServices.Utilities profile.Height = work.Height.HasValue ? work.Height.Value : 0;
profile.IPod5GSupport = work.IPod5GSupport;
profile.OpenCLGSupport = work.OpenCLSupport;
- profile.UVDSupport = work.UVDSupport;
+ profile.HWDSupport = work.HWDSupport;
profile.IncludeChapterMarkers = work.IncludeChapterMarkers;
profile.KeepDisplayAspect = work.KeepDisplayAspect;
profile.LargeFile = work.LargeFile;
diff --git a/win/CS/HandBrake.ApplicationServices/Utilities/PlistUtility.cs b/win/CS/HandBrake.ApplicationServices/Utilities/PlistUtility.cs index 260abfe54..e22345134 100644 --- a/win/CS/HandBrake.ApplicationServices/Utilities/PlistUtility.cs +++ b/win/CS/HandBrake.ApplicationServices/Utilities/PlistUtility.cs @@ -142,7 +142,7 @@ namespace HandBrake.ApplicationServices.Utilities AddEncodeElement(xmlWriter, "Mp4LargeFile", "integer", parsed.LargeFile ? "1" : "0");
AddEncodeElement(xmlWriter, "Mp4iPodCompatible", "integer", parsed.IPod5GSupport ? "1" : "0");
AddEncodeElement(xmlWriter, "OpenCLSupport", "integer", parsed.OpenCLSupport ? "1" : "0");
- AddEncodeElement(xmlWriter, "UVDSupport", "integer", parsed.UVDSupport ? "1" : "0");
+ AddEncodeElement(xmlWriter, "HWDSupport", "integer", parsed.HWDSupport ? "1" : "0");
AddEncodeElement(xmlWriter, "PictureAutoCrop", "integer", "1");
AddEncodeElement(xmlWriter, "PictureBottomCrop", "integer", parsed.Cropping.Bottom.ToString());
diff --git a/win/CS/HandBrake.ApplicationServices/Utilities/QueryGeneratorUtility.cs b/win/CS/HandBrake.ApplicationServices/Utilities/QueryGeneratorUtility.cs index 69cbfc258..6cdf1d900 100644 --- a/win/CS/HandBrake.ApplicationServices/Utilities/QueryGeneratorUtility.cs +++ b/win/CS/HandBrake.ApplicationServices/Utilities/QueryGeneratorUtility.cs @@ -246,7 +246,7 @@ namespace HandBrake.ApplicationServices.Utilities if (task.OpenCLSupport)
query += " -P ";
- if (task.UVDSupport)
+ if (task.HWDSupport)
query += " -U ";
return query;
diff --git a/win/CS/HandBrake.ApplicationServices/Utilities/QueryParserUtility.cs b/win/CS/HandBrake.ApplicationServices/Utilities/QueryParserUtility.cs index a167801df..9655d6629 100644 --- a/win/CS/HandBrake.ApplicationServices/Utilities/QueryParserUtility.cs +++ b/win/CS/HandBrake.ApplicationServices/Utilities/QueryParserUtility.cs @@ -57,7 +57,7 @@ namespace HandBrake.ApplicationServices.Utilities Match largerMp4 = Regex.Match(input, @" -4");
Match ipodAtom = Regex.Match(input, @" -I");
Match openclSupport = Regex.Match(input, @" -P");
- Match uvdSupport = Regex.Match(input, @" -U");
+ Match hwdSupport = Regex.Match(input, @" -U");
// Picture Settings Tab
Match width = Regex.Match(input, @"-w ([0-9]+)");
@@ -158,7 +158,7 @@ namespace HandBrake.ApplicationServices.Utilities parsed.IPod5GSupport = ipodAtom.Success;
parsed.OptimizeMP4 = optimizeMP4.Success;
parsed.OpenCLSupport = openclSupport.Success;
- parsed.UVDSupport = uvdSupport.Success;
+ parsed.HWDSupport = hwdSupport.Success;
#endregion
diff --git a/win/CS/HandBrake.Interop/HandBrakeInterop/HandBrakeInstance.cs b/win/CS/HandBrake.Interop/HandBrakeInterop/HandBrakeInstance.cs index 74f1683a9..06bf022f3 100644 --- a/win/CS/HandBrake.Interop/HandBrakeInterop/HandBrakeInstance.cs +++ b/win/CS/HandBrake.Interop/HandBrakeInterop/HandBrakeInstance.cs @@ -1448,7 +1448,7 @@ namespace HandBrake.Interop nativeJob.mp4_optimize = profile.Optimize ? 1 : 0;
nativeJob.ipod_atom = profile.IPod5GSupport ? 1 : 0;
nativeJob.opencl_support = profile.OpenCLGSupport ? 1 : 0;
- nativeJob.uvd_support = profile.UVDSupport ? 1 : 0;
+ nativeJob.hwd_support = profile.HWDSupport ? 1 : 0;
if (title.AngleCount > 1)
{
diff --git a/win/CS/HandBrake.Interop/HandBrakeInterop/HbLib/hb_job_s.cs b/win/CS/HandBrake.Interop/HandBrakeInterop/HbLib/hb_job_s.cs index a761780fa..562531083 100644 --- a/win/CS/HandBrake.Interop/HandBrakeInterop/HbLib/hb_job_s.cs +++ b/win/CS/HandBrake.Interop/HandBrakeInterop/HbLib/hb_job_s.cs @@ -149,7 +149,7 @@ namespace HandBrake.Interop.HbLib public int opencl_support;
/// int
- public int uvd_support;
+ public int hwd_support;
/// int
public int indepth_scan;
diff --git a/win/CS/HandBrake.Interop/HandBrakeInterop/Model/Encoding/EncodingProfile.cs b/win/CS/HandBrake.Interop/HandBrakeInterop/Model/Encoding/EncodingProfile.cs index 9667d5e6e..937c776a7 100644 --- a/win/CS/HandBrake.Interop/HandBrakeInterop/Model/Encoding/EncodingProfile.cs +++ b/win/CS/HandBrake.Interop/HandBrakeInterop/Model/Encoding/EncodingProfile.cs @@ -28,7 +28,7 @@ namespace HandBrake.Interop.Model.Encoding public bool Optimize { get; set; }
public bool IPod5GSupport { get; set; }
public bool OpenCLGSupport { get; set; }
- public bool UVDSupport { get; set; }
+ public bool HWDSupport { get; set; }
public int Width { get; set; }
public int Height { get; set; }
@@ -85,7 +85,7 @@ namespace HandBrake.Interop.Model.Encoding Optimize = this.Optimize,
IPod5GSupport = this.IPod5GSupport,
OpenCLGSupport = this.OpenCLGSupport,
- UVDSupport = this.UVDSupport,
+ HWDSupport = this.HWDSupport,
Width = this.Width,
Height = this.Height,
diff --git a/win/CS/HandBrakeWPF/ViewModels/MainViewModel.cs b/win/CS/HandBrakeWPF/ViewModels/MainViewModel.cs index e814e897c..ad09f3043 100644 --- a/win/CS/HandBrakeWPF/ViewModels/MainViewModel.cs +++ b/win/CS/HandBrakeWPF/ViewModels/MainViewModel.cs @@ -377,7 +377,7 @@ namespace HandBrakeWPF.ViewModels this.CurrentTask.OptimizeMP4 = selectedPreset.Task.OptimizeMP4;
this.CurrentTask.IPod5GSupport = selectedPreset.Task.IPod5GSupport;
this.CurrentTask.OpenCLSupport = selectedPreset.Task.OpenCLSupport;
- this.CurrentTask.UVDSupport = selectedPreset.Task.UVDSupport;
+ this.CurrentTask.HWDSupport = selectedPreset.Task.HWDSupport;
this.SelectedOutputFormat = selectedPreset.Task.OutputFormat;
// Tab Settings
@@ -1697,7 +1697,7 @@ namespace HandBrakeWPF.ViewModels {
this.SupportOpenCL = false;
}
- if (this.selectedTitle.UVDSupport == 0)
+ if (this.selectedTitle.HWDSupport == 0)
{
this.SupportHardwareDecoding = true;
}
diff --git a/win/CS/HandBrakeWPF/Views/MainView.xaml b/win/CS/HandBrakeWPF/Views/MainView.xaml index 295ebb4c1..0f85a2b73 100644 --- a/win/CS/HandBrakeWPF/Views/MainView.xaml +++ b/win/CS/HandBrakeWPF/Views/MainView.xaml @@ -482,11 +482,11 @@ Converter={StaticResource boolToVisConverter},
ConverterParameter=true}"
/>
- <CheckBox Name="UVD"
+ <CheckBox Name="HWD"
Margin="8,0,0,0"
VerticalAlignment="Center"
- Content="UVD Support"
- IsChecked="{Binding Path=CurrentTask.UVDSupport}" IsEnabled="True"
+ Content="HWD Support"
+ IsChecked="{Binding Path=CurrentTask.HWDSupport}" IsEnabled="True"
Visibility="{Binding SupportHardwareDecoding,
Converter={StaticResource boolToVisConverter},
ConverterParameter=true}"
|