diff options
author | Rodeo <[email protected]> | 2013-11-08 21:21:02 +0000 |
---|---|---|
committer | Rodeo <[email protected]> | 2013-11-08 21:21:02 +0000 |
commit | 43f0bc9d538c86ea75a5cd627a81452e9d76b825 (patch) | |
tree | a6bde5c2174543e4ea282ab26b6a2c76829105eb /libhb | |
parent | d0a2953efbce340e34a971b9481024a51ae52383 (diff) |
OpenCL: use the new library loading architecture for all OpenCL code.
An OpenCL SDK is no longer needed to build OpenCL support.
Note: as a result, the --enable-opencl configure option is removed.
Also, libOpenCL is no longer needed to run the application (it is still necessary to use OpenCL features, of course).
git-svn-id: svn://svn.handbrake.fr/HandBrake/trunk@5886 b64f7644-9d1e-0410-96f1-a4d463321fa5
Diffstat (limited to 'libhb')
-rw-r--r-- | libhb/common.h | 61 | ||||
-rw-r--r-- | libhb/cropscale.c | 125 | ||||
-rw-r--r-- | libhb/decavcodec.c | 19 | ||||
-rw-r--r-- | libhb/fifo.c | 101 | ||||
-rw-r--r-- | libhb/hb.c | 5 | ||||
-rw-r--r-- | libhb/internal.h | 4 | ||||
-rw-r--r-- | libhb/module.defs | 7 | ||||
-rw-r--r-- | libhb/oclnv12toyuv.c | 77 | ||||
-rw-r--r-- | libhb/oclnv12toyuv.h | 15 | ||||
-rw-r--r-- | libhb/oclscale.c | 167 | ||||
-rw-r--r-- | libhb/opencl.c | 383 | ||||
-rw-r--r-- | libhb/opencl.h | 80 | ||||
-rw-r--r-- | libhb/openclwrapper.c | 399 | ||||
-rw-r--r-- | libhb/openclwrapper.h | 11 | ||||
-rw-r--r-- | libhb/scan.c | 4 | ||||
-rw-r--r-- | libhb/stream.c | 17 | ||||
-rw-r--r-- | libhb/vadxva2.c | 14 | ||||
-rw-r--r-- | libhb/vadxva2.h | 19 | ||||
-rw-r--r-- | libhb/work.c | 28 |
19 files changed, 857 insertions, 679 deletions
diff --git a/libhb/common.h b/libhb/common.h index 6a657465c..b57103d06 100644 --- a/libhb/common.h +++ b/libhb/common.h @@ -21,9 +21,6 @@ #include <sys/stat.h> #include <dirent.h> -#ifdef USE_OPENCL -#include "extras/cl.h" -#endif /* * It seems WinXP doesn't align the stack of new threads to 16 bytes. * To prevent crashes in SSE functions, we need to force stack alignement @@ -537,10 +534,11 @@ struct hb_job_s uint32_t frames_to_skip; // decode but discard this many frames // initially (for frame accurate positioning // to non-I frames). - int use_opencl;/* 0 is disable use of opencl. 1 is enable use of opencl */ + int use_opencl; int use_hwd; int use_decomb; int use_detelecine; + #ifdef USE_QSV // QSV-specific settings struct @@ -920,10 +918,12 @@ struct hb_title_s uint32_t flags; // set if video stream doesn't have IDR frames - int opencl_support; - int hwd_support; #define HBTF_NO_IDR (1 << 0) #define HBTF_SCAN_COMPLETE (1 << 0) + + // whether OpenCL scaling is supported for this source + int opencl_support; + int hwd_support; // TODO: merge to video_decode_support }; // Update win/CS/HandBrake.Interop/HandBrakeInterop/HbLib/hb_state_s.cs when changing this struct @@ -1094,51 +1094,7 @@ extern hb_work_object_t hb_reader; #define HB_FILTER_DROP 3 #define HB_FILTER_DONE 4 -typedef struct hb_oclscale_s -{ -#ifdef USE_OPENCL - int initialized; - // bicubic scale weights - cl_mem bicubic_x_weights; - cl_mem bicubic_y_weights; - cl_float xscale; - cl_float yscale; - int width; - int height; - // horizontal scaling and vertical scaling kernel handle - cl_kernel m_kernel; - int use_ocl_mem; // 0 use host memory. 1 use gpu oclmem -#endif -} hb_oclscale_t; - -#ifdef USE_OPENCL -int hb_ocl_scale( hb_buffer_t *in, hb_buffer_t *out, int *crop, hb_oclscale_t *os ); -#endif - -#ifdef USE_OPENCL -int hb_use_dxva( hb_title_t * title ); -// create opencl buffer -#define CREATEBUF( out, flags, size )\ - {\ - out = clCreateBuffer( kenv->context, (flags), (size), NULL, &status );\ - if( CL_SUCCESS != status ) return -1;\ - } - -#define OCLCHECK( method, ... )\ - status = method( __VA_ARGS__ ); if( status != CL_SUCCESS ) {\ - hb_error("%s:%d (%s) error: %d\n", __FUNCTION__, __LINE__, #method, status); return status; } - -#define CL_FREE( buf )\ -{\ - if( buf )\ - {\ - { clReleaseMemObject( buf ); }\ - buf = NULL;\ - }\ -} - - -#endif +int hb_use_dxva(hb_title_t *title); typedef struct hb_filter_init_s { @@ -1152,10 +1108,7 @@ typedef struct hb_filter_init_s int vrate_base; int vrate; int cfr; -#ifdef USE_OPENCL int use_dxva; -#endif - } hb_filter_init_t; typedef struct hb_filter_info_s diff --git a/libhb/cropscale.c b/libhb/cropscale.c index 66aec7f27..f9227562a 100644 --- a/libhb/cropscale.c +++ b/libhb/cropscale.c @@ -10,7 +10,7 @@ #include "hb.h" #include "hbffmpeg.h" #include "common.h" - +#include "opencl.h" struct hb_filter_private_s { @@ -23,12 +23,12 @@ struct hb_filter_private_s int height_out; int crop[4]; -#ifdef USE_OPENCL + /* OpenCL/DXVA2 */ int use_dxva; int use_decomb; int use_detelecine; - hb_oclscale_t *os; //ocl scaler handler -#endif + hb_oclscale_t *os; //ocl scaler handler + struct SwsContext * context; }; @@ -69,17 +69,18 @@ static int hb_crop_scale_init( hb_filter_object_t * filter, pv->height_in = init->height; pv->width_out = init->width - (init->crop[2] + init->crop[3]); pv->height_out = init->height - (init->crop[0] + init->crop[1]); -#ifdef USE_OPENCL - pv->use_dxva = init->use_dxva; - pv->use_decomb = init->job->use_decomb; + + /* OpenCL/DXVA2 */ + pv->use_dxva = init->use_dxva; + pv->use_decomb = init->job->use_decomb; pv->use_detelecine = init->job->use_detelecine; - if( pv->job->use_opencl && pv->job->title->opencl_support) + if (pv->job->use_opencl && pv->job->title->opencl_support) { pv->os = ( hb_oclscale_t * )malloc( sizeof( hb_oclscale_t ) ); memset( pv->os, 0, sizeof( hb_oclscale_t ) ); } -#endif + memcpy( pv->crop, init->crop, sizeof( int[4] ) ); if( filter->settings ) { @@ -93,9 +94,6 @@ static int hb_crop_scale_init( hb_filter_object_t * filter, init->width = pv->width_out; init->height = pv->height_out; memcpy( init->crop, pv->crop, sizeof( int[4] ) ); -#ifdef USE_OPENCL - pv->use_dxva = init->use_dxva; -#endif return 0; } @@ -136,15 +134,18 @@ static void hb_crop_scale_close( hb_filter_object_t * filter ) { return; } -#ifdef USE_OPENCL - if( pv->job->use_opencl && pv->job->title->opencl_support && pv->os ) + /* OpenCL */ + if (pv->job->use_opencl && pv->job->title->opencl_support && pv->os) { - CL_FREE( pv->os->bicubic_x_weights ); - CL_FREE( pv->os->bicubic_y_weights ); - free( pv->os ); + if (hb_ocl != NULL) + { + HB_OCL_BUF_FREE(hb_ocl, pv->os->bicubic_x_weights); + HB_OCL_BUF_FREE(hb_ocl, pv->os->bicubic_y_weights); + } + free(pv->os); } -#endif + if( pv->context ) { sws_freeContext( pv->context ); @@ -154,7 +155,7 @@ static void hb_crop_scale_close( hb_filter_object_t * filter ) filter->private_data = NULL; } -#ifdef USE_OPENCL +/* OpenCL */ static uint8_t *copy_plane( uint8_t *dst, uint8_t* src, int dstride, int sstride, int h ) { if( dstride == sstride ) @@ -171,7 +172,6 @@ static uint8_t *copy_plane( uint8_t *dst, uint8_t* src, int dstride, int sstride } return dst; } -#endif static hb_buffer_t* crop_scale( hb_filter_private_t * pv, hb_buffer_t * in ) { @@ -189,45 +189,44 @@ static hb_buffer_t* crop_scale( hb_filter_private_t * pv, hb_buffer_t * in ) av_picture_crop( &pic_crop, &pic_in, in->f.fmt, pv->crop[0], pv->crop[2] ); -#ifdef USE_OPENCL // Use bicubic OpenCL scaling when selected and when downsampling < 4:1; - if ((pv->job->use_opencl && pv->job->title->opencl_support) && (pv->width_out * 4 > pv->width_in) && (in->cl.buffer != NULL) && (out->cl.buffer != NULL)) + if ((pv->job->use_opencl && pv->job->title->opencl_support) && + (pv->width_out * 4 > pv->width_in) && + (in->cl.buffer != NULL) && (out->cl.buffer != NULL)) { + /* OpenCL */ hb_ocl_scale(in, out, pv->crop, pv->os); } else { -#endif - if ( !pv->context || - pv->width_in != in->f.width || - pv->height_in != in->f.height || - pv->pix_fmt != in->f.fmt ) - { - // Something changed, need a new scaling context. - if( pv->context ) - sws_freeContext( pv->context ); - - pv->context = hb_sws_get_context( - in->f.width - (pv->crop[2] + pv->crop[3]), - in->f.height - (pv->crop[0] + pv->crop[1]), - in->f.fmt, - out->f.width, out->f.height, out->f.fmt, - SWS_LANCZOS | SWS_ACCURATE_RND ); - pv->width_in = in->f.width; - pv->height_in = in->f.height; - pv->pix_fmt = in->f.fmt; + if (pv->context == NULL || + pv->width_in != in->f.width || + pv->height_in != in->f.height || + pv->pix_fmt != in->f.fmt) + { + // Something changed, need a new scaling context. + if (pv->context != NULL) + { + sws_freeContext(pv->context); + } + + pv->context = hb_sws_get_context(in->f.width - (pv->crop[2] + pv->crop[3]), + in->f.height - (pv->crop[0] + pv->crop[1]), + in->f.fmt, out->f.width, out->f.height, + out->f.fmt, SWS_LANCZOS|SWS_ACCURATE_RND); + pv->width_in = in->f.width; + pv->height_in = in->f.height; + pv->pix_fmt = in->f.fmt; + } + + // Scale pic_crop into pic_render according to the + // context set up above + sws_scale(pv->context, + (const uint8_t* const*)pic_crop.data, pic_crop.linesize, + 0, in->f.height - (pv->crop[0] + pv->crop[1]), + pic_out.data, pic_out.linesize); } - // Scale pic_crop into pic_render according to the - // context set up above - sws_scale(pv->context, - (const uint8_t* const*)pic_crop.data, - pic_crop.linesize, - 0, in->f.height - (pv->crop[0] + pv->crop[1]), - pic_out.data, pic_out.linesize); -#ifdef USE_OPENCL - } -#endif out->s = in->s; hb_buffer_move_subs( out, in ); return out; @@ -261,27 +260,19 @@ static int hb_crop_scale_work( hb_filter_object_t * filter, pv->width_out = in->f.width - (pv->crop[2] + pv->crop[3]); pv->height_out = in->f.height - (pv->crop[0] + pv->crop[1]); } -#ifdef USE_OPENCL - if ( (in->f.fmt == pv->pix_fmt_out && - !pv->crop[0] && !pv->crop[1] && !pv->crop[2] && !pv->crop[3] && - in->f.width == pv->width_out && in->f.height == pv->height_out) && - (pv->use_decomb == 0) && (pv->use_detelecine == 0) || - (pv->use_dxva && in->f.width == pv->width_out && in->f.height == pv->height_out) ) - { - *buf_out = in; - *buf_in = NULL; - return HB_FILTER_OK; - } -#else - if ( in->f.fmt == pv->pix_fmt_out && + + /* OpenCL/DXVA2 */ + if ((!pv->use_decomb && !pv->use_detelecine && !pv->crop[0] && !pv->crop[1] && !pv->crop[2] && !pv->crop[3] && - in->f.width == pv->width_out && in->f.height == pv->height_out ) + in->f.fmt == pv->pix_fmt_out && in->f.width == pv->width_out && + in->f.height == pv->height_out) || (pv->use_dxva && + in->f.width == pv->width_out && + in->f.height == pv->height_out)) { *buf_out = in; - *buf_in = NULL; + *buf_in = NULL; return HB_FILTER_OK; } -#endif *buf_out = crop_scale( pv, in ); diff --git a/libhb/decavcodec.c b/libhb/decavcodec.c index 0ade0da27..c961bd719 100644 --- a/libhb/decavcodec.c +++ b/libhb/decavcodec.c @@ -43,6 +43,7 @@ #include "audio_resample.h" #ifdef USE_HWD +#include "opencl.h" #include "vadxva2.h" #endif @@ -393,19 +394,19 @@ static void closePrivData( hb_work_private_t ** ppv ) hb_audio_resample_free(pv->resample); #ifdef USE_HWD - if ( pv->opencl_scale ) + if (pv->opencl_scale != NULL) { - free( pv->opencl_scale ); + free(pv->opencl_scale); } - - if ( pv->dxva2 ) + if (pv->dxva2 != NULL) { -#ifdef USE_OPENCL - CL_FREE( pv->dxva2->cl_mem_nv12 ); -#endif - hb_va_close( pv->dxva2 ); + if (hb_ocl != NULL) + { + HB_OCL_BUF_FREE(hb_ocl, pv->dxva2->cl_mem_nv12); + } + hb_va_close(pv->dxva2); } -#endif +#endif #ifdef USE_QSV_PTS_WORKAROUND if (pv->qsv.decode && pv->qsv.pts_list != NULL) diff --git a/libhb/fifo.c b/libhb/fifo.c index fbfcefd02..6887e43c7 100644 --- a/libhb/fifo.c +++ b/libhb/fifo.c @@ -250,14 +250,20 @@ void hb_buffer_pool_free( void ) if( b->data ) { freed += b->alloc; -#ifdef USE_OPENCL - if (b->cl.buffer != NULL) { + + if (b->cl.buffer != NULL) + { + /* OpenCL */ if (hb_cl_free_mapped_buffer(b->cl.buffer, b->data) == 0) - hb_log("bad free: %.16x -> buffer %.16x map %.16x", b, b->cl.buffer, b->data); + { + hb_log("hb_buffer_pool_free: bad free: %.16x -> buffer %.16x map %.16x", + b, b->cl.buffer, b->data); + } } else -#endif - free( b->data ); + { + free(b->data); + } } free( b ); count++; @@ -303,17 +309,18 @@ hb_buffer_t * hb_buffer_init_internal( int size , int needsMapped ) { b = hb_fifo_get( buffer_pool ); -#ifdef USE_OPENCL - if (b && (needsMapped != 0) && (b->cl.buffer == NULL)) + /* OpenCL */ + if (b != NULL && needsMapped && b->cl.buffer == NULL) { // We need a mapped OpenCL buffer and that is not what we got out of the pool. - // Ditch it. It will get replaced with what we need. - if (b->data) + // Ditch it; it will get replaced with what we need. + if (b->data != NULL) + { free(b->data); + } free(b); b = NULL; } -#endif if( b ) { @@ -322,11 +329,11 @@ hb_buffer_t * hb_buffer_init_internal( int size , int needsMapped ) * didn't have to do this. */ uint8_t *data = b->data; -#ifdef USE_OPENCL - cl_mem buffer = b->cl.buffer; + + /* OpenCL */ + cl_mem buffer = b->cl.buffer; cl_event last_event = b->cl.last_event; - int loc = b->cl.buffer_location; -#endif + int loc = b->cl.buffer_location; memset( b, 0, sizeof(hb_buffer_t) ); b->alloc = buffer_pool->buffer_size; @@ -335,11 +342,12 @@ hb_buffer_t * hb_buffer_init_internal( int size , int needsMapped ) b->s.start = -1; b->s.stop = -1; b->s.renderOffset = -1; -#ifdef USE_OPENCL - b->cl.buffer = buffer; - b->cl.last_event = last_event; + + /* OpenCL */ + b->cl.buffer = buffer; + b->cl.last_event = last_event; b->cl.buffer_location = loc; -#endif + return( b ); } } @@ -358,19 +366,18 @@ hb_buffer_t * hb_buffer_init_internal( int size , int needsMapped ) if (size) { -#ifdef USE_OPENCL - b->cl.last_event = NULL; + /* OpenCL */ + b->cl.last_event = NULL; b->cl.buffer_location = HOST; - if (needsMapped != 0) + /* OpenCL */ + if (needsMapped) { - int status; - status = hb_cl_create_mapped_buffer(&b->cl.buffer, &b->data, b->alloc); - //hb_log("buf: %.16x -> buffer %.16x map %.16x size %d", b, b->cl.buffer, b->data, size); + int status = hb_cl_create_mapped_buffer(&b->cl.buffer, &b->data, b->alloc); } - else { + else + { b->cl.buffer = NULL; -#endif #if defined( SYS_DARWIN ) || defined( SYS_FREEBSD ) || defined( SYS_MINGW ) b->data = malloc( b->alloc ); @@ -380,9 +387,7 @@ hb_buffer_t * hb_buffer_init_internal( int size , int needsMapped ) #else b->data = memalign( 16, b->alloc ); #endif -#ifdef USE_OPENCL } -#endif if( !b->data ) { @@ -533,11 +538,10 @@ hb_buffer_t * hb_frame_buffer_init( int pix_fmt, int width, int height ) hb_image_height_stride( pix_fmt, height, p ); } } -#ifdef USE_OPENCL - buf = hb_buffer_init_internal( size , hb_use_buffers() ); -#else - buf = hb_buffer_init( size ); -#endif + + /* OpenCL */ + buf = hb_buffer_init_internal(size , hb_use_buffers()); + if( buf == NULL ) return NULL; @@ -590,22 +594,22 @@ void hb_buffer_swap_copy( hb_buffer_t *src, hb_buffer_t *dst ) uint8_t *data = dst->data; int size = dst->size; int alloc = dst->alloc; -#ifdef USE_OPENCL - cl_mem buffer = dst->cl.buffer; + + /* OpenCL */ + cl_mem buffer = dst->cl.buffer; cl_event last_event = dst->cl.last_event; - int loc = dst->cl.buffer_location; -#endif + int loc = dst->cl.buffer_location; *dst = *src; src->data = data; src->size = size; src->alloc = alloc; -#ifdef USE_OPENCL - src->cl.buffer = buffer; - src->cl.last_event = last_event; + + /* OpenCL */ + src->cl.buffer = buffer; + src->cl.last_event = last_event; src->cl.buffer_location = loc; -#endif } // Frees the specified buffer list. @@ -633,14 +637,19 @@ void hb_buffer_close( hb_buffer_t ** _b ) // free the buf if( b->data ) { -#ifdef USE_OPENCL - if (b->cl.buffer != NULL) { + if (b->cl.buffer != NULL) + { + /* OpenCL */ if (hb_cl_free_mapped_buffer(b->cl.buffer, b->data) == 0) - hb_log("bad free2: %.16x -> buffer %.16x map %.16x", b, b->cl.buffer, b->data); + { + hb_log("hb_buffer_pool_free: bad free %.16x -> buffer %.16x map %.16x", + b, b->cl.buffer, b->data); } + } else -#endif - free( b->data ); + { + free(b->data); + } hb_lock(buffers.lock); buffers.allocated -= b->alloc; hb_unlock(buffers.lock); diff --git a/libhb/hb.c b/libhb/hb.c index 857ffa1d2..73356595e 100644 --- a/libhb/hb.c +++ b/libhb/hb.c @@ -441,11 +441,6 @@ hb_handle_t * hb_init( int verbose, int update_check ) h->interjob = calloc( sizeof( hb_interjob_t ), 1 ); - /* opencl */ -#ifdef USE_OPENCL - //hb_opencl_init(); // FIXME: Ensure gui instances call this or hb_get_opencl_env() during startup if needed. -#endif - /* Start library thread */ hb_log( "hb_init: starting libhb thread" ); h->die = 0; diff --git a/libhb/internal.h b/libhb/internal.h index e4d5ed71c..3920cf025 100644 --- a/libhb/internal.h +++ b/libhb/internal.h @@ -8,6 +8,7 @@ */ #include "hbffmpeg.h" +#include "extras/cl.h" /*********************************************************************** * common.c @@ -121,14 +122,13 @@ struct hb_buffer_s void *filter_details; } qsv_details; -#ifdef USE_OPENCL + /* OpenCL */ struct cl_data { cl_mem buffer; cl_event last_event; enum { HOST, DEVICE } buffer_location; } cl; -#endif // PICTURESUB subtitle packets: diff --git a/libhb/module.defs b/libhb/module.defs index 297a1b4ed..3102beafa 100644 --- a/libhb/module.defs +++ b/libhb/module.defs @@ -41,9 +41,6 @@ endif ifeq (1,$(FEATURE.fdk_aac)) LIBHB.GCC.D += USE_FDK_AAC endif -ifeq (1,$(FEATURE.opencl)) -LIBHB.GCC.D += USE_OPENCL -endif ifeq (1,$(FEATURE.hwd)) LIBHB.GCC.D += USE_HWD @@ -67,10 +64,6 @@ endif LIBHB.GCC.D += __LIBHB__ USE_PTHREAD LIBHB.GCC.I += $(LIBHB.build/) $(CONTRIB.build/)include -ifeq (1,$(FEATURE.opencl)) -LIBHB.GCC.l += OpenCL -endif - ifeq ($(BUILD.system),cygwin) LIBHB.GCC.D += SYS_CYGWIN else ifeq ($(BUILD.system),darwin) diff --git a/libhb/oclnv12toyuv.c b/libhb/oclnv12toyuv.c index 19188e5a3..0f4ee337c 100644 --- a/libhb/oclnv12toyuv.c +++ b/libhb/oclnv12toyuv.c @@ -10,8 +10,9 @@ Li Cao <[email protected]> <http://www.multicorewareinc.com/> */ -#ifdef USE_OPENCL #ifdef USE_HWD + +#include "opencl.h" #include "vadxva2.h" #include "oclnv12toyuv.h" @@ -50,10 +51,16 @@ static int hb_nv12toyuv_reg_kernel( void ); */ static int hb_nv12toyuv_create_cl_buf( KernelEnv *kenv, int w, int h, hb_va_dxva2_t *dxva2 ) { + if (hb_ocl == NULL) + { + hb_error("hb_nv12toyuv_create_cl_kernel: OpenCL support not available"); + return 1; + } + cl_int status = CL_SUCCESS; int in_bytes = w*h*3/2; - CREATEBUF( dxva2->cl_mem_nv12, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, in_bytes ); - CREATEBUF( dxva2->cl_mem_yuv, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, in_bytes ); + HB_OCL_BUF_CREATE(hb_ocl, dxva2->cl_mem_nv12, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, in_bytes); + HB_OCL_BUF_CREATE(hb_ocl, dxva2->cl_mem_yuv, CL_MEM_READ_WRITE|CL_MEM_ALLOC_HOST_PTR, in_bytes); return 0; } @@ -62,8 +69,14 @@ static int hb_nv12toyuv_create_cl_buf( KernelEnv *kenv, int w, int h, hb_va_dxva */ static int hb_nv12toyuv_create_cl_kernel( KernelEnv *kenv, hb_va_dxva2_t *dxva2 ) { + if (hb_ocl == NULL) + { + hb_error("hb_nv12toyuv_create_cl_kernel: OpenCL support not available"); + return 1; + } + int ret; - dxva2->nv12toyuv = clCreateKernel( kenv->program, "nv12toyuv", &ret ); + dxva2->nv12toyuv = hb_ocl->clCreateKernel(kenv->program, "nv12toyuv", &ret); return ret; } @@ -74,10 +87,17 @@ static int hb_nv12toyuv_setkernelarg( KernelEnv *kenv, int w, int h, hb_va_dxva2 { int arg = 0, status; kenv->kernel = dxva2->nv12toyuv; - OCLCHECK( clSetKernelArg, kenv->kernel, arg++, sizeof(cl_mem), &dxva2->cl_mem_nv12 ); - OCLCHECK( clSetKernelArg, kenv->kernel, arg++, sizeof(cl_mem), &dxva2->cl_mem_yuv ); - OCLCHECK( clSetKernelArg, kenv->kernel, arg++, sizeof(int), &w ); - OCLCHECK( clSetKernelArg, kenv->kernel, arg++, sizeof(int), &h ); + + if (hb_ocl == NULL) + { + hb_error("hb_nv12toyuv_setkernelarg: OpenCL support not available"); + return 1; + } + + HB_OCL_CHECK(hb_ocl->clSetKernelArg, kenv->kernel, arg++, sizeof(cl_mem), &dxva2->cl_mem_nv12); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, kenv->kernel, arg++, sizeof(cl_mem), &dxva2->cl_mem_yuv); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, kenv->kernel, arg++, sizeof(int), &w); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, kenv->kernel, arg++, sizeof(int), &h); return 0; } @@ -163,10 +183,19 @@ static int hb_nv12toyuv( void **userdata, KernelEnv *kenv ) return -1; } + if (hb_ocl == NULL) + { + hb_error("hb_nv12toyuv: OpenCL support not available"); + return -1; + } + int in_bytes = w*h*3/2; if( kenv->isAMD ) { - void *data = clEnqueueMapBuffer( kenv->command_queue, dxva2->cl_mem_nv12, CL_MAP_WRITE_INVALIDATE_REGION, CL_TRUE, 0, in_bytes, 0, NULL, NULL, NULL ); + void *data = hb_ocl->clEnqueueMapBuffer(kenv->command_queue, + dxva2->cl_mem_nv12, + CL_MAP_WRITE_INVALIDATE_REGION, + CL_TRUE, 0, in_bytes, 0, NULL, NULL, NULL); for ( i = 0; i < dxva2->height; i++ ) { @@ -176,7 +205,8 @@ static int hb_nv12toyuv( void **userdata, KernelEnv *kenv ) memcpy( data + ( dxva2->width * dxva2->height ) + i * dxva2->width, bufi2 + i * p, dxva2->width ); } } - clEnqueueUnmapMemObject( kenv->command_queue, dxva2->cl_mem_nv12, data, 0, NULL, NULL ); + hb_ocl->clEnqueueUnmapMemObject(kenv->command_queue, dxva2->cl_mem_nv12, + data, 0, NULL, NULL); } else { @@ -189,18 +219,22 @@ static int hb_nv12toyuv( void **userdata, KernelEnv *kenv ) memcpy( tmp + (dxva2->width * dxva2->height) + i * dxva2->width, bufi2 + i * p, dxva2->width ); } } - OCLCHECK( clEnqueueWriteBuffer, kenv->command_queue, dxva2->cl_mem_nv12, CL_TRUE, 0, in_bytes, tmp, 0, NULL, NULL ); + HB_OCL_CHECK(hb_ocl->clEnqueueWriteBuffer, kenv->command_queue, + dxva2->cl_mem_nv12, CL_TRUE, 0, in_bytes, tmp, 0, NULL, NULL); free( tmp ); } size_t gdim[2] = {w>>1, h>>1}; - OCLCHECK( clEnqueueNDRangeKernel, kenv->command_queue, kenv->kernel, 2, NULL, gdim, NULL, 0, NULL, NULL ); + HB_OCL_CHECK(hb_ocl->clEnqueueNDRangeKernel, kenv->command_queue, + kenv->kernel, 2, NULL, gdim, NULL, 0, NULL, NULL ); if( (crop[0] || crop[1] || crop[2] || crop[3]) && (decomb == 0) && (detelecine == 0) ) { AVPicture pic_in; AVPicture pic_crop; - clEnqueueReadBuffer( kenv->command_queue, dxva2->cl_mem_yuv, CL_TRUE, 0, in_bytes, dxva2->nv12toyuv_tmp_out, 0, NULL, NULL ); + hb_ocl->clEnqueueReadBuffer(kenv->command_queue, dxva2->cl_mem_yuv, + CL_TRUE, 0, in_bytes, dxva2->nv12toyuv_tmp_out, + 0, NULL, NULL); hb_buffer_t *in = hb_video_buffer_init( w, h ); int wmp = in->plane[0].stride; @@ -226,13 +260,20 @@ static int hb_nv12toyuv( void **userdata, KernelEnv *kenv ) if( kenv->isAMD ) { - void *data = clEnqueueMapBuffer( kenv->command_queue, dxva2->cl_mem_yuv, CL_MAP_WRITE_INVALIDATE_REGION, CL_TRUE, 0, ww * hh * 3 / 2, 0, NULL, NULL, NULL ); + void *data = hb_ocl->clEnqueueMapBuffer(kenv->command_queue, + dxva2->cl_mem_yuv, + CL_MAP_WRITE_INVALIDATE_REGION, + CL_TRUE, 0, ww * hh * 3 / 2, 0, + NULL, NULL, NULL); memcpy( data, dxva2->nv12toyuv_tmp_in, ww * hh * 3 / 2 ); - clEnqueueUnmapMemObject( kenv->command_queue, dxva2->cl_mem_yuv, data, 0, NULL, NULL ); + hb_ocl->clEnqueueUnmapMemObject(kenv->command_queue, + dxva2->cl_mem_yuv, data, 0, NULL, NULL); } else { - OCLCHECK( clEnqueueWriteBuffer, kenv->command_queue, dxva2->cl_mem_yuv, CL_TRUE, 0, in_bytes, dxva2->nv12toyuv_tmp_in, 0, NULL, NULL ); + HB_OCL_CHECK(hb_ocl->clEnqueueWriteBuffer, kenv->command_queue, + dxva2->cl_mem_yuv, CL_TRUE, 0, in_bytes, + dxva2->nv12toyuv_tmp_in, 0, NULL, NULL); } hb_buffer_close( &in ); @@ -281,5 +322,5 @@ int hb_ocl_nv12toyuv( uint8_t *bufi[], int p, int w, int h, int *crop, hb_va_dxv } return 0; } -#endif -#endif + +#endif // USE_HWD diff --git a/libhb/oclnv12toyuv.h b/libhb/oclnv12toyuv.h index 4fa596662..de9282bfa 100644 --- a/libhb/oclnv12toyuv.h +++ b/libhb/oclnv12toyuv.h @@ -11,20 +11,17 @@ */ -#ifdef USE_OPENCL -#ifndef RENDER_CL_H -#define RENDER_CL_H +#ifndef HB_OCLNV12TOYUV_H +#define HB_OCLNV12TOYUV_H #include "common.h" #include "extras/cl.h" #include "openclwrapper.h" -/** +/* * nv12 to yuv interface * bufi is input frame of nv12, w is input frame width, h is input frame height */ -#ifdef USE_HWD -int hb_ocl_nv12toyuv( uint8_t *bufi[], int p, int w, int h, int *crop, hb_va_dxva2_t *dxva2, int decomb, int detelecine ); -#endif -#endif -#endif +int hb_ocl_nv12toyuv(uint8_t *bufi[], int p, int w, int h, int *crop, hb_va_dxva2_t *dxva2, int decomb, int detelecine); + +#endif // HB_OCLNV12TOYUV_H diff --git a/libhb/oclscale.c b/libhb/oclscale.c index da05f371e..eb59eaa66 100644 --- a/libhb/oclscale.c +++ b/libhb/oclscale.c @@ -1,20 +1,19 @@ -/* oclscale.c
-
- Copyright (c) 2003-2012 HandBrake Team
- This file is part of the HandBrake source code
- Homepage: <http://handbrake.fr/>.
- It may be used under the terms of the GNU General Public License v2.
- For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
-
- Authors: Peng Gao <[email protected]> <http://www.multicorewareinc.com/>
- Li Cao <[email protected]> <http://www.multicorewareinc.com/>
-
- */
-
-#ifdef USE_OPENCL
-
+/* oclscale.c + + Copyright (c) 2003-2012 HandBrake Team + This file is part of the HandBrake source code + Homepage: <http://handbrake.fr/>. + It may be used under the terms of the GNU General Public License v2. + For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html + + Authors: Peng Gao <[email protected]> <http://www.multicorewareinc.com/> + Li Cao <[email protected]> <http://www.multicorewareinc.com/> + + */ + #include <math.h> #include "common.h" +#include "opencl.h" #include "openclwrapper.h" #define FILTER_LEN 4 @@ -72,14 +71,20 @@ int hb_ocl_scale_func( void **data, KernelEnv *kenv ) int crop_bottom = data[3]; int crop_left = data[4]; int crop_right = data[5]; - cl_int in_frame_w = (int)data[6];
- cl_int in_frame_h = (int)data[7];
- cl_int out_frame_w = (int)data[8];
+ cl_int in_frame_w = (int)data[6]; + cl_int in_frame_h = (int)data[7]; + cl_int out_frame_w = (int)data[8]; cl_int out_frame_h = (int)data[9]; hb_oclscale_t *os = data[10]; hb_buffer_t *in = data[11]; hb_buffer_t *out = data[12]; + if (hb_ocl == NULL) + { + hb_error("hb_ocl_scale_func: OpenCL support not available"); + return 0; + } + if (os->initialized == 0) { hb_log( "Scaling With OpenCL" ); @@ -87,7 +92,7 @@ int hb_ocl_scale_func( void **data, KernelEnv *kenv ) hb_log( "Using Zero Copy"); // create the block kernel cl_int status; - os->m_kernel = clCreateKernel( kenv->program, "frame_scale", &status ); + os->m_kernel = hb_ocl->clCreateKernel(kenv->program, "frame_scale", &status); os->initialized = 1; } @@ -98,8 +103,12 @@ int hb_ocl_scale_func( void **data, KernelEnv *kenv ) int eventCount = 0; if (kenv->isAMD == 0) { - status = clEnqueueUnmapMemObject(kenv->command_queue, in->cl.buffer, in->data, 0, NULL, &events[eventCount++]); - status = clEnqueueUnmapMemObject(kenv->command_queue, out->cl.buffer, out->data, 0, NULL, &events[eventCount++]); + status = hb_ocl->clEnqueueUnmapMemObject(kenv->command_queue, + in->cl.buffer, in->data, 0, + NULL, &events[eventCount++]); + status = hb_ocl->clEnqueueUnmapMemObject(kenv->command_queue, + out->cl.buffer, out->data, 0, + NULL, &events[eventCount++]); } cl_int srcPlaneOffset0 = in->plane[0].data - in->data; @@ -127,28 +136,28 @@ int hb_ocl_scale_func( void **data, KernelEnv *kenv ) cl_float yscale = (out_frame_h * 1.0f) / in_frame_h; setupScaleWeights(xscale, yscale, out_frame_w, out_frame_h, os, kenv); - OCLCHECK( clSetKernelArg, os->m_kernel, 0, sizeof(cl_mem), &out_buf ); - OCLCHECK( clSetKernelArg, os->m_kernel, 1, sizeof(cl_mem), &in_buf ); - OCLCHECK( clSetKernelArg, os->m_kernel, 2, sizeof(cl_float), &xscale ); - OCLCHECK( clSetKernelArg, os->m_kernel, 3, sizeof(cl_float), &yscale ); - OCLCHECK( clSetKernelArg, os->m_kernel, 4, sizeof(cl_int), &srcPlaneOffset0 ); - OCLCHECK( clSetKernelArg, os->m_kernel, 5, sizeof(cl_int), &srcPlaneOffset1 ); - OCLCHECK( clSetKernelArg, os->m_kernel, 6, sizeof(cl_int), &srcPlaneOffset2 ); - OCLCHECK( clSetKernelArg, os->m_kernel, 7, sizeof(cl_int), &dstPlaneOffset0 ); - OCLCHECK( clSetKernelArg, os->m_kernel, 8, sizeof(cl_int), &dstPlaneOffset1 ); - OCLCHECK( clSetKernelArg, os->m_kernel, 9, sizeof(cl_int), &dstPlaneOffset2 ); - OCLCHECK( clSetKernelArg, os->m_kernel, 10, sizeof(cl_int), &srcRowWords0 ); - OCLCHECK( clSetKernelArg, os->m_kernel, 11, sizeof(cl_int), &srcRowWords1 ); - OCLCHECK( clSetKernelArg, os->m_kernel, 12, sizeof(cl_int), &srcRowWords2 ); - OCLCHECK( clSetKernelArg, os->m_kernel, 13, sizeof(cl_int), &dstRowWords0 ); - OCLCHECK( clSetKernelArg, os->m_kernel, 14, sizeof(cl_int), &dstRowWords1 ); - OCLCHECK( clSetKernelArg, os->m_kernel, 15, sizeof(cl_int), &dstRowWords2 ); - OCLCHECK( clSetKernelArg, os->m_kernel, 16, sizeof(cl_int), &in_frame_w );
- OCLCHECK( clSetKernelArg, os->m_kernel, 17, sizeof(cl_int), &in_frame_h );
- OCLCHECK( clSetKernelArg, os->m_kernel, 18, sizeof(cl_int), &out_frame_w );
- OCLCHECK( clSetKernelArg, os->m_kernel, 19, sizeof(cl_int), &out_frame_h ); - OCLCHECK( clSetKernelArg, os->m_kernel, 20, sizeof(cl_mem), &os->bicubic_x_weights ); - OCLCHECK( clSetKernelArg, os->m_kernel, 21, sizeof(cl_mem), &os->bicubic_y_weights ); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 0, sizeof(cl_mem), &out_buf); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 1, sizeof(cl_mem), &in_buf); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 2, sizeof(cl_float), &xscale); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 3, sizeof(cl_float), &yscale); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 4, sizeof(cl_int), &srcPlaneOffset0); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 5, sizeof(cl_int), &srcPlaneOffset1); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 6, sizeof(cl_int), &srcPlaneOffset2); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 7, sizeof(cl_int), &dstPlaneOffset0); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 8, sizeof(cl_int), &dstPlaneOffset1); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 9, sizeof(cl_int), &dstPlaneOffset2); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 10, sizeof(cl_int), &srcRowWords0); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 11, sizeof(cl_int), &srcRowWords1); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 12, sizeof(cl_int), &srcRowWords2); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 13, sizeof(cl_int), &dstRowWords0); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 14, sizeof(cl_int), &dstRowWords1); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 15, sizeof(cl_int), &dstRowWords2); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 16, sizeof(cl_int), &in_frame_w); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 17, sizeof(cl_int), &in_frame_h); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 18, sizeof(cl_int), &out_frame_w); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 19, sizeof(cl_int), &out_frame_h); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 20, sizeof(cl_mem), &os->bicubic_x_weights); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 21, sizeof(cl_mem), &os->bicubic_y_weights); size_t workOffset[] = { 0, 0, 0 }; size_t globalWorkSize[] = { 1, 1, 1 }; @@ -164,42 +173,70 @@ int hb_ocl_scale_func( void **data, KernelEnv *kenv ) globalWorkSize[1] = ygroups; globalWorkSize[2] = 3; - OCLCHECK( clEnqueueNDRangeKernel, kenv->command_queue, os->m_kernel, 3, workOffset, globalWorkSize, localWorkSize, eventCount, (eventCount == 0) ? NULL : &events[0], &events[eventCount] ); + HB_OCL_CHECK(hb_ocl->clEnqueueNDRangeKernel, kenv->command_queue, + os->m_kernel, 3, workOffset, globalWorkSize, localWorkSize, + eventCount, eventCount == 0 ? NULL : &events[0], &events[eventCount]); ++eventCount; if (kenv->isAMD == 0) { - in->data = clEnqueueMapBuffer(kenv->command_queue, in->cl.buffer, CL_FALSE, CL_MAP_READ | CL_MAP_WRITE, 0, in->alloc, (eventCount == 0) ? 0 : 1, (eventCount == 0) ? NULL : &events[eventCount - 1], &events[eventCount], &status); - out->data = clEnqueueMapBuffer(kenv->command_queue, out->cl.buffer, CL_FALSE, CL_MAP_READ | CL_MAP_WRITE, 0, out->alloc, (eventCount == 0) ? 0 : 1, (eventCount == 0) ? NULL : &events[eventCount - 1], &events[eventCount + 1], &status); + in->data = hb_ocl->clEnqueueMapBuffer(kenv->command_queue, in->cl.buffer, + CL_FALSE, CL_MAP_READ|CL_MAP_WRITE, + 0, in->alloc, + eventCount ? 1 : 0, + eventCount ? &events[eventCount - 1] : NULL, + &events[eventCount], &status); + out->data = hb_ocl->clEnqueueMapBuffer(kenv->command_queue, out->cl.buffer, + CL_FALSE, CL_MAP_READ|CL_MAP_WRITE, + 0, out->alloc, + eventCount ? 1 : 0, + eventCount ? &events[eventCount - 1] : NULL, + &events[eventCount + 1], &status); eventCount += 2; } - clFlush(kenv->command_queue); - clWaitForEvents(eventCount, &events[0]); + hb_ocl->clFlush(kenv->command_queue); + hb_ocl->clWaitForEvents(eventCount, &events[0]); int i; for (i = 0; i < eventCount; ++i) - clReleaseEvent(events[i]); + { + hb_ocl->clReleaseEvent(events[i]); + } } return 1; } -int setupScaleWeights(cl_float xscale, cl_float yscale, int width, int height, hb_oclscale_t *os, KernelEnv *kenv) { +int setupScaleWeights(cl_float xscale, cl_float yscale, int width, int height, hb_oclscale_t *os, KernelEnv *kenv) +{ cl_int status; - if (os->xscale != xscale || os->width < width) { + + if (hb_ocl == NULL) + { + hb_error("setupScaleWeights: OpenCL support not available"); + return 1; + } + + if (os->xscale != xscale || os->width < width) + { cl_float *xweights = hb_bicubic_weights(xscale, width); - CL_FREE(os->bicubic_x_weights); - CREATEBUF(os->bicubic_x_weights, CL_MEM_READ_ONLY, sizeof(cl_float) * width * 4); - OCLCHECK(clEnqueueWriteBuffer, kenv->command_queue, os->bicubic_x_weights, CL_TRUE, 0, sizeof(cl_float) * width * 4, xweights, 0, NULL, NULL ); + HB_OCL_BUF_FREE (hb_ocl, os->bicubic_x_weights); + HB_OCL_BUF_CREATE(hb_ocl, os->bicubic_x_weights, CL_MEM_READ_ONLY, + sizeof(cl_float) * width * 4); + HB_OCL_CHECK(hb_ocl->clEnqueueWriteBuffer, kenv->command_queue, os->bicubic_x_weights, + CL_TRUE, 0, sizeof(cl_float) * width * 4, xweights, 0, NULL, NULL); os->width = width; os->xscale = xscale; free(xweights); } - if ((os->yscale != yscale) || (os->height < height)) { + if ((os->yscale != yscale) || (os->height < height)) + { cl_float *yweights = hb_bicubic_weights(yscale, height); - CL_FREE(os->bicubic_y_weights); - CREATEBUF(os->bicubic_y_weights, CL_MEM_READ_ONLY, sizeof(cl_float) * height * 4); - OCLCHECK(clEnqueueWriteBuffer, kenv->command_queue, os->bicubic_y_weights, CL_TRUE, 0, sizeof(cl_float) * height * 4, yweights, 0, NULL, NULL ); + HB_OCL_BUF_FREE (hb_ocl, os->bicubic_y_weights); + HB_OCL_BUF_CREATE(hb_ocl, os->bicubic_y_weights, CL_MEM_READ_ONLY, + sizeof(cl_float) * height * 4); + HB_OCL_CHECK(hb_ocl->clEnqueueWriteBuffer, kenv->command_queue, os->bicubic_y_weights, + CL_TRUE, 0, sizeof(cl_float) * height * 4, yweights, 0, NULL, NULL); os->height = height; os->yscale = yscale; free(yweights); @@ -211,10 +248,10 @@ int setupScaleWeights(cl_float xscale, cl_float yscale, int width, int height, h /** * function describe: this function is used to scaling video frame. it uses the gausi scaling algorithm * parameter: -* inputFrameBuffer: the source video frame opencl buffer
-* outputdata: the destination video frame buffer
-* inputWidth: the width of the source video frame
-* inputHeight: the height of the source video frame
+* inputFrameBuffer: the source video frame opencl buffer +* outputdata: the destination video frame buffer +* inputWidth: the width of the source video frame +* inputHeight: the height of the source video frame * outputWidth: the width of destination video frame * outputHeight: the height of destination video frame */ @@ -263,9 +300,3 @@ int hb_ocl_scale(hb_buffer_t *in, hb_buffer_t *out, int *crop, hb_oclscale_t *os hb_log( "run kernel[%s] failed", "frame_scale" ); return 0; } - - - - - -#endif diff --git a/libhb/opencl.c b/libhb/opencl.c index cfc5e5747..8d846a824 100644 --- a/libhb/opencl.c +++ b/libhb/opencl.c @@ -26,10 +26,31 @@ #include "common.h" #include "opencl.h" -int hb_opencl_library_open(hb_opencl_library_t *opencl) +hb_opencl_library_t *hb_ocl = NULL; + +int hb_ocl_init() +{ + if (hb_ocl == NULL) + { + if ((hb_ocl = hb_opencl_library_init()) == NULL) + { + return -1; + } + } + return 0; +} + +void hb_ocl_close() +{ + hb_opencl_library_close(&hb_ocl); +} + +hb_opencl_library_t* hb_opencl_library_init() { - if (opencl == NULL) + hb_opencl_library_t *opencl; + if ((opencl = calloc(1, sizeof(hb_opencl_library_t))) == NULL) { + hb_error("hb_opencl_library_init: memory allocation failure"); goto fail; } @@ -39,13 +60,13 @@ int hb_opencl_library_open(hb_opencl_library_t *opencl) goto fail; } -#define HB_OCL_LOAD(func) \ -{ \ - if ((opencl->func = (void*)HB_OCL_DLSYM(opencl->library, #func)) == NULL) \ - { \ - hb_log("hb_opencl_library_open: failed to load function '%s'", #func); \ - goto fail; \ - } \ +#define HB_OCL_LOAD(func) \ +{ \ + if ((opencl->func = (void*)HB_OCL_DLSYM(opencl->library, #func)) == NULL) \ + { \ + hb_log("hb_opencl_library_init: failed to load function '%s'", #func); \ + goto fail; \ + } \ } HB_OCL_LOAD(clBuildProgram); HB_OCL_LOAD(clCreateBuffer); @@ -73,18 +94,27 @@ int hb_opencl_library_open(hb_opencl_library_t *opencl) HB_OCL_LOAD(clReleaseContext); HB_OCL_LOAD(clReleaseEvent); HB_OCL_LOAD(clReleaseKernel); + HB_OCL_LOAD(clReleaseMemObject); HB_OCL_LOAD(clReleaseProgram); HB_OCL_LOAD(clSetKernelArg); HB_OCL_LOAD(clWaitForEvents); - return 0; + + //success + return opencl; fail: - hb_opencl_library_close(opencl); - return -1; + hb_opencl_library_close(&opencl); + return NULL; } -void hb_opencl_library_close(hb_opencl_library_t *opencl) +void hb_opencl_library_close(hb_opencl_library_t **_opencl) { + if (_opencl == NULL) + { + return; + } + hb_opencl_library_t *opencl = *_opencl; + if (opencl != NULL) { if (opencl->library != NULL) @@ -120,114 +150,216 @@ void hb_opencl_library_close(hb_opencl_library_t *opencl) HB_OCL_UNLOAD(clReleaseContext); HB_OCL_UNLOAD(clReleaseEvent); HB_OCL_UNLOAD(clReleaseKernel); + HB_OCL_UNLOAD(clReleaseMemObject); HB_OCL_UNLOAD(clReleaseProgram); HB_OCL_UNLOAD(clSetKernelArg); HB_OCL_UNLOAD(clWaitForEvents); } + *_opencl = NULL; } -static int hb_opencl_device_is_supported(cl_device_type type, - const char *vendor, - const char *version) +static int hb_opencl_device_is_supported(hb_opencl_device_t* device) { - int major, minor; + // we only support OpenCL on GPUs for now + // FIXME: disable on NVIDIA to to a bug + if ((device != NULL) && + (device->type & CL_DEVICE_TYPE_GPU) && + (device->ocl_vendor != HB_OCL_VENDOR_NVIDIA)) + { + int major, minor; + // check OpenCL version: + // OpenCL<space><major_version.minor_version><space><vendor-specific information> + if (sscanf(device->version, "OpenCL %d.%d", &major, &minor) != 2) + { + return 0; + } + return (major > HB_OCL_MINVERSION_MAJOR) || (major == HB_OCL_MINVERSION_MAJOR && + minor >= HB_OCL_MINVERSION_MINOR); + } + return 0; +} + +static hb_opencl_device_t* hb_opencl_device_get(hb_opencl_library_t *opencl, + cl_device_id device_id) +{ + if (opencl == NULL || opencl->clGetDeviceInfo == NULL) + { + hb_error("hb_opencl_device_get: OpenCL support not available"); + return NULL; + } + else if (device_id == NULL) + { + hb_error("hb_opencl_device_get: invalid device ID"); + return NULL; + } + + hb_opencl_device_t *device = calloc(1, sizeof(hb_opencl_device_t)); + if (device == NULL) + { + hb_error("hb_opencl_device_get: memory allocation failure"); + return NULL; + } + + cl_int status = CL_SUCCESS; + device->id = device_id; - // we only support OpenCL on GPUs - // disable on NVIDIA to to a bug (FIXME) - if (!(type & CL_DEVICE_TYPE_GPU) || - !(strncmp(vendor, "NVIDIA", 6 /* strlen("NVIDIA") */))) + status |= opencl->clGetDeviceInfo(device->id, CL_DEVICE_VENDOR, sizeof(device->vendor), + device->vendor, NULL); + status |= opencl->clGetDeviceInfo(device->id, CL_DEVICE_NAME, sizeof(device->name), + device->name, NULL); + status |= opencl->clGetDeviceInfo(device->id, CL_DEVICE_VERSION, sizeof(device->version), + device->version, NULL); + status |= opencl->clGetDeviceInfo(device->id, CL_DEVICE_TYPE, sizeof(device->type), + &device->type, NULL); + status |= opencl->clGetDeviceInfo(device->id, CL_DEVICE_PLATFORM, sizeof(device->platform), + &device->platform, NULL); + status |= opencl->clGetDeviceInfo(device->id, CL_DRIVER_VERSION, sizeof(device->driver), + device->driver, NULL); + if (status != CL_SUCCESS) { - return 0; + free(device); + return NULL; } - // check OpenCL version; format: - // OpenCL<space><major_version.minor_version><space><vendor-specific information> - if (sscanf(version, "OpenCL %d.%d", &major, &minor) != 2) + if (!strcmp(device->vendor, "Advanced Micro Devices, Inc.") || + !strcmp(device->vendor, "AMD")) + { + device->ocl_vendor = HB_OCL_VENDOR_AMD; + } + else if (!strncmp(device->vendor, "NVIDIA", 6 /* strlen("NVIDIA") */)) { - return 0; + device->ocl_vendor = HB_OCL_VENDOR_NVIDIA; + } + else + { + device->ocl_vendor = HB_OCL_VENDOR_OTHER; } - return (major > HB_OCL_MINVERSION_MAJOR) || (major == HB_OCL_MINVERSION_MAJOR && - minor >= HB_OCL_MINVERSION_MINOR); + return device; } -int hb_opencl_available() +static void hb_opencl_devices_list_close(hb_list_t **_list) { - static int opencl_available = -1; - if (opencl_available >= 0) + if (_list != NULL) { - return opencl_available; + hb_list_t *list = *_list; + hb_opencl_device_t *device; + while (list != NULL && hb_list_count(list) > 0) + { + if ((device = hb_list_item(list, 0)) != NULL) + { + hb_list_rem(list, device); + free(device); + } + } } - opencl_available = 0; + hb_list_close(_list); +} - cl_device_type type; - char vendor[100], version[100]; - cl_device_id *device_ids = NULL; - cl_platform_id *platform_ids = NULL; - hb_opencl_library_t lib, *opencl = &lib; +static hb_list_t* hb_opencl_devices_list_get(hb_opencl_library_t *opencl, + cl_device_type device_type) +{ + if (opencl == NULL || + opencl->library == NULL || + opencl->clGetDeviceIDs == NULL || + opencl->clGetDeviceInfo == NULL || + opencl->clGetPlatformIDs == NULL) + { + hb_error("hb_opencl_devices_list_get: OpenCL support not available"); + return NULL; + } + + hb_list_t *list = hb_list_init(); + if (list == NULL) + { + hb_error("hb_opencl_devices_list_get: memory allocation failure"); + return NULL; + } + + cl_device_id *device_ids; + hb_opencl_device_t *device; + cl_platform_id *platform_ids; cl_uint i, j, num_platforms, num_devices; - /* - * Check whether we can load the OpenCL library, then check devices and make - * sure we support running OpenCL code on at least one of them. - */ - if (hb_opencl_library_open(opencl) == 0) + if (opencl->clGetPlatformIDs(0, NULL, &num_platforms) != CL_SUCCESS || !num_platforms) + { + goto fail; + } + if ((platform_ids = malloc(sizeof(cl_platform_id) * num_platforms)) == NULL) + { + hb_error("hb_opencl_devices_list_get: memory allocation failure"); + goto fail; + } + if (opencl->clGetPlatformIDs(num_platforms, platform_ids, NULL) != CL_SUCCESS) + { + goto fail; + } + for (i = 0; i < num_platforms; i++) { - if (opencl->clGetPlatformIDs(0, NULL, &num_platforms) != CL_SUCCESS || !num_platforms) + if (opencl->clGetDeviceIDs(platform_ids[i], device_type, 0, NULL, &num_devices) != CL_SUCCESS || !num_devices) { - goto end; + // non-fatal + continue; } - if ((platform_ids = malloc(sizeof(cl_platform_id) * num_platforms)) == NULL) + if ((device_ids = malloc(sizeof(cl_device_id) * num_devices)) == NULL) { - goto end; + hb_error("hb_opencl_devices_list_get: memory allocation failure"); + goto fail; } - if (opencl->clGetPlatformIDs(num_platforms, platform_ids, NULL) != CL_SUCCESS) + if (opencl->clGetDeviceIDs(platform_ids[i], device_type, num_devices, device_ids, NULL) != CL_SUCCESS) { - goto end; + // non-fatal + continue; } - for (i = 0; i < num_platforms; i++) + for (j = 0; j < num_devices; j++) { - if (opencl->clGetDeviceIDs(platform_ids[i], CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices) != CL_SUCCESS || !num_devices) - { - goto end; - } - if ((device_ids = malloc(sizeof(cl_device_id) * num_devices)) == NULL) - { - goto end; - } - if (opencl->clGetDeviceIDs(platform_ids[i], CL_DEVICE_TYPE_ALL, num_devices, device_ids, NULL) != CL_SUCCESS) + if ((device = hb_opencl_device_get(opencl, device_ids[j])) != NULL) { - goto end; + hb_list_add(list, device); } - for (j = 0; j < num_devices; j++) + } + } + return list; + +fail: + hb_opencl_devices_list_close(&list); + return NULL; +} + +int hb_opencl_available() +{ + static int opencl_available = -1; + if (opencl_available >= 0) + { + return opencl_available; + } + opencl_available = 0; + + /* + * Check whether we can load the OpenCL library, then check devices and make + * sure we support running OpenCL code on at least one of them. + */ + hb_opencl_library_t *opencl; + if ((opencl = hb_opencl_library_init()) != NULL) + { + int i; + hb_list_t *device_list; + hb_opencl_device_t *device; + if ((device_list = hb_opencl_devices_list_get(opencl, CL_DEVICE_TYPE_ALL)) != NULL) + { + for (i = 0; i < hb_list_count(device_list); i++) { - if (device_ids[j] != NULL) + if ((device = hb_list_item(device_list, i)) != NULL && + (hb_opencl_device_is_supported(device))) { - opencl->clGetDeviceInfo(device_ids[j], CL_DEVICE_VENDOR, sizeof(vendor), - vendor, NULL); - opencl->clGetDeviceInfo(device_ids[j], CL_DEVICE_VERSION, sizeof(version), - version, NULL); - opencl->clGetDeviceInfo(device_ids[j], CL_DEVICE_TYPE, sizeof(type), - &type, NULL); - - if (hb_opencl_device_is_supported(type, - (const char*)vendor, - (const char*)version)) - { - opencl_available = 1; - goto end; - } + opencl_available = 1; + break; } } - free(device_ids); - device_ids = NULL; + hb_opencl_devices_list_close(&device_list); } + hb_opencl_library_close(&opencl); } - -end: - free(device_ids); - free(platform_ids); - hb_opencl_library_close(opencl); return opencl_available; } @@ -238,7 +370,7 @@ void hb_opencl_info_print() * Its only purpose is to list OpenCL-capable devices, so let's initialize * only what we absolutely need here, rather than calling library_open(). */ - hb_opencl_library_t lib, *opencl = &lib; + hb_opencl_library_t ocl, *opencl = &ocl; if ((opencl->library = (void*)HB_OCL_DLOPEN) == NULL || (opencl->clGetDeviceIDs = (void*)HB_OCL_DLSYM(opencl->library, "clGetDeviceIDs" )) == NULL || (opencl->clGetDeviceInfo = (void*)HB_OCL_DLSYM(opencl->library, "clGetDeviceInfo" )) == NULL || @@ -249,76 +381,35 @@ void hb_opencl_info_print() goto end; } - cl_device_type type; - cl_device_id *device_ids; - cl_platform_id *platform_ids; - cl_uint i, j, k, num_platforms, num_devices; - char vendor[100], name[1024], version[100], driver[1024]; - - if (opencl->clGetPlatformIDs(0, NULL, &num_platforms) != CL_SUCCESS || !num_platforms) - { - goto end; - } - if ((platform_ids = malloc(sizeof(cl_platform_id) * num_platforms)) == NULL) + int i, idx; + hb_list_t *device_list; + hb_opencl_device_t *device; + if ((device_list = hb_opencl_devices_list_get(opencl, CL_DEVICE_TYPE_ALL)) != NULL) { - goto end; - } - if (opencl->clGetPlatformIDs(num_platforms, platform_ids, NULL) != CL_SUCCESS) - { - goto end; - } - for (i = 0, k = 1; i < num_platforms; i++) - { - if (opencl->clGetDeviceIDs(platform_ids[i], CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices) != CL_SUCCESS || !num_devices) - { - goto end; - } - if ((device_ids = malloc(sizeof(cl_device_id) * num_devices)) == NULL) - { - goto end; - } - if (opencl->clGetDeviceIDs(platform_ids[i], CL_DEVICE_TYPE_ALL, num_devices, device_ids, NULL) != CL_SUCCESS) - { - goto end; - } - for (j = 0; j < num_devices; j++) + for (i = 0, idx = 1; i < hb_list_count(device_list); i++) { - if (device_ids[j] != NULL) + if ((device = hb_list_item(device_list, i)) != NULL) { - opencl->clGetDeviceInfo(device_ids[j], CL_DEVICE_VENDOR, sizeof(vendor), - vendor, NULL); - opencl->clGetDeviceInfo(device_ids[j], CL_DEVICE_NAME, sizeof(name), - name, NULL); - opencl->clGetDeviceInfo(device_ids[j], CL_DEVICE_VERSION, sizeof(version), - version, NULL); - opencl->clGetDeviceInfo(device_ids[j], CL_DRIVER_VERSION, sizeof(driver), - driver, NULL); - opencl->clGetDeviceInfo(device_ids[j], CL_DEVICE_TYPE, sizeof(type), - &type, NULL); - - // don't list unsupported devices - if (type & CL_DEVICE_TYPE_CPU) + // don't list CPU devices (always unsupported) + if (!(device->type & CL_DEVICE_TYPE_CPU)) { - continue; + hb_log("OpenCL device #%d: %s %s", idx++, device->vendor, device->name); + hb_log(" - OpenCL version: %s", device->version + 7 /* strlen("OpenCL ") */); + hb_log(" - driver version: %s", device->driver); + hb_log(" - device type: %s%s", + device->type & CL_DEVICE_TYPE_CPU ? "CPU" : + device->type & CL_DEVICE_TYPE_GPU ? "GPU" : + device->type & CL_DEVICE_TYPE_CUSTOM ? "Custom" : + device->type & CL_DEVICE_TYPE_ACCELERATOR ? "Accelerator" : "Unknown", + device->type & CL_DEVICE_TYPE_DEFAULT ? " (default)" : ""); + hb_log(" - supported: %s", + hb_opencl_device_is_supported(device) ? "YES" : "no"); } - hb_log("OpenCL device #%d: %s %s", k++, vendor, name); - hb_log(" - OpenCL version: %s", version + 7 /* strlen("OpenCL ") */); - hb_log(" - driver version: %s", driver); - hb_log(" - device type: %s%s", - type & CL_DEVICE_TYPE_CPU ? "CPU" : - type & CL_DEVICE_TYPE_GPU ? "GPU" : - type & CL_DEVICE_TYPE_CUSTOM ? "Custom" : - type & CL_DEVICE_TYPE_ACCELERATOR ? "Accelerator" : "Unknown", - type & CL_DEVICE_TYPE_DEFAULT ? " (default)" : ""); - hb_log(" - supported: %s", - hb_opencl_device_is_supported(type, - (const char*)vendor, - (const char*)version) ? "yes" : "no"); } } - free(device_ids); + hb_opencl_devices_list_close(&device_list); } end: - hb_opencl_library_close(opencl); + hb_opencl_library_close(&opencl); } diff --git a/libhb/opencl.h b/libhb/opencl.h index 93171ebd0..2791e1618 100644 --- a/libhb/opencl.h +++ b/libhb/opencl.h @@ -11,6 +11,7 @@ #define HB_OPENCL_H #include "extras/cl.h" +#include "openclwrapper.h" // we only support OpenCL 1.1 or later #define HB_OCL_MINVERSION_MAJOR 1 @@ -654,15 +655,90 @@ typedef struct hb_opencl_library_s HB_OCL_FUNC_DECL(clReleaseContext); HB_OCL_FUNC_DECL(clReleaseEvent); HB_OCL_FUNC_DECL(clReleaseKernel); + HB_OCL_FUNC_DECL(clReleaseMemObject); HB_OCL_FUNC_DECL(clReleaseProgram); HB_OCL_FUNC_DECL(clSetKernelArg); HB_OCL_FUNC_DECL(clWaitForEvents); } hb_opencl_library_t; -int hb_opencl_library_open (hb_opencl_library_t *opencl); -void hb_opencl_library_close(hb_opencl_library_t *opencl); +hb_opencl_library_t* hb_opencl_library_init(); +void hb_opencl_library_close(hb_opencl_library_t **_opencl); + +/* + * Convenience pointer to a single shared OpenCL library wrapper. + * + * It can be initialized and closed via hb_ocl_init/close(). + */ +extern hb_opencl_library_t *hb_ocl; +int hb_ocl_init(); +void hb_ocl_close(); + +typedef struct hb_opencl_device_s +{ + cl_platform_id platform; + cl_device_type type; + cl_device_id id; + char version[128]; + char driver[128]; + char vendor[128]; + char name[128]; + enum + { + HB_OCL_VENDOR_AMD, + HB_OCL_VENDOR_NVIDIA, + HB_OCL_VENDOR_OTHER, + } ocl_vendor; +} hb_opencl_device_t; int hb_opencl_available(); void hb_opencl_info_print(); +/* OpenCL scaling */ +typedef struct hb_oclscale_s +{ + int initialized; + // bicubic scale weights + cl_mem bicubic_x_weights; + cl_mem bicubic_y_weights; + cl_float xscale; + cl_float yscale; + int width; + int height; + // horizontal scaling and vertical scaling kernel handle + cl_kernel m_kernel; + int use_ocl_mem; // 0 use host memory. 1 use gpu oclmem +} hb_oclscale_t; + +int hb_ocl_scale(hb_buffer_t *in, hb_buffer_t *out, int *crop, + hb_oclscale_t *os); + +/* Utilities */ +#define HB_OCL_BUF_CREATE(ocl_lib, out, flags, size) \ +{ \ + out = ocl_lib->clCreateBuffer(kenv->context, flags, size, NULL, &status); \ + if (CL_SUCCESS != status) \ + { \ + return -1; \ + } \ +} + +#define HB_OCL_BUF_FREE(ocl_lib, buf) \ +{ \ + if (buf != NULL) \ + { \ + ocl_lib->clReleaseMemObject(buf); \ + buf = NULL; \ + } \ +} + +#define HB_OCL_CHECK(method, ...) \ +{ \ + status = method(__VA_ARGS__); \ + if (status != CL_SUCCESS) \ + { \ + hb_error("%s:%d (%s) error: %d\n",__FUNCTION__,__LINE__,#method,status);\ + return status; \ + } \ +} + #endif//HB_OPENCL_H diff --git a/libhb/openclwrapper.c b/libhb/openclwrapper.c index b5faf7041..7fb395aba 100644 --- a/libhb/openclwrapper.c +++ b/libhb/openclwrapper.c @@ -10,12 +10,11 @@ Li Cao <[email protected]> <http://www.multicorewareinc.com/> */ -#ifdef USE_OPENCL - #include <stdio.h> #include <stdlib.h> #include <string.h> #include "extras/cl.h" +#include "opencl.h" #include "openclwrapper.h" #include "openclkernels.h" @@ -80,80 +79,12 @@ static int isInited = 0; static int useBuffers = 0; static hb_kernel_node gKernels[MAX_KERNEL_NUM]; -#define ADD_KERNEL_CFG( idx, s, p ){\ - strcpy( gKernels[idx].kernelName, s );\ - gKernels[idx].kernelStr = p;\ - strcpy( gpu_env.kernel_names[idx], s );\ - gpu_env.kernel_count++; } - - -/** - * hb_confirm_gpu_type - */ -int hb_confirm_gpu_type() -{ - int status = 1; - unsigned int i, j; - cl_uint numPlatforms = 0; - status = clGetPlatformIDs(0,NULL,&numPlatforms); - if(status != 0) - { - goto end; - } - if(numPlatforms > 0) - { - cl_platform_id* platforms = (cl_platform_id* )malloc (numPlatforms * sizeof(cl_platform_id)); - status = clGetPlatformIDs (numPlatforms, platforms, NULL); - if (status != 0) - { - goto end; - } - for (i=0; i < numPlatforms; i++) - { - char pbuff[100]; - cl_uint numDevices; - status = clGetPlatformInfo( platforms[i], - CL_PLATFORM_VENDOR, - sizeof (pbuff), - pbuff, - NULL); - if (status) - continue; - status = clGetDeviceIDs( platforms[i], - CL_DEVICE_TYPE_GPU , - 0 , - NULL , - &numDevices); - - cl_device_id *devices = (cl_device_id *)malloc(numDevices * sizeof(cl_device_id)); - status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, numDevices, devices, NULL); - for (j = 0; j < numDevices; j++) - { - char dbuff[100]; - status = clGetDeviceInfo(devices[j], CL_DEVICE_VENDOR, sizeof(dbuff), dbuff, NULL); - if (!strcmp(dbuff, "Advanced Micro Devices, Inc.") || - !strcmp(dbuff, "Intel(R) Corporation") || -#ifdef __APPLE__ - !strcmp(dbuff, "AMD") || - /* MacBook Pro, AMD ATI Radeon HD 6750M, OS X 10.8.3 */ - !strcmp(dbuff, "NVIDIA") || - /* MacBook Pro, NVIDIA GeForce GT 330M, OS X 10.7.4 */ -#endif - !strcmp(dbuff, "NVIDIA Corporation")) - { - return 0; - } - } - - if ( status != CL_SUCCESS ) - continue; - if( numDevices ) - break; - } - free( platforms ); - } - end: - return -1; +#define HB_OCL_ADD_KERNEL_CFG(idx, s, p) \ +{ \ + strcpy(gKernels[idx].kernelName, s); \ + gKernels[idx].kernelStr = p; \ + strcpy(gpu_env.kernel_names[idx], s); \ + gpu_env.kernel_count++; \ } /** @@ -168,8 +99,8 @@ int hb_regist_opencl_kernel() gpu_env.file_count = 0; //argc; gpu_env.kernel_count = 0UL; - ADD_KERNEL_CFG( 0, "frame_scale", NULL ) - ADD_KERNEL_CFG( 1, "yadif_filter", NULL ) + HB_OCL_ADD_KERNEL_CFG(0, "frame_scale", NULL); + HB_OCL_ADD_KERNEL_CFG(1, "yadif_filter", NULL); return 0; } @@ -230,11 +161,14 @@ int hb_binary_generated( cl_context context, const char * cl_file_name, FILE ** char * str = NULL; FILE * fd = NULL; - status = clGetContextInfo( context, - CL_CONTEXT_NUM_DEVICES, - sizeof(numDevices), - &numDevices, - NULL ); + if (hb_ocl == NULL) + { + hb_error("hb_binary_generated: OpenCL support not available"); + return 0; + } + + status = hb_ocl->clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, + sizeof(numDevices), &numDevices, NULL); if( status != CL_SUCCESS ) { hb_log( "OpenCL: Get context info failed" ); @@ -249,11 +183,9 @@ int hb_binary_generated( cl_context context, const char * cl_file_name, FILE ** } /* grab the handles to all of the devices in the context. */ - status = clGetContextInfo( context, - CL_CONTEXT_DEVICES, - sizeof(cl_device_id) * numDevices, - devices, - NULL ); + status = hb_ocl->clGetContextInfo(context, CL_CONTEXT_DEVICES, + sizeof(cl_device_id) * numDevices, + devices, NULL); status = 0; /* dump out each binary into its own separate file. */ @@ -264,11 +196,8 @@ int hb_binary_generated( cl_context context, const char * cl_file_name, FILE ** if (devices[i]) { char deviceName[1024]; - status = clGetDeviceInfo(devices[i], - CL_DEVICE_NAME, - sizeof(deviceName), - deviceName, - NULL); + status = hb_ocl->clGetDeviceInfo(devices[i], CL_DEVICE_NAME, + sizeof(deviceName), deviceName, NULL); str = (char*)strstr(cl_file_name, ".cl"); memcpy(cl_name, cl_file_name, str - cl_file_name); @@ -325,11 +254,14 @@ int hb_generat_bin_from_kernel_source( cl_program program, const char * cl_file_ char **binaries; char *str = NULL; - status = clGetProgramInfo( program, - CL_PROGRAM_NUM_DEVICES, - sizeof(numDevices), - &numDevices, - NULL ); + if (hb_ocl == NULL) + { + hb_error("hb_generat_bin_from_kernel_source: OpenCL support not available"); + return 0; + } + + status = hb_ocl->clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, + sizeof(numDevices), &numDevices, NULL); if( status != CL_SUCCESS ) { hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_NUM_DEVICES failed"); @@ -344,11 +276,9 @@ int hb_generat_bin_from_kernel_source( cl_program program, const char * cl_file_ } /* grab the handles to all of the devices in the program. */ - status = clGetProgramInfo( program, - CL_PROGRAM_DEVICES, - sizeof(cl_device_id) * numDevices, - devices, - NULL ); + status = hb_ocl->clGetProgramInfo(program, CL_PROGRAM_DEVICES, + sizeof(cl_device_id) * numDevices, + devices, NULL); if( status != CL_SUCCESS ) { hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_DEVICES failed"); @@ -358,10 +288,9 @@ int hb_generat_bin_from_kernel_source( cl_program program, const char * cl_file_ /* figure out the sizes of each of the binaries. */ binarySizes = (size_t*)malloc( sizeof(size_t) * numDevices ); - status = clGetProgramInfo( program, - CL_PROGRAM_BINARY_SIZES, - sizeof(size_t) * numDevices, - binarySizes, NULL ); + status = hb_ocl->clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, + sizeof(size_t) * numDevices, + binarySizes, NULL); if( status != CL_SUCCESS ) { hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_BINARY_SIZES failed"); @@ -393,11 +322,9 @@ int hb_generat_bin_from_kernel_source( cl_program program, const char * cl_file_ } } - status = clGetProgramInfo( program, - CL_PROGRAM_BINARIES, - sizeof(char *) * numDevices, - binaries, - NULL ); + status = hb_ocl->clGetProgramInfo(program, CL_PROGRAM_BINARIES, + sizeof(char *) * numDevices, + binaries, NULL); if( status != CL_SUCCESS ) { hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_BINARIES failed"); @@ -412,11 +339,9 @@ int hb_generat_bin_from_kernel_source( cl_program program, const char * cl_file_ if (binarySizes[i]) { char deviceName[1024]; - status = clGetDeviceInfo(devices[i], - CL_DEVICE_NAME, - sizeof(deviceName), - deviceName, - NULL); + status = hb_ocl->clGetDeviceInfo(devices[i], CL_DEVICE_NAME, + sizeof(deviceName), deviceName, + NULL); str = (char*)strstr( cl_file_name, (char*)".cl" ); memcpy(cl_name, cl_file_name, str - cl_file_name); @@ -489,8 +414,15 @@ int hb_init_opencl_attr( OpenCLEnv * env ) int hb_create_kernel( char * kernelname, KernelEnv * env ) { int status; - env->kernel = clCreateKernel( gpu_env.programs[0], kernelname, &status ); - env->context = gpu_env.context; + + if (hb_ocl == NULL) + { + hb_error("hb_create_kernel: OpenCL support not available"); + return 0; + } + + env->kernel = hb_ocl->clCreateKernel(gpu_env.programs[0], kernelname, &status); + env->context = gpu_env.context; env->command_queue = gpu_env.command_queue; return status != CL_SUCCESS ? 1 : 0; } @@ -501,7 +433,13 @@ int hb_create_kernel( char * kernelname, KernelEnv * env ) */ int hb_release_kernel( KernelEnv * env ) { - int status = clReleaseKernel( env->kernel ); + if (hb_ocl == NULL) + { + hb_error("hb_release_kernel: OpenCL support not available"); + return 0; + } + + int status = hb_ocl->clReleaseKernel(env->kernel); return status != CL_SUCCESS ? 1 : 0; } @@ -522,17 +460,23 @@ int hb_init_opencl_env( GPUEnv *gpu_info ) unsigned int i; void *handle = INVALID_HANDLE_VALUE; - if (init_once != 0) return 0; else init_once = 1; + + if (hb_ocl == NULL) + { + hb_error("hb_init_opencl_env: OpenCL support not available"); + return 1; + } + /* * Have a look at the available platforms. */ if( !gpu_info->isUserCreated ) { - status = clGetPlatformIDs( 0, NULL, &numPlatforms ); + status = hb_ocl->clGetPlatformIDs(0, NULL, &numPlatforms); if( status != CL_SUCCESS ) { hb_log( "OpenCL: OpenCL device platform not found." ); @@ -548,7 +492,7 @@ int hb_init_opencl_env( GPUEnv *gpu_info ) { return(1); } - status = clGetPlatformIDs( numPlatforms, platforms, NULL ); + status = hb_ocl->clGetPlatformIDs(numPlatforms, platforms, NULL); if( status != CL_SUCCESS ) { @@ -558,9 +502,8 @@ int hb_init_opencl_env( GPUEnv *gpu_info ) for( i = 0; i < numPlatforms; i++ ) { - status = clGetPlatformInfo( platforms[i], CL_PLATFORM_VENDOR, - sizeof(platformName), platformName, - NULL ); + status = hb_ocl->clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, + sizeof(platformName), platformName, NULL); if( status != CL_SUCCESS ) { @@ -576,11 +519,10 @@ int hb_init_opencl_env( GPUEnv *gpu_info ) gpu_info->platform = platforms[i]; - status = clGetDeviceIDs( gpu_info->platform /* platform */, - CL_DEVICE_TYPE_GPU /* device_type */, - 0 /* num_entries */, - NULL /* devices */, - &numDevices ); + status = hb_ocl->clGetDeviceIDs(gpu_info->platform /* platform */, + CL_DEVICE_TYPE_GPU /* device_type */, + 0 /* num_entries */, + NULL /* devices */, &numDevices); if( status != CL_SUCCESS ) { @@ -614,21 +556,21 @@ int hb_init_opencl_env( GPUEnv *gpu_info ) cps[2] = 0; /* Check for GPU. */ gpu_info->dType = CL_DEVICE_TYPE_GPU; - gpu_info->context = clCreateContextFromType( - cps, gpu_info->dType, NULL, NULL, &status ); + gpu_info->context = hb_ocl->clCreateContextFromType(cps, gpu_info->dType, + NULL, NULL, &status); if( (gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS) ) { gpu_info->dType = CL_DEVICE_TYPE_CPU; - gpu_info->context = clCreateContextFromType( - cps, gpu_info->dType, NULL, NULL, &status ); + gpu_info->context = hb_ocl->clCreateContextFromType(cps, gpu_info->dType, + NULL, NULL, &status); } if( (gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS) ) { gpu_info->dType = CL_DEVICE_TYPE_DEFAULT; - gpu_info->context = clCreateContextFromType( - cps, gpu_info->dType, NULL, NULL, &status ); + gpu_info->context = hb_ocl->clCreateContextFromType(cps, gpu_info->dType, + NULL, NULL, &status); } if( (gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS) ) @@ -639,8 +581,8 @@ int hb_init_opencl_env( GPUEnv *gpu_info ) /* Detect OpenCL devices. */ /* First, get the size of device list data */ - status = clGetContextInfo( gpu_info->context, CL_CONTEXT_DEVICES, - 0, NULL, &length ); + status = hb_ocl->clGetContextInfo(gpu_info->context, CL_CONTEXT_DEVICES, + 0, NULL, &length); if((status != CL_SUCCESS) || (length == 0)) { hb_log( "OpenCL: Unable to get the list of devices in context." ); @@ -655,8 +597,8 @@ int hb_init_opencl_env( GPUEnv *gpu_info ) } /* Now, get the device list data */ - status = clGetContextInfo( gpu_info->context, CL_CONTEXT_DEVICES, length, - gpu_info->devices, NULL ); + status = hb_ocl->clGetContextInfo(gpu_info->context, CL_CONTEXT_DEVICES, + length, gpu_info->devices, NULL); if( status != CL_SUCCESS ) { hb_log( "OpenCL: Unable to get the device list data in context." ); @@ -664,9 +606,9 @@ int hb_init_opencl_env( GPUEnv *gpu_info ) } /* Create OpenCL command queue. */ - gpu_info->command_queue = clCreateCommandQueue( gpu_info->context, - gpu_info->devices[0], - 0, &status ); + gpu_info->command_queue = hb_ocl->clCreateCommandQueue(gpu_info->context, + gpu_info->devices[0], + 0, &status); if( status != CL_SUCCESS ) { hb_log( "OpenCL: Unable to create opencl command queue." ); @@ -674,9 +616,10 @@ int hb_init_opencl_env( GPUEnv *gpu_info ) } } - if( clGetCommandQueueInfo( gpu_info->command_queue, - CL_QUEUE_THREAD_HANDLE_AMD, sizeof(handle), - &handle, NULL ) == CL_SUCCESS && handle != INVALID_HANDLE_VALUE ) + if ((CL_SUCCESS == hb_ocl->clGetCommandQueueInfo(gpu_info->command_queue, + CL_QUEUE_THREAD_HANDLE_AMD, + sizeof(handle), &handle, NULL)) && + (INVALID_HANDLE_VALUE != handle)) { #ifdef SYS_MINGW SetThreadPriority( handle, THREAD_PRIORITY_TIME_CRITICAL ); @@ -697,29 +640,36 @@ int hb_release_opencl_env( GPUEnv *gpu_info ) return 1; int i; + if (hb_ocl == NULL) + { + hb_error("hb_release_opencl_env: OpenCL support not available"); + return 0; + } + for( i = 0; i<gpu_env.file_count; i++ ) { if( gpu_env.programs[i] ) ; { - clReleaseProgram( gpu_env.programs[i] ); + hb_ocl->clReleaseProgram(gpu_env.programs[i]); gpu_env.programs[i] = NULL; } } if( gpu_env.command_queue ) { - clReleaseCommandQueue( gpu_env.command_queue ); + hb_ocl->clReleaseCommandQueue(gpu_env.command_queue); gpu_env.command_queue = NULL; } if( gpu_env.context ) { - clReleaseContext( gpu_env.context ); + hb_ocl->clReleaseContext(gpu_env.context); gpu_env.context = NULL; } isInited = 0; gpu_info->isUserCreated = 0; + return 1; } @@ -813,13 +763,16 @@ int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info, source = source_str; source_size[0] = strlen( source ); + if (hb_ocl == NULL) + { + hb_error("hb_compile_kernel_file: OpenCL support not available"); + return 0; + } + if ((binaryExisted = hb_binary_generated(gpu_info->context, filename, &fd)) == 1) { - status = clGetContextInfo(gpu_info->context, - CL_CONTEXT_NUM_DEVICES, - sizeof(numDevices), - &numDevices, - NULL); + status = hb_ocl->clGetContextInfo(gpu_info->context, CL_CONTEXT_NUM_DEVICES, + sizeof(numDevices), &numDevices, NULL); if (status != CL_SUCCESS) { hb_log("OpenCL: Unable to get the number of devices in context."); @@ -852,19 +805,17 @@ int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info, return 0; /* grab the handles to all of the devices in the context. */ - status = clGetContextInfo(gpu_info->context, - CL_CONTEXT_DEVICES, - sizeof(cl_device_id) * numDevices, - devices, - NULL); - - gpu_info->programs[idx] = clCreateProgramWithBinary(gpu_info->context, - numDevices, - devices, - &length, - (const unsigned char**)&binary, - &binary_status, - &status); + status = hb_ocl->clGetContextInfo(gpu_info->context, CL_CONTEXT_DEVICES, + sizeof(cl_device_id) * numDevices, + devices, NULL); + + gpu_info->programs[idx] = hb_ocl->clCreateProgramWithBinary(gpu_info->context, + numDevices, + devices, + &length, + (const unsigned char**)&binary, + &binary_status, + &status); fclose(fd); free(devices); @@ -874,8 +825,9 @@ int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info, else { /* create a CL program using the kernel source */ - gpu_info->programs[idx] = clCreateProgramWithSource( - gpu_info->context, 1, &source, source_size, &status ); + gpu_info->programs[idx] = hb_ocl->clCreateProgramWithSource(gpu_info->context, 1, + &source, source_size, + &status); } if((gpu_info->programs[idx] == (cl_program)NULL) || (status != CL_SUCCESS)){ @@ -886,28 +838,30 @@ int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info, /* create a cl program executable for all the devices specified */ if( !gpu_info->isUserCreated ) { - status = clBuildProgram( gpu_info->programs[idx], 1, gpu_info->devices, - build_option, NULL, NULL ); + status = hb_ocl->clBuildProgram(gpu_info->programs[idx], 1, gpu_info->devices, + build_option, NULL, NULL); } else { - status = clBuildProgram( gpu_info->programs[idx], 1, &(gpu_info->dev), - build_option, NULL, NULL ); + status = hb_ocl->clBuildProgram(gpu_info->programs[idx], 1, &(gpu_info->dev), + build_option, NULL, NULL); } if( status != CL_SUCCESS ) { if( !gpu_info->isUserCreated ) { - status = clGetProgramBuildInfo( gpu_info->programs[idx], - gpu_info->devices[0], - CL_PROGRAM_BUILD_LOG, 0, NULL, &length ); + status = hb_ocl->clGetProgramBuildInfo(gpu_info->programs[idx], + gpu_info->devices[0], + CL_PROGRAM_BUILD_LOG, + 0, NULL, &length); } else { - status = clGetProgramBuildInfo( gpu_info->programs[idx], - gpu_info->dev, - CL_PROGRAM_BUILD_LOG, 0, NULL, &length ); + status = hb_ocl->clGetProgramBuildInfo(gpu_info->programs[idx], + gpu_info->dev, + CL_PROGRAM_BUILD_LOG, + 0, NULL, &length); } if( status != CL_SUCCESS ) @@ -924,13 +878,17 @@ int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info, if( !gpu_info->isUserCreated ) { - status = clGetProgramBuildInfo( gpu_info->programs[idx], gpu_info->devices[0], - CL_PROGRAM_BUILD_LOG, length, buildLog, &length ); + status = hb_ocl->clGetProgramBuildInfo(gpu_info->programs[idx], + gpu_info->devices[0], + CL_PROGRAM_BUILD_LOG, + length, buildLog, &length); } else { - status = clGetProgramBuildInfo( gpu_info->programs[idx], gpu_info->dev, - CL_PROGRAM_BUILD_LOG, length, buildLog, &length ); + status = hb_ocl->clGetProgramBuildInfo(gpu_info->programs[idx], + gpu_info->dev, + CL_PROGRAM_BUILD_LOG, + length, buildLog, &length); } fd1 = fopen( "kernel-build.log", "w+" ); @@ -1083,7 +1041,14 @@ int hb_get_opencl_env() int hb_create_buffer( cl_mem *cl_Buf, int flags, int size ) { int status; - *cl_Buf = clCreateBuffer( gpu_env.context, (flags), (size), NULL, &status ); + + if (hb_ocl == NULL) + { + hb_error("hb_create_buffer: OpenCL support not available"); + return 0; + } + + *cl_Buf = hb_ocl->clCreateBuffer(gpu_env.context, flags, size, NULL, &status); if( status != CL_SUCCESS ) { @@ -1105,7 +1070,14 @@ int hb_read_opencl_buffer( cl_mem cl_inBuf, unsigned char *outbuf, int size ) { int status; - status = clEnqueueReadBuffer( gpu_env.command_queue, cl_inBuf, CL_TRUE, 0, size, outbuf, 0, 0, 0 ); + if (hb_ocl == NULL) + { + hb_error("hb_read_opencl_suffer: OpenCL support not available"); + return 0; + } + + status = hb_ocl->clEnqueueReadBuffer(gpu_env.command_queue, cl_inBuf, + CL_TRUE, 0, size, outbuf, 0, 0, 0); if( status != CL_SUCCESS ) { hb_log( "OpenCL: av_read_opencl_buffer error '%d'", status ); @@ -1119,9 +1091,18 @@ int hb_cl_create_mapped_buffer(cl_mem *mem, unsigned char **addr, int size) { int status; int flags = CL_MEM_ALLOC_HOST_PTR; + + if (hb_ocl == NULL) + { + hb_error("hb_cl_create_mapped_buffer: OpenCL support not available"); + return 0; + } + //cl_event event; - *mem = clCreateBuffer(gpu_env.context, flags, size, NULL, &status); - *addr = clEnqueueMapBuffer(gpu_env.command_queue, *mem, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL/*&event*/, &status); + *mem = hb_ocl->clCreateBuffer(gpu_env.context, flags, size, NULL, &status); + *addr = hb_ocl->clEnqueueMapBuffer(gpu_env.command_queue, *mem, CL_TRUE, + CL_MAP_READ|CL_MAP_WRITE, 0, size, 0, + NULL, NULL/*&event*/, &status); //hb_log("\t **** context: %.8x cmdqueue: %.8x cl_mem: %.8x mapaddr: %.8x size: %d status: %d", gpu_env.context, gpu_env.command_queue, mem, addr, size, status); @@ -1131,9 +1112,17 @@ int hb_cl_create_mapped_buffer(cl_mem *mem, unsigned char **addr, int size) int hb_cl_free_mapped_buffer(cl_mem mem, unsigned char *addr) { cl_event event; - int status = clEnqueueUnmapMemObject(gpu_env.command_queue, mem, addr, 0, NULL, &event); + + if (hb_ocl == NULL) + { + hb_error("hb_cl_free_mapped_buffer: OpenCL support not available"); + return 0; + } + + int status = hb_ocl->clEnqueueUnmapMemObject(gpu_env.command_queue, mem, + addr, 0, NULL, &event); if (status == CL_SUCCESS) - clWaitForEvents(1, &event); + hb_ocl->clWaitForEvents(1, &event); else hb_log("hb_free_mapped_buffer: error %d", status); return (status == CL_SUCCESS) ? 1 : 0; @@ -1151,11 +1140,16 @@ int hb_use_buffers() int hb_copy_buffer(cl_mem src_buffer,cl_mem dst_buffer,size_t src_offset,size_t dst_offset,size_t cb) { - int status = clEnqueueCopyBuffer(gpu_env.command_queue, - src_buffer, - dst_buffer, - src_offset, dst_offset, cb, - 0, 0, 0); + if (hb_ocl == NULL) + { + hb_error("hb_copy_buffer: OpenCL support not available"); + return 0; + } + + int status = hb_ocl->clEnqueueCopyBuffer(gpu_env.command_queue, + src_buffer, dst_buffer, + src_offset, dst_offset, + cb, 0, 0, 0); if( status != CL_SUCCESS ) { av_log(NULL,AV_LOG_ERROR, "hb_read_opencl_buffer error '%d'\n", status ); @@ -1184,13 +1178,23 @@ int hb_read_opencl_frame_buffer(cl_mem cl_inBuf,unsigned char *Ybuf,unsigned cha int hb_write_opencl_frame_buffer(cl_mem cl_inBuf,unsigned char *Ybuf,unsigned char *Ubuf,unsigned char *Vbuf,int linesize0,int linesize1,int linesize2,int height,int offset) { int status; - void *mapped = clEnqueueMapBuffer( gpu_env.command_queue, cl_inBuf, CL_TRUE,CL_MAP_WRITE, 0, sizeof(uint8_t) * (linesize0 + linesize1)*height + offset, 0, NULL, NULL, NULL ); + + if (hb_ocl == NULL) + { + hb_error("hb_write_opencl_frame_buffer: OpenCL support not available"); + return 0; + } + + void *mapped = hb_ocl->clEnqueueMapBuffer(gpu_env.command_queue, cl_inBuf, + CL_TRUE,CL_MAP_WRITE, 0, + sizeof(uint8_t) * (linesize0 + linesize1) * height + offset, + 0, NULL, NULL, NULL); uint8_t *temp = (uint8_t *)mapped; temp += offset; memcpy(temp,Ybuf,sizeof(uint8_t) * linesize0 * height); memcpy(temp + sizeof(uint8_t) * linesize0 * height,Ubuf,sizeof(uint8_t) * linesize1 * height/2); memcpy(temp + sizeof(uint8_t) * (linesize0 * height + linesize1 * height/2),Vbuf,sizeof(uint8_t) * linesize2 * height/2); - clEnqueueUnmapMemObject(gpu_env.command_queue, cl_inBuf, mapped, 0, NULL, NULL ); + hb_ocl->clEnqueueUnmapMemObject(gpu_env.command_queue, cl_inBuf, mapped, 0, NULL, NULL); return 1; } @@ -1203,4 +1207,3 @@ cl_context hb_get_context() { return gpu_env.context; } -#endif diff --git a/libhb/openclwrapper.h b/libhb/openclwrapper.h index c7606afc0..0ccabd564 100644 --- a/libhb/openclwrapper.h +++ b/libhb/openclwrapper.h @@ -11,10 +11,11 @@ */ -#ifndef __OPENCL_WRAPPER_H -#define __OPENCL_WRAPPER_H -#ifdef USE_OPENCL +#ifndef HB_OPENCL_WRAPPER_H +#define HB_OPENCL_WRAPPER_H + #include "common.h" +#include "extras/cl.h" //support AMD opencl #define CL_QUEUE_THREAD_HANDLE_AMD 0x403E @@ -85,5 +86,5 @@ int hb_cl_free_mapped_buffer(cl_mem mem, unsigned char *addr); int hb_use_buffers(); int hb_confirm_gpu_type(); -#endif -#endif + +#endif // HB_OPENCL_WRAPPER_H diff --git a/libhb/scan.c b/libhb/scan.c index 535baea39..f827245e1 100644 --- a/libhb/scan.c +++ b/libhb/scan.c @@ -8,6 +8,7 @@ */ #include "hb.h" +#include "opencl.h" #include "hbffmpeg.h" #include "a52dec/a52.h" @@ -868,6 +869,9 @@ skip_preview: title->video_decode_support = vid_info.video_decode_support; + // TODO: check video dimensions + title->opencl_support = !!hb_opencl_available(); + // compute the aspect ratio based on the storage dimensions and the // pixel aspect ratio (if supplied) or just storage dimensions if no PAR. title->aspect = (double)title->width / (double)title->height; diff --git a/libhb/stream.c b/libhb/stream.c index ac34cc550..5d6ec7f4c 100644 --- a/libhb/stream.c +++ b/libhb/stream.c @@ -1121,14 +1121,7 @@ hb_title_t * hb_stream_title_scan(hb_stream_t *stream, hb_title_t * title) #else title->hwd_support = 0; #endif -#ifdef USE_OPENCL - if (hb_confirm_gpu_type() == 0 && hb_opencl_available() == 1) - title->opencl_support = 1; - else - title->opencl_support = 0; -#else - title->opencl_support = 0; -#endif + // Height, width, rate and aspect ratio information is filled in // when the previews are built return title; @@ -5687,14 +5680,6 @@ static hb_title_t *ffmpeg_title_scan( hb_stream_t *stream, hb_title_t *title ) #else title->hwd_support = 0; #endif -#ifdef USE_OPENCL - if (hb_confirm_gpu_type() == 0 && hb_opencl_available() == 1) - title->opencl_support = 1; - else - title->opencl_support = 0; -#else - title->opencl_support = 0; -#endif return title; } diff --git a/libhb/vadxva2.c b/libhb/vadxva2.c index 98ff41dcc..d173d4a08 100644 --- a/libhb/vadxva2.c +++ b/libhb/vadxva2.c @@ -10,14 +10,12 @@ Li Cao <[email protected]> <http://www.multicorewareinc.com/> */ -#include "vadxva2.h" -#ifdef USE_OPENCL +#ifdef USE_HWD + +#include "vadxva2.h" #include "extras/cl.h" #include "oclnv12toyuv.h" -#endif - -#ifdef USE_HWD static int hb_va_setup( hb_va_dxva2_t *dxva2, void **hw, int width, int height ); static int hb_va_get( hb_va_dxva2_t *dxva2, AVFrame *frame ); @@ -112,12 +110,11 @@ void hb_va_close( hb_va_dxva2_t *dxva2 ) if( dxva2->hd3d9_dll ) FreeLibrary( dxva2->hd3d9_dll ); -#ifdef USE_OPENCL if ( dxva2->nv12toyuv_tmp_in ) free( dxva2->nv12toyuv_tmp_in ); if ( dxva2->nv12toyuv_tmp_out ) free( dxva2->nv12toyuv_tmp_out ); -#endif + dxva2->description = NULL; free( dxva2 ); } @@ -775,4 +772,5 @@ int hb_check_hwd_fmt( int fmt ) } return result; } -#endif + +#endif // USE_HWD diff --git a/libhb/vadxva2.h b/libhb/vadxva2.h index cd879b974..39165e188 100644 --- a/libhb/vadxva2.h +++ b/libhb/vadxva2.h @@ -11,15 +11,17 @@ */ -#ifndef VA_DXVA2_H -#define VA_DXVA2_H - #ifdef USE_HWD + +#ifndef HB_VA_DXVA2_H +#define HB_VA_DXVA2_H + #include "hbffmpeg.h" #include "d3d9.h" #include "libavcodec/dxva2.h" #include "dxva2api.h" #include "common.h" +#include "opencl.h" #include "openclwrapper.h" #define HB_FOURCC( a, b, c, d ) ( ((uint32_t)a) | ( ((uint32_t)b) << 8 ) | ( ((uint32_t)c) << 16 ) | ( ((uint32_t)d) << 24 ) ) @@ -133,21 +135,18 @@ typedef struct int do_job; // running nv12toyuv kernel. -#ifdef USE_OPENCL cl_kernel nv12toyuv; cl_mem cl_mem_nv12; cl_mem cl_mem_yuv; uint8_t * nv12toyuv_tmp_in; uint8_t * nv12toyuv_tmp_out; -#endif } hb_va_dxva2_t; typedef struct FilterLink_T { -#ifdef USE_OPENCL cl_mem cl_inbuf; cl_mem cl_outbuf; -#endif + uint8_t *mem_inbuf; uint8_t *mem_outbuf; int width; @@ -209,5 +208,7 @@ void hb_va_new_dxva2( hb_va_dxva2_t *dxva2, AVCodecContext *p_context ); void hb_va_release( hb_va_dxva2_t *dxva2, AVFrame *frame ); void hb_va_close( hb_va_dxva2_t *dxva2 ); int hb_check_hwd_fmt( int fmt ); -#endif -#endif + +#endif // HB_VA_DXVA2_H + +#endif // USE_HWD diff --git a/libhb/work.c b/libhb/work.c index b63925359..43dbdeaaa 100644 --- a/libhb/work.c +++ b/libhb/work.c @@ -11,6 +11,7 @@ #include "a52dec/a52.h" #include "libavformat/avformat.h" #include "openclwrapper.h" +#include "opencl.h" #ifdef USE_QSV #include "qsv_common.h" @@ -539,13 +540,13 @@ static void do_job(hb_job_t *job) job->list_work = hb_list_init(); -#ifdef USE_OPENCL - /* init opencl environment */ - if (job->use_opencl) - job->use_opencl = !hb_init_opencl_run_env(0, NULL, "-I."); -#else - job->use_opencl = 0; -#endif + /* OpenCL */ + if (job->use_opencl && (hb_ocl_init() || hb_init_opencl_run_env(0, NULL, "-I."))) + { + hb_log("work: failed to initialize OpenCL environment, using fallback"); + job->use_opencl = 0; + hb_ocl_close(); + } hb_log( "starting job" ); @@ -856,9 +857,10 @@ static void do_job(hb_job_t *job) init.pix_fmt = AV_PIX_FMT_YUV420P; init.width = title->width; init.height = title->height; -#ifdef USE_OPENCL - init.use_dxva = hb_use_dxva( title ); -#endif + + /* DXVA2 */ + init.use_dxva = hb_use_dxva(title); + init.par_width = job->anamorphic.par_width; init.par_height = job->anamorphic.par_height; memcpy(init.crop, title->crop, sizeof(int[4])); @@ -1615,6 +1617,12 @@ cleanup: hb_buffer_pool_free(); hb_job_close( &job ); + + /* OpenCL: must be closed *after* freeing the buffer pool */ + if (job->use_opencl) + { + hb_ocl_close(); + } } static inline void copy_chapter( hb_buffer_t * dst, hb_buffer_t * src ) |