summaryrefslogtreecommitdiffstats
path: root/libhb
diff options
context:
space:
mode:
Diffstat (limited to 'libhb')
-rw-r--r--libhb/common.h61
-rw-r--r--libhb/cropscale.c125
-rw-r--r--libhb/decavcodec.c19
-rw-r--r--libhb/fifo.c101
-rw-r--r--libhb/hb.c5
-rw-r--r--libhb/internal.h4
-rw-r--r--libhb/module.defs7
-rw-r--r--libhb/oclnv12toyuv.c77
-rw-r--r--libhb/oclnv12toyuv.h15
-rw-r--r--libhb/oclscale.c167
-rw-r--r--libhb/opencl.c383
-rw-r--r--libhb/opencl.h80
-rw-r--r--libhb/openclwrapper.c399
-rw-r--r--libhb/openclwrapper.h11
-rw-r--r--libhb/scan.c4
-rw-r--r--libhb/stream.c17
-rw-r--r--libhb/vadxva2.c14
-rw-r--r--libhb/vadxva2.h19
-rw-r--r--libhb/work.c28
19 files changed, 857 insertions, 679 deletions
diff --git a/libhb/common.h b/libhb/common.h
index 6a657465c..b57103d06 100644
--- a/libhb/common.h
+++ b/libhb/common.h
@@ -21,9 +21,6 @@
#include <sys/stat.h>
#include <dirent.h>
-#ifdef USE_OPENCL
-#include "extras/cl.h"
-#endif
/*
* It seems WinXP doesn't align the stack of new threads to 16 bytes.
* To prevent crashes in SSE functions, we need to force stack alignement
@@ -537,10 +534,11 @@ struct hb_job_s
uint32_t frames_to_skip; // decode but discard this many frames
// initially (for frame accurate positioning
// to non-I frames).
- int use_opencl;/* 0 is disable use of opencl. 1 is enable use of opencl */
+ int use_opencl;
int use_hwd;
int use_decomb;
int use_detelecine;
+
#ifdef USE_QSV
// QSV-specific settings
struct
@@ -920,10 +918,12 @@ struct hb_title_s
uint32_t flags;
// set if video stream doesn't have IDR frames
- int opencl_support;
- int hwd_support;
#define HBTF_NO_IDR (1 << 0)
#define HBTF_SCAN_COMPLETE (1 << 0)
+
+ // whether OpenCL scaling is supported for this source
+ int opencl_support;
+ int hwd_support; // TODO: merge to video_decode_support
};
// Update win/CS/HandBrake.Interop/HandBrakeInterop/HbLib/hb_state_s.cs when changing this struct
@@ -1094,51 +1094,7 @@ extern hb_work_object_t hb_reader;
#define HB_FILTER_DROP 3
#define HB_FILTER_DONE 4
-typedef struct hb_oclscale_s
-{
-#ifdef USE_OPENCL
- int initialized;
- // bicubic scale weights
- cl_mem bicubic_x_weights;
- cl_mem bicubic_y_weights;
- cl_float xscale;
- cl_float yscale;
- int width;
- int height;
- // horizontal scaling and vertical scaling kernel handle
- cl_kernel m_kernel;
- int use_ocl_mem; // 0 use host memory. 1 use gpu oclmem
-#endif
-} hb_oclscale_t;
-
-#ifdef USE_OPENCL
-int hb_ocl_scale( hb_buffer_t *in, hb_buffer_t *out, int *crop, hb_oclscale_t *os );
-#endif
-
-#ifdef USE_OPENCL
-int hb_use_dxva( hb_title_t * title );
-// create opencl buffer
-#define CREATEBUF( out, flags, size )\
- {\
- out = clCreateBuffer( kenv->context, (flags), (size), NULL, &status );\
- if( CL_SUCCESS != status ) return -1;\
- }
-
-#define OCLCHECK( method, ... )\
- status = method( __VA_ARGS__ ); if( status != CL_SUCCESS ) {\
- hb_error("%s:%d (%s) error: %d\n", __FUNCTION__, __LINE__, #method, status); return status; }
-
-#define CL_FREE( buf )\
-{\
- if( buf )\
- {\
- { clReleaseMemObject( buf ); }\
- buf = NULL;\
- }\
-}
-
-
-#endif
+int hb_use_dxva(hb_title_t *title);
typedef struct hb_filter_init_s
{
@@ -1152,10 +1108,7 @@ typedef struct hb_filter_init_s
int vrate_base;
int vrate;
int cfr;
-#ifdef USE_OPENCL
int use_dxva;
-#endif
-
} hb_filter_init_t;
typedef struct hb_filter_info_s
diff --git a/libhb/cropscale.c b/libhb/cropscale.c
index 66aec7f27..f9227562a 100644
--- a/libhb/cropscale.c
+++ b/libhb/cropscale.c
@@ -10,7 +10,7 @@
#include "hb.h"
#include "hbffmpeg.h"
#include "common.h"
-
+#include "opencl.h"
struct hb_filter_private_s
{
@@ -23,12 +23,12 @@ struct hb_filter_private_s
int height_out;
int crop[4];
-#ifdef USE_OPENCL
+ /* OpenCL/DXVA2 */
int use_dxva;
int use_decomb;
int use_detelecine;
- hb_oclscale_t *os; //ocl scaler handler
-#endif
+ hb_oclscale_t *os; //ocl scaler handler
+
struct SwsContext * context;
};
@@ -69,17 +69,18 @@ static int hb_crop_scale_init( hb_filter_object_t * filter,
pv->height_in = init->height;
pv->width_out = init->width - (init->crop[2] + init->crop[3]);
pv->height_out = init->height - (init->crop[0] + init->crop[1]);
-#ifdef USE_OPENCL
- pv->use_dxva = init->use_dxva;
- pv->use_decomb = init->job->use_decomb;
+
+ /* OpenCL/DXVA2 */
+ pv->use_dxva = init->use_dxva;
+ pv->use_decomb = init->job->use_decomb;
pv->use_detelecine = init->job->use_detelecine;
- if( pv->job->use_opencl && pv->job->title->opencl_support)
+ if (pv->job->use_opencl && pv->job->title->opencl_support)
{
pv->os = ( hb_oclscale_t * )malloc( sizeof( hb_oclscale_t ) );
memset( pv->os, 0, sizeof( hb_oclscale_t ) );
}
-#endif
+
memcpy( pv->crop, init->crop, sizeof( int[4] ) );
if( filter->settings )
{
@@ -93,9 +94,6 @@ static int hb_crop_scale_init( hb_filter_object_t * filter,
init->width = pv->width_out;
init->height = pv->height_out;
memcpy( init->crop, pv->crop, sizeof( int[4] ) );
-#ifdef USE_OPENCL
- pv->use_dxva = init->use_dxva;
-#endif
return 0;
}
@@ -136,15 +134,18 @@ static void hb_crop_scale_close( hb_filter_object_t * filter )
{
return;
}
-#ifdef USE_OPENCL
- if( pv->job->use_opencl && pv->job->title->opencl_support && pv->os )
+ /* OpenCL */
+ if (pv->job->use_opencl && pv->job->title->opencl_support && pv->os)
{
- CL_FREE( pv->os->bicubic_x_weights );
- CL_FREE( pv->os->bicubic_y_weights );
- free( pv->os );
+ if (hb_ocl != NULL)
+ {
+ HB_OCL_BUF_FREE(hb_ocl, pv->os->bicubic_x_weights);
+ HB_OCL_BUF_FREE(hb_ocl, pv->os->bicubic_y_weights);
+ }
+ free(pv->os);
}
-#endif
+
if( pv->context )
{
sws_freeContext( pv->context );
@@ -154,7 +155,7 @@ static void hb_crop_scale_close( hb_filter_object_t * filter )
filter->private_data = NULL;
}
-#ifdef USE_OPENCL
+/* OpenCL */
static uint8_t *copy_plane( uint8_t *dst, uint8_t* src, int dstride, int sstride, int h )
{
if( dstride == sstride )
@@ -171,7 +172,6 @@ static uint8_t *copy_plane( uint8_t *dst, uint8_t* src, int dstride, int sstride
}
return dst;
}
-#endif
static hb_buffer_t* crop_scale( hb_filter_private_t * pv, hb_buffer_t * in )
{
@@ -189,45 +189,44 @@ static hb_buffer_t* crop_scale( hb_filter_private_t * pv, hb_buffer_t * in )
av_picture_crop( &pic_crop, &pic_in, in->f.fmt,
pv->crop[0], pv->crop[2] );
-#ifdef USE_OPENCL
// Use bicubic OpenCL scaling when selected and when downsampling < 4:1;
- if ((pv->job->use_opencl && pv->job->title->opencl_support) && (pv->width_out * 4 > pv->width_in) && (in->cl.buffer != NULL) && (out->cl.buffer != NULL))
+ if ((pv->job->use_opencl && pv->job->title->opencl_support) &&
+ (pv->width_out * 4 > pv->width_in) &&
+ (in->cl.buffer != NULL) && (out->cl.buffer != NULL))
{
+ /* OpenCL */
hb_ocl_scale(in, out, pv->crop, pv->os);
}
else
{
-#endif
- if ( !pv->context ||
- pv->width_in != in->f.width ||
- pv->height_in != in->f.height ||
- pv->pix_fmt != in->f.fmt )
- {
- // Something changed, need a new scaling context.
- if( pv->context )
- sws_freeContext( pv->context );
-
- pv->context = hb_sws_get_context(
- in->f.width - (pv->crop[2] + pv->crop[3]),
- in->f.height - (pv->crop[0] + pv->crop[1]),
- in->f.fmt,
- out->f.width, out->f.height, out->f.fmt,
- SWS_LANCZOS | SWS_ACCURATE_RND );
- pv->width_in = in->f.width;
- pv->height_in = in->f.height;
- pv->pix_fmt = in->f.fmt;
+ if (pv->context == NULL ||
+ pv->width_in != in->f.width ||
+ pv->height_in != in->f.height ||
+ pv->pix_fmt != in->f.fmt)
+ {
+ // Something changed, need a new scaling context.
+ if (pv->context != NULL)
+ {
+ sws_freeContext(pv->context);
+ }
+
+ pv->context = hb_sws_get_context(in->f.width - (pv->crop[2] + pv->crop[3]),
+ in->f.height - (pv->crop[0] + pv->crop[1]),
+ in->f.fmt, out->f.width, out->f.height,
+ out->f.fmt, SWS_LANCZOS|SWS_ACCURATE_RND);
+ pv->width_in = in->f.width;
+ pv->height_in = in->f.height;
+ pv->pix_fmt = in->f.fmt;
+ }
+
+ // Scale pic_crop into pic_render according to the
+ // context set up above
+ sws_scale(pv->context,
+ (const uint8_t* const*)pic_crop.data, pic_crop.linesize,
+ 0, in->f.height - (pv->crop[0] + pv->crop[1]),
+ pic_out.data, pic_out.linesize);
}
- // Scale pic_crop into pic_render according to the
- // context set up above
- sws_scale(pv->context,
- (const uint8_t* const*)pic_crop.data,
- pic_crop.linesize,
- 0, in->f.height - (pv->crop[0] + pv->crop[1]),
- pic_out.data, pic_out.linesize);
-#ifdef USE_OPENCL
- }
-#endif
out->s = in->s;
hb_buffer_move_subs( out, in );
return out;
@@ -261,27 +260,19 @@ static int hb_crop_scale_work( hb_filter_object_t * filter,
pv->width_out = in->f.width - (pv->crop[2] + pv->crop[3]);
pv->height_out = in->f.height - (pv->crop[0] + pv->crop[1]);
}
-#ifdef USE_OPENCL
- if ( (in->f.fmt == pv->pix_fmt_out &&
- !pv->crop[0] && !pv->crop[1] && !pv->crop[2] && !pv->crop[3] &&
- in->f.width == pv->width_out && in->f.height == pv->height_out) &&
- (pv->use_decomb == 0) && (pv->use_detelecine == 0) ||
- (pv->use_dxva && in->f.width == pv->width_out && in->f.height == pv->height_out) )
- {
- *buf_out = in;
- *buf_in = NULL;
- return HB_FILTER_OK;
- }
-#else
- if ( in->f.fmt == pv->pix_fmt_out &&
+
+ /* OpenCL/DXVA2 */
+ if ((!pv->use_decomb && !pv->use_detelecine &&
!pv->crop[0] && !pv->crop[1] && !pv->crop[2] && !pv->crop[3] &&
- in->f.width == pv->width_out && in->f.height == pv->height_out )
+ in->f.fmt == pv->pix_fmt_out && in->f.width == pv->width_out &&
+ in->f.height == pv->height_out) || (pv->use_dxva &&
+ in->f.width == pv->width_out &&
+ in->f.height == pv->height_out))
{
*buf_out = in;
- *buf_in = NULL;
+ *buf_in = NULL;
return HB_FILTER_OK;
}
-#endif
*buf_out = crop_scale( pv, in );
diff --git a/libhb/decavcodec.c b/libhb/decavcodec.c
index 0ade0da27..c961bd719 100644
--- a/libhb/decavcodec.c
+++ b/libhb/decavcodec.c
@@ -43,6 +43,7 @@
#include "audio_resample.h"
#ifdef USE_HWD
+#include "opencl.h"
#include "vadxva2.h"
#endif
@@ -393,19 +394,19 @@ static void closePrivData( hb_work_private_t ** ppv )
hb_audio_resample_free(pv->resample);
#ifdef USE_HWD
- if ( pv->opencl_scale )
+ if (pv->opencl_scale != NULL)
{
- free( pv->opencl_scale );
+ free(pv->opencl_scale);
}
-
- if ( pv->dxva2 )
+ if (pv->dxva2 != NULL)
{
-#ifdef USE_OPENCL
- CL_FREE( pv->dxva2->cl_mem_nv12 );
-#endif
- hb_va_close( pv->dxva2 );
+ if (hb_ocl != NULL)
+ {
+ HB_OCL_BUF_FREE(hb_ocl, pv->dxva2->cl_mem_nv12);
+ }
+ hb_va_close(pv->dxva2);
}
-#endif
+#endif
#ifdef USE_QSV_PTS_WORKAROUND
if (pv->qsv.decode && pv->qsv.pts_list != NULL)
diff --git a/libhb/fifo.c b/libhb/fifo.c
index fbfcefd02..6887e43c7 100644
--- a/libhb/fifo.c
+++ b/libhb/fifo.c
@@ -250,14 +250,20 @@ void hb_buffer_pool_free( void )
if( b->data )
{
freed += b->alloc;
-#ifdef USE_OPENCL
- if (b->cl.buffer != NULL) {
+
+ if (b->cl.buffer != NULL)
+ {
+ /* OpenCL */
if (hb_cl_free_mapped_buffer(b->cl.buffer, b->data) == 0)
- hb_log("bad free: %.16x -> buffer %.16x map %.16x", b, b->cl.buffer, b->data);
+ {
+ hb_log("hb_buffer_pool_free: bad free: %.16x -> buffer %.16x map %.16x",
+ b, b->cl.buffer, b->data);
+ }
}
else
-#endif
- free( b->data );
+ {
+ free(b->data);
+ }
}
free( b );
count++;
@@ -303,17 +309,18 @@ hb_buffer_t * hb_buffer_init_internal( int size , int needsMapped )
{
b = hb_fifo_get( buffer_pool );
-#ifdef USE_OPENCL
- if (b && (needsMapped != 0) && (b->cl.buffer == NULL))
+ /* OpenCL */
+ if (b != NULL && needsMapped && b->cl.buffer == NULL)
{
// We need a mapped OpenCL buffer and that is not what we got out of the pool.
- // Ditch it. It will get replaced with what we need.
- if (b->data)
+ // Ditch it; it will get replaced with what we need.
+ if (b->data != NULL)
+ {
free(b->data);
+ }
free(b);
b = NULL;
}
-#endif
if( b )
{
@@ -322,11 +329,11 @@ hb_buffer_t * hb_buffer_init_internal( int size , int needsMapped )
* didn't have to do this.
*/
uint8_t *data = b->data;
-#ifdef USE_OPENCL
- cl_mem buffer = b->cl.buffer;
+
+ /* OpenCL */
+ cl_mem buffer = b->cl.buffer;
cl_event last_event = b->cl.last_event;
- int loc = b->cl.buffer_location;
-#endif
+ int loc = b->cl.buffer_location;
memset( b, 0, sizeof(hb_buffer_t) );
b->alloc = buffer_pool->buffer_size;
@@ -335,11 +342,12 @@ hb_buffer_t * hb_buffer_init_internal( int size , int needsMapped )
b->s.start = -1;
b->s.stop = -1;
b->s.renderOffset = -1;
-#ifdef USE_OPENCL
- b->cl.buffer = buffer;
- b->cl.last_event = last_event;
+
+ /* OpenCL */
+ b->cl.buffer = buffer;
+ b->cl.last_event = last_event;
b->cl.buffer_location = loc;
-#endif
+
return( b );
}
}
@@ -358,19 +366,18 @@ hb_buffer_t * hb_buffer_init_internal( int size , int needsMapped )
if (size)
{
-#ifdef USE_OPENCL
- b->cl.last_event = NULL;
+ /* OpenCL */
+ b->cl.last_event = NULL;
b->cl.buffer_location = HOST;
- if (needsMapped != 0)
+ /* OpenCL */
+ if (needsMapped)
{
- int status;
- status = hb_cl_create_mapped_buffer(&b->cl.buffer, &b->data, b->alloc);
- //hb_log("buf: %.16x -> buffer %.16x map %.16x size %d", b, b->cl.buffer, b->data, size);
+ int status = hb_cl_create_mapped_buffer(&b->cl.buffer, &b->data, b->alloc);
}
- else {
+ else
+ {
b->cl.buffer = NULL;
-#endif
#if defined( SYS_DARWIN ) || defined( SYS_FREEBSD ) || defined( SYS_MINGW )
b->data = malloc( b->alloc );
@@ -380,9 +387,7 @@ hb_buffer_t * hb_buffer_init_internal( int size , int needsMapped )
#else
b->data = memalign( 16, b->alloc );
#endif
-#ifdef USE_OPENCL
}
-#endif
if( !b->data )
{
@@ -533,11 +538,10 @@ hb_buffer_t * hb_frame_buffer_init( int pix_fmt, int width, int height )
hb_image_height_stride( pix_fmt, height, p );
}
}
-#ifdef USE_OPENCL
- buf = hb_buffer_init_internal( size , hb_use_buffers() );
-#else
- buf = hb_buffer_init( size );
-#endif
+
+ /* OpenCL */
+ buf = hb_buffer_init_internal(size , hb_use_buffers());
+
if( buf == NULL )
return NULL;
@@ -590,22 +594,22 @@ void hb_buffer_swap_copy( hb_buffer_t *src, hb_buffer_t *dst )
uint8_t *data = dst->data;
int size = dst->size;
int alloc = dst->alloc;
-#ifdef USE_OPENCL
- cl_mem buffer = dst->cl.buffer;
+
+ /* OpenCL */
+ cl_mem buffer = dst->cl.buffer;
cl_event last_event = dst->cl.last_event;
- int loc = dst->cl.buffer_location;
-#endif
+ int loc = dst->cl.buffer_location;
*dst = *src;
src->data = data;
src->size = size;
src->alloc = alloc;
-#ifdef USE_OPENCL
- src->cl.buffer = buffer;
- src->cl.last_event = last_event;
+
+ /* OpenCL */
+ src->cl.buffer = buffer;
+ src->cl.last_event = last_event;
src->cl.buffer_location = loc;
-#endif
}
// Frees the specified buffer list.
@@ -633,14 +637,19 @@ void hb_buffer_close( hb_buffer_t ** _b )
// free the buf
if( b->data )
{
-#ifdef USE_OPENCL
- if (b->cl.buffer != NULL) {
+ if (b->cl.buffer != NULL)
+ {
+ /* OpenCL */
if (hb_cl_free_mapped_buffer(b->cl.buffer, b->data) == 0)
- hb_log("bad free2: %.16x -> buffer %.16x map %.16x", b, b->cl.buffer, b->data);
+ {
+ hb_log("hb_buffer_pool_free: bad free %.16x -> buffer %.16x map %.16x",
+ b, b->cl.buffer, b->data);
}
+ }
else
-#endif
- free( b->data );
+ {
+ free(b->data);
+ }
hb_lock(buffers.lock);
buffers.allocated -= b->alloc;
hb_unlock(buffers.lock);
diff --git a/libhb/hb.c b/libhb/hb.c
index 857ffa1d2..73356595e 100644
--- a/libhb/hb.c
+++ b/libhb/hb.c
@@ -441,11 +441,6 @@ hb_handle_t * hb_init( int verbose, int update_check )
h->interjob = calloc( sizeof( hb_interjob_t ), 1 );
- /* opencl */
-#ifdef USE_OPENCL
- //hb_opencl_init(); // FIXME: Ensure gui instances call this or hb_get_opencl_env() during startup if needed.
-#endif
-
/* Start library thread */
hb_log( "hb_init: starting libhb thread" );
h->die = 0;
diff --git a/libhb/internal.h b/libhb/internal.h
index e4d5ed71c..3920cf025 100644
--- a/libhb/internal.h
+++ b/libhb/internal.h
@@ -8,6 +8,7 @@
*/
#include "hbffmpeg.h"
+#include "extras/cl.h"
/***********************************************************************
* common.c
@@ -121,14 +122,13 @@ struct hb_buffer_s
void *filter_details;
} qsv_details;
-#ifdef USE_OPENCL
+ /* OpenCL */
struct cl_data
{
cl_mem buffer;
cl_event last_event;
enum { HOST, DEVICE } buffer_location;
} cl;
-#endif
// PICTURESUB subtitle packets:
diff --git a/libhb/module.defs b/libhb/module.defs
index 297a1b4ed..3102beafa 100644
--- a/libhb/module.defs
+++ b/libhb/module.defs
@@ -41,9 +41,6 @@ endif
ifeq (1,$(FEATURE.fdk_aac))
LIBHB.GCC.D += USE_FDK_AAC
endif
-ifeq (1,$(FEATURE.opencl))
-LIBHB.GCC.D += USE_OPENCL
-endif
ifeq (1,$(FEATURE.hwd))
LIBHB.GCC.D += USE_HWD
@@ -67,10 +64,6 @@ endif
LIBHB.GCC.D += __LIBHB__ USE_PTHREAD
LIBHB.GCC.I += $(LIBHB.build/) $(CONTRIB.build/)include
-ifeq (1,$(FEATURE.opencl))
-LIBHB.GCC.l += OpenCL
-endif
-
ifeq ($(BUILD.system),cygwin)
LIBHB.GCC.D += SYS_CYGWIN
else ifeq ($(BUILD.system),darwin)
diff --git a/libhb/oclnv12toyuv.c b/libhb/oclnv12toyuv.c
index 19188e5a3..0f4ee337c 100644
--- a/libhb/oclnv12toyuv.c
+++ b/libhb/oclnv12toyuv.c
@@ -10,8 +10,9 @@
Li Cao <[email protected]> <http://www.multicorewareinc.com/>
*/
-#ifdef USE_OPENCL
#ifdef USE_HWD
+
+#include "opencl.h"
#include "vadxva2.h"
#include "oclnv12toyuv.h"
@@ -50,10 +51,16 @@ static int hb_nv12toyuv_reg_kernel( void );
*/
static int hb_nv12toyuv_create_cl_buf( KernelEnv *kenv, int w, int h, hb_va_dxva2_t *dxva2 )
{
+ if (hb_ocl == NULL)
+ {
+ hb_error("hb_nv12toyuv_create_cl_kernel: OpenCL support not available");
+ return 1;
+ }
+
cl_int status = CL_SUCCESS;
int in_bytes = w*h*3/2;
- CREATEBUF( dxva2->cl_mem_nv12, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, in_bytes );
- CREATEBUF( dxva2->cl_mem_yuv, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, in_bytes );
+ HB_OCL_BUF_CREATE(hb_ocl, dxva2->cl_mem_nv12, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, in_bytes);
+ HB_OCL_BUF_CREATE(hb_ocl, dxva2->cl_mem_yuv, CL_MEM_READ_WRITE|CL_MEM_ALLOC_HOST_PTR, in_bytes);
return 0;
}
@@ -62,8 +69,14 @@ static int hb_nv12toyuv_create_cl_buf( KernelEnv *kenv, int w, int h, hb_va_dxva
*/
static int hb_nv12toyuv_create_cl_kernel( KernelEnv *kenv, hb_va_dxva2_t *dxva2 )
{
+ if (hb_ocl == NULL)
+ {
+ hb_error("hb_nv12toyuv_create_cl_kernel: OpenCL support not available");
+ return 1;
+ }
+
int ret;
- dxva2->nv12toyuv = clCreateKernel( kenv->program, "nv12toyuv", &ret );
+ dxva2->nv12toyuv = hb_ocl->clCreateKernel(kenv->program, "nv12toyuv", &ret);
return ret;
}
@@ -74,10 +87,17 @@ static int hb_nv12toyuv_setkernelarg( KernelEnv *kenv, int w, int h, hb_va_dxva2
{
int arg = 0, status;
kenv->kernel = dxva2->nv12toyuv;
- OCLCHECK( clSetKernelArg, kenv->kernel, arg++, sizeof(cl_mem), &dxva2->cl_mem_nv12 );
- OCLCHECK( clSetKernelArg, kenv->kernel, arg++, sizeof(cl_mem), &dxva2->cl_mem_yuv );
- OCLCHECK( clSetKernelArg, kenv->kernel, arg++, sizeof(int), &w );
- OCLCHECK( clSetKernelArg, kenv->kernel, arg++, sizeof(int), &h );
+
+ if (hb_ocl == NULL)
+ {
+ hb_error("hb_nv12toyuv_setkernelarg: OpenCL support not available");
+ return 1;
+ }
+
+ HB_OCL_CHECK(hb_ocl->clSetKernelArg, kenv->kernel, arg++, sizeof(cl_mem), &dxva2->cl_mem_nv12);
+ HB_OCL_CHECK(hb_ocl->clSetKernelArg, kenv->kernel, arg++, sizeof(cl_mem), &dxva2->cl_mem_yuv);
+ HB_OCL_CHECK(hb_ocl->clSetKernelArg, kenv->kernel, arg++, sizeof(int), &w);
+ HB_OCL_CHECK(hb_ocl->clSetKernelArg, kenv->kernel, arg++, sizeof(int), &h);
return 0;
}
@@ -163,10 +183,19 @@ static int hb_nv12toyuv( void **userdata, KernelEnv *kenv )
return -1;
}
+ if (hb_ocl == NULL)
+ {
+ hb_error("hb_nv12toyuv: OpenCL support not available");
+ return -1;
+ }
+
int in_bytes = w*h*3/2;
if( kenv->isAMD )
{
- void *data = clEnqueueMapBuffer( kenv->command_queue, dxva2->cl_mem_nv12, CL_MAP_WRITE_INVALIDATE_REGION, CL_TRUE, 0, in_bytes, 0, NULL, NULL, NULL );
+ void *data = hb_ocl->clEnqueueMapBuffer(kenv->command_queue,
+ dxva2->cl_mem_nv12,
+ CL_MAP_WRITE_INVALIDATE_REGION,
+ CL_TRUE, 0, in_bytes, 0, NULL, NULL, NULL);
for ( i = 0; i < dxva2->height; i++ )
{
@@ -176,7 +205,8 @@ static int hb_nv12toyuv( void **userdata, KernelEnv *kenv )
memcpy( data + ( dxva2->width * dxva2->height ) + i * dxva2->width, bufi2 + i * p, dxva2->width );
}
}
- clEnqueueUnmapMemObject( kenv->command_queue, dxva2->cl_mem_nv12, data, 0, NULL, NULL );
+ hb_ocl->clEnqueueUnmapMemObject(kenv->command_queue, dxva2->cl_mem_nv12,
+ data, 0, NULL, NULL);
}
else
{
@@ -189,18 +219,22 @@ static int hb_nv12toyuv( void **userdata, KernelEnv *kenv )
memcpy( tmp + (dxva2->width * dxva2->height) + i * dxva2->width, bufi2 + i * p, dxva2->width );
}
}
- OCLCHECK( clEnqueueWriteBuffer, kenv->command_queue, dxva2->cl_mem_nv12, CL_TRUE, 0, in_bytes, tmp, 0, NULL, NULL );
+ HB_OCL_CHECK(hb_ocl->clEnqueueWriteBuffer, kenv->command_queue,
+ dxva2->cl_mem_nv12, CL_TRUE, 0, in_bytes, tmp, 0, NULL, NULL);
free( tmp );
}
size_t gdim[2] = {w>>1, h>>1};
- OCLCHECK( clEnqueueNDRangeKernel, kenv->command_queue, kenv->kernel, 2, NULL, gdim, NULL, 0, NULL, NULL );
+ HB_OCL_CHECK(hb_ocl->clEnqueueNDRangeKernel, kenv->command_queue,
+ kenv->kernel, 2, NULL, gdim, NULL, 0, NULL, NULL );
if( (crop[0] || crop[1] || crop[2] || crop[3]) && (decomb == 0) && (detelecine == 0) )
{
AVPicture pic_in;
AVPicture pic_crop;
- clEnqueueReadBuffer( kenv->command_queue, dxva2->cl_mem_yuv, CL_TRUE, 0, in_bytes, dxva2->nv12toyuv_tmp_out, 0, NULL, NULL );
+ hb_ocl->clEnqueueReadBuffer(kenv->command_queue, dxva2->cl_mem_yuv,
+ CL_TRUE, 0, in_bytes, dxva2->nv12toyuv_tmp_out,
+ 0, NULL, NULL);
hb_buffer_t *in = hb_video_buffer_init( w, h );
int wmp = in->plane[0].stride;
@@ -226,13 +260,20 @@ static int hb_nv12toyuv( void **userdata, KernelEnv *kenv )
if( kenv->isAMD )
{
- void *data = clEnqueueMapBuffer( kenv->command_queue, dxva2->cl_mem_yuv, CL_MAP_WRITE_INVALIDATE_REGION, CL_TRUE, 0, ww * hh * 3 / 2, 0, NULL, NULL, NULL );
+ void *data = hb_ocl->clEnqueueMapBuffer(kenv->command_queue,
+ dxva2->cl_mem_yuv,
+ CL_MAP_WRITE_INVALIDATE_REGION,
+ CL_TRUE, 0, ww * hh * 3 / 2, 0,
+ NULL, NULL, NULL);
memcpy( data, dxva2->nv12toyuv_tmp_in, ww * hh * 3 / 2 );
- clEnqueueUnmapMemObject( kenv->command_queue, dxva2->cl_mem_yuv, data, 0, NULL, NULL );
+ hb_ocl->clEnqueueUnmapMemObject(kenv->command_queue,
+ dxva2->cl_mem_yuv, data, 0, NULL, NULL);
}
else
{
- OCLCHECK( clEnqueueWriteBuffer, kenv->command_queue, dxva2->cl_mem_yuv, CL_TRUE, 0, in_bytes, dxva2->nv12toyuv_tmp_in, 0, NULL, NULL );
+ HB_OCL_CHECK(hb_ocl->clEnqueueWriteBuffer, kenv->command_queue,
+ dxva2->cl_mem_yuv, CL_TRUE, 0, in_bytes,
+ dxva2->nv12toyuv_tmp_in, 0, NULL, NULL);
}
hb_buffer_close( &in );
@@ -281,5 +322,5 @@ int hb_ocl_nv12toyuv( uint8_t *bufi[], int p, int w, int h, int *crop, hb_va_dxv
}
return 0;
}
-#endif
-#endif
+
+#endif // USE_HWD
diff --git a/libhb/oclnv12toyuv.h b/libhb/oclnv12toyuv.h
index 4fa596662..de9282bfa 100644
--- a/libhb/oclnv12toyuv.h
+++ b/libhb/oclnv12toyuv.h
@@ -11,20 +11,17 @@
*/
-#ifdef USE_OPENCL
-#ifndef RENDER_CL_H
-#define RENDER_CL_H
+#ifndef HB_OCLNV12TOYUV_H
+#define HB_OCLNV12TOYUV_H
#include "common.h"
#include "extras/cl.h"
#include "openclwrapper.h"
-/**
+/*
* nv12 to yuv interface
* bufi is input frame of nv12, w is input frame width, h is input frame height
*/
-#ifdef USE_HWD
-int hb_ocl_nv12toyuv( uint8_t *bufi[], int p, int w, int h, int *crop, hb_va_dxva2_t *dxva2, int decomb, int detelecine );
-#endif
-#endif
-#endif
+int hb_ocl_nv12toyuv(uint8_t *bufi[], int p, int w, int h, int *crop, hb_va_dxva2_t *dxva2, int decomb, int detelecine);
+
+#endif // HB_OCLNV12TOYUV_H
diff --git a/libhb/oclscale.c b/libhb/oclscale.c
index da05f371e..eb59eaa66 100644
--- a/libhb/oclscale.c
+++ b/libhb/oclscale.c
@@ -1,20 +1,19 @@
-/* oclscale.c
-
- Copyright (c) 2003-2012 HandBrake Team
- This file is part of the HandBrake source code
- Homepage: <http://handbrake.fr/>.
- It may be used under the terms of the GNU General Public License v2.
- For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
-
- Authors: Peng Gao <[email protected]> <http://www.multicorewareinc.com/>
- Li Cao <[email protected]> <http://www.multicorewareinc.com/>
-
- */
-
-#ifdef USE_OPENCL
-
+/* oclscale.c
+
+ Copyright (c) 2003-2012 HandBrake Team
+ This file is part of the HandBrake source code
+ Homepage: <http://handbrake.fr/>.
+ It may be used under the terms of the GNU General Public License v2.
+ For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
+
+ Authors: Peng Gao <[email protected]> <http://www.multicorewareinc.com/>
+ Li Cao <[email protected]> <http://www.multicorewareinc.com/>
+
+ */
+
#include <math.h>
#include "common.h"
+#include "opencl.h"
#include "openclwrapper.h"
#define FILTER_LEN 4
@@ -72,14 +71,20 @@ int hb_ocl_scale_func( void **data, KernelEnv *kenv )
int crop_bottom = data[3];
int crop_left = data[4];
int crop_right = data[5];
- cl_int in_frame_w = (int)data[6];
- cl_int in_frame_h = (int)data[7];
- cl_int out_frame_w = (int)data[8];
+ cl_int in_frame_w = (int)data[6];
+ cl_int in_frame_h = (int)data[7];
+ cl_int out_frame_w = (int)data[8];
cl_int out_frame_h = (int)data[9];
hb_oclscale_t *os = data[10];
hb_buffer_t *in = data[11];
hb_buffer_t *out = data[12];
+ if (hb_ocl == NULL)
+ {
+ hb_error("hb_ocl_scale_func: OpenCL support not available");
+ return 0;
+ }
+
if (os->initialized == 0)
{
hb_log( "Scaling With OpenCL" );
@@ -87,7 +92,7 @@ int hb_ocl_scale_func( void **data, KernelEnv *kenv )
hb_log( "Using Zero Copy");
// create the block kernel
cl_int status;
- os->m_kernel = clCreateKernel( kenv->program, "frame_scale", &status );
+ os->m_kernel = hb_ocl->clCreateKernel(kenv->program, "frame_scale", &status);
os->initialized = 1;
}
@@ -98,8 +103,12 @@ int hb_ocl_scale_func( void **data, KernelEnv *kenv )
int eventCount = 0;
if (kenv->isAMD == 0) {
- status = clEnqueueUnmapMemObject(kenv->command_queue, in->cl.buffer, in->data, 0, NULL, &events[eventCount++]);
- status = clEnqueueUnmapMemObject(kenv->command_queue, out->cl.buffer, out->data, 0, NULL, &events[eventCount++]);
+ status = hb_ocl->clEnqueueUnmapMemObject(kenv->command_queue,
+ in->cl.buffer, in->data, 0,
+ NULL, &events[eventCount++]);
+ status = hb_ocl->clEnqueueUnmapMemObject(kenv->command_queue,
+ out->cl.buffer, out->data, 0,
+ NULL, &events[eventCount++]);
}
cl_int srcPlaneOffset0 = in->plane[0].data - in->data;
@@ -127,28 +136,28 @@ int hb_ocl_scale_func( void **data, KernelEnv *kenv )
cl_float yscale = (out_frame_h * 1.0f) / in_frame_h;
setupScaleWeights(xscale, yscale, out_frame_w, out_frame_h, os, kenv);
- OCLCHECK( clSetKernelArg, os->m_kernel, 0, sizeof(cl_mem), &out_buf );
- OCLCHECK( clSetKernelArg, os->m_kernel, 1, sizeof(cl_mem), &in_buf );
- OCLCHECK( clSetKernelArg, os->m_kernel, 2, sizeof(cl_float), &xscale );
- OCLCHECK( clSetKernelArg, os->m_kernel, 3, sizeof(cl_float), &yscale );
- OCLCHECK( clSetKernelArg, os->m_kernel, 4, sizeof(cl_int), &srcPlaneOffset0 );
- OCLCHECK( clSetKernelArg, os->m_kernel, 5, sizeof(cl_int), &srcPlaneOffset1 );
- OCLCHECK( clSetKernelArg, os->m_kernel, 6, sizeof(cl_int), &srcPlaneOffset2 );
- OCLCHECK( clSetKernelArg, os->m_kernel, 7, sizeof(cl_int), &dstPlaneOffset0 );
- OCLCHECK( clSetKernelArg, os->m_kernel, 8, sizeof(cl_int), &dstPlaneOffset1 );
- OCLCHECK( clSetKernelArg, os->m_kernel, 9, sizeof(cl_int), &dstPlaneOffset2 );
- OCLCHECK( clSetKernelArg, os->m_kernel, 10, sizeof(cl_int), &srcRowWords0 );
- OCLCHECK( clSetKernelArg, os->m_kernel, 11, sizeof(cl_int), &srcRowWords1 );
- OCLCHECK( clSetKernelArg, os->m_kernel, 12, sizeof(cl_int), &srcRowWords2 );
- OCLCHECK( clSetKernelArg, os->m_kernel, 13, sizeof(cl_int), &dstRowWords0 );
- OCLCHECK( clSetKernelArg, os->m_kernel, 14, sizeof(cl_int), &dstRowWords1 );
- OCLCHECK( clSetKernelArg, os->m_kernel, 15, sizeof(cl_int), &dstRowWords2 );
- OCLCHECK( clSetKernelArg, os->m_kernel, 16, sizeof(cl_int), &in_frame_w );
- OCLCHECK( clSetKernelArg, os->m_kernel, 17, sizeof(cl_int), &in_frame_h );
- OCLCHECK( clSetKernelArg, os->m_kernel, 18, sizeof(cl_int), &out_frame_w );
- OCLCHECK( clSetKernelArg, os->m_kernel, 19, sizeof(cl_int), &out_frame_h );
- OCLCHECK( clSetKernelArg, os->m_kernel, 20, sizeof(cl_mem), &os->bicubic_x_weights );
- OCLCHECK( clSetKernelArg, os->m_kernel, 21, sizeof(cl_mem), &os->bicubic_y_weights );
+ HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 0, sizeof(cl_mem), &out_buf);
+ HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 1, sizeof(cl_mem), &in_buf);
+ HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 2, sizeof(cl_float), &xscale);
+ HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 3, sizeof(cl_float), &yscale);
+ HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 4, sizeof(cl_int), &srcPlaneOffset0);
+ HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 5, sizeof(cl_int), &srcPlaneOffset1);
+ HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 6, sizeof(cl_int), &srcPlaneOffset2);
+ HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 7, sizeof(cl_int), &dstPlaneOffset0);
+ HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 8, sizeof(cl_int), &dstPlaneOffset1);
+ HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 9, sizeof(cl_int), &dstPlaneOffset2);
+ HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 10, sizeof(cl_int), &srcRowWords0);
+ HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 11, sizeof(cl_int), &srcRowWords1);
+ HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 12, sizeof(cl_int), &srcRowWords2);
+ HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 13, sizeof(cl_int), &dstRowWords0);
+ HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 14, sizeof(cl_int), &dstRowWords1);
+ HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 15, sizeof(cl_int), &dstRowWords2);
+ HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 16, sizeof(cl_int), &in_frame_w);
+ HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 17, sizeof(cl_int), &in_frame_h);
+ HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 18, sizeof(cl_int), &out_frame_w);
+ HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 19, sizeof(cl_int), &out_frame_h);
+ HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 20, sizeof(cl_mem), &os->bicubic_x_weights);
+ HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 21, sizeof(cl_mem), &os->bicubic_y_weights);
size_t workOffset[] = { 0, 0, 0 };
size_t globalWorkSize[] = { 1, 1, 1 };
@@ -164,42 +173,70 @@ int hb_ocl_scale_func( void **data, KernelEnv *kenv )
globalWorkSize[1] = ygroups;
globalWorkSize[2] = 3;
- OCLCHECK( clEnqueueNDRangeKernel, kenv->command_queue, os->m_kernel, 3, workOffset, globalWorkSize, localWorkSize, eventCount, (eventCount == 0) ? NULL : &events[0], &events[eventCount] );
+ HB_OCL_CHECK(hb_ocl->clEnqueueNDRangeKernel, kenv->command_queue,
+ os->m_kernel, 3, workOffset, globalWorkSize, localWorkSize,
+ eventCount, eventCount == 0 ? NULL : &events[0], &events[eventCount]);
++eventCount;
if (kenv->isAMD == 0) {
- in->data = clEnqueueMapBuffer(kenv->command_queue, in->cl.buffer, CL_FALSE, CL_MAP_READ | CL_MAP_WRITE, 0, in->alloc, (eventCount == 0) ? 0 : 1, (eventCount == 0) ? NULL : &events[eventCount - 1], &events[eventCount], &status);
- out->data = clEnqueueMapBuffer(kenv->command_queue, out->cl.buffer, CL_FALSE, CL_MAP_READ | CL_MAP_WRITE, 0, out->alloc, (eventCount == 0) ? 0 : 1, (eventCount == 0) ? NULL : &events[eventCount - 1], &events[eventCount + 1], &status);
+ in->data = hb_ocl->clEnqueueMapBuffer(kenv->command_queue, in->cl.buffer,
+ CL_FALSE, CL_MAP_READ|CL_MAP_WRITE,
+ 0, in->alloc,
+ eventCount ? 1 : 0,
+ eventCount ? &events[eventCount - 1] : NULL,
+ &events[eventCount], &status);
+ out->data = hb_ocl->clEnqueueMapBuffer(kenv->command_queue, out->cl.buffer,
+ CL_FALSE, CL_MAP_READ|CL_MAP_WRITE,
+ 0, out->alloc,
+ eventCount ? 1 : 0,
+ eventCount ? &events[eventCount - 1] : NULL,
+ &events[eventCount + 1], &status);
eventCount += 2;
}
- clFlush(kenv->command_queue);
- clWaitForEvents(eventCount, &events[0]);
+ hb_ocl->clFlush(kenv->command_queue);
+ hb_ocl->clWaitForEvents(eventCount, &events[0]);
int i;
for (i = 0; i < eventCount; ++i)
- clReleaseEvent(events[i]);
+ {
+ hb_ocl->clReleaseEvent(events[i]);
+ }
}
return 1;
}
-int setupScaleWeights(cl_float xscale, cl_float yscale, int width, int height, hb_oclscale_t *os, KernelEnv *kenv) {
+int setupScaleWeights(cl_float xscale, cl_float yscale, int width, int height, hb_oclscale_t *os, KernelEnv *kenv)
+{
cl_int status;
- if (os->xscale != xscale || os->width < width) {
+
+ if (hb_ocl == NULL)
+ {
+ hb_error("setupScaleWeights: OpenCL support not available");
+ return 1;
+ }
+
+ if (os->xscale != xscale || os->width < width)
+ {
cl_float *xweights = hb_bicubic_weights(xscale, width);
- CL_FREE(os->bicubic_x_weights);
- CREATEBUF(os->bicubic_x_weights, CL_MEM_READ_ONLY, sizeof(cl_float) * width * 4);
- OCLCHECK(clEnqueueWriteBuffer, kenv->command_queue, os->bicubic_x_weights, CL_TRUE, 0, sizeof(cl_float) * width * 4, xweights, 0, NULL, NULL );
+ HB_OCL_BUF_FREE (hb_ocl, os->bicubic_x_weights);
+ HB_OCL_BUF_CREATE(hb_ocl, os->bicubic_x_weights, CL_MEM_READ_ONLY,
+ sizeof(cl_float) * width * 4);
+ HB_OCL_CHECK(hb_ocl->clEnqueueWriteBuffer, kenv->command_queue, os->bicubic_x_weights,
+ CL_TRUE, 0, sizeof(cl_float) * width * 4, xweights, 0, NULL, NULL);
os->width = width;
os->xscale = xscale;
free(xweights);
}
- if ((os->yscale != yscale) || (os->height < height)) {
+ if ((os->yscale != yscale) || (os->height < height))
+ {
cl_float *yweights = hb_bicubic_weights(yscale, height);
- CL_FREE(os->bicubic_y_weights);
- CREATEBUF(os->bicubic_y_weights, CL_MEM_READ_ONLY, sizeof(cl_float) * height * 4);
- OCLCHECK(clEnqueueWriteBuffer, kenv->command_queue, os->bicubic_y_weights, CL_TRUE, 0, sizeof(cl_float) * height * 4, yweights, 0, NULL, NULL );
+ HB_OCL_BUF_FREE (hb_ocl, os->bicubic_y_weights);
+ HB_OCL_BUF_CREATE(hb_ocl, os->bicubic_y_weights, CL_MEM_READ_ONLY,
+ sizeof(cl_float) * height * 4);
+ HB_OCL_CHECK(hb_ocl->clEnqueueWriteBuffer, kenv->command_queue, os->bicubic_y_weights,
+ CL_TRUE, 0, sizeof(cl_float) * height * 4, yweights, 0, NULL, NULL);
os->height = height;
os->yscale = yscale;
free(yweights);
@@ -211,10 +248,10 @@ int setupScaleWeights(cl_float xscale, cl_float yscale, int width, int height, h
/**
* function describe: this function is used to scaling video frame. it uses the gausi scaling algorithm
* parameter:
-* inputFrameBuffer: the source video frame opencl buffer
-* outputdata: the destination video frame buffer
-* inputWidth: the width of the source video frame
-* inputHeight: the height of the source video frame
+* inputFrameBuffer: the source video frame opencl buffer
+* outputdata: the destination video frame buffer
+* inputWidth: the width of the source video frame
+* inputHeight: the height of the source video frame
* outputWidth: the width of destination video frame
* outputHeight: the height of destination video frame
*/
@@ -263,9 +300,3 @@ int hb_ocl_scale(hb_buffer_t *in, hb_buffer_t *out, int *crop, hb_oclscale_t *os
hb_log( "run kernel[%s] failed", "frame_scale" );
return 0;
}
-
-
-
-
-
-#endif
diff --git a/libhb/opencl.c b/libhb/opencl.c
index cfc5e5747..8d846a824 100644
--- a/libhb/opencl.c
+++ b/libhb/opencl.c
@@ -26,10 +26,31 @@
#include "common.h"
#include "opencl.h"
-int hb_opencl_library_open(hb_opencl_library_t *opencl)
+hb_opencl_library_t *hb_ocl = NULL;
+
+int hb_ocl_init()
+{
+ if (hb_ocl == NULL)
+ {
+ if ((hb_ocl = hb_opencl_library_init()) == NULL)
+ {
+ return -1;
+ }
+ }
+ return 0;
+}
+
+void hb_ocl_close()
+{
+ hb_opencl_library_close(&hb_ocl);
+}
+
+hb_opencl_library_t* hb_opencl_library_init()
{
- if (opencl == NULL)
+ hb_opencl_library_t *opencl;
+ if ((opencl = calloc(1, sizeof(hb_opencl_library_t))) == NULL)
{
+ hb_error("hb_opencl_library_init: memory allocation failure");
goto fail;
}
@@ -39,13 +60,13 @@ int hb_opencl_library_open(hb_opencl_library_t *opencl)
goto fail;
}
-#define HB_OCL_LOAD(func) \
-{ \
- if ((opencl->func = (void*)HB_OCL_DLSYM(opencl->library, #func)) == NULL) \
- { \
- hb_log("hb_opencl_library_open: failed to load function '%s'", #func); \
- goto fail; \
- } \
+#define HB_OCL_LOAD(func) \
+{ \
+ if ((opencl->func = (void*)HB_OCL_DLSYM(opencl->library, #func)) == NULL) \
+ { \
+ hb_log("hb_opencl_library_init: failed to load function '%s'", #func); \
+ goto fail; \
+ } \
}
HB_OCL_LOAD(clBuildProgram);
HB_OCL_LOAD(clCreateBuffer);
@@ -73,18 +94,27 @@ int hb_opencl_library_open(hb_opencl_library_t *opencl)
HB_OCL_LOAD(clReleaseContext);
HB_OCL_LOAD(clReleaseEvent);
HB_OCL_LOAD(clReleaseKernel);
+ HB_OCL_LOAD(clReleaseMemObject);
HB_OCL_LOAD(clReleaseProgram);
HB_OCL_LOAD(clSetKernelArg);
HB_OCL_LOAD(clWaitForEvents);
- return 0;
+
+ //success
+ return opencl;
fail:
- hb_opencl_library_close(opencl);
- return -1;
+ hb_opencl_library_close(&opencl);
+ return NULL;
}
-void hb_opencl_library_close(hb_opencl_library_t *opencl)
+void hb_opencl_library_close(hb_opencl_library_t **_opencl)
{
+ if (_opencl == NULL)
+ {
+ return;
+ }
+ hb_opencl_library_t *opencl = *_opencl;
+
if (opencl != NULL)
{
if (opencl->library != NULL)
@@ -120,114 +150,216 @@ void hb_opencl_library_close(hb_opencl_library_t *opencl)
HB_OCL_UNLOAD(clReleaseContext);
HB_OCL_UNLOAD(clReleaseEvent);
HB_OCL_UNLOAD(clReleaseKernel);
+ HB_OCL_UNLOAD(clReleaseMemObject);
HB_OCL_UNLOAD(clReleaseProgram);
HB_OCL_UNLOAD(clSetKernelArg);
HB_OCL_UNLOAD(clWaitForEvents);
}
+ *_opencl = NULL;
}
-static int hb_opencl_device_is_supported(cl_device_type type,
- const char *vendor,
- const char *version)
+static int hb_opencl_device_is_supported(hb_opencl_device_t* device)
{
- int major, minor;
+ // we only support OpenCL on GPUs for now
+ // FIXME: disable on NVIDIA to to a bug
+ if ((device != NULL) &&
+ (device->type & CL_DEVICE_TYPE_GPU) &&
+ (device->ocl_vendor != HB_OCL_VENDOR_NVIDIA))
+ {
+ int major, minor;
+ // check OpenCL version:
+ // OpenCL<space><major_version.minor_version><space><vendor-specific information>
+ if (sscanf(device->version, "OpenCL %d.%d", &major, &minor) != 2)
+ {
+ return 0;
+ }
+ return (major > HB_OCL_MINVERSION_MAJOR) || (major == HB_OCL_MINVERSION_MAJOR &&
+ minor >= HB_OCL_MINVERSION_MINOR);
+ }
+ return 0;
+}
+
+static hb_opencl_device_t* hb_opencl_device_get(hb_opencl_library_t *opencl,
+ cl_device_id device_id)
+{
+ if (opencl == NULL || opencl->clGetDeviceInfo == NULL)
+ {
+ hb_error("hb_opencl_device_get: OpenCL support not available");
+ return NULL;
+ }
+ else if (device_id == NULL)
+ {
+ hb_error("hb_opencl_device_get: invalid device ID");
+ return NULL;
+ }
+
+ hb_opencl_device_t *device = calloc(1, sizeof(hb_opencl_device_t));
+ if (device == NULL)
+ {
+ hb_error("hb_opencl_device_get: memory allocation failure");
+ return NULL;
+ }
+
+ cl_int status = CL_SUCCESS;
+ device->id = device_id;
- // we only support OpenCL on GPUs
- // disable on NVIDIA to to a bug (FIXME)
- if (!(type & CL_DEVICE_TYPE_GPU) ||
- !(strncmp(vendor, "NVIDIA", 6 /* strlen("NVIDIA") */)))
+ status |= opencl->clGetDeviceInfo(device->id, CL_DEVICE_VENDOR, sizeof(device->vendor),
+ device->vendor, NULL);
+ status |= opencl->clGetDeviceInfo(device->id, CL_DEVICE_NAME, sizeof(device->name),
+ device->name, NULL);
+ status |= opencl->clGetDeviceInfo(device->id, CL_DEVICE_VERSION, sizeof(device->version),
+ device->version, NULL);
+ status |= opencl->clGetDeviceInfo(device->id, CL_DEVICE_TYPE, sizeof(device->type),
+ &device->type, NULL);
+ status |= opencl->clGetDeviceInfo(device->id, CL_DEVICE_PLATFORM, sizeof(device->platform),
+ &device->platform, NULL);
+ status |= opencl->clGetDeviceInfo(device->id, CL_DRIVER_VERSION, sizeof(device->driver),
+ device->driver, NULL);
+ if (status != CL_SUCCESS)
{
- return 0;
+ free(device);
+ return NULL;
}
- // check OpenCL version; format:
- // OpenCL<space><major_version.minor_version><space><vendor-specific information>
- if (sscanf(version, "OpenCL %d.%d", &major, &minor) != 2)
+ if (!strcmp(device->vendor, "Advanced Micro Devices, Inc.") ||
+ !strcmp(device->vendor, "AMD"))
+ {
+ device->ocl_vendor = HB_OCL_VENDOR_AMD;
+ }
+ else if (!strncmp(device->vendor, "NVIDIA", 6 /* strlen("NVIDIA") */))
{
- return 0;
+ device->ocl_vendor = HB_OCL_VENDOR_NVIDIA;
+ }
+ else
+ {
+ device->ocl_vendor = HB_OCL_VENDOR_OTHER;
}
- return (major > HB_OCL_MINVERSION_MAJOR) || (major == HB_OCL_MINVERSION_MAJOR &&
- minor >= HB_OCL_MINVERSION_MINOR);
+ return device;
}
-int hb_opencl_available()
+static void hb_opencl_devices_list_close(hb_list_t **_list)
{
- static int opencl_available = -1;
- if (opencl_available >= 0)
+ if (_list != NULL)
{
- return opencl_available;
+ hb_list_t *list = *_list;
+ hb_opencl_device_t *device;
+ while (list != NULL && hb_list_count(list) > 0)
+ {
+ if ((device = hb_list_item(list, 0)) != NULL)
+ {
+ hb_list_rem(list, device);
+ free(device);
+ }
+ }
}
- opencl_available = 0;
+ hb_list_close(_list);
+}
- cl_device_type type;
- char vendor[100], version[100];
- cl_device_id *device_ids = NULL;
- cl_platform_id *platform_ids = NULL;
- hb_opencl_library_t lib, *opencl = &lib;
+static hb_list_t* hb_opencl_devices_list_get(hb_opencl_library_t *opencl,
+ cl_device_type device_type)
+{
+ if (opencl == NULL ||
+ opencl->library == NULL ||
+ opencl->clGetDeviceIDs == NULL ||
+ opencl->clGetDeviceInfo == NULL ||
+ opencl->clGetPlatformIDs == NULL)
+ {
+ hb_error("hb_opencl_devices_list_get: OpenCL support not available");
+ return NULL;
+ }
+
+ hb_list_t *list = hb_list_init();
+ if (list == NULL)
+ {
+ hb_error("hb_opencl_devices_list_get: memory allocation failure");
+ return NULL;
+ }
+
+ cl_device_id *device_ids;
+ hb_opencl_device_t *device;
+ cl_platform_id *platform_ids;
cl_uint i, j, num_platforms, num_devices;
- /*
- * Check whether we can load the OpenCL library, then check devices and make
- * sure we support running OpenCL code on at least one of them.
- */
- if (hb_opencl_library_open(opencl) == 0)
+ if (opencl->clGetPlatformIDs(0, NULL, &num_platforms) != CL_SUCCESS || !num_platforms)
+ {
+ goto fail;
+ }
+ if ((platform_ids = malloc(sizeof(cl_platform_id) * num_platforms)) == NULL)
+ {
+ hb_error("hb_opencl_devices_list_get: memory allocation failure");
+ goto fail;
+ }
+ if (opencl->clGetPlatformIDs(num_platforms, platform_ids, NULL) != CL_SUCCESS)
+ {
+ goto fail;
+ }
+ for (i = 0; i < num_platforms; i++)
{
- if (opencl->clGetPlatformIDs(0, NULL, &num_platforms) != CL_SUCCESS || !num_platforms)
+ if (opencl->clGetDeviceIDs(platform_ids[i], device_type, 0, NULL, &num_devices) != CL_SUCCESS || !num_devices)
{
- goto end;
+ // non-fatal
+ continue;
}
- if ((platform_ids = malloc(sizeof(cl_platform_id) * num_platforms)) == NULL)
+ if ((device_ids = malloc(sizeof(cl_device_id) * num_devices)) == NULL)
{
- goto end;
+ hb_error("hb_opencl_devices_list_get: memory allocation failure");
+ goto fail;
}
- if (opencl->clGetPlatformIDs(num_platforms, platform_ids, NULL) != CL_SUCCESS)
+ if (opencl->clGetDeviceIDs(platform_ids[i], device_type, num_devices, device_ids, NULL) != CL_SUCCESS)
{
- goto end;
+ // non-fatal
+ continue;
}
- for (i = 0; i < num_platforms; i++)
+ for (j = 0; j < num_devices; j++)
{
- if (opencl->clGetDeviceIDs(platform_ids[i], CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices) != CL_SUCCESS || !num_devices)
- {
- goto end;
- }
- if ((device_ids = malloc(sizeof(cl_device_id) * num_devices)) == NULL)
- {
- goto end;
- }
- if (opencl->clGetDeviceIDs(platform_ids[i], CL_DEVICE_TYPE_ALL, num_devices, device_ids, NULL) != CL_SUCCESS)
+ if ((device = hb_opencl_device_get(opencl, device_ids[j])) != NULL)
{
- goto end;
+ hb_list_add(list, device);
}
- for (j = 0; j < num_devices; j++)
+ }
+ }
+ return list;
+
+fail:
+ hb_opencl_devices_list_close(&list);
+ return NULL;
+}
+
+int hb_opencl_available()
+{
+ static int opencl_available = -1;
+ if (opencl_available >= 0)
+ {
+ return opencl_available;
+ }
+ opencl_available = 0;
+
+ /*
+ * Check whether we can load the OpenCL library, then check devices and make
+ * sure we support running OpenCL code on at least one of them.
+ */
+ hb_opencl_library_t *opencl;
+ if ((opencl = hb_opencl_library_init()) != NULL)
+ {
+ int i;
+ hb_list_t *device_list;
+ hb_opencl_device_t *device;
+ if ((device_list = hb_opencl_devices_list_get(opencl, CL_DEVICE_TYPE_ALL)) != NULL)
+ {
+ for (i = 0; i < hb_list_count(device_list); i++)
{
- if (device_ids[j] != NULL)
+ if ((device = hb_list_item(device_list, i)) != NULL &&
+ (hb_opencl_device_is_supported(device)))
{
- opencl->clGetDeviceInfo(device_ids[j], CL_DEVICE_VENDOR, sizeof(vendor),
- vendor, NULL);
- opencl->clGetDeviceInfo(device_ids[j], CL_DEVICE_VERSION, sizeof(version),
- version, NULL);
- opencl->clGetDeviceInfo(device_ids[j], CL_DEVICE_TYPE, sizeof(type),
- &type, NULL);
-
- if (hb_opencl_device_is_supported(type,
- (const char*)vendor,
- (const char*)version))
- {
- opencl_available = 1;
- goto end;
- }
+ opencl_available = 1;
+ break;
}
}
- free(device_ids);
- device_ids = NULL;
+ hb_opencl_devices_list_close(&device_list);
}
+ hb_opencl_library_close(&opencl);
}
-
-end:
- free(device_ids);
- free(platform_ids);
- hb_opencl_library_close(opencl);
return opencl_available;
}
@@ -238,7 +370,7 @@ void hb_opencl_info_print()
* Its only purpose is to list OpenCL-capable devices, so let's initialize
* only what we absolutely need here, rather than calling library_open().
*/
- hb_opencl_library_t lib, *opencl = &lib;
+ hb_opencl_library_t ocl, *opencl = &ocl;
if ((opencl->library = (void*)HB_OCL_DLOPEN) == NULL ||
(opencl->clGetDeviceIDs = (void*)HB_OCL_DLSYM(opencl->library, "clGetDeviceIDs" )) == NULL ||
(opencl->clGetDeviceInfo = (void*)HB_OCL_DLSYM(opencl->library, "clGetDeviceInfo" )) == NULL ||
@@ -249,76 +381,35 @@ void hb_opencl_info_print()
goto end;
}
- cl_device_type type;
- cl_device_id *device_ids;
- cl_platform_id *platform_ids;
- cl_uint i, j, k, num_platforms, num_devices;
- char vendor[100], name[1024], version[100], driver[1024];
-
- if (opencl->clGetPlatformIDs(0, NULL, &num_platforms) != CL_SUCCESS || !num_platforms)
- {
- goto end;
- }
- if ((platform_ids = malloc(sizeof(cl_platform_id) * num_platforms)) == NULL)
+ int i, idx;
+ hb_list_t *device_list;
+ hb_opencl_device_t *device;
+ if ((device_list = hb_opencl_devices_list_get(opencl, CL_DEVICE_TYPE_ALL)) != NULL)
{
- goto end;
- }
- if (opencl->clGetPlatformIDs(num_platforms, platform_ids, NULL) != CL_SUCCESS)
- {
- goto end;
- }
- for (i = 0, k = 1; i < num_platforms; i++)
- {
- if (opencl->clGetDeviceIDs(platform_ids[i], CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices) != CL_SUCCESS || !num_devices)
- {
- goto end;
- }
- if ((device_ids = malloc(sizeof(cl_device_id) * num_devices)) == NULL)
- {
- goto end;
- }
- if (opencl->clGetDeviceIDs(platform_ids[i], CL_DEVICE_TYPE_ALL, num_devices, device_ids, NULL) != CL_SUCCESS)
- {
- goto end;
- }
- for (j = 0; j < num_devices; j++)
+ for (i = 0, idx = 1; i < hb_list_count(device_list); i++)
{
- if (device_ids[j] != NULL)
+ if ((device = hb_list_item(device_list, i)) != NULL)
{
- opencl->clGetDeviceInfo(device_ids[j], CL_DEVICE_VENDOR, sizeof(vendor),
- vendor, NULL);
- opencl->clGetDeviceInfo(device_ids[j], CL_DEVICE_NAME, sizeof(name),
- name, NULL);
- opencl->clGetDeviceInfo(device_ids[j], CL_DEVICE_VERSION, sizeof(version),
- version, NULL);
- opencl->clGetDeviceInfo(device_ids[j], CL_DRIVER_VERSION, sizeof(driver),
- driver, NULL);
- opencl->clGetDeviceInfo(device_ids[j], CL_DEVICE_TYPE, sizeof(type),
- &type, NULL);
-
- // don't list unsupported devices
- if (type & CL_DEVICE_TYPE_CPU)
+ // don't list CPU devices (always unsupported)
+ if (!(device->type & CL_DEVICE_TYPE_CPU))
{
- continue;
+ hb_log("OpenCL device #%d: %s %s", idx++, device->vendor, device->name);
+ hb_log(" - OpenCL version: %s", device->version + 7 /* strlen("OpenCL ") */);
+ hb_log(" - driver version: %s", device->driver);
+ hb_log(" - device type: %s%s",
+ device->type & CL_DEVICE_TYPE_CPU ? "CPU" :
+ device->type & CL_DEVICE_TYPE_GPU ? "GPU" :
+ device->type & CL_DEVICE_TYPE_CUSTOM ? "Custom" :
+ device->type & CL_DEVICE_TYPE_ACCELERATOR ? "Accelerator" : "Unknown",
+ device->type & CL_DEVICE_TYPE_DEFAULT ? " (default)" : "");
+ hb_log(" - supported: %s",
+ hb_opencl_device_is_supported(device) ? "YES" : "no");
}
- hb_log("OpenCL device #%d: %s %s", k++, vendor, name);
- hb_log(" - OpenCL version: %s", version + 7 /* strlen("OpenCL ") */);
- hb_log(" - driver version: %s", driver);
- hb_log(" - device type: %s%s",
- type & CL_DEVICE_TYPE_CPU ? "CPU" :
- type & CL_DEVICE_TYPE_GPU ? "GPU" :
- type & CL_DEVICE_TYPE_CUSTOM ? "Custom" :
- type & CL_DEVICE_TYPE_ACCELERATOR ? "Accelerator" : "Unknown",
- type & CL_DEVICE_TYPE_DEFAULT ? " (default)" : "");
- hb_log(" - supported: %s",
- hb_opencl_device_is_supported(type,
- (const char*)vendor,
- (const char*)version) ? "yes" : "no");
}
}
- free(device_ids);
+ hb_opencl_devices_list_close(&device_list);
}
end:
- hb_opencl_library_close(opencl);
+ hb_opencl_library_close(&opencl);
}
diff --git a/libhb/opencl.h b/libhb/opencl.h
index 93171ebd0..2791e1618 100644
--- a/libhb/opencl.h
+++ b/libhb/opencl.h
@@ -11,6 +11,7 @@
#define HB_OPENCL_H
#include "extras/cl.h"
+#include "openclwrapper.h"
// we only support OpenCL 1.1 or later
#define HB_OCL_MINVERSION_MAJOR 1
@@ -654,15 +655,90 @@ typedef struct hb_opencl_library_s
HB_OCL_FUNC_DECL(clReleaseContext);
HB_OCL_FUNC_DECL(clReleaseEvent);
HB_OCL_FUNC_DECL(clReleaseKernel);
+ HB_OCL_FUNC_DECL(clReleaseMemObject);
HB_OCL_FUNC_DECL(clReleaseProgram);
HB_OCL_FUNC_DECL(clSetKernelArg);
HB_OCL_FUNC_DECL(clWaitForEvents);
} hb_opencl_library_t;
-int hb_opencl_library_open (hb_opencl_library_t *opencl);
-void hb_opencl_library_close(hb_opencl_library_t *opencl);
+hb_opencl_library_t* hb_opencl_library_init();
+void hb_opencl_library_close(hb_opencl_library_t **_opencl);
+
+/*
+ * Convenience pointer to a single shared OpenCL library wrapper.
+ *
+ * It can be initialized and closed via hb_ocl_init/close().
+ */
+extern hb_opencl_library_t *hb_ocl;
+int hb_ocl_init();
+void hb_ocl_close();
+
+typedef struct hb_opencl_device_s
+{
+ cl_platform_id platform;
+ cl_device_type type;
+ cl_device_id id;
+ char version[128];
+ char driver[128];
+ char vendor[128];
+ char name[128];
+ enum
+ {
+ HB_OCL_VENDOR_AMD,
+ HB_OCL_VENDOR_NVIDIA,
+ HB_OCL_VENDOR_OTHER,
+ } ocl_vendor;
+} hb_opencl_device_t;
int hb_opencl_available();
void hb_opencl_info_print();
+/* OpenCL scaling */
+typedef struct hb_oclscale_s
+{
+ int initialized;
+ // bicubic scale weights
+ cl_mem bicubic_x_weights;
+ cl_mem bicubic_y_weights;
+ cl_float xscale;
+ cl_float yscale;
+ int width;
+ int height;
+ // horizontal scaling and vertical scaling kernel handle
+ cl_kernel m_kernel;
+ int use_ocl_mem; // 0 use host memory. 1 use gpu oclmem
+} hb_oclscale_t;
+
+int hb_ocl_scale(hb_buffer_t *in, hb_buffer_t *out, int *crop,
+ hb_oclscale_t *os);
+
+/* Utilities */
+#define HB_OCL_BUF_CREATE(ocl_lib, out, flags, size) \
+{ \
+ out = ocl_lib->clCreateBuffer(kenv->context, flags, size, NULL, &status); \
+ if (CL_SUCCESS != status) \
+ { \
+ return -1; \
+ } \
+}
+
+#define HB_OCL_BUF_FREE(ocl_lib, buf) \
+{ \
+ if (buf != NULL) \
+ { \
+ ocl_lib->clReleaseMemObject(buf); \
+ buf = NULL; \
+ } \
+}
+
+#define HB_OCL_CHECK(method, ...) \
+{ \
+ status = method(__VA_ARGS__); \
+ if (status != CL_SUCCESS) \
+ { \
+ hb_error("%s:%d (%s) error: %d\n",__FUNCTION__,__LINE__,#method,status);\
+ return status; \
+ } \
+}
+
#endif//HB_OPENCL_H
diff --git a/libhb/openclwrapper.c b/libhb/openclwrapper.c
index b5faf7041..7fb395aba 100644
--- a/libhb/openclwrapper.c
+++ b/libhb/openclwrapper.c
@@ -10,12 +10,11 @@
Li Cao <[email protected]> <http://www.multicorewareinc.com/>
*/
-#ifdef USE_OPENCL
-
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "extras/cl.h"
+#include "opencl.h"
#include "openclwrapper.h"
#include "openclkernels.h"
@@ -80,80 +79,12 @@ static int isInited = 0;
static int useBuffers = 0;
static hb_kernel_node gKernels[MAX_KERNEL_NUM];
-#define ADD_KERNEL_CFG( idx, s, p ){\
- strcpy( gKernels[idx].kernelName, s );\
- gKernels[idx].kernelStr = p;\
- strcpy( gpu_env.kernel_names[idx], s );\
- gpu_env.kernel_count++; }
-
-
-/**
- * hb_confirm_gpu_type
- */
-int hb_confirm_gpu_type()
-{
- int status = 1;
- unsigned int i, j;
- cl_uint numPlatforms = 0;
- status = clGetPlatformIDs(0,NULL,&numPlatforms);
- if(status != 0)
- {
- goto end;
- }
- if(numPlatforms > 0)
- {
- cl_platform_id* platforms = (cl_platform_id* )malloc (numPlatforms * sizeof(cl_platform_id));
- status = clGetPlatformIDs (numPlatforms, platforms, NULL);
- if (status != 0)
- {
- goto end;
- }
- for (i=0; i < numPlatforms; i++)
- {
- char pbuff[100];
- cl_uint numDevices;
- status = clGetPlatformInfo( platforms[i],
- CL_PLATFORM_VENDOR,
- sizeof (pbuff),
- pbuff,
- NULL);
- if (status)
- continue;
- status = clGetDeviceIDs( platforms[i],
- CL_DEVICE_TYPE_GPU ,
- 0 ,
- NULL ,
- &numDevices);
-
- cl_device_id *devices = (cl_device_id *)malloc(numDevices * sizeof(cl_device_id));
- status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
- for (j = 0; j < numDevices; j++)
- {
- char dbuff[100];
- status = clGetDeviceInfo(devices[j], CL_DEVICE_VENDOR, sizeof(dbuff), dbuff, NULL);
- if (!strcmp(dbuff, "Advanced Micro Devices, Inc.") ||
- !strcmp(dbuff, "Intel(R) Corporation") ||
-#ifdef __APPLE__
- !strcmp(dbuff, "AMD") ||
- /* MacBook Pro, AMD ATI Radeon HD 6750M, OS X 10.8.3 */
- !strcmp(dbuff, "NVIDIA") ||
- /* MacBook Pro, NVIDIA GeForce GT 330M, OS X 10.7.4 */
-#endif
- !strcmp(dbuff, "NVIDIA Corporation"))
- {
- return 0;
- }
- }
-
- if ( status != CL_SUCCESS )
- continue;
- if( numDevices )
- break;
- }
- free( platforms );
- }
- end:
- return -1;
+#define HB_OCL_ADD_KERNEL_CFG(idx, s, p) \
+{ \
+ strcpy(gKernels[idx].kernelName, s); \
+ gKernels[idx].kernelStr = p; \
+ strcpy(gpu_env.kernel_names[idx], s); \
+ gpu_env.kernel_count++; \
}
/**
@@ -168,8 +99,8 @@ int hb_regist_opencl_kernel()
gpu_env.file_count = 0; //argc;
gpu_env.kernel_count = 0UL;
- ADD_KERNEL_CFG( 0, "frame_scale", NULL )
- ADD_KERNEL_CFG( 1, "yadif_filter", NULL )
+ HB_OCL_ADD_KERNEL_CFG(0, "frame_scale", NULL);
+ HB_OCL_ADD_KERNEL_CFG(1, "yadif_filter", NULL);
return 0;
}
@@ -230,11 +161,14 @@ int hb_binary_generated( cl_context context, const char * cl_file_name, FILE **
char * str = NULL;
FILE * fd = NULL;
- status = clGetContextInfo( context,
- CL_CONTEXT_NUM_DEVICES,
- sizeof(numDevices),
- &numDevices,
- NULL );
+ if (hb_ocl == NULL)
+ {
+ hb_error("hb_binary_generated: OpenCL support not available");
+ return 0;
+ }
+
+ status = hb_ocl->clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES,
+ sizeof(numDevices), &numDevices, NULL);
if( status != CL_SUCCESS )
{
hb_log( "OpenCL: Get context info failed" );
@@ -249,11 +183,9 @@ int hb_binary_generated( cl_context context, const char * cl_file_name, FILE **
}
/* grab the handles to all of the devices in the context. */
- status = clGetContextInfo( context,
- CL_CONTEXT_DEVICES,
- sizeof(cl_device_id) * numDevices,
- devices,
- NULL );
+ status = hb_ocl->clGetContextInfo(context, CL_CONTEXT_DEVICES,
+ sizeof(cl_device_id) * numDevices,
+ devices, NULL);
status = 0;
/* dump out each binary into its own separate file. */
@@ -264,11 +196,8 @@ int hb_binary_generated( cl_context context, const char * cl_file_name, FILE **
if (devices[i])
{
char deviceName[1024];
- status = clGetDeviceInfo(devices[i],
- CL_DEVICE_NAME,
- sizeof(deviceName),
- deviceName,
- NULL);
+ status = hb_ocl->clGetDeviceInfo(devices[i], CL_DEVICE_NAME,
+ sizeof(deviceName), deviceName, NULL);
str = (char*)strstr(cl_file_name, ".cl");
memcpy(cl_name, cl_file_name, str - cl_file_name);
@@ -325,11 +254,14 @@ int hb_generat_bin_from_kernel_source( cl_program program, const char * cl_file_
char **binaries;
char *str = NULL;
- status = clGetProgramInfo( program,
- CL_PROGRAM_NUM_DEVICES,
- sizeof(numDevices),
- &numDevices,
- NULL );
+ if (hb_ocl == NULL)
+ {
+ hb_error("hb_generat_bin_from_kernel_source: OpenCL support not available");
+ return 0;
+ }
+
+ status = hb_ocl->clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES,
+ sizeof(numDevices), &numDevices, NULL);
if( status != CL_SUCCESS )
{
hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_NUM_DEVICES failed");
@@ -344,11 +276,9 @@ int hb_generat_bin_from_kernel_source( cl_program program, const char * cl_file_
}
/* grab the handles to all of the devices in the program. */
- status = clGetProgramInfo( program,
- CL_PROGRAM_DEVICES,
- sizeof(cl_device_id) * numDevices,
- devices,
- NULL );
+ status = hb_ocl->clGetProgramInfo(program, CL_PROGRAM_DEVICES,
+ sizeof(cl_device_id) * numDevices,
+ devices, NULL);
if( status != CL_SUCCESS )
{
hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_DEVICES failed");
@@ -358,10 +288,9 @@ int hb_generat_bin_from_kernel_source( cl_program program, const char * cl_file_
/* figure out the sizes of each of the binaries. */
binarySizes = (size_t*)malloc( sizeof(size_t) * numDevices );
- status = clGetProgramInfo( program,
- CL_PROGRAM_BINARY_SIZES,
- sizeof(size_t) * numDevices,
- binarySizes, NULL );
+ status = hb_ocl->clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES,
+ sizeof(size_t) * numDevices,
+ binarySizes, NULL);
if( status != CL_SUCCESS )
{
hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_BINARY_SIZES failed");
@@ -393,11 +322,9 @@ int hb_generat_bin_from_kernel_source( cl_program program, const char * cl_file_
}
}
- status = clGetProgramInfo( program,
- CL_PROGRAM_BINARIES,
- sizeof(char *) * numDevices,
- binaries,
- NULL );
+ status = hb_ocl->clGetProgramInfo(program, CL_PROGRAM_BINARIES,
+ sizeof(char *) * numDevices,
+ binaries, NULL);
if( status != CL_SUCCESS )
{
hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_BINARIES failed");
@@ -412,11 +339,9 @@ int hb_generat_bin_from_kernel_source( cl_program program, const char * cl_file_
if (binarySizes[i])
{
char deviceName[1024];
- status = clGetDeviceInfo(devices[i],
- CL_DEVICE_NAME,
- sizeof(deviceName),
- deviceName,
- NULL);
+ status = hb_ocl->clGetDeviceInfo(devices[i], CL_DEVICE_NAME,
+ sizeof(deviceName), deviceName,
+ NULL);
str = (char*)strstr( cl_file_name, (char*)".cl" );
memcpy(cl_name, cl_file_name, str - cl_file_name);
@@ -489,8 +414,15 @@ int hb_init_opencl_attr( OpenCLEnv * env )
int hb_create_kernel( char * kernelname, KernelEnv * env )
{
int status;
- env->kernel = clCreateKernel( gpu_env.programs[0], kernelname, &status );
- env->context = gpu_env.context;
+
+ if (hb_ocl == NULL)
+ {
+ hb_error("hb_create_kernel: OpenCL support not available");
+ return 0;
+ }
+
+ env->kernel = hb_ocl->clCreateKernel(gpu_env.programs[0], kernelname, &status);
+ env->context = gpu_env.context;
env->command_queue = gpu_env.command_queue;
return status != CL_SUCCESS ? 1 : 0;
}
@@ -501,7 +433,13 @@ int hb_create_kernel( char * kernelname, KernelEnv * env )
*/
int hb_release_kernel( KernelEnv * env )
{
- int status = clReleaseKernel( env->kernel );
+ if (hb_ocl == NULL)
+ {
+ hb_error("hb_release_kernel: OpenCL support not available");
+ return 0;
+ }
+
+ int status = hb_ocl->clReleaseKernel(env->kernel);
return status != CL_SUCCESS ? 1 : 0;
}
@@ -522,17 +460,23 @@ int hb_init_opencl_env( GPUEnv *gpu_info )
unsigned int i;
void *handle = INVALID_HANDLE_VALUE;
-
if (init_once != 0)
return 0;
else
init_once = 1;
+
+ if (hb_ocl == NULL)
+ {
+ hb_error("hb_init_opencl_env: OpenCL support not available");
+ return 1;
+ }
+
/*
* Have a look at the available platforms.
*/
if( !gpu_info->isUserCreated )
{
- status = clGetPlatformIDs( 0, NULL, &numPlatforms );
+ status = hb_ocl->clGetPlatformIDs(0, NULL, &numPlatforms);
if( status != CL_SUCCESS )
{
hb_log( "OpenCL: OpenCL device platform not found." );
@@ -548,7 +492,7 @@ int hb_init_opencl_env( GPUEnv *gpu_info )
{
return(1);
}
- status = clGetPlatformIDs( numPlatforms, platforms, NULL );
+ status = hb_ocl->clGetPlatformIDs(numPlatforms, platforms, NULL);
if( status != CL_SUCCESS )
{
@@ -558,9 +502,8 @@ int hb_init_opencl_env( GPUEnv *gpu_info )
for( i = 0; i < numPlatforms; i++ )
{
- status = clGetPlatformInfo( platforms[i], CL_PLATFORM_VENDOR,
- sizeof(platformName), platformName,
- NULL );
+ status = hb_ocl->clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR,
+ sizeof(platformName), platformName, NULL);
if( status != CL_SUCCESS )
{
@@ -576,11 +519,10 @@ int hb_init_opencl_env( GPUEnv *gpu_info )
gpu_info->platform = platforms[i];
- status = clGetDeviceIDs( gpu_info->platform /* platform */,
- CL_DEVICE_TYPE_GPU /* device_type */,
- 0 /* num_entries */,
- NULL /* devices */,
- &numDevices );
+ status = hb_ocl->clGetDeviceIDs(gpu_info->platform /* platform */,
+ CL_DEVICE_TYPE_GPU /* device_type */,
+ 0 /* num_entries */,
+ NULL /* devices */, &numDevices);
if( status != CL_SUCCESS )
{
@@ -614,21 +556,21 @@ int hb_init_opencl_env( GPUEnv *gpu_info )
cps[2] = 0;
/* Check for GPU. */
gpu_info->dType = CL_DEVICE_TYPE_GPU;
- gpu_info->context = clCreateContextFromType(
- cps, gpu_info->dType, NULL, NULL, &status );
+ gpu_info->context = hb_ocl->clCreateContextFromType(cps, gpu_info->dType,
+ NULL, NULL, &status);
if( (gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS) )
{
gpu_info->dType = CL_DEVICE_TYPE_CPU;
- gpu_info->context = clCreateContextFromType(
- cps, gpu_info->dType, NULL, NULL, &status );
+ gpu_info->context = hb_ocl->clCreateContextFromType(cps, gpu_info->dType,
+ NULL, NULL, &status);
}
if( (gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS) )
{
gpu_info->dType = CL_DEVICE_TYPE_DEFAULT;
- gpu_info->context = clCreateContextFromType(
- cps, gpu_info->dType, NULL, NULL, &status );
+ gpu_info->context = hb_ocl->clCreateContextFromType(cps, gpu_info->dType,
+ NULL, NULL, &status);
}
if( (gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS) )
@@ -639,8 +581,8 @@ int hb_init_opencl_env( GPUEnv *gpu_info )
/* Detect OpenCL devices. */
/* First, get the size of device list data */
- status = clGetContextInfo( gpu_info->context, CL_CONTEXT_DEVICES,
- 0, NULL, &length );
+ status = hb_ocl->clGetContextInfo(gpu_info->context, CL_CONTEXT_DEVICES,
+ 0, NULL, &length);
if((status != CL_SUCCESS) || (length == 0))
{
hb_log( "OpenCL: Unable to get the list of devices in context." );
@@ -655,8 +597,8 @@ int hb_init_opencl_env( GPUEnv *gpu_info )
}
/* Now, get the device list data */
- status = clGetContextInfo( gpu_info->context, CL_CONTEXT_DEVICES, length,
- gpu_info->devices, NULL );
+ status = hb_ocl->clGetContextInfo(gpu_info->context, CL_CONTEXT_DEVICES,
+ length, gpu_info->devices, NULL);
if( status != CL_SUCCESS )
{
hb_log( "OpenCL: Unable to get the device list data in context." );
@@ -664,9 +606,9 @@ int hb_init_opencl_env( GPUEnv *gpu_info )
}
/* Create OpenCL command queue. */
- gpu_info->command_queue = clCreateCommandQueue( gpu_info->context,
- gpu_info->devices[0],
- 0, &status );
+ gpu_info->command_queue = hb_ocl->clCreateCommandQueue(gpu_info->context,
+ gpu_info->devices[0],
+ 0, &status);
if( status != CL_SUCCESS )
{
hb_log( "OpenCL: Unable to create opencl command queue." );
@@ -674,9 +616,10 @@ int hb_init_opencl_env( GPUEnv *gpu_info )
}
}
- if( clGetCommandQueueInfo( gpu_info->command_queue,
- CL_QUEUE_THREAD_HANDLE_AMD, sizeof(handle),
- &handle, NULL ) == CL_SUCCESS && handle != INVALID_HANDLE_VALUE )
+ if ((CL_SUCCESS == hb_ocl->clGetCommandQueueInfo(gpu_info->command_queue,
+ CL_QUEUE_THREAD_HANDLE_AMD,
+ sizeof(handle), &handle, NULL)) &&
+ (INVALID_HANDLE_VALUE != handle))
{
#ifdef SYS_MINGW
SetThreadPriority( handle, THREAD_PRIORITY_TIME_CRITICAL );
@@ -697,29 +640,36 @@ int hb_release_opencl_env( GPUEnv *gpu_info )
return 1;
int i;
+ if (hb_ocl == NULL)
+ {
+ hb_error("hb_release_opencl_env: OpenCL support not available");
+ return 0;
+ }
+
for( i = 0; i<gpu_env.file_count; i++ )
{
if( gpu_env.programs[i] ) ;
{
- clReleaseProgram( gpu_env.programs[i] );
+ hb_ocl->clReleaseProgram(gpu_env.programs[i]);
gpu_env.programs[i] = NULL;
}
}
if( gpu_env.command_queue )
{
- clReleaseCommandQueue( gpu_env.command_queue );
+ hb_ocl->clReleaseCommandQueue(gpu_env.command_queue);
gpu_env.command_queue = NULL;
}
if( gpu_env.context )
{
- clReleaseContext( gpu_env.context );
+ hb_ocl->clReleaseContext(gpu_env.context);
gpu_env.context = NULL;
}
isInited = 0;
gpu_info->isUserCreated = 0;
+
return 1;
}
@@ -813,13 +763,16 @@ int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info,
source = source_str;
source_size[0] = strlen( source );
+ if (hb_ocl == NULL)
+ {
+ hb_error("hb_compile_kernel_file: OpenCL support not available");
+ return 0;
+ }
+
if ((binaryExisted = hb_binary_generated(gpu_info->context, filename, &fd)) == 1)
{
- status = clGetContextInfo(gpu_info->context,
- CL_CONTEXT_NUM_DEVICES,
- sizeof(numDevices),
- &numDevices,
- NULL);
+ status = hb_ocl->clGetContextInfo(gpu_info->context, CL_CONTEXT_NUM_DEVICES,
+ sizeof(numDevices), &numDevices, NULL);
if (status != CL_SUCCESS)
{
hb_log("OpenCL: Unable to get the number of devices in context.");
@@ -852,19 +805,17 @@ int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info,
return 0;
/* grab the handles to all of the devices in the context. */
- status = clGetContextInfo(gpu_info->context,
- CL_CONTEXT_DEVICES,
- sizeof(cl_device_id) * numDevices,
- devices,
- NULL);
-
- gpu_info->programs[idx] = clCreateProgramWithBinary(gpu_info->context,
- numDevices,
- devices,
- &length,
- (const unsigned char**)&binary,
- &binary_status,
- &status);
+ status = hb_ocl->clGetContextInfo(gpu_info->context, CL_CONTEXT_DEVICES,
+ sizeof(cl_device_id) * numDevices,
+ devices, NULL);
+
+ gpu_info->programs[idx] = hb_ocl->clCreateProgramWithBinary(gpu_info->context,
+ numDevices,
+ devices,
+ &length,
+ (const unsigned char**)&binary,
+ &binary_status,
+ &status);
fclose(fd);
free(devices);
@@ -874,8 +825,9 @@ int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info,
else
{
/* create a CL program using the kernel source */
- gpu_info->programs[idx] = clCreateProgramWithSource(
- gpu_info->context, 1, &source, source_size, &status );
+ gpu_info->programs[idx] = hb_ocl->clCreateProgramWithSource(gpu_info->context, 1,
+ &source, source_size,
+ &status);
}
if((gpu_info->programs[idx] == (cl_program)NULL) || (status != CL_SUCCESS)){
@@ -886,28 +838,30 @@ int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info,
/* create a cl program executable for all the devices specified */
if( !gpu_info->isUserCreated )
{
- status = clBuildProgram( gpu_info->programs[idx], 1, gpu_info->devices,
- build_option, NULL, NULL );
+ status = hb_ocl->clBuildProgram(gpu_info->programs[idx], 1, gpu_info->devices,
+ build_option, NULL, NULL);
}
else
{
- status = clBuildProgram( gpu_info->programs[idx], 1, &(gpu_info->dev),
- build_option, NULL, NULL );
+ status = hb_ocl->clBuildProgram(gpu_info->programs[idx], 1, &(gpu_info->dev),
+ build_option, NULL, NULL);
}
if( status != CL_SUCCESS )
{
if( !gpu_info->isUserCreated )
{
- status = clGetProgramBuildInfo( gpu_info->programs[idx],
- gpu_info->devices[0],
- CL_PROGRAM_BUILD_LOG, 0, NULL, &length );
+ status = hb_ocl->clGetProgramBuildInfo(gpu_info->programs[idx],
+ gpu_info->devices[0],
+ CL_PROGRAM_BUILD_LOG,
+ 0, NULL, &length);
}
else
{
- status = clGetProgramBuildInfo( gpu_info->programs[idx],
- gpu_info->dev,
- CL_PROGRAM_BUILD_LOG, 0, NULL, &length );
+ status = hb_ocl->clGetProgramBuildInfo(gpu_info->programs[idx],
+ gpu_info->dev,
+ CL_PROGRAM_BUILD_LOG,
+ 0, NULL, &length);
}
if( status != CL_SUCCESS )
@@ -924,13 +878,17 @@ int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info,
if( !gpu_info->isUserCreated )
{
- status = clGetProgramBuildInfo( gpu_info->programs[idx], gpu_info->devices[0],
- CL_PROGRAM_BUILD_LOG, length, buildLog, &length );
+ status = hb_ocl->clGetProgramBuildInfo(gpu_info->programs[idx],
+ gpu_info->devices[0],
+ CL_PROGRAM_BUILD_LOG,
+ length, buildLog, &length);
}
else
{
- status = clGetProgramBuildInfo( gpu_info->programs[idx], gpu_info->dev,
- CL_PROGRAM_BUILD_LOG, length, buildLog, &length );
+ status = hb_ocl->clGetProgramBuildInfo(gpu_info->programs[idx],
+ gpu_info->dev,
+ CL_PROGRAM_BUILD_LOG,
+ length, buildLog, &length);
}
fd1 = fopen( "kernel-build.log", "w+" );
@@ -1083,7 +1041,14 @@ int hb_get_opencl_env()
int hb_create_buffer( cl_mem *cl_Buf, int flags, int size )
{
int status;
- *cl_Buf = clCreateBuffer( gpu_env.context, (flags), (size), NULL, &status );
+
+ if (hb_ocl == NULL)
+ {
+ hb_error("hb_create_buffer: OpenCL support not available");
+ return 0;
+ }
+
+ *cl_Buf = hb_ocl->clCreateBuffer(gpu_env.context, flags, size, NULL, &status);
if( status != CL_SUCCESS )
{
@@ -1105,7 +1070,14 @@ int hb_read_opencl_buffer( cl_mem cl_inBuf, unsigned char *outbuf, int size )
{
int status;
- status = clEnqueueReadBuffer( gpu_env.command_queue, cl_inBuf, CL_TRUE, 0, size, outbuf, 0, 0, 0 );
+ if (hb_ocl == NULL)
+ {
+ hb_error("hb_read_opencl_suffer: OpenCL support not available");
+ return 0;
+ }
+
+ status = hb_ocl->clEnqueueReadBuffer(gpu_env.command_queue, cl_inBuf,
+ CL_TRUE, 0, size, outbuf, 0, 0, 0);
if( status != CL_SUCCESS )
{
hb_log( "OpenCL: av_read_opencl_buffer error '%d'", status );
@@ -1119,9 +1091,18 @@ int hb_cl_create_mapped_buffer(cl_mem *mem, unsigned char **addr, int size)
{
int status;
int flags = CL_MEM_ALLOC_HOST_PTR;
+
+ if (hb_ocl == NULL)
+ {
+ hb_error("hb_cl_create_mapped_buffer: OpenCL support not available");
+ return 0;
+ }
+
//cl_event event;
- *mem = clCreateBuffer(gpu_env.context, flags, size, NULL, &status);
- *addr = clEnqueueMapBuffer(gpu_env.command_queue, *mem, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL/*&event*/, &status);
+ *mem = hb_ocl->clCreateBuffer(gpu_env.context, flags, size, NULL, &status);
+ *addr = hb_ocl->clEnqueueMapBuffer(gpu_env.command_queue, *mem, CL_TRUE,
+ CL_MAP_READ|CL_MAP_WRITE, 0, size, 0,
+ NULL, NULL/*&event*/, &status);
//hb_log("\t **** context: %.8x cmdqueue: %.8x cl_mem: %.8x mapaddr: %.8x size: %d status: %d", gpu_env.context, gpu_env.command_queue, mem, addr, size, status);
@@ -1131,9 +1112,17 @@ int hb_cl_create_mapped_buffer(cl_mem *mem, unsigned char **addr, int size)
int hb_cl_free_mapped_buffer(cl_mem mem, unsigned char *addr)
{
cl_event event;
- int status = clEnqueueUnmapMemObject(gpu_env.command_queue, mem, addr, 0, NULL, &event);
+
+ if (hb_ocl == NULL)
+ {
+ hb_error("hb_cl_free_mapped_buffer: OpenCL support not available");
+ return 0;
+ }
+
+ int status = hb_ocl->clEnqueueUnmapMemObject(gpu_env.command_queue, mem,
+ addr, 0, NULL, &event);
if (status == CL_SUCCESS)
- clWaitForEvents(1, &event);
+ hb_ocl->clWaitForEvents(1, &event);
else
hb_log("hb_free_mapped_buffer: error %d", status);
return (status == CL_SUCCESS) ? 1 : 0;
@@ -1151,11 +1140,16 @@ int hb_use_buffers()
int hb_copy_buffer(cl_mem src_buffer,cl_mem dst_buffer,size_t src_offset,size_t dst_offset,size_t cb)
{
- int status = clEnqueueCopyBuffer(gpu_env.command_queue,
- src_buffer,
- dst_buffer,
- src_offset, dst_offset, cb,
- 0, 0, 0);
+ if (hb_ocl == NULL)
+ {
+ hb_error("hb_copy_buffer: OpenCL support not available");
+ return 0;
+ }
+
+ int status = hb_ocl->clEnqueueCopyBuffer(gpu_env.command_queue,
+ src_buffer, dst_buffer,
+ src_offset, dst_offset,
+ cb, 0, 0, 0);
if( status != CL_SUCCESS )
{
av_log(NULL,AV_LOG_ERROR, "hb_read_opencl_buffer error '%d'\n", status );
@@ -1184,13 +1178,23 @@ int hb_read_opencl_frame_buffer(cl_mem cl_inBuf,unsigned char *Ybuf,unsigned cha
int hb_write_opencl_frame_buffer(cl_mem cl_inBuf,unsigned char *Ybuf,unsigned char *Ubuf,unsigned char *Vbuf,int linesize0,int linesize1,int linesize2,int height,int offset)
{
int status;
- void *mapped = clEnqueueMapBuffer( gpu_env.command_queue, cl_inBuf, CL_TRUE,CL_MAP_WRITE, 0, sizeof(uint8_t) * (linesize0 + linesize1)*height + offset, 0, NULL, NULL, NULL );
+
+ if (hb_ocl == NULL)
+ {
+ hb_error("hb_write_opencl_frame_buffer: OpenCL support not available");
+ return 0;
+ }
+
+ void *mapped = hb_ocl->clEnqueueMapBuffer(gpu_env.command_queue, cl_inBuf,
+ CL_TRUE,CL_MAP_WRITE, 0,
+ sizeof(uint8_t) * (linesize0 + linesize1) * height + offset,
+ 0, NULL, NULL, NULL);
uint8_t *temp = (uint8_t *)mapped;
temp += offset;
memcpy(temp,Ybuf,sizeof(uint8_t) * linesize0 * height);
memcpy(temp + sizeof(uint8_t) * linesize0 * height,Ubuf,sizeof(uint8_t) * linesize1 * height/2);
memcpy(temp + sizeof(uint8_t) * (linesize0 * height + linesize1 * height/2),Vbuf,sizeof(uint8_t) * linesize2 * height/2);
- clEnqueueUnmapMemObject(gpu_env.command_queue, cl_inBuf, mapped, 0, NULL, NULL );
+ hb_ocl->clEnqueueUnmapMemObject(gpu_env.command_queue, cl_inBuf, mapped, 0, NULL, NULL);
return 1;
}
@@ -1203,4 +1207,3 @@ cl_context hb_get_context()
{
return gpu_env.context;
}
-#endif
diff --git a/libhb/openclwrapper.h b/libhb/openclwrapper.h
index c7606afc0..0ccabd564 100644
--- a/libhb/openclwrapper.h
+++ b/libhb/openclwrapper.h
@@ -11,10 +11,11 @@
*/
-#ifndef __OPENCL_WRAPPER_H
-#define __OPENCL_WRAPPER_H
-#ifdef USE_OPENCL
+#ifndef HB_OPENCL_WRAPPER_H
+#define HB_OPENCL_WRAPPER_H
+
#include "common.h"
+#include "extras/cl.h"
//support AMD opencl
#define CL_QUEUE_THREAD_HANDLE_AMD 0x403E
@@ -85,5 +86,5 @@ int hb_cl_free_mapped_buffer(cl_mem mem, unsigned char *addr);
int hb_use_buffers();
int hb_confirm_gpu_type();
-#endif
-#endif
+
+#endif // HB_OPENCL_WRAPPER_H
diff --git a/libhb/scan.c b/libhb/scan.c
index 535baea39..f827245e1 100644
--- a/libhb/scan.c
+++ b/libhb/scan.c
@@ -8,6 +8,7 @@
*/
#include "hb.h"
+#include "opencl.h"
#include "hbffmpeg.h"
#include "a52dec/a52.h"
@@ -868,6 +869,9 @@ skip_preview:
title->video_decode_support = vid_info.video_decode_support;
+ // TODO: check video dimensions
+ title->opencl_support = !!hb_opencl_available();
+
// compute the aspect ratio based on the storage dimensions and the
// pixel aspect ratio (if supplied) or just storage dimensions if no PAR.
title->aspect = (double)title->width / (double)title->height;
diff --git a/libhb/stream.c b/libhb/stream.c
index ac34cc550..5d6ec7f4c 100644
--- a/libhb/stream.c
+++ b/libhb/stream.c
@@ -1121,14 +1121,7 @@ hb_title_t * hb_stream_title_scan(hb_stream_t *stream, hb_title_t * title)
#else
title->hwd_support = 0;
#endif
-#ifdef USE_OPENCL
- if (hb_confirm_gpu_type() == 0 && hb_opencl_available() == 1)
- title->opencl_support = 1;
- else
- title->opencl_support = 0;
-#else
- title->opencl_support = 0;
-#endif
+
// Height, width, rate and aspect ratio information is filled in
// when the previews are built
return title;
@@ -5687,14 +5680,6 @@ static hb_title_t *ffmpeg_title_scan( hb_stream_t *stream, hb_title_t *title )
#else
title->hwd_support = 0;
#endif
-#ifdef USE_OPENCL
- if (hb_confirm_gpu_type() == 0 && hb_opencl_available() == 1)
- title->opencl_support = 1;
- else
- title->opencl_support = 0;
-#else
- title->opencl_support = 0;
-#endif
return title;
}
diff --git a/libhb/vadxva2.c b/libhb/vadxva2.c
index 98ff41dcc..d173d4a08 100644
--- a/libhb/vadxva2.c
+++ b/libhb/vadxva2.c
@@ -10,14 +10,12 @@
Li Cao <[email protected]> <http://www.multicorewareinc.com/>
*/
-#include "vadxva2.h"
-#ifdef USE_OPENCL
+#ifdef USE_HWD
+
+#include "vadxva2.h"
#include "extras/cl.h"
#include "oclnv12toyuv.h"
-#endif
-
-#ifdef USE_HWD
static int hb_va_setup( hb_va_dxva2_t *dxva2, void **hw, int width, int height );
static int hb_va_get( hb_va_dxva2_t *dxva2, AVFrame *frame );
@@ -112,12 +110,11 @@ void hb_va_close( hb_va_dxva2_t *dxva2 )
if( dxva2->hd3d9_dll )
FreeLibrary( dxva2->hd3d9_dll );
-#ifdef USE_OPENCL
if ( dxva2->nv12toyuv_tmp_in )
free( dxva2->nv12toyuv_tmp_in );
if ( dxva2->nv12toyuv_tmp_out )
free( dxva2->nv12toyuv_tmp_out );
-#endif
+
dxva2->description = NULL;
free( dxva2 );
}
@@ -775,4 +772,5 @@ int hb_check_hwd_fmt( int fmt )
}
return result;
}
-#endif
+
+#endif // USE_HWD
diff --git a/libhb/vadxva2.h b/libhb/vadxva2.h
index cd879b974..39165e188 100644
--- a/libhb/vadxva2.h
+++ b/libhb/vadxva2.h
@@ -11,15 +11,17 @@
*/
-#ifndef VA_DXVA2_H
-#define VA_DXVA2_H
-
#ifdef USE_HWD
+
+#ifndef HB_VA_DXVA2_H
+#define HB_VA_DXVA2_H
+
#include "hbffmpeg.h"
#include "d3d9.h"
#include "libavcodec/dxva2.h"
#include "dxva2api.h"
#include "common.h"
+#include "opencl.h"
#include "openclwrapper.h"
#define HB_FOURCC( a, b, c, d ) ( ((uint32_t)a) | ( ((uint32_t)b) << 8 ) | ( ((uint32_t)c) << 16 ) | ( ((uint32_t)d) << 24 ) )
@@ -133,21 +135,18 @@ typedef struct
int do_job;
// running nv12toyuv kernel.
-#ifdef USE_OPENCL
cl_kernel nv12toyuv;
cl_mem cl_mem_nv12;
cl_mem cl_mem_yuv;
uint8_t * nv12toyuv_tmp_in;
uint8_t * nv12toyuv_tmp_out;
-#endif
} hb_va_dxva2_t;
typedef struct FilterLink_T
{
-#ifdef USE_OPENCL
cl_mem cl_inbuf;
cl_mem cl_outbuf;
-#endif
+
uint8_t *mem_inbuf;
uint8_t *mem_outbuf;
int width;
@@ -209,5 +208,7 @@ void hb_va_new_dxva2( hb_va_dxva2_t *dxva2, AVCodecContext *p_context );
void hb_va_release( hb_va_dxva2_t *dxva2, AVFrame *frame );
void hb_va_close( hb_va_dxva2_t *dxva2 );
int hb_check_hwd_fmt( int fmt );
-#endif
-#endif
+
+#endif // HB_VA_DXVA2_H
+
+#endif // USE_HWD
diff --git a/libhb/work.c b/libhb/work.c
index b63925359..43dbdeaaa 100644
--- a/libhb/work.c
+++ b/libhb/work.c
@@ -11,6 +11,7 @@
#include "a52dec/a52.h"
#include "libavformat/avformat.h"
#include "openclwrapper.h"
+#include "opencl.h"
#ifdef USE_QSV
#include "qsv_common.h"
@@ -539,13 +540,13 @@ static void do_job(hb_job_t *job)
job->list_work = hb_list_init();
-#ifdef USE_OPENCL
- /* init opencl environment */
- if (job->use_opencl)
- job->use_opencl = !hb_init_opencl_run_env(0, NULL, "-I.");
-#else
- job->use_opencl = 0;
-#endif
+ /* OpenCL */
+ if (job->use_opencl && (hb_ocl_init() || hb_init_opencl_run_env(0, NULL, "-I.")))
+ {
+ hb_log("work: failed to initialize OpenCL environment, using fallback");
+ job->use_opencl = 0;
+ hb_ocl_close();
+ }
hb_log( "starting job" );
@@ -856,9 +857,10 @@ static void do_job(hb_job_t *job)
init.pix_fmt = AV_PIX_FMT_YUV420P;
init.width = title->width;
init.height = title->height;
-#ifdef USE_OPENCL
- init.use_dxva = hb_use_dxva( title );
-#endif
+
+ /* DXVA2 */
+ init.use_dxva = hb_use_dxva(title);
+
init.par_width = job->anamorphic.par_width;
init.par_height = job->anamorphic.par_height;
memcpy(init.crop, title->crop, sizeof(int[4]));
@@ -1615,6 +1617,12 @@ cleanup:
hb_buffer_pool_free();
hb_job_close( &job );
+
+ /* OpenCL: must be closed *after* freeing the buffer pool */
+ if (job->use_opencl)
+ {
+ hb_ocl_close();
+ }
}
static inline void copy_chapter( hb_buffer_t * dst, hb_buffer_t * src )