summaryrefslogtreecommitdiffstats
path: root/libhb/openclwrapper.c
diff options
context:
space:
mode:
authorsr55 <[email protected]>2013-09-21 20:16:51 +0000
committersr55 <[email protected]>2013-09-21 20:16:51 +0000
commitf69b7f1dfc98c90d454078f1a3aabef3bae36fd2 (patch)
tree4792942c10f6b1c4418228bcc53b648b773d5098 /libhb/openclwrapper.c
parent8bccfabca28d059978f1eb8e516592f4e2f06c1a (diff)
Merging-in the OpenCL Scaling code from the OpenCL branch to trunk.
Patch originally by the Multicoreware Inc team, followed by improvements and fixes by Micheal Wootton from AMD Inc, OpenCL: This patch implements Bicubic Scaling in OpenCL. Note that HandBrake currently uses Lanczos so the performance difference appears to be much more significant. We may offer an option of BiCubic in software later. Bicubic scaling may appear a bit sharper than the equivalent Lanczos encode and may increase file size a bit. Quality may be better or worse depending on the scaling and content and personal preference towards sharpness. When comparing performance with a custom HandBrake build that runs Software Bicubic to OpenCL Bicubic, performance increase is about 5~7% on average on a modern GPU. Hardware Decode via DXVA: We also have optional DXVA decoding which may come in useful for slower/lower end systems that have a capable GPU. This is only available on input sources that use the libav decode path. Most GPU hardware for decoding is designed for playback, so if you are running on a high end CPU, it will bottleneck the encode process. Requires OpenCL 1.1 or later supporting GPU. Front end changes and testing framework are not included in this patch. This will be resolved later. Patch will be revised further before the UI is implemented. git-svn-id: svn://svn.handbrake.fr/HandBrake/trunk@5792 b64f7644-9d1e-0410-96f1-a4d463321fa5
Diffstat (limited to 'libhb/openclwrapper.c')
-rw-r--r--libhb/openclwrapper.c1261
1 files changed, 1261 insertions, 0 deletions
diff --git a/libhb/openclwrapper.c b/libhb/openclwrapper.c
new file mode 100644
index 000000000..9a7e9888d
--- /dev/null
+++ b/libhb/openclwrapper.c
@@ -0,0 +1,1261 @@
+/* openclwrapper.c
+
+ Copyright (c) 2003-2012 HandBrake Team
+ This file is part of the HandBrake source code
+ Homepage: <http://handbrake.fr/>.
+ It may be used under the terms of the GNU General Public License v2.
+ For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
+
+ Authors: Peng Gao <[email protected]> <http://www.multicorewareinc.com/>
+ Li Cao <[email protected]> <http://www.multicorewareinc.com/>
+ */
+
+#ifdef USE_OPENCL
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "openclwrapper.h"
+#include "openclkernels.h"
+
+//#define USE_EXTERNAL_KERNEL
+#ifdef SYS_MINGW
+#include <windows.h>
+#endif
+
+#if defined(__APPLE__)
+#include <OpenCL/cl.h>
+#else
+#include <CL/cl.h>
+#endif
+
+#if defined(_MSC_VER)
+#define strcasecmp strcmpi
+#endif
+
+#define MAX_KERNEL_STRING_LEN 64
+#define MAX_CLFILE_NUM 50
+#define MAX_CLKERNEL_NUM 200
+#define MAX_CLFILE_PATH 255
+#define MAX_KERNEL_NUM 50
+#define MAX_KERNEL_NAME_LEN 64
+
+#ifndef INVALID_HANDLE_VALUE
+#define INVALID_HANDLE_VALUE NULL
+#endif
+
+//#define THREAD_PRIORITY_TIME_CRITICAL 15
+
+enum VENDOR
+{
+ AMD = 0,
+ Intel,
+ NVIDIA,
+ others
+};
+typedef struct _GPUEnv
+{
+ //share vb in all modules in hb library
+ cl_platform_id platform;
+ cl_device_type dType;
+ cl_context context;
+ cl_device_id * devices;
+ cl_device_id dev;
+ cl_command_queue command_queue;
+ cl_kernel kernels[MAX_CLFILE_NUM];
+ cl_program programs[MAX_CLFILE_NUM]; //one program object maps one kernel source file
+ char kernelSrcFile[MAX_CLFILE_NUM][256]; //the max len of kernel file name is 256
+ int file_count; // only one kernel file
+
+ char kernel_names[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN+1];
+ cl_kernel_function kernel_functions[MAX_CLKERNEL_NUM];
+ int kernel_count;
+ int isUserCreated; // 1: created , 0:no create and needed to create by opencl wrapper
+ enum VENDOR vendor;
+}GPUEnv;
+
+typedef struct
+{
+ char kernelName[MAX_KERNEL_NAME_LEN+1];
+ char * kernelStr;
+}hb_kernel_node;
+
+static GPUEnv gpu_env;
+static int isInited = 0;
+static int useBuffers = 0;
+static hb_kernel_node gKernels[MAX_KERNEL_NUM];
+
+#define ADD_KERNEL_CFG( idx, s, p ){\
+ strcpy( gKernels[idx].kernelName, s );\
+ gKernels[idx].kernelStr = p;\
+ strcpy( gpu_env.kernel_names[idx], s );\
+ gpu_env.kernel_count++; }
+
+
+/**
+ * hb_confirm_gpu_type
+ */
+int hb_confirm_gpu_type()
+{
+ int status = 1;
+ unsigned int i, j;
+ cl_uint numPlatforms = 0;
+ status = clGetPlatformIDs(0,NULL,&numPlatforms);
+ if(status != 0)
+ {
+ goto end;
+ }
+ if(numPlatforms > 0)
+ {
+ cl_platform_id* platforms = (cl_platform_id* )malloc (numPlatforms * sizeof(cl_platform_id));
+ status = clGetPlatformIDs (numPlatforms, platforms, NULL);
+ if (status != 0)
+ {
+ goto end;
+ }
+ for (i=0; i < numPlatforms; i++)
+ {
+ char pbuff[100];
+ cl_uint numDevices;
+ status = clGetPlatformInfo( platforms[i],
+ CL_PLATFORM_VENDOR,
+ sizeof (pbuff),
+ pbuff,
+ NULL);
+ if (status)
+ continue;
+ status = clGetDeviceIDs( platforms[i],
+ CL_DEVICE_TYPE_GPU ,
+ 0 ,
+ NULL ,
+ &numDevices);
+
+ cl_device_id *devices = (cl_device_id *)malloc(numDevices * sizeof(cl_device_id));
+ status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
+ for (j = 0; j < numDevices; j++)
+ {
+ char dbuff[100];
+ status = clGetDeviceInfo(devices[j], CL_DEVICE_VENDOR, sizeof(dbuff), dbuff, NULL);
+ if (!strcmp(dbuff, "Advanced Micro Devices, Inc.") ||
+ !strcmp(dbuff, "Intel(R) Corporation") ||
+#ifdef __APPLE__
+ !strcmp(dbuff, "AMD") ||
+ /* MacBook Pro, AMD ATI Radeon HD 6750M, OS X 10.8.3 */
+ !strcmp(dbuff, "NVIDIA") ||
+ /* MacBook Pro, NVIDIA GeForce GT 330M, OS X 10.7.4 */
+#endif
+ !strcmp(dbuff, "NVIDIA Corporation"))
+ {
+ return 0;
+ }
+ }
+
+ if ( status != CL_SUCCESS )
+ continue;
+ if( numDevices )
+ break;
+ }
+ free( platforms );
+ }
+ end:
+ return -1;
+}
+
+/**
+ * hb_regist_opencl_kernel
+ */
+int hb_regist_opencl_kernel()
+{
+ //if( !gpu_env.isUserCreated )
+ // memset( &gpu_env, 0, sizeof(gpu_env) );
+ //Comment for posterity: When in doubt just zero out a structure full of pointers to allocated resources.
+
+ gpu_env.file_count = 0; //argc;
+ gpu_env.kernel_count = 0UL;
+
+ ADD_KERNEL_CFG( 0, "frame_scale", NULL )
+ ADD_KERNEL_CFG( 1, "yadif_filter", NULL )
+
+ return 0;
+}
+
+/**
+ * hb_regist_opencl_kernel
+ * @param filename -
+ * @param source -
+ * @param gpu_info -
+ * @param int idx -
+ */
+int hb_convert_to_string( const char *filename, char **source, GPUEnv *gpu_info, int idx )
+{
+ int file_size;
+ size_t result;
+ FILE * file = NULL;
+ file_size = 0;
+ result = 0;
+ file = fopen( filename, "rb+" );
+
+ if( file!=NULL )
+ {
+ fseek( file, 0, SEEK_END );
+
+ file_size = ftell( file );
+ rewind( file );
+ *source = (char*)malloc( sizeof(char) * file_size + 1 );
+ if( *source == (char*)NULL )
+ {
+ return(0);
+ }
+ result = fread( *source, 1, file_size, file );
+ if( result != file_size )
+ {
+ free( *source );
+ return(0);
+ }
+ (*source)[file_size] = '\0';
+ fclose( file );
+
+ return(1);
+ }
+ return(0);
+}
+
+/**
+ * hb_binary_generated
+ * @param context -
+ * @param cl_file_name -
+ * @param fhandle -
+ */
+int hb_binary_generated( cl_context context, const char * cl_file_name, FILE ** fhandle )
+{
+ int i = 0;
+ cl_int status;
+ cl_uint numDevices;
+ cl_device_id *devices;
+ char * str = NULL;
+ FILE * fd = NULL;
+
+ status = clGetContextInfo( context,
+ CL_CONTEXT_NUM_DEVICES,
+ sizeof(numDevices),
+ &numDevices,
+ NULL );
+ if( status != CL_SUCCESS )
+ {
+ hb_log( "OpenCL: Get context info failed" );
+ return 0;
+ }
+
+ devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices );
+ if( devices == NULL )
+ {
+ hb_log( "OpenCL: No device found" );
+ return 0;
+ }
+
+ /* grab the handles to all of the devices in the context. */
+ status = clGetContextInfo( context,
+ CL_CONTEXT_DEVICES,
+ sizeof(cl_device_id) * numDevices,
+ devices,
+ NULL );
+
+ status = 0;
+ /* dump out each binary into its own separate file. */
+ for (i = 0; i < numDevices; i++)
+ {
+ char fileName[256] = { 0 };
+ char cl_name[128] = { 0 };
+ if (devices[i])
+ {
+ char deviceName[1024];
+ status = clGetDeviceInfo(devices[i],
+ CL_DEVICE_NAME,
+ sizeof(deviceName),
+ deviceName,
+ NULL);
+
+ str = (char*)strstr(cl_file_name, ".cl");
+ memcpy(cl_name, cl_file_name, str - cl_file_name);
+ cl_name[str - cl_file_name] = '\0';
+ sprintf(fileName, "./%s - %s.bin", cl_name, deviceName);
+ fd = fopen(fileName, "rb");
+ status = fd != NULL;
+ }
+ }
+
+ if( devices != NULL )
+ {
+ free( devices );
+ devices = NULL;
+ }
+
+ if( fd != NULL )
+ *fhandle = fd;
+
+ return status;
+}
+
+/**
+ * hb_write_binary_to_file
+ * @param fileName -
+ * @param birary -
+ * @param numBytes -
+ */
+int hb_write_binary_to_file( const char* fileName, const char* birary, size_t numBytes )
+{
+ FILE *output = NULL;
+ output = fopen( fileName, "wb" );
+ if( output == NULL )
+ return 0;
+
+ fwrite( birary, sizeof(char), numBytes, output );
+ fclose( output );
+
+ return 1;
+}
+
+/**
+ * hb_generat_bin_from_kernel_source
+ * @param program -
+ * @param cl_file_name -
+ */
+int hb_generat_bin_from_kernel_source( cl_program program, const char * cl_file_name )
+{
+ int i = 0;
+ cl_int status;
+ cl_uint numDevices;
+ size_t *binarySizes;
+ cl_device_id *devices;
+ char **binaries;
+ char *str = NULL;
+
+ status = clGetProgramInfo( program,
+ CL_PROGRAM_NUM_DEVICES,
+ sizeof(numDevices),
+ &numDevices,
+ NULL );
+ if( status != CL_SUCCESS )
+ {
+ hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_NUM_DEVICES failed");
+ return 0;
+ }
+
+ devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices );
+ if( devices == NULL )
+ {
+ hb_log("OpenCL: hb_generat_bin_from_kernel_source: no device found");
+ return 0;
+ }
+
+ /* grab the handles to all of the devices in the program. */
+ status = clGetProgramInfo( program,
+ CL_PROGRAM_DEVICES,
+ sizeof(cl_device_id) * numDevices,
+ devices,
+ NULL );
+ if( status != CL_SUCCESS )
+ {
+ hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_DEVICES failed");
+ return 0;
+ }
+
+ /* figure out the sizes of each of the binaries. */
+ binarySizes = (size_t*)malloc( sizeof(size_t) * numDevices );
+
+ status = clGetProgramInfo( program,
+ CL_PROGRAM_BINARY_SIZES,
+ sizeof(size_t) * numDevices,
+ binarySizes, NULL );
+ if( status != CL_SUCCESS )
+ {
+ hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_BINARY_SIZES failed");
+ return 0;
+ }
+
+ /* copy over all of the generated binaries. */
+ binaries = (char**)malloc( sizeof(char *) * numDevices );
+ if( binaries == NULL )
+ {
+ hb_log("OpenCL: hb_generat_bin_from_kernel_source: malloc for binaries failed");
+ return 0;
+ }
+
+ for( i = 0; i < numDevices; i++ )
+ {
+ if( binarySizes[i] != 0 )
+ {
+ binaries[i] = (char*)malloc( sizeof(char) * binarySizes[i] );
+ if( binaries[i] == NULL )
+ {
+ hb_log("OpenCL: hb_generat_bin_from_kernel_source: malloc for binaries[%d] failed", i);
+ return 0;
+ }
+ }
+ else
+ {
+ binaries[i] = NULL;
+ }
+ }
+
+ status = clGetProgramInfo( program,
+ CL_PROGRAM_BINARIES,
+ sizeof(char *) * numDevices,
+ binaries,
+ NULL );
+ if( status != CL_SUCCESS )
+ {
+ hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_BINARIES failed");
+ return 0;
+ }
+
+ /* dump out each binary into its own separate file. */
+ for (i = 0; i < numDevices; i++)
+ {
+ char fileName[256] = {0};
+ char cl_name[128] = {0};
+ if (binarySizes[i])
+ {
+ char deviceName[1024];
+ status = clGetDeviceInfo(devices[i],
+ CL_DEVICE_NAME,
+ sizeof(deviceName),
+ deviceName,
+ NULL);
+
+ str = (char*)strstr( cl_file_name, (char*)".cl" );
+ memcpy(cl_name, cl_file_name, str - cl_file_name);
+ cl_name[str - cl_file_name] = '\0';
+ sprintf(fileName, "./%s - %s.bin", cl_name, deviceName);
+
+ if (!hb_write_binary_to_file(fileName, binaries[i], binarySizes[i]))
+ {
+ hb_log("OpenCL: hb_generat_bin_from_kernel_source: unable to write kernel, writing to temporary directory instead.");
+ return 0;
+ }
+ }
+ }
+
+ // Release all resouces and memory
+ for( i = 0; i < numDevices; i++ )
+ {
+ if( binaries[i] != NULL )
+ {
+ free( binaries[i] );
+ binaries[i] = NULL;
+ }
+ }
+
+ if( binaries != NULL )
+ {
+ free( binaries );
+ binaries = NULL;
+ }
+
+ if( binarySizes != NULL )
+ {
+ free( binarySizes );
+ binarySizes = NULL;
+ }
+
+ if( devices != NULL )
+ {
+ free( devices );
+ devices = NULL;
+ }
+ return 1;
+}
+
+
+/**
+ * hb_init_opencl_attr
+ * @param env -
+ */
+int hb_init_opencl_attr( OpenCLEnv * env )
+{
+ if( gpu_env.isUserCreated )
+ return 1;
+
+ gpu_env.context = env->context;
+ gpu_env.platform = env->platform;
+ gpu_env.dev = env->devices;
+ gpu_env.command_queue = env->command_queue;
+
+ gpu_env.isUserCreated = 1;
+
+ return 0;
+}
+
+/**
+ * hb_create_kernel
+ * @param kernelname -
+ * @param env -
+ */
+int hb_create_kernel( char * kernelname, KernelEnv * env )
+{
+ int status;
+ env->kernel = clCreateKernel( gpu_env.programs[0], kernelname, &status );
+ env->context = gpu_env.context;
+ env->command_queue = gpu_env.command_queue;
+ return status != CL_SUCCESS ? 1 : 0;
+}
+
+/**
+ * hb_release_kernel
+ * @param env -
+ */
+int hb_release_kernel( KernelEnv * env )
+{
+ int status = clReleaseKernel( env->kernel );
+ return status != CL_SUCCESS ? 1 : 0;
+}
+
+/**
+ * hb_init_opencl_env
+ * @param gpu_info -
+ */
+
+static int init_once = 0;
+int hb_init_opencl_env( GPUEnv *gpu_info )
+{
+ size_t length;
+ cl_int status;
+ cl_uint numPlatforms, numDevices;
+ cl_platform_id *platforms;
+ cl_context_properties cps[3];
+ char platformName[100];
+ unsigned int i;
+ void *handle = INVALID_HANDLE_VALUE;
+
+
+ if (init_once != 0)
+ return 0;
+ else
+ init_once = 1;
+ /*
+ * Have a look at the available platforms.
+ */
+ if( !gpu_info->isUserCreated )
+ {
+ status = clGetPlatformIDs( 0, NULL, &numPlatforms );
+ if( status != CL_SUCCESS )
+ {
+ hb_log( "OpenCL: OpenCL device platform not found." );
+ return(1);
+ }
+
+ gpu_info->platform = NULL;
+ if( 0 < numPlatforms )
+ {
+ platforms = (cl_platform_id*)malloc(
+ numPlatforms * sizeof(cl_platform_id));
+ if( platforms == (cl_platform_id*)NULL )
+ {
+ return(1);
+ }
+ status = clGetPlatformIDs( numPlatforms, platforms, NULL );
+
+ if( status != CL_SUCCESS )
+ {
+ hb_log( "OpenCL: Specific opencl platform not found." );
+ return(1);
+ }
+
+ for( i = 0; i < numPlatforms; i++ )
+ {
+ status = clGetPlatformInfo( platforms[i], CL_PLATFORM_VENDOR,
+ sizeof(platformName), platformName,
+ NULL );
+
+ if( status != CL_SUCCESS )
+ {
+ continue;
+ }
+ gpu_info->platform = platforms[i];
+
+ if (!strcmp(platformName, "Advanced Micro Devices, Inc.") ||
+ !strcmp(platformName, "AMD"))
+ gpu_info->vendor = AMD;
+ else
+ gpu_info->vendor = others;
+
+ gpu_info->platform = platforms[i];
+
+ status = clGetDeviceIDs( gpu_info->platform /* platform */,
+ CL_DEVICE_TYPE_GPU /* device_type */,
+ 0 /* num_entries */,
+ NULL /* devices */,
+ &numDevices );
+
+ if( status != CL_SUCCESS )
+ {
+ continue;
+ }
+
+ if( numDevices )
+ break;
+
+ }
+ free( platforms );
+ }
+
+ if( NULL == gpu_info->platform )
+ {
+ hb_log( "OpenCL: No OpenCL-compatible GPU found." );
+ return(1);
+ }
+
+ if( status != CL_SUCCESS )
+ {
+ hb_log( "OpenCL: No OpenCL-compatible GPU found." );
+ return(1);
+ }
+
+ /*
+ * Use available platform.
+ */
+ cps[0] = CL_CONTEXT_PLATFORM;
+ cps[1] = (cl_context_properties)gpu_info->platform;
+ cps[2] = 0;
+ /* Check for GPU. */
+ gpu_info->dType = CL_DEVICE_TYPE_GPU;
+ gpu_info->context = clCreateContextFromType(
+ cps, gpu_info->dType, NULL, NULL, &status );
+
+ if( (gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS) )
+ {
+ gpu_info->dType = CL_DEVICE_TYPE_CPU;
+ gpu_info->context = clCreateContextFromType(
+ cps, gpu_info->dType, NULL, NULL, &status );
+ }
+
+ if( (gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS) )
+ {
+ gpu_info->dType = CL_DEVICE_TYPE_DEFAULT;
+ gpu_info->context = clCreateContextFromType(
+ cps, gpu_info->dType, NULL, NULL, &status );
+ }
+
+ if( (gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS) )
+ {
+ hb_log( "OpenCL: Unable to create opencl context." );
+ return(1);
+ }
+
+ /* Detect OpenCL devices. */
+ /* First, get the size of device list data */
+ status = clGetContextInfo( gpu_info->context, CL_CONTEXT_DEVICES,
+ 0, NULL, &length );
+ if((status != CL_SUCCESS) || (length == 0))
+ {
+ hb_log( "OpenCL: Unable to get the list of devices in context." );
+ return(1);
+ }
+
+ /* Now allocate memory for device list based on the size we got earlier */
+ gpu_info->devices = (cl_device_id*)malloc( length );
+ if( gpu_info->devices == (cl_device_id*)NULL )
+ {
+ return(1);
+ }
+
+ /* Now, get the device list data */
+ status = clGetContextInfo( gpu_info->context, CL_CONTEXT_DEVICES, length,
+ gpu_info->devices, NULL );
+ if( status != CL_SUCCESS )
+ {
+ hb_log( "OpenCL: Unable to get the device list data in context." );
+ return(1);
+ }
+
+ /* Create OpenCL command queue. */
+ gpu_info->command_queue = clCreateCommandQueue( gpu_info->context,
+ gpu_info->devices[0],
+ 0, &status );
+ if( status != CL_SUCCESS )
+ {
+ hb_log( "OpenCL: Unable to create opencl command queue." );
+ return(1);
+ }
+ }
+
+ if( clGetCommandQueueInfo( gpu_info->command_queue,
+ CL_QUEUE_THREAD_HANDLE_AMD, sizeof(handle),
+ &handle, NULL ) == CL_SUCCESS && handle != INVALID_HANDLE_VALUE )
+ {
+#ifdef SYS_MINGW
+ SetThreadPriority( handle, THREAD_PRIORITY_TIME_CRITICAL );
+#endif
+ }
+
+ return 0;
+}
+
+
+/**
+ * hb_release_opencl_env
+ * @param gpu_info -
+ */
+int hb_release_opencl_env( GPUEnv *gpu_info )
+{
+ if( !isInited )
+ return 1;
+ int i;
+
+ for( i = 0; i<gpu_env.file_count; i++ )
+ {
+ if( gpu_env.programs[i] ) ;
+ {
+ clReleaseProgram( gpu_env.programs[i] );
+ gpu_env.programs[i] = NULL;
+ }
+ }
+
+ if( gpu_env.command_queue )
+ {
+ clReleaseCommandQueue( gpu_env.command_queue );
+ gpu_env.command_queue = NULL;
+ }
+
+ if( gpu_env.context )
+ {
+ clReleaseContext( gpu_env.context );
+ gpu_env.context = NULL;
+ }
+
+ isInited = 0;
+ gpu_info->isUserCreated = 0;
+ return 1;
+}
+
+
+/**
+ * hb_register_kernel_wrapper
+ * @param kernel_name -
+ * @param function -
+ */
+int hb_register_kernel_wrapper( const char *kernel_name, cl_kernel_function function )
+{
+ int i;
+ for( i = 0; i < gpu_env.kernel_count; i++ )
+ {
+ if( strcasecmp( kernel_name, gpu_env.kernel_names[i] ) == 0 )
+ {
+ gpu_env.kernel_functions[i] = function;
+ return(1);
+ }
+ }
+ return(0);
+}
+
+/**
+ * hb_cached_of_kerner_prg
+ * @param gpu_env -
+ * @param cl_file_name -
+ */
+int hb_cached_of_kerner_prg( const GPUEnv *gpu_env, const char * cl_file_name )
+{
+ int i;
+ for( i = 0; i < gpu_env->file_count; i++ )
+ {
+ if( strcasecmp( gpu_env->kernelSrcFile[i], cl_file_name ) == 0 )
+ {
+ if( gpu_env->programs[i] != NULL )
+ return(1);
+ }
+ }
+
+ return(0);
+}
+
+/**
+ * hb_compile_kernel_file
+ * @param filename -
+ * @param gpu_info -
+ * @param indx -
+ * @param build_option -
+ */
+int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info,
+ int indx, const char *build_option )
+{
+ cl_int status;
+ size_t length;
+ char *source_str;
+ const char *source;
+ size_t source_size[1];
+ char *buildLog = NULL;
+ int b_error, binary_status, binaryExisted;
+ char * binary;
+ cl_uint numDevices;
+ cl_device_id *devices;
+ FILE * fd;
+ FILE * fd1;
+ int idx;
+
+ if( hb_cached_of_kerner_prg( gpu_info, filename ) == 1 )
+ return (1);
+
+ idx = gpu_info->file_count;
+
+#ifdef USE_EXTERNAL_KERNEL
+ status = hb_convert_to_string( filename, &source_str, gpu_info, idx );
+ if( status == 0 )
+ return(0);
+#else
+ int kernel_src_size = strlen(kernel_src_scale) + strlen(kernel_src_yadif_filter);
+
+// char *scale_src;
+// status = hb_convert_to_string("./scale_kernels.cl", &scale_src, gpu_info, idx);
+// if (status != 0)
+// kernel_src_size += strlen(scale_src);
+
+ source_str = (char*)malloc( kernel_src_size + 2 );
+ strcpy( source_str, kernel_src_scale );
+// strcat( source_str, scale_src ); //
+ strcat( source_str, kernel_src_yadif_filter );
+#endif
+
+ source = source_str;
+ source_size[0] = strlen( source );
+
+ if ((binaryExisted = hb_binary_generated(gpu_info->context, filename, &fd)) == 1)
+ {
+ status = clGetContextInfo(gpu_info->context,
+ CL_CONTEXT_NUM_DEVICES,
+ sizeof(numDevices),
+ &numDevices,
+ NULL);
+ if (status != CL_SUCCESS)
+ {
+ hb_log("OpenCL: Unable to get the number of devices in context.");
+ return 0;
+ }
+
+ devices = (cl_device_id*)malloc(sizeof(cl_device_id) * numDevices);
+ if (devices == NULL)
+ return 0;
+
+ length = 0;
+ b_error = 0;
+ b_error |= fseek(fd, 0, SEEK_END) < 0;
+ b_error |= (length = ftell(fd)) <= 0;
+ b_error |= fseek(fd, 0, SEEK_SET) < 0;
+ if (b_error)
+ return 0;
+
+ binary = (char*)calloc(length + 2, sizeof(char));
+ if (binary == NULL)
+ return 0;
+
+ b_error |= fread(binary, 1, length, fd) != length;
+#if 0 // this doesn't work under OS X and/or with some non-AMD GPUs
+ if (binary[length-1] != '\n')
+ binary[length++] = '\n;
+#endif
+
+ if (b_error)
+ return 0;
+
+ /* grab the handles to all of the devices in the context. */
+ status = clGetContextInfo(gpu_info->context,
+ CL_CONTEXT_DEVICES,
+ sizeof(cl_device_id) * numDevices,
+ devices,
+ NULL);
+
+ gpu_info->programs[idx] = clCreateProgramWithBinary(gpu_info->context,
+ numDevices,
+ devices,
+ &length,
+ (const unsigned char**)&binary,
+ &binary_status,
+ &status);
+
+ fclose(fd);
+ free(devices);
+ fd = NULL;
+ devices = NULL;
+ }
+ else
+ {
+ /* create a CL program using the kernel source */
+ gpu_info->programs[idx] = clCreateProgramWithSource(
+ gpu_info->context, 1, &source, source_size, &status );
+ }
+
+ if((gpu_info->programs[idx] == (cl_program)NULL) || (status != CL_SUCCESS)){
+ hb_log( "OpenCL: Unable to get list of devices in context." );
+ return(0);
+ }
+
+ /* create a cl program executable for all the devices specified */
+ if( !gpu_info->isUserCreated )
+ {
+ status = clBuildProgram( gpu_info->programs[idx], 1, gpu_info->devices,
+ build_option, NULL, NULL );
+ }
+ else
+ {
+ status = clBuildProgram( gpu_info->programs[idx], 1, &(gpu_info->dev),
+ build_option, NULL, NULL );
+ }
+
+ if( status != CL_SUCCESS )
+ {
+ if( !gpu_info->isUserCreated )
+ {
+ status = clGetProgramBuildInfo( gpu_info->programs[idx],
+ gpu_info->devices[0],
+ CL_PROGRAM_BUILD_LOG, 0, NULL, &length );
+ }
+ else
+ {
+ status = clGetProgramBuildInfo( gpu_info->programs[idx],
+ gpu_info->dev,
+ CL_PROGRAM_BUILD_LOG, 0, NULL, &length );
+ }
+
+ if( status != CL_SUCCESS )
+ {
+ hb_log( "OpenCL: Unable to get GPU build information." );
+ return(0);
+ }
+
+ buildLog = (char*)malloc( length );
+ if( buildLog == (char*)NULL )
+ {
+ return(0);
+ }
+
+ if( !gpu_info->isUserCreated )
+ {
+ status = clGetProgramBuildInfo( gpu_info->programs[idx], gpu_info->devices[0],
+ CL_PROGRAM_BUILD_LOG, length, buildLog, &length );
+ }
+ else
+ {
+ status = clGetProgramBuildInfo( gpu_info->programs[idx], gpu_info->dev,
+ CL_PROGRAM_BUILD_LOG, length, buildLog, &length );
+ }
+
+ fd1 = fopen( "kernel-build.log", "w+" );
+ if( fd1 != NULL ) {
+ fwrite( buildLog, sizeof(char), length, fd1 );
+ fclose( fd1 );
+ }
+
+ free( buildLog );
+ return(0);
+ }
+
+ strcpy( gpu_env.kernelSrcFile[idx], filename );
+
+ if (binaryExisted != 1)
+ {
+ //hb_generat_bin_from_kernel_source(gpu_env.programs[idx], filename);
+ }
+
+ gpu_info->file_count += 1;
+
+ return(1);
+}
+
+
+/**
+ * hb_get_kernel_env_and_func
+ * @param kernel_name -
+ * @param env -
+ * @param function -
+ */
+int hb_get_kernel_env_and_func( const char *kernel_name,
+ KernelEnv *env,
+ cl_kernel_function *function )
+{
+ int i;
+ for( i = 0; i < gpu_env.kernel_count; i++ )
+ {
+ if( strcasecmp( kernel_name, gpu_env.kernel_names[i] ) == 0 )
+ {
+ env->context = gpu_env.context;
+ env->command_queue = gpu_env.command_queue;
+ env->program = gpu_env.programs[0];
+ env->kernel = gpu_env.kernels[i];
+ env->isAMD = ( gpu_env.vendor == AMD ) ? 1 : 0;
+ *function = gpu_env.kernel_functions[i];
+ return(1);
+ }
+ }
+ return(0);
+}
+
+/**
+ * hb_get_kernel_env_and_func
+ * @param kernel_name -
+ * @param userdata -
+ */
+int hb_run_kernel( const char *kernel_name, void **userdata )
+{
+ KernelEnv env;
+ cl_kernel_function function;
+ int status;
+ memset( &env, 0, sizeof(KernelEnv));
+ status = hb_get_kernel_env_and_func( kernel_name, &env, &function );
+ strcpy( env.kernel_name, kernel_name );
+ if( status == 1 )
+ {
+ return(function( userdata, &env ));
+ }
+
+ return(0);
+}
+
+/**
+ * hb_init_opencl_run_env
+ * @param argc -
+ * @param argv -
+ * @param build_option -
+ */
+int hb_init_opencl_run_env( int argc, char **argv, const char *build_option )
+{
+ int status = 0;
+ if( MAX_CLKERNEL_NUM <= 0 )
+ {
+ return 1;
+ }
+
+ if((argc > MAX_CLFILE_NUM) || (argc<0))
+ {
+ return 1;
+ }
+
+ if( !isInited )
+ {
+ hb_regist_opencl_kernel();
+
+ /*initialize devices, context, comand_queue*/
+ status = hb_init_opencl_env( &gpu_env );
+ if( status )
+ return(1);
+
+ /*initialize program, kernel_name, kernel_count*/
+ status = hb_compile_kernel_file("hb-opencl-kernels.cl",
+ &gpu_env, 0, build_option);
+
+ if( status == 0 || gpu_env.kernel_count == 0 )
+ {
+ return(1);
+
+ }
+
+ useBuffers = 1;
+ isInited = 1;
+ }
+
+ return(0);
+}
+
+/**
+ * hb_release_opencl_run_env
+ */
+int hb_release_opencl_run_env()
+{
+ return hb_release_opencl_env( &gpu_env );
+}
+
+/**
+ * hb_opencl_stats
+ */
+int hb_opencl_stats()
+{
+ return isInited;
+}
+
+/**
+ * hb_get_opencl_env
+ */
+int hb_get_opencl_env()
+{
+ int i = 0;
+ cl_int status;
+ cl_uint numDevices;
+ cl_device_id *devices;
+
+ /*initialize devices, context, comand_queue*/
+ status = hb_init_opencl_env( &gpu_env );
+ if( status )
+ return(1);
+ status = clGetContextInfo( gpu_env.context,
+ CL_CONTEXT_NUM_DEVICES,
+ sizeof(numDevices),
+ &numDevices,
+ NULL );
+ if( status != CL_SUCCESS )
+ return 0;
+
+ devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices );
+ if( devices == NULL )
+ return 0;
+
+ /* grab the handles to all of the devices in the context. */
+ status = clGetContextInfo( gpu_env.context,
+ CL_CONTEXT_DEVICES,
+ sizeof(cl_device_id) * numDevices,
+ devices,
+ NULL );
+
+ for (i = 0; i < numDevices; i++)
+ {
+ if (devices[i] != NULL)
+ {
+ char deviceVendor[100], deviceName[1024], driverVersion[1024];
+ clGetDeviceInfo(devices[i], CL_DEVICE_VENDOR, sizeof(deviceVendor),
+ deviceVendor, NULL);
+ clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(deviceName),
+ deviceName, NULL);
+ clGetDeviceInfo(devices[i], CL_DRIVER_VERSION, sizeof(driverVersion),
+ driverVersion, NULL);
+ hb_log("hb_get_opencl_env: GPU #%d, Device Vendor: %s", i + 1, deviceVendor);
+ hb_log("hb_get_opencl_env: GPU #%d, Device Name: %s", i + 1, deviceName);
+ hb_log("hb_get_opencl_env: GPU #%d, Driver Version: %s", i + 1, driverVersion);
+ }
+ }
+
+ if( devices != NULL )
+ {
+ free( devices );
+ devices = NULL;
+ }
+
+ return status;
+}
+
+/**
+ * hb_create_buffer
+ * @param cl_inBuf -
+ * @param flags -
+ * @param size -
+ */
+int hb_create_buffer( cl_mem *cl_Buf, int flags, int size )
+{
+ int status;
+ *cl_Buf = clCreateBuffer( gpu_env.context, (flags), (size), NULL, &status );
+
+ if( status != CL_SUCCESS )
+ {
+ hb_log( "OpenCL: clCreateBuffer error '%d'", status );
+ return 0;
+ }
+
+ return 1;
+}
+
+
+/**
+ * hb_read_opencl_buffer
+ * @param cl_inBuf -
+ * @param outbuf -
+ * @param size -
+ */
+int hb_read_opencl_buffer( cl_mem cl_inBuf, unsigned char *outbuf, int size )
+{
+ int status;
+
+ status = clEnqueueReadBuffer( gpu_env.command_queue, cl_inBuf, CL_TRUE, 0, size, outbuf, 0, 0, 0 );
+ if( status != CL_SUCCESS )
+ {
+ hb_log( "OpenCL: av_read_opencl_buffer error '%d'", status );
+ return 0;
+ }
+
+ return 1;
+}
+
+int hb_cl_create_mapped_buffer(cl_mem *mem, unsigned char **addr, int size)
+{
+ int status;
+ int flags = CL_MEM_ALLOC_HOST_PTR;
+ //cl_event event;
+ *mem = clCreateBuffer(gpu_env.context, flags, size, NULL, &status);
+ *addr = clEnqueueMapBuffer(gpu_env.command_queue, *mem, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL/*&event*/, &status);
+
+ //hb_log("\t **** context: %.8x cmdqueue: %.8x cl_mem: %.8x mapaddr: %.8x size: %d status: %d", gpu_env.context, gpu_env.command_queue, mem, addr, size, status);
+
+ return (status == CL_SUCCESS) ? 1 : 0;
+}
+
+int hb_cl_free_mapped_buffer(cl_mem mem, unsigned char *addr)
+{
+ cl_event event;
+ int status = clEnqueueUnmapMemObject(gpu_env.command_queue, mem, addr, 0, NULL, &event);
+ if (status == CL_SUCCESS)
+ clWaitForEvents(1, &event);
+ else
+ hb_log("hb_free_mapped_buffer: error %d", status);
+ return (status == CL_SUCCESS) ? 1 : 0;
+}
+
+void hb_opencl_init()
+{
+ hb_get_opencl_env();
+}
+
+int hb_use_buffers()
+{
+ return useBuffers;
+}
+
+int hb_copy_buffer(cl_mem src_buffer,cl_mem dst_buffer,size_t src_offset,size_t dst_offset,size_t cb)
+{
+ int status = clEnqueueCopyBuffer(gpu_env.command_queue,
+ src_buffer,
+ dst_buffer,
+ src_offset, dst_offset, cb,
+ 0, 0, 0);
+ if( status != CL_SUCCESS )
+ {
+ av_log(NULL,AV_LOG_ERROR, "hb_read_opencl_buffer error '%d'\n", status );
+ return 0;
+ }
+ return 1;
+}
+
+int hb_read_opencl_frame_buffer(cl_mem cl_inBuf,unsigned char *Ybuf,unsigned char *Ubuf,unsigned char *Vbuf,int linesize0,int linesize1,int linesize2,int height)
+{
+
+ int chrH = -(-height >> 1);
+ unsigned char *temp = (unsigned char *)av_malloc(sizeof(uint8_t) * (linesize0 * height + linesize1 * chrH * 2));
+ if(hb_read_opencl_buffer(cl_inBuf,temp,sizeof(uint8_t)*(linesize0 + linesize1)*height))
+ {
+ memcpy(Ybuf,temp,linesize0 * height);
+ memcpy(Ubuf,temp + linesize0 * height,linesize1 *chrH);
+ memcpy(Vbuf,temp + linesize0 * height + linesize1 * chrH,linesize2 * chrH);
+
+ }
+ av_free(temp);
+
+ return 1;
+}
+
+int hb_write_opencl_frame_buffer(cl_mem cl_inBuf,unsigned char *Ybuf,unsigned char *Ubuf,unsigned char *Vbuf,int linesize0,int linesize1,int linesize2,int height,int offset)
+{
+ int status;
+ void *mapped = clEnqueueMapBuffer( gpu_env.command_queue, cl_inBuf, CL_TRUE,CL_MAP_WRITE, 0, sizeof(uint8_t) * (linesize0 + linesize1)*height + offset, 0, NULL, NULL, NULL );
+ uint8_t *temp = (uint8_t *)mapped;
+ temp += offset;
+ memcpy(temp,Ybuf,sizeof(uint8_t) * linesize0 * height);
+ memcpy(temp + sizeof(uint8_t) * linesize0 * height,Ubuf,sizeof(uint8_t) * linesize1 * height/2);
+ memcpy(temp + sizeof(uint8_t) * (linesize0 * height + linesize1 * height/2),Vbuf,sizeof(uint8_t) * linesize2 * height/2);
+ clEnqueueUnmapMemObject(gpu_env.command_queue, cl_inBuf, mapped, 0, NULL, NULL );
+ return 1;
+}
+
+cl_command_queue hb_get_command_queue()
+{
+ return gpu_env.command_queue;
+}
+
+cl_context hb_get_context()
+{
+ return gpu_env.context;
+}
+#endif