/* openclwrapper.c
Copyright (c) 2003-2012 HandBrake Team
This file is part of the HandBrake source code
Homepage: .
It may be used under the terms of the GNU General Public License v2.
For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
Authors: Peng Gao
Li Cao
*/
#ifdef USE_OPENCL
#include
#include
#include
#include "openclwrapper.h"
#include "openclkernels.h"
//#define USE_EXTERNAL_KERNEL
#ifdef SYS_MINGW
#include
#endif
#if defined(__APPLE__)
#include
#else
#include
#endif
#if defined(_MSC_VER)
#define strcasecmp strcmpi
#endif
#define MAX_KERNEL_STRING_LEN 64
#define MAX_CLFILE_NUM 50
#define MAX_CLKERNEL_NUM 200
#define MAX_CLFILE_PATH 255
#define MAX_KERNEL_NUM 50
#define MAX_KERNEL_NAME_LEN 64
#ifndef INVALID_HANDLE_VALUE
#define INVALID_HANDLE_VALUE NULL
#endif
//#define THREAD_PRIORITY_TIME_CRITICAL 15
enum VENDOR
{
AMD = 0,
Intel,
NVIDIA,
others
};
typedef struct _GPUEnv
{
//share vb in all modules in hb library
cl_platform_id platform;
cl_device_type dType;
cl_context context;
cl_device_id * devices;
cl_device_id dev;
cl_command_queue command_queue;
cl_kernel kernels[MAX_CLFILE_NUM];
cl_program programs[MAX_CLFILE_NUM]; //one program object maps one kernel source file
char kernelSrcFile[MAX_CLFILE_NUM][256]; //the max len of kernel file name is 256
int file_count; // only one kernel file
char kernel_names[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN+1];
cl_kernel_function kernel_functions[MAX_CLKERNEL_NUM];
int kernel_count;
int isUserCreated; // 1: created , 0:no create and needed to create by opencl wrapper
enum VENDOR vendor;
}GPUEnv;
typedef struct
{
char kernelName[MAX_KERNEL_NAME_LEN+1];
char * kernelStr;
}hb_kernel_node;
static GPUEnv gpu_env;
static int isInited = 0;
static int useBuffers = 0;
static hb_kernel_node gKernels[MAX_KERNEL_NUM];
#define ADD_KERNEL_CFG( idx, s, p ){\
strcpy( gKernels[idx].kernelName, s );\
gKernels[idx].kernelStr = p;\
strcpy( gpu_env.kernel_names[idx], s );\
gpu_env.kernel_count++; }
/**
* hb_confirm_gpu_type
*/
int hb_confirm_gpu_type()
{
int status = 1;
unsigned int i, j;
cl_uint numPlatforms = 0;
status = clGetPlatformIDs(0,NULL,&numPlatforms);
if(status != 0)
{
goto end;
}
if(numPlatforms > 0)
{
cl_platform_id* platforms = (cl_platform_id* )malloc (numPlatforms * sizeof(cl_platform_id));
status = clGetPlatformIDs (numPlatforms, platforms, NULL);
if (status != 0)
{
goto end;
}
for (i=0; i < numPlatforms; i++)
{
char pbuff[100];
cl_uint numDevices;
status = clGetPlatformInfo( platforms[i],
CL_PLATFORM_VENDOR,
sizeof (pbuff),
pbuff,
NULL);
if (status)
continue;
status = clGetDeviceIDs( platforms[i],
CL_DEVICE_TYPE_GPU ,
0 ,
NULL ,
&numDevices);
cl_device_id *devices = (cl_device_id *)malloc(numDevices * sizeof(cl_device_id));
status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
for (j = 0; j < numDevices; j++)
{
char dbuff[100];
status = clGetDeviceInfo(devices[j], CL_DEVICE_VENDOR, sizeof(dbuff), dbuff, NULL);
if (!strcmp(dbuff, "Advanced Micro Devices, Inc.") ||
!strcmp(dbuff, "Intel(R) Corporation") ||
#ifdef __APPLE__
!strcmp(dbuff, "AMD") ||
/* MacBook Pro, AMD ATI Radeon HD 6750M, OS X 10.8.3 */
!strcmp(dbuff, "NVIDIA") ||
/* MacBook Pro, NVIDIA GeForce GT 330M, OS X 10.7.4 */
#endif
!strcmp(dbuff, "NVIDIA Corporation"))
{
return 0;
}
}
if ( status != CL_SUCCESS )
continue;
if( numDevices )
break;
}
free( platforms );
}
end:
return -1;
}
/**
* hb_regist_opencl_kernel
*/
int hb_regist_opencl_kernel()
{
//if( !gpu_env.isUserCreated )
// memset( &gpu_env, 0, sizeof(gpu_env) );
//Comment for posterity: When in doubt just zero out a structure full of pointers to allocated resources.
gpu_env.file_count = 0; //argc;
gpu_env.kernel_count = 0UL;
ADD_KERNEL_CFG( 0, "frame_scale", NULL )
ADD_KERNEL_CFG( 1, "yadif_filter", NULL )
return 0;
}
/**
* hb_regist_opencl_kernel
* @param filename -
* @param source -
* @param gpu_info -
* @param int idx -
*/
int hb_convert_to_string( const char *filename, char **source, GPUEnv *gpu_info, int idx )
{
int file_size;
size_t result;
FILE * file = NULL;
file_size = 0;
result = 0;
file = fopen( filename, "rb+" );
if( file!=NULL )
{
fseek( file, 0, SEEK_END );
file_size = ftell( file );
rewind( file );
*source = (char*)malloc( sizeof(char) * file_size + 1 );
if( *source == (char*)NULL )
{
return(0);
}
result = fread( *source, 1, file_size, file );
if( result != file_size )
{
free( *source );
return(0);
}
(*source)[file_size] = '\0';
fclose( file );
return(1);
}
return(0);
}
/**
* hb_binary_generated
* @param context -
* @param cl_file_name -
* @param fhandle -
*/
int hb_binary_generated( cl_context context, const char * cl_file_name, FILE ** fhandle )
{
int i = 0;
cl_int status;
cl_uint numDevices;
cl_device_id *devices;
char * str = NULL;
FILE * fd = NULL;
status = clGetContextInfo( context,
CL_CONTEXT_NUM_DEVICES,
sizeof(numDevices),
&numDevices,
NULL );
if( status != CL_SUCCESS )
{
hb_log( "OpenCL: Get context info failed" );
return 0;
}
devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices );
if( devices == NULL )
{
hb_log( "OpenCL: No device found" );
return 0;
}
/* grab the handles to all of the devices in the context. */
status = clGetContextInfo( context,
CL_CONTEXT_DEVICES,
sizeof(cl_device_id) * numDevices,
devices,
NULL );
status = 0;
/* dump out each binary into its own separate file. */
for (i = 0; i < numDevices; i++)
{
char fileName[256] = { 0 };
char cl_name[128] = { 0 };
if (devices[i])
{
char deviceName[1024];
status = clGetDeviceInfo(devices[i],
CL_DEVICE_NAME,
sizeof(deviceName),
deviceName,
NULL);
str = (char*)strstr(cl_file_name, ".cl");
memcpy(cl_name, cl_file_name, str - cl_file_name);
cl_name[str - cl_file_name] = '\0';
sprintf(fileName, "./%s - %s.bin", cl_name, deviceName);
fd = fopen(fileName, "rb");
status = fd != NULL;
}
}
if( devices != NULL )
{
free( devices );
devices = NULL;
}
if( fd != NULL )
*fhandle = fd;
return status;
}
/**
* hb_write_binary_to_file
* @param fileName -
* @param birary -
* @param numBytes -
*/
int hb_write_binary_to_file( const char* fileName, const char* birary, size_t numBytes )
{
FILE *output = NULL;
output = fopen( fileName, "wb" );
if( output == NULL )
return 0;
fwrite( birary, sizeof(char), numBytes, output );
fclose( output );
return 1;
}
/**
* hb_generat_bin_from_kernel_source
* @param program -
* @param cl_file_name -
*/
int hb_generat_bin_from_kernel_source( cl_program program, const char * cl_file_name )
{
int i = 0;
cl_int status;
cl_uint numDevices;
size_t *binarySizes;
cl_device_id *devices;
char **binaries;
char *str = NULL;
status = clGetProgramInfo( program,
CL_PROGRAM_NUM_DEVICES,
sizeof(numDevices),
&numDevices,
NULL );
if( status != CL_SUCCESS )
{
hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_NUM_DEVICES failed");
return 0;
}
devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices );
if( devices == NULL )
{
hb_log("OpenCL: hb_generat_bin_from_kernel_source: no device found");
return 0;
}
/* grab the handles to all of the devices in the program. */
status = clGetProgramInfo( program,
CL_PROGRAM_DEVICES,
sizeof(cl_device_id) * numDevices,
devices,
NULL );
if( status != CL_SUCCESS )
{
hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_DEVICES failed");
return 0;
}
/* figure out the sizes of each of the binaries. */
binarySizes = (size_t*)malloc( sizeof(size_t) * numDevices );
status = clGetProgramInfo( program,
CL_PROGRAM_BINARY_SIZES,
sizeof(size_t) * numDevices,
binarySizes, NULL );
if( status != CL_SUCCESS )
{
hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_BINARY_SIZES failed");
return 0;
}
/* copy over all of the generated binaries. */
binaries = (char**)malloc( sizeof(char *) * numDevices );
if( binaries == NULL )
{
hb_log("OpenCL: hb_generat_bin_from_kernel_source: malloc for binaries failed");
return 0;
}
for( i = 0; i < numDevices; i++ )
{
if( binarySizes[i] != 0 )
{
binaries[i] = (char*)malloc( sizeof(char) * binarySizes[i] );
if( binaries[i] == NULL )
{
hb_log("OpenCL: hb_generat_bin_from_kernel_source: malloc for binaries[%d] failed", i);
return 0;
}
}
else
{
binaries[i] = NULL;
}
}
status = clGetProgramInfo( program,
CL_PROGRAM_BINARIES,
sizeof(char *) * numDevices,
binaries,
NULL );
if( status != CL_SUCCESS )
{
hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_BINARIES failed");
return 0;
}
/* dump out each binary into its own separate file. */
for (i = 0; i < numDevices; i++)
{
char fileName[256] = {0};
char cl_name[128] = {0};
if (binarySizes[i])
{
char deviceName[1024];
status = clGetDeviceInfo(devices[i],
CL_DEVICE_NAME,
sizeof(deviceName),
deviceName,
NULL);
str = (char*)strstr( cl_file_name, (char*)".cl" );
memcpy(cl_name, cl_file_name, str - cl_file_name);
cl_name[str - cl_file_name] = '\0';
sprintf(fileName, "./%s - %s.bin", cl_name, deviceName);
if (!hb_write_binary_to_file(fileName, binaries[i], binarySizes[i]))
{
hb_log("OpenCL: hb_generat_bin_from_kernel_source: unable to write kernel, writing to temporary directory instead.");
return 0;
}
}
}
// Release all resouces and memory
for( i = 0; i < numDevices; i++ )
{
if( binaries[i] != NULL )
{
free( binaries[i] );
binaries[i] = NULL;
}
}
if( binaries != NULL )
{
free( binaries );
binaries = NULL;
}
if( binarySizes != NULL )
{
free( binarySizes );
binarySizes = NULL;
}
if( devices != NULL )
{
free( devices );
devices = NULL;
}
return 1;
}
/**
* hb_init_opencl_attr
* @param env -
*/
int hb_init_opencl_attr( OpenCLEnv * env )
{
if( gpu_env.isUserCreated )
return 1;
gpu_env.context = env->context;
gpu_env.platform = env->platform;
gpu_env.dev = env->devices;
gpu_env.command_queue = env->command_queue;
gpu_env.isUserCreated = 1;
return 0;
}
/**
* hb_create_kernel
* @param kernelname -
* @param env -
*/
int hb_create_kernel( char * kernelname, KernelEnv * env )
{
int status;
env->kernel = clCreateKernel( gpu_env.programs[0], kernelname, &status );
env->context = gpu_env.context;
env->command_queue = gpu_env.command_queue;
return status != CL_SUCCESS ? 1 : 0;
}
/**
* hb_release_kernel
* @param env -
*/
int hb_release_kernel( KernelEnv * env )
{
int status = clReleaseKernel( env->kernel );
return status != CL_SUCCESS ? 1 : 0;
}
/**
* hb_init_opencl_env
* @param gpu_info -
*/
static int init_once = 0;
int hb_init_opencl_env( GPUEnv *gpu_info )
{
size_t length;
cl_int status;
cl_uint numPlatforms, numDevices;
cl_platform_id *platforms;
cl_context_properties cps[3];
char platformName[100];
unsigned int i;
void *handle = INVALID_HANDLE_VALUE;
if (init_once != 0)
return 0;
else
init_once = 1;
/*
* Have a look at the available platforms.
*/
if( !gpu_info->isUserCreated )
{
status = clGetPlatformIDs( 0, NULL, &numPlatforms );
if( status != CL_SUCCESS )
{
hb_log( "OpenCL: OpenCL device platform not found." );
return(1);
}
gpu_info->platform = NULL;
if( 0 < numPlatforms )
{
platforms = (cl_platform_id*)malloc(
numPlatforms * sizeof(cl_platform_id));
if( platforms == (cl_platform_id*)NULL )
{
return(1);
}
status = clGetPlatformIDs( numPlatforms, platforms, NULL );
if( status != CL_SUCCESS )
{
hb_log( "OpenCL: Specific opencl platform not found." );
return(1);
}
for( i = 0; i < numPlatforms; i++ )
{
status = clGetPlatformInfo( platforms[i], CL_PLATFORM_VENDOR,
sizeof(platformName), platformName,
NULL );
if( status != CL_SUCCESS )
{
continue;
}
gpu_info->platform = platforms[i];
if (!strcmp(platformName, "Advanced Micro Devices, Inc.") ||
!strcmp(platformName, "AMD"))
gpu_info->vendor = AMD;
else
gpu_info->vendor = others;
gpu_info->platform = platforms[i];
status = clGetDeviceIDs( gpu_info->platform /* platform */,
CL_DEVICE_TYPE_GPU /* device_type */,
0 /* num_entries */,
NULL /* devices */,
&numDevices );
if( status != CL_SUCCESS )
{
continue;
}
if( numDevices )
break;
}
free( platforms );
}
if( NULL == gpu_info->platform )
{
hb_log( "OpenCL: No OpenCL-compatible GPU found." );
return(1);
}
if( status != CL_SUCCESS )
{
hb_log( "OpenCL: No OpenCL-compatible GPU found." );
return(1);
}
/*
* Use available platform.
*/
cps[0] = CL_CONTEXT_PLATFORM;
cps[1] = (cl_context_properties)gpu_info->platform;
cps[2] = 0;
/* Check for GPU. */
gpu_info->dType = CL_DEVICE_TYPE_GPU;
gpu_info->context = clCreateContextFromType(
cps, gpu_info->dType, NULL, NULL, &status );
if( (gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS) )
{
gpu_info->dType = CL_DEVICE_TYPE_CPU;
gpu_info->context = clCreateContextFromType(
cps, gpu_info->dType, NULL, NULL, &status );
}
if( (gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS) )
{
gpu_info->dType = CL_DEVICE_TYPE_DEFAULT;
gpu_info->context = clCreateContextFromType(
cps, gpu_info->dType, NULL, NULL, &status );
}
if( (gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS) )
{
hb_log( "OpenCL: Unable to create opencl context." );
return(1);
}
/* Detect OpenCL devices. */
/* First, get the size of device list data */
status = clGetContextInfo( gpu_info->context, CL_CONTEXT_DEVICES,
0, NULL, &length );
if((status != CL_SUCCESS) || (length == 0))
{
hb_log( "OpenCL: Unable to get the list of devices in context." );
return(1);
}
/* Now allocate memory for device list based on the size we got earlier */
gpu_info->devices = (cl_device_id*)malloc( length );
if( gpu_info->devices == (cl_device_id*)NULL )
{
return(1);
}
/* Now, get the device list data */
status = clGetContextInfo( gpu_info->context, CL_CONTEXT_DEVICES, length,
gpu_info->devices, NULL );
if( status != CL_SUCCESS )
{
hb_log( "OpenCL: Unable to get the device list data in context." );
return(1);
}
/* Create OpenCL command queue. */
gpu_info->command_queue = clCreateCommandQueue( gpu_info->context,
gpu_info->devices[0],
0, &status );
if( status != CL_SUCCESS )
{
hb_log( "OpenCL: Unable to create opencl command queue." );
return(1);
}
}
if( clGetCommandQueueInfo( gpu_info->command_queue,
CL_QUEUE_THREAD_HANDLE_AMD, sizeof(handle),
&handle, NULL ) == CL_SUCCESS && handle != INVALID_HANDLE_VALUE )
{
#ifdef SYS_MINGW
SetThreadPriority( handle, THREAD_PRIORITY_TIME_CRITICAL );
#endif
}
return 0;
}
/**
* hb_release_opencl_env
* @param gpu_info -
*/
int hb_release_opencl_env( GPUEnv *gpu_info )
{
if( !isInited )
return 1;
int i;
for( i = 0; iisUserCreated = 0;
return 1;
}
/**
* hb_register_kernel_wrapper
* @param kernel_name -
* @param function -
*/
int hb_register_kernel_wrapper( const char *kernel_name, cl_kernel_function function )
{
int i;
for( i = 0; i < gpu_env.kernel_count; i++ )
{
if( strcasecmp( kernel_name, gpu_env.kernel_names[i] ) == 0 )
{
gpu_env.kernel_functions[i] = function;
return(1);
}
}
return(0);
}
/**
* hb_cached_of_kerner_prg
* @param gpu_env -
* @param cl_file_name -
*/
int hb_cached_of_kerner_prg( const GPUEnv *gpu_env, const char * cl_file_name )
{
int i;
for( i = 0; i < gpu_env->file_count; i++ )
{
if( strcasecmp( gpu_env->kernelSrcFile[i], cl_file_name ) == 0 )
{
if( gpu_env->programs[i] != NULL )
return(1);
}
}
return(0);
}
/**
* hb_compile_kernel_file
* @param filename -
* @param gpu_info -
* @param indx -
* @param build_option -
*/
int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info,
int indx, const char *build_option )
{
cl_int status;
size_t length;
char *source_str;
const char *source;
size_t source_size[1];
char *buildLog = NULL;
int b_error, binary_status, binaryExisted;
char * binary;
cl_uint numDevices;
cl_device_id *devices;
FILE * fd;
FILE * fd1;
int idx;
if( hb_cached_of_kerner_prg( gpu_info, filename ) == 1 )
return (1);
idx = gpu_info->file_count;
#ifdef USE_EXTERNAL_KERNEL
status = hb_convert_to_string( filename, &source_str, gpu_info, idx );
if( status == 0 )
return(0);
#else
int kernel_src_size = strlen(kernel_src_scale) + strlen(kernel_src_yadif_filter);
// char *scale_src;
// status = hb_convert_to_string("./scale_kernels.cl", &scale_src, gpu_info, idx);
// if (status != 0)
// kernel_src_size += strlen(scale_src);
source_str = (char*)malloc( kernel_src_size + 2 );
strcpy( source_str, kernel_src_scale );
// strcat( source_str, scale_src ); //
strcat( source_str, kernel_src_yadif_filter );
#endif
source = source_str;
source_size[0] = strlen( source );
if ((binaryExisted = hb_binary_generated(gpu_info->context, filename, &fd)) == 1)
{
status = clGetContextInfo(gpu_info->context,
CL_CONTEXT_NUM_DEVICES,
sizeof(numDevices),
&numDevices,
NULL);
if (status != CL_SUCCESS)
{
hb_log("OpenCL: Unable to get the number of devices in context.");
return 0;
}
devices = (cl_device_id*)malloc(sizeof(cl_device_id) * numDevices);
if (devices == NULL)
return 0;
length = 0;
b_error = 0;
b_error |= fseek(fd, 0, SEEK_END) < 0;
b_error |= (length = ftell(fd)) <= 0;
b_error |= fseek(fd, 0, SEEK_SET) < 0;
if (b_error)
return 0;
binary = (char*)calloc(length + 2, sizeof(char));
if (binary == NULL)
return 0;
b_error |= fread(binary, 1, length, fd) != length;
#if 0 // this doesn't work under OS X and/or with some non-AMD GPUs
if (binary[length-1] != '\n')
binary[length++] = '\n';
#endif
if (b_error)
return 0;
/* grab the handles to all of the devices in the context. */
status = clGetContextInfo(gpu_info->context,
CL_CONTEXT_DEVICES,
sizeof(cl_device_id) * numDevices,
devices,
NULL);
gpu_info->programs[idx] = clCreateProgramWithBinary(gpu_info->context,
numDevices,
devices,
&length,
(const unsigned char**)&binary,
&binary_status,
&status);
fclose(fd);
free(devices);
fd = NULL;
devices = NULL;
}
else
{
/* create a CL program using the kernel source */
gpu_info->programs[idx] = clCreateProgramWithSource(
gpu_info->context, 1, &source, source_size, &status );
}
if((gpu_info->programs[idx] == (cl_program)NULL) || (status != CL_SUCCESS)){
hb_log( "OpenCL: Unable to get list of devices in context." );
return(0);
}
/* create a cl program executable for all the devices specified */
if( !gpu_info->isUserCreated )
{
status = clBuildProgram( gpu_info->programs[idx], 1, gpu_info->devices,
build_option, NULL, NULL );
}
else
{
status = clBuildProgram( gpu_info->programs[idx], 1, &(gpu_info->dev),
build_option, NULL, NULL );
}
if( status != CL_SUCCESS )
{
if( !gpu_info->isUserCreated )
{
status = clGetProgramBuildInfo( gpu_info->programs[idx],
gpu_info->devices[0],
CL_PROGRAM_BUILD_LOG, 0, NULL, &length );
}
else
{
status = clGetProgramBuildInfo( gpu_info->programs[idx],
gpu_info->dev,
CL_PROGRAM_BUILD_LOG, 0, NULL, &length );
}
if( status != CL_SUCCESS )
{
hb_log( "OpenCL: Unable to get GPU build information." );
return(0);
}
buildLog = (char*)malloc( length );
if( buildLog == (char*)NULL )
{
return(0);
}
if( !gpu_info->isUserCreated )
{
status = clGetProgramBuildInfo( gpu_info->programs[idx], gpu_info->devices[0],
CL_PROGRAM_BUILD_LOG, length, buildLog, &length );
}
else
{
status = clGetProgramBuildInfo( gpu_info->programs[idx], gpu_info->dev,
CL_PROGRAM_BUILD_LOG, length, buildLog, &length );
}
fd1 = fopen( "kernel-build.log", "w+" );
if( fd1 != NULL ) {
fwrite( buildLog, sizeof(char), length, fd1 );
fclose( fd1 );
}
free( buildLog );
return(0);
}
strcpy( gpu_env.kernelSrcFile[idx], filename );
if (binaryExisted != 1)
{
//hb_generat_bin_from_kernel_source(gpu_env.programs[idx], filename);
}
gpu_info->file_count += 1;
return(1);
}
/**
* hb_get_kernel_env_and_func
* @param kernel_name -
* @param env -
* @param function -
*/
int hb_get_kernel_env_and_func( const char *kernel_name,
KernelEnv *env,
cl_kernel_function *function )
{
int i;
for( i = 0; i < gpu_env.kernel_count; i++ )
{
if( strcasecmp( kernel_name, gpu_env.kernel_names[i] ) == 0 )
{
env->context = gpu_env.context;
env->command_queue = gpu_env.command_queue;
env->program = gpu_env.programs[0];
env->kernel = gpu_env.kernels[i];
env->isAMD = ( gpu_env.vendor == AMD ) ? 1 : 0;
*function = gpu_env.kernel_functions[i];
return(1);
}
}
return(0);
}
/**
* hb_get_kernel_env_and_func
* @param kernel_name -
* @param userdata -
*/
int hb_run_kernel( const char *kernel_name, void **userdata )
{
KernelEnv env;
cl_kernel_function function;
int status;
memset( &env, 0, sizeof(KernelEnv));
status = hb_get_kernel_env_and_func( kernel_name, &env, &function );
strcpy( env.kernel_name, kernel_name );
if( status == 1 )
{
return(function( userdata, &env ));
}
return(0);
}
/**
* hb_init_opencl_run_env
* @param argc -
* @param argv -
* @param build_option -
*/
int hb_init_opencl_run_env( int argc, char **argv, const char *build_option )
{
int status = 0;
if( MAX_CLKERNEL_NUM <= 0 )
{
return 1;
}
if((argc > MAX_CLFILE_NUM) || (argc<0))
{
return 1;
}
if( !isInited )
{
hb_regist_opencl_kernel();
/*initialize devices, context, comand_queue*/
status = hb_init_opencl_env( &gpu_env );
if( status )
return(1);
/*initialize program, kernel_name, kernel_count*/
status = hb_compile_kernel_file("hb-opencl-kernels.cl",
&gpu_env, 0, build_option);
if( status == 0 || gpu_env.kernel_count == 0 )
{
return(1);
}
useBuffers = 1;
isInited = 1;
}
return(0);
}
/**
* hb_release_opencl_run_env
*/
int hb_release_opencl_run_env()
{
return hb_release_opencl_env( &gpu_env );
}
/**
* hb_opencl_stats
*/
int hb_opencl_stats()
{
return isInited;
}
/**
* hb_get_opencl_env
*/
int hb_get_opencl_env()
{
int i = 0;
cl_int status;
cl_uint numDevices;
cl_device_id *devices;
/*initialize devices, context, comand_queue*/
status = hb_init_opencl_env( &gpu_env );
if( status )
return(1);
status = clGetContextInfo( gpu_env.context,
CL_CONTEXT_NUM_DEVICES,
sizeof(numDevices),
&numDevices,
NULL );
if( status != CL_SUCCESS )
return 0;
devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices );
if( devices == NULL )
return 0;
/* grab the handles to all of the devices in the context. */
status = clGetContextInfo( gpu_env.context,
CL_CONTEXT_DEVICES,
sizeof(cl_device_id) * numDevices,
devices,
NULL );
for (i = 0; i < numDevices; i++)
{
if (devices[i] != NULL)
{
char deviceVendor[100], deviceName[1024], driverVersion[1024];
clGetDeviceInfo(devices[i], CL_DEVICE_VENDOR, sizeof(deviceVendor),
deviceVendor, NULL);
clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(deviceName),
deviceName, NULL);
clGetDeviceInfo(devices[i], CL_DRIVER_VERSION, sizeof(driverVersion),
driverVersion, NULL);
hb_log("hb_get_opencl_env: GPU #%d, Device Vendor: %s", i + 1, deviceVendor);
hb_log("hb_get_opencl_env: GPU #%d, Device Name: %s", i + 1, deviceName);
hb_log("hb_get_opencl_env: GPU #%d, Driver Version: %s", i + 1, driverVersion);
}
}
if( devices != NULL )
{
free( devices );
devices = NULL;
}
return status;
}
/**
* hb_create_buffer
* @param cl_inBuf -
* @param flags -
* @param size -
*/
int hb_create_buffer( cl_mem *cl_Buf, int flags, int size )
{
int status;
*cl_Buf = clCreateBuffer( gpu_env.context, (flags), (size), NULL, &status );
if( status != CL_SUCCESS )
{
hb_log( "OpenCL: clCreateBuffer error '%d'", status );
return 0;
}
return 1;
}
/**
* hb_read_opencl_buffer
* @param cl_inBuf -
* @param outbuf -
* @param size -
*/
int hb_read_opencl_buffer( cl_mem cl_inBuf, unsigned char *outbuf, int size )
{
int status;
status = clEnqueueReadBuffer( gpu_env.command_queue, cl_inBuf, CL_TRUE, 0, size, outbuf, 0, 0, 0 );
if( status != CL_SUCCESS )
{
hb_log( "OpenCL: av_read_opencl_buffer error '%d'", status );
return 0;
}
return 1;
}
int hb_cl_create_mapped_buffer(cl_mem *mem, unsigned char **addr, int size)
{
int status;
int flags = CL_MEM_ALLOC_HOST_PTR;
//cl_event event;
*mem = clCreateBuffer(gpu_env.context, flags, size, NULL, &status);
*addr = clEnqueueMapBuffer(gpu_env.command_queue, *mem, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL/*&event*/, &status);
//hb_log("\t **** context: %.8x cmdqueue: %.8x cl_mem: %.8x mapaddr: %.8x size: %d status: %d", gpu_env.context, gpu_env.command_queue, mem, addr, size, status);
return (status == CL_SUCCESS) ? 1 : 0;
}
int hb_cl_free_mapped_buffer(cl_mem mem, unsigned char *addr)
{
cl_event event;
int status = clEnqueueUnmapMemObject(gpu_env.command_queue, mem, addr, 0, NULL, &event);
if (status == CL_SUCCESS)
clWaitForEvents(1, &event);
else
hb_log("hb_free_mapped_buffer: error %d", status);
return (status == CL_SUCCESS) ? 1 : 0;
}
void hb_opencl_init()
{
hb_get_opencl_env();
}
int hb_use_buffers()
{
return useBuffers;
}
int hb_copy_buffer(cl_mem src_buffer,cl_mem dst_buffer,size_t src_offset,size_t dst_offset,size_t cb)
{
int status = clEnqueueCopyBuffer(gpu_env.command_queue,
src_buffer,
dst_buffer,
src_offset, dst_offset, cb,
0, 0, 0);
if( status != CL_SUCCESS )
{
av_log(NULL,AV_LOG_ERROR, "hb_read_opencl_buffer error '%d'\n", status );
return 0;
}
return 1;
}
int hb_read_opencl_frame_buffer(cl_mem cl_inBuf,unsigned char *Ybuf,unsigned char *Ubuf,unsigned char *Vbuf,int linesize0,int linesize1,int linesize2,int height)
{
int chrH = -(-height >> 1);
unsigned char *temp = (unsigned char *)av_malloc(sizeof(uint8_t) * (linesize0 * height + linesize1 * chrH * 2));
if(hb_read_opencl_buffer(cl_inBuf,temp,sizeof(uint8_t)*(linesize0 + linesize1)*height))
{
memcpy(Ybuf,temp,linesize0 * height);
memcpy(Ubuf,temp + linesize0 * height,linesize1 *chrH);
memcpy(Vbuf,temp + linesize0 * height + linesize1 * chrH,linesize2 * chrH);
}
av_free(temp);
return 1;
}
int hb_write_opencl_frame_buffer(cl_mem cl_inBuf,unsigned char *Ybuf,unsigned char *Ubuf,unsigned char *Vbuf,int linesize0,int linesize1,int linesize2,int height,int offset)
{
int status;
void *mapped = clEnqueueMapBuffer( gpu_env.command_queue, cl_inBuf, CL_TRUE,CL_MAP_WRITE, 0, sizeof(uint8_t) * (linesize0 + linesize1)*height + offset, 0, NULL, NULL, NULL );
uint8_t *temp = (uint8_t *)mapped;
temp += offset;
memcpy(temp,Ybuf,sizeof(uint8_t) * linesize0 * height);
memcpy(temp + sizeof(uint8_t) * linesize0 * height,Ubuf,sizeof(uint8_t) * linesize1 * height/2);
memcpy(temp + sizeof(uint8_t) * (linesize0 * height + linesize1 * height/2),Vbuf,sizeof(uint8_t) * linesize2 * height/2);
clEnqueueUnmapMemObject(gpu_env.command_queue, cl_inBuf, mapped, 0, NULL, NULL );
return 1;
}
cl_command_queue hb_get_command_queue()
{
return gpu_env.command_queue;
}
cl_context hb_get_context()
{
return gpu_env.context;
}
#endif