/* scale_kernel.h
Copyright (c) 2003-2012 HandBrake Team
This file is part of the HandBrake source code
Homepage: .
It may be used under the terms of the GNU General Public License v2.
For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
Authors: Peng Gao
Li Cao
*/
#ifdef USE_OPENCL
#include
#include
#include
#include
#include
#include "scale.h"
#include "openclwrapper.h"
#define OCLCHECK( method, ...) \
status = method(__VA_ARGS__); if(status != CL_SUCCESS) { \
hb_log(" error %s %d",# method, status); assert(0); return status; }
#define CREATEBUF( out, flags, size, ptr)\
out = clCreateBuffer( kenv->context, (flags), (size), ptr, &status );\
if( status != CL_SUCCESS ) { hb_log( "clCreateBuffer faild %d", status ); return -1; }
#define CL_PARAM_NUM 20
/****************************************************************************************************************************/
/*************************Combine the hscale and yuv2plane into scaling******************************************************/
/****************************************************************************************************************************/
static int CreateCLBuffer( ScaleContext *c, KernelEnv *kenv )
{
cl_int status;
if(!c->hyscale_fast || !c->hcscale_fast)
{
CREATEBUF(c->cl_hLumFilter, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->dstW*c->hLumFilterSize*sizeof(cl_short), c->hLumFilter);
CREATEBUF(c->cl_hLumFilterPos, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->dstW*sizeof(cl_int), c->hLumFilterPos);
CREATEBUF(c->cl_hChrFilter, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->chrDstW*c->hChrFilterSize*sizeof(cl_short), c->hChrFilter);
CREATEBUF(c->cl_hChrFilterPos, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->chrDstW*sizeof(cl_int), c->hChrFilterPos);
}
if( c->vLumFilterSize > 1 && c->vChrFilterSize > 1 )
{
CREATEBUF(c->cl_vLumFilter, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->dstH*c->vLumFilterSize*sizeof(cl_short), c->vLumFilter);
CREATEBUF(c->cl_vChrFilter, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->chrDstH*c->vChrFilterSize*sizeof(cl_short), c->vChrFilter);
}
CREATEBUF(c->cl_vLumFilterPos, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->dstH*sizeof(cl_int), c->vLumFilterPos);
CREATEBUF(c->cl_vChrFilterPos, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->chrDstH*sizeof(cl_int), c->vChrFilterPos);
return 1;
}
int av_scale_frame_func( void **userdata, KernelEnv *kenv )
{
ScaleContext *c = (ScaleContext *)userdata[0];
c->cl_src = (cl_mem)userdata[2];
c->cl_dst = (cl_mem)userdata[1];
/*frame size*/
int *tmp = (int *)userdata[3];
int srcStride = tmp[0];
int srcChrStride = tmp[1];
int srcW = c->srcW;
int srcH = c->srcH;
tmp = (int *)userdata[4];
int dstStride = tmp[0];
int dstChrStride = tmp[1];
int dstW = c->dstW;
int dstH = c->dstH;
/* local variable */
cl_int status;
size_t global_work_size[2];
int intermediaSize;
int st = CreateCLBuffer(c,kenv);
if( !st )
{
hb_log( "CreateBuffer[%s] faild %d", "scale_opencl",st );
return -1;
}
intermediaSize = dstStride * srcH + dstChrStride * srcH;
CREATEBUF(c->cl_intermediaBuf, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, intermediaSize*sizeof(cl_short), NULL);
static int init_chr_status = 0;
static cl_kernel chr_kernel;
if(init_chr_status == 0){
if(!(c->flags & 1))
{
chr_kernel = clCreateKernel( kenv->program, "hscale_all_opencl", NULL );
//Set the Kernel Argument;
OCLCHECK(clSetKernelArg,chr_kernel, 2, sizeof(cl_mem), (void*)&c->cl_hLumFilter);
OCLCHECK(clSetKernelArg,chr_kernel, 3, sizeof(cl_mem), (void*)&c->cl_hLumFilterPos);
OCLCHECK(clSetKernelArg,chr_kernel, 4, sizeof(int), (void*)&c->hLumFilterSize);
OCLCHECK(clSetKernelArg,chr_kernel, 5, sizeof(cl_mem), (void*)&c->cl_hChrFilter);
OCLCHECK(clSetKernelArg,chr_kernel, 6, sizeof(cl_mem), (void*)&c->cl_hChrFilterPos);
OCLCHECK(clSetKernelArg,chr_kernel, 7, sizeof(int), (void*)&c->hChrFilterSize);
}
/*Set the arguments*/
OCLCHECK(clSetKernelArg, chr_kernel, 8, sizeof(dstW), (void*)&dstW);
OCLCHECK(clSetKernelArg, chr_kernel, 9, sizeof(srcH), (void*)&srcH);
OCLCHECK(clSetKernelArg, chr_kernel, 10, sizeof(srcW), (void*)&srcW);
OCLCHECK(clSetKernelArg, chr_kernel, 11, sizeof(srcH), (void*)&srcH);
OCLCHECK(clSetKernelArg, chr_kernel, 12, sizeof(dstStride), (void*)&dstStride);
OCLCHECK(clSetKernelArg, chr_kernel, 13, sizeof(dstChrStride), (void*)&dstChrStride);
OCLCHECK(clSetKernelArg, chr_kernel, 14, sizeof(srcStride), (void*)&srcStride);
OCLCHECK(clSetKernelArg, chr_kernel, 15, sizeof(srcChrStride), (void*)&srcChrStride);
init_chr_status = 1;
}
kenv->kernel = chr_kernel;
OCLCHECK(clSetKernelArg, chr_kernel, 0, sizeof(cl_mem), (void*)&c->cl_intermediaBuf);
OCLCHECK(clSetKernelArg, chr_kernel, 1, sizeof(cl_mem), (void*)&c->cl_src);
/*Run the Kernel*/
global_work_size[0] = c->chrDstW;//dstW >> 1; //must times 256;
global_work_size[1] = c->chrSrcH;
OCLCHECK(clEnqueueNDRangeKernel, kenv->command_queue, kenv->kernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
static int init_lum_status = 0;
static cl_kernel lum_kernel;
if( init_lum_status == 0 ){
//Vertical:
/*Create Kernel*/
if( c->vLumFilterSize > 1 && c->vChrFilterSize > 1 )
lum_kernel = clCreateKernel( kenv->program, "vscale_all_nodither_opencl", NULL );
else
lum_kernel = clCreateKernel( kenv->program, "vscale_fast_opencl", NULL );
if( c->vLumFilterSize > 1 && c->vChrFilterSize > 1 )
{
OCLCHECK(clSetKernelArg, lum_kernel, 2, sizeof(cl_mem), (void*)&c->cl_vLumFilter);
OCLCHECK(clSetKernelArg, lum_kernel, 3, sizeof(int), (void*)&c->vLumFilterSize);
OCLCHECK(clSetKernelArg, lum_kernel, 4, sizeof(cl_mem), (void*)&c->cl_vChrFilter);
OCLCHECK(clSetKernelArg, lum_kernel, 5, sizeof(int), (void*)&c->vChrFilterSize);
OCLCHECK(clSetKernelArg, lum_kernel, 6, sizeof(cl_mem), (void*)&c->cl_vLumFilterPos);
OCLCHECK(clSetKernelArg, lum_kernel, 7, sizeof(cl_mem), (void*)&c->cl_vChrFilterPos);
OCLCHECK(clSetKernelArg, lum_kernel, 8, sizeof(dstW), (void*)&dstW);
OCLCHECK(clSetKernelArg, lum_kernel, 9, sizeof(dstH), (void*)&dstH);
OCLCHECK(clSetKernelArg, lum_kernel, 10, sizeof(srcW), (void*)&srcW);
OCLCHECK(clSetKernelArg, lum_kernel, 11, sizeof(srcH), (void*)&srcH);
OCLCHECK(clSetKernelArg, lum_kernel, 12, sizeof(dstStride), (void*)&dstStride);
OCLCHECK(clSetKernelArg, lum_kernel, 13, sizeof(dstChrStride), (void*)&dstChrStride);
OCLCHECK(clSetKernelArg, lum_kernel, 14, sizeof(dstStride), (void*)&dstStride);
OCLCHECK(clSetKernelArg, lum_kernel, 15, sizeof(dstChrStride), (void*)&dstChrStride);
}
else
{
OCLCHECK(clSetKernelArg, lum_kernel, 2, sizeof(cl_mem), (void*)&c->cl_vLumFilterPos);
OCLCHECK(clSetKernelArg, lum_kernel, 3, sizeof(cl_mem), (void*)&c->cl_vChrFilterPos);
OCLCHECK(clSetKernelArg, lum_kernel, 4, sizeof(dstW), (void*)&dstW);
OCLCHECK(clSetKernelArg, lum_kernel, 5, sizeof(dstH), (void*)&dstH);
OCLCHECK(clSetKernelArg, lum_kernel, 6, sizeof(srcW), (void*)&srcW);
OCLCHECK(clSetKernelArg, lum_kernel, 7, sizeof(srcH), (void*)&srcH);
OCLCHECK(clSetKernelArg, lum_kernel, 8, sizeof(dstStride), (void*)&dstStride);
OCLCHECK(clSetKernelArg, lum_kernel, 9, sizeof(dstChrStride), (void*)&dstChrStride);
OCLCHECK(clSetKernelArg, lum_kernel, 10, sizeof(dstStride), (void*)&dstStride);
OCLCHECK(clSetKernelArg, lum_kernel, 11, sizeof(dstChrStride), (void*)&dstChrStride);
}
init_lum_status = 1;
}
kenv->kernel = lum_kernel;
OCLCHECK(clSetKernelArg, kenv->kernel, 0, sizeof(cl_mem), (void*)&c->cl_dst);
OCLCHECK(clSetKernelArg, kenv->kernel, 1, sizeof(cl_mem), (void*)&c->cl_intermediaBuf);
/*Run the Kernel*/
global_work_size[0] = c->chrDstW;
global_work_size[1] = c->chrDstH;
OCLCHECK(clEnqueueNDRangeKernel, kenv->command_queue, kenv->kernel, 2, NULL,global_work_size, NULL, 0, NULL, NULL);
clReleaseMemObject( c->cl_intermediaBuf );
return 1;
}
void av_scale_frame(ScaleContext *c, void *dst, void *src, int *srcStride, int *dstStride, int *should_dither)
{
static int regflg = 0;
void *userdata[CL_PARAM_NUM];
userdata[0] = (void *)c;
userdata[1] = (void *)dst;
userdata[2] = (void *)src;
userdata[3] = (void *)srcStride;
userdata[4] = (void *)dstStride;
userdata[5] = (void *)should_dither;
if( regflg==0 )
{
int st = hb_register_kernel_wrapper( "scale_opencl", av_scale_frame_func);
if( !st )
{
hb_log( "register kernel[%s] faild %d", "scale_opencl",st );
return;
}
regflg++;
}
if( !hb_run_kernel( "scale_opencl", userdata ))
{
hb_log("run kernel function[%s] faild", "scale_opencl_func" );
return;
}
}
#endif