diff options
author | John Stebbins <[email protected]> | 2016-05-17 08:12:33 -0700 |
---|---|---|
committer | John Stebbins <[email protected]> | 2016-05-17 08:12:33 -0700 |
commit | 141579c628e4d74fd6c7d00da0de388bdca7eabd (patch) | |
tree | 5c6d365cb58945bba799c5c891487541d739f3b5 /libhb/oclnv12toyuv.c | |
parent | 17433d4f622a561f1cc4fdef71ced517f9b997b5 (diff) |
remove dxva hw decode (#189)
* remove dxva hw decode
It provides practically no speed increase even on a slow CPU and results
in a speed decrease on fast CPUs. And the code is exceptionally fugly.
* cli: remove USE_HWD
Diffstat (limited to 'libhb/oclnv12toyuv.c')
-rw-r--r-- | libhb/oclnv12toyuv.c | 334 |
1 files changed, 0 insertions, 334 deletions
diff --git a/libhb/oclnv12toyuv.c b/libhb/oclnv12toyuv.c deleted file mode 100644 index b65039fa0..000000000 --- a/libhb/oclnv12toyuv.c +++ /dev/null @@ -1,334 +0,0 @@ -/* oclnv12toyuv.c - - Copyright (c) 2003-2016 HandBrake Team - This file is part of the HandBrake source code - Homepage: <http://handbrake.fr/>. - It may be used under the terms of the GNU General Public License v2. - For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html - - Authors: Peng Gao <[email protected]> <http://www.multicorewareinc.com/> - Li Cao <[email protected]> <http://www.multicorewareinc.com/> - */ - -#ifdef USE_HWD - -#include "opencl.h" -#include "vadxva2.h" -#include "oclnv12toyuv.h" - -/** - * It creates are opencl bufs w is input frame width, h is input frame height -*/ -static int hb_nv12toyuv_create_cl_buf( KernelEnv *kenv, int w, int h, hb_va_dxva2_t *dxva2 ); - -/** - * It creates are opencl kernel. kernel name is nv12toyuv -*/ -static int hb_nv12toyuv_create_cl_kernel( KernelEnv *kenv, hb_va_dxva2_t *dxva2 ); - -/** - * It set opencl arg, input data,output data, input width, output height -*/ -static int hb_nv12toyuv_setkernelarg( KernelEnv *kenv, int w, int h, hb_va_dxva2_t *dxva2 ); - -/** - * It initialize nv12 to yuv kernel. -*/ -static int hb_init_nv12toyuv_ocl( KernelEnv *kenv, int w, int h, hb_va_dxva2_t *dxva2 ); - -/** - * Run nv12 to yuv kernel. - */ -static int hb_nv12toyuv( void **userdata, KernelEnv *kenv ); - -/** - * register nv12 to yuv kernel. - */ -static int hb_nv12toyuv_reg_kernel( void ); - -/** - * It creates are opencl bufs w is input frame width, h is input frame height - */ -static int hb_nv12toyuv_create_cl_buf( KernelEnv *kenv, int w, int h, hb_va_dxva2_t *dxva2 ) -{ - if (hb_ocl == NULL) - { - hb_error("hb_nv12toyuv_create_cl_kernel: OpenCL support not available"); - return 1; - } - - cl_int status = CL_SUCCESS; - int in_bytes = w*h*3/2; - HB_OCL_BUF_CREATE(hb_ocl, dxva2->cl_mem_nv12, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, in_bytes); - HB_OCL_BUF_CREATE(hb_ocl, dxva2->cl_mem_yuv, CL_MEM_READ_WRITE|CL_MEM_ALLOC_HOST_PTR, in_bytes); - return 0; -} - -/** - * It creates are opencl kernel. kernel name is nv12toyuv - */ -static int hb_nv12toyuv_create_cl_kernel( KernelEnv *kenv, hb_va_dxva2_t *dxva2 ) -{ - if (hb_ocl == NULL) - { - hb_error("hb_nv12toyuv_create_cl_kernel: OpenCL support not available"); - return 1; - } - - int ret; - dxva2->nv12toyuv = hb_ocl->clCreateKernel(kenv->program, "nv12toyuv", &ret); - return ret; -} - -/** - * It set opencl arg, input data,output data, input width, output height - */ -static int hb_nv12toyuv_setkernelarg( KernelEnv *kenv, int w, int h, hb_va_dxva2_t *dxva2 ) -{ - int arg = 0, status; - kenv->kernel = dxva2->nv12toyuv; - - if (hb_ocl == NULL) - { - hb_error("hb_nv12toyuv_setkernelarg: OpenCL support not available"); - return 1; - } - - HB_OCL_CHECK(hb_ocl->clSetKernelArg, kenv->kernel, arg++, sizeof(cl_mem), &dxva2->cl_mem_nv12); - HB_OCL_CHECK(hb_ocl->clSetKernelArg, kenv->kernel, arg++, sizeof(cl_mem), &dxva2->cl_mem_yuv); - HB_OCL_CHECK(hb_ocl->clSetKernelArg, kenv->kernel, arg++, sizeof(int), &w); - HB_OCL_CHECK(hb_ocl->clSetKernelArg, kenv->kernel, arg++, sizeof(int), &h); - return 0; -} - -/** - * It initialize nv12 to yuv kernel. - */ -static int hb_init_nv12toyuv_ocl( KernelEnv *kenv, int w, int h, hb_va_dxva2_t *dxva2 ) -{ - if( !dxva2->nv12toyuv ) - { - if( hb_nv12toyuv_create_cl_buf( kenv, w, h, dxva2 ) ) - { - hb_log( "OpenCL: nv12toyuv_create_cl_buf fail" ); - return -1; - } - if (!dxva2->nv12toyuv_tmp_in) - { - dxva2->nv12toyuv_tmp_in = malloc (w*h*3/2); - } - - if (!dxva2->nv12toyuv_tmp_out) - { - dxva2->nv12toyuv_tmp_out = malloc (w*h*3/2); - } - - hb_nv12toyuv_create_cl_kernel( kenv, dxva2 ); - } - return 0; -} - -/** - * copy_plane - * @param dst - - * @param src - - * @param dstride - - * @param sstride - - * @param h - - */ -static uint8_t *copy_plane( uint8_t *dst, uint8_t* src, int dstride, int sstride, - int h ) -{ - if ( dstride == sstride ) - { - memcpy( dst, src, dstride * h ); - return dst + dstride * h; - } - - int lbytes = dstride <= sstride? dstride : sstride; - while ( --h >= 0 ) - { - memcpy( dst, src, lbytes ); - src += sstride; - dst += dstride; - } - - return dst; -} - -/** - * Run nv12 to yuv kernel. - */ -static int hb_nv12toyuv( void **userdata, KernelEnv *kenv ) -{ - int status; - int w = (int)userdata[0]; - int h = (int)userdata[1]; - uint8_t *bufi1 = userdata[2]; - int *crop = userdata[3]; - hb_va_dxva2_t *dxva2 = userdata[4]; - - uint8_t *bufi2 = userdata[5]; - int p = (int)userdata[6]; - int decomb = (int)userdata[7]; - int detelecine = (int)userdata[8]; - int i; - if( hb_init_nv12toyuv_ocl( kenv, w, h, dxva2 ) ) - { - return -1; - } - - if( hb_nv12toyuv_setkernelarg( kenv, w, h, dxva2 ) ) - { - return -1; - } - - if (hb_ocl == NULL) - { - hb_error("hb_nv12toyuv: OpenCL support not available"); - return -1; - } - - int in_bytes = w*h*3/2; - if( kenv->isAMD ) - { - void *data = hb_ocl->clEnqueueMapBuffer(kenv->command_queue, - dxva2->cl_mem_nv12, - CL_MAP_WRITE_INVALIDATE_REGION, - CL_TRUE, 0, in_bytes, 0, NULL, NULL, NULL); - - for ( i = 0; i < dxva2->height; i++ ) - { - memcpy( data + i * dxva2->width, bufi1 + i * p, dxva2->width ); - if ( i < dxva2->height >> 1 ) - { - memcpy( data + ( dxva2->width * dxva2->height ) + i * dxva2->width, bufi2 + i * p, dxva2->width ); - } - } - hb_ocl->clEnqueueUnmapMemObject(kenv->command_queue, dxva2->cl_mem_nv12, - data, 0, NULL, NULL); - } - else - { - uint8_t *tmp = (uint8_t*)malloc( dxva2->width * dxva2->height * 3 / 2 ); - for( i = 0; i < dxva2->height; i++ ) - { - memcpy( tmp + i * dxva2->width, bufi1 + i * p, dxva2->width ); - if( i < dxva2->height >> 1 ) - { - memcpy( tmp + (dxva2->width * dxva2->height) + i * dxva2->width, bufi2 + i * p, dxva2->width ); - } - } - HB_OCL_CHECK(hb_ocl->clEnqueueWriteBuffer, kenv->command_queue, - dxva2->cl_mem_nv12, CL_TRUE, 0, in_bytes, tmp, 0, NULL, NULL); - free( tmp ); - } - - size_t gdim[2] = {w>>1, h>>1}; - HB_OCL_CHECK(hb_ocl->clEnqueueNDRangeKernel, kenv->command_queue, - kenv->kernel, 2, NULL, gdim, NULL, 0, NULL, NULL ); - - if ((crop[0] || crop[1] || crop[2] || crop[3]) && - (decomb == 0) && (detelecine == 0)) - { - uint8_t * crop_data[4]; - int crop_stride[4]; - - hb_ocl->clEnqueueReadBuffer(kenv->command_queue, dxva2->cl_mem_yuv, - CL_TRUE, 0, in_bytes, dxva2->nv12toyuv_tmp_out, - 0, NULL, NULL); - hb_buffer_t *in = hb_video_buffer_init( w, h ); - - int wmp = in->plane[0].stride; - int hmp = in->plane[0].height; - copy_plane(in->plane[0].data, dxva2->nv12toyuv_tmp_out, wmp, w, hmp); - wmp = in->plane[1].stride; - hmp = in->plane[1].height; - copy_plane(in->plane[1].data, dxva2->nv12toyuv_tmp_out + w * h, - wmp, w >> 1, hmp); - wmp = in->plane[2].stride; - hmp = in->plane[2].height; - copy_plane(in->plane[2].data, dxva2->nv12toyuv_tmp_out + w * h + - ((w * h) >> 2), wmp, w>>1, hmp); - - hb_picture_crop(crop_data, crop_stride, in, crop[0], crop[2]); - int i, ww = w - ( crop[2] + crop[3] ), hh = h - ( crop[0] + crop[1] ); - for( i = 0; i< hh >> 1; i++ ) - { - memcpy( dxva2->nv12toyuv_tmp_in + ((i << 1) + 0) * ww, - crop_data[0]+ ((i << 1) + 0) * crop_stride[0], ww ); - memcpy( dxva2->nv12toyuv_tmp_in + ((i << 1) + 1) * ww, - crop_data[0]+ ((i << 1) + 1) * crop_stride[0], ww ); - memcpy( dxva2->nv12toyuv_tmp_in + (ww * hh) + i * (ww >> 1), - crop_data[1] + i * crop_stride[1], ww >> 1 ); - memcpy( dxva2->nv12toyuv_tmp_in + (ww * hh) + ((ww * hh) >> 2) + - i * (ww >> 1), - crop_data[2] + i * crop_stride[2], ww >> 1 ); - } - - if( kenv->isAMD ) - { - void *data = hb_ocl->clEnqueueMapBuffer(kenv->command_queue, - dxva2->cl_mem_yuv, - CL_MAP_WRITE_INVALIDATE_REGION, - CL_TRUE, 0, ww * hh * 3 / 2, 0, - NULL, NULL, NULL); - memcpy( data, dxva2->nv12toyuv_tmp_in, ww * hh * 3 / 2 ); - hb_ocl->clEnqueueUnmapMemObject(kenv->command_queue, - dxva2->cl_mem_yuv, data, 0, NULL, NULL); - } - else - { - HB_OCL_CHECK(hb_ocl->clEnqueueWriteBuffer, kenv->command_queue, - dxva2->cl_mem_yuv, CL_TRUE, 0, in_bytes, - dxva2->nv12toyuv_tmp_in, 0, NULL, NULL); - } - - hb_buffer_close( &in ); - } - return 0; -} -/** - * register nv12 to yuv kernel. - */ -static int hb_nv12toyuv_reg_kernel( void ) -{ - int st = hb_register_kernel_wrapper( "nv12toyuv", hb_nv12toyuv ); - if( !st ) - { - hb_log( "OpenCL: register kernel[%s] failed", "nv12toyuv" ); - return -1; - } - return 0; -} -/** - * nv12 to yuv interface - * bufi is input frame of nv12, w is input frame width, h is input frame height - */ -int hb_ocl_nv12toyuv( uint8_t *bufi[], int p, int w, int h, int *crop, hb_va_dxva2_t *dxva2, int decomb, int detelecine ) -{ - void *userdata[9]; - userdata[0] = (void*)w; - userdata[1] = (void*)h; - userdata[2] = bufi[0]; - userdata[3] = crop; - userdata[4] = dxva2; - userdata[5] = bufi[1]; - userdata[6] = (void*)p; - userdata[7] = decomb; - userdata[8] = detelecine; - - if( hb_nv12toyuv_reg_kernel() ) - { - return -1; - } - - if( hb_run_kernel( "nv12toyuv", userdata ) ) - { - hb_log( "OpenCL: run kernel[nv12toyuv] failed" ); - return -1; - } - return 0; -} - -#endif // USE_HWD |