diff options
Diffstat (limited to 'libhb/oclscale.c')
-rw-r--r-- | libhb/oclscale.c | 167 |
1 files changed, 99 insertions, 68 deletions
diff --git a/libhb/oclscale.c b/libhb/oclscale.c index da05f371e..eb59eaa66 100644 --- a/libhb/oclscale.c +++ b/libhb/oclscale.c @@ -1,20 +1,19 @@ -/* oclscale.c
-
- Copyright (c) 2003-2012 HandBrake Team
- This file is part of the HandBrake source code
- Homepage: <http://handbrake.fr/>.
- It may be used under the terms of the GNU General Public License v2.
- For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
-
- Authors: Peng Gao <[email protected]> <http://www.multicorewareinc.com/>
- Li Cao <[email protected]> <http://www.multicorewareinc.com/>
-
- */
-
-#ifdef USE_OPENCL
-
+/* oclscale.c + + Copyright (c) 2003-2012 HandBrake Team + This file is part of the HandBrake source code + Homepage: <http://handbrake.fr/>. + It may be used under the terms of the GNU General Public License v2. + For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html + + Authors: Peng Gao <[email protected]> <http://www.multicorewareinc.com/> + Li Cao <[email protected]> <http://www.multicorewareinc.com/> + + */ + #include <math.h> #include "common.h" +#include "opencl.h" #include "openclwrapper.h" #define FILTER_LEN 4 @@ -72,14 +71,20 @@ int hb_ocl_scale_func( void **data, KernelEnv *kenv ) int crop_bottom = data[3]; int crop_left = data[4]; int crop_right = data[5]; - cl_int in_frame_w = (int)data[6];
- cl_int in_frame_h = (int)data[7];
- cl_int out_frame_w = (int)data[8];
+ cl_int in_frame_w = (int)data[6]; + cl_int in_frame_h = (int)data[7]; + cl_int out_frame_w = (int)data[8]; cl_int out_frame_h = (int)data[9]; hb_oclscale_t *os = data[10]; hb_buffer_t *in = data[11]; hb_buffer_t *out = data[12]; + if (hb_ocl == NULL) + { + hb_error("hb_ocl_scale_func: OpenCL support not available"); + return 0; + } + if (os->initialized == 0) { hb_log( "Scaling With OpenCL" ); @@ -87,7 +92,7 @@ int hb_ocl_scale_func( void **data, KernelEnv *kenv ) hb_log( "Using Zero Copy"); // create the block kernel cl_int status; - os->m_kernel = clCreateKernel( kenv->program, "frame_scale", &status ); + os->m_kernel = hb_ocl->clCreateKernel(kenv->program, "frame_scale", &status); os->initialized = 1; } @@ -98,8 +103,12 @@ int hb_ocl_scale_func( void **data, KernelEnv *kenv ) int eventCount = 0; if (kenv->isAMD == 0) { - status = clEnqueueUnmapMemObject(kenv->command_queue, in->cl.buffer, in->data, 0, NULL, &events[eventCount++]); - status = clEnqueueUnmapMemObject(kenv->command_queue, out->cl.buffer, out->data, 0, NULL, &events[eventCount++]); + status = hb_ocl->clEnqueueUnmapMemObject(kenv->command_queue, + in->cl.buffer, in->data, 0, + NULL, &events[eventCount++]); + status = hb_ocl->clEnqueueUnmapMemObject(kenv->command_queue, + out->cl.buffer, out->data, 0, + NULL, &events[eventCount++]); } cl_int srcPlaneOffset0 = in->plane[0].data - in->data; @@ -127,28 +136,28 @@ int hb_ocl_scale_func( void **data, KernelEnv *kenv ) cl_float yscale = (out_frame_h * 1.0f) / in_frame_h; setupScaleWeights(xscale, yscale, out_frame_w, out_frame_h, os, kenv); - OCLCHECK( clSetKernelArg, os->m_kernel, 0, sizeof(cl_mem), &out_buf ); - OCLCHECK( clSetKernelArg, os->m_kernel, 1, sizeof(cl_mem), &in_buf ); - OCLCHECK( clSetKernelArg, os->m_kernel, 2, sizeof(cl_float), &xscale ); - OCLCHECK( clSetKernelArg, os->m_kernel, 3, sizeof(cl_float), &yscale ); - OCLCHECK( clSetKernelArg, os->m_kernel, 4, sizeof(cl_int), &srcPlaneOffset0 ); - OCLCHECK( clSetKernelArg, os->m_kernel, 5, sizeof(cl_int), &srcPlaneOffset1 ); - OCLCHECK( clSetKernelArg, os->m_kernel, 6, sizeof(cl_int), &srcPlaneOffset2 ); - OCLCHECK( clSetKernelArg, os->m_kernel, 7, sizeof(cl_int), &dstPlaneOffset0 ); - OCLCHECK( clSetKernelArg, os->m_kernel, 8, sizeof(cl_int), &dstPlaneOffset1 ); - OCLCHECK( clSetKernelArg, os->m_kernel, 9, sizeof(cl_int), &dstPlaneOffset2 ); - OCLCHECK( clSetKernelArg, os->m_kernel, 10, sizeof(cl_int), &srcRowWords0 ); - OCLCHECK( clSetKernelArg, os->m_kernel, 11, sizeof(cl_int), &srcRowWords1 ); - OCLCHECK( clSetKernelArg, os->m_kernel, 12, sizeof(cl_int), &srcRowWords2 ); - OCLCHECK( clSetKernelArg, os->m_kernel, 13, sizeof(cl_int), &dstRowWords0 ); - OCLCHECK( clSetKernelArg, os->m_kernel, 14, sizeof(cl_int), &dstRowWords1 ); - OCLCHECK( clSetKernelArg, os->m_kernel, 15, sizeof(cl_int), &dstRowWords2 ); - OCLCHECK( clSetKernelArg, os->m_kernel, 16, sizeof(cl_int), &in_frame_w );
- OCLCHECK( clSetKernelArg, os->m_kernel, 17, sizeof(cl_int), &in_frame_h );
- OCLCHECK( clSetKernelArg, os->m_kernel, 18, sizeof(cl_int), &out_frame_w );
- OCLCHECK( clSetKernelArg, os->m_kernel, 19, sizeof(cl_int), &out_frame_h ); - OCLCHECK( clSetKernelArg, os->m_kernel, 20, sizeof(cl_mem), &os->bicubic_x_weights ); - OCLCHECK( clSetKernelArg, os->m_kernel, 21, sizeof(cl_mem), &os->bicubic_y_weights ); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 0, sizeof(cl_mem), &out_buf); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 1, sizeof(cl_mem), &in_buf); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 2, sizeof(cl_float), &xscale); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 3, sizeof(cl_float), &yscale); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 4, sizeof(cl_int), &srcPlaneOffset0); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 5, sizeof(cl_int), &srcPlaneOffset1); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 6, sizeof(cl_int), &srcPlaneOffset2); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 7, sizeof(cl_int), &dstPlaneOffset0); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 8, sizeof(cl_int), &dstPlaneOffset1); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 9, sizeof(cl_int), &dstPlaneOffset2); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 10, sizeof(cl_int), &srcRowWords0); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 11, sizeof(cl_int), &srcRowWords1); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 12, sizeof(cl_int), &srcRowWords2); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 13, sizeof(cl_int), &dstRowWords0); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 14, sizeof(cl_int), &dstRowWords1); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 15, sizeof(cl_int), &dstRowWords2); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 16, sizeof(cl_int), &in_frame_w); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 17, sizeof(cl_int), &in_frame_h); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 18, sizeof(cl_int), &out_frame_w); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 19, sizeof(cl_int), &out_frame_h); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 20, sizeof(cl_mem), &os->bicubic_x_weights); + HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 21, sizeof(cl_mem), &os->bicubic_y_weights); size_t workOffset[] = { 0, 0, 0 }; size_t globalWorkSize[] = { 1, 1, 1 }; @@ -164,42 +173,70 @@ int hb_ocl_scale_func( void **data, KernelEnv *kenv ) globalWorkSize[1] = ygroups; globalWorkSize[2] = 3; - OCLCHECK( clEnqueueNDRangeKernel, kenv->command_queue, os->m_kernel, 3, workOffset, globalWorkSize, localWorkSize, eventCount, (eventCount == 0) ? NULL : &events[0], &events[eventCount] ); + HB_OCL_CHECK(hb_ocl->clEnqueueNDRangeKernel, kenv->command_queue, + os->m_kernel, 3, workOffset, globalWorkSize, localWorkSize, + eventCount, eventCount == 0 ? NULL : &events[0], &events[eventCount]); ++eventCount; if (kenv->isAMD == 0) { - in->data = clEnqueueMapBuffer(kenv->command_queue, in->cl.buffer, CL_FALSE, CL_MAP_READ | CL_MAP_WRITE, 0, in->alloc, (eventCount == 0) ? 0 : 1, (eventCount == 0) ? NULL : &events[eventCount - 1], &events[eventCount], &status); - out->data = clEnqueueMapBuffer(kenv->command_queue, out->cl.buffer, CL_FALSE, CL_MAP_READ | CL_MAP_WRITE, 0, out->alloc, (eventCount == 0) ? 0 : 1, (eventCount == 0) ? NULL : &events[eventCount - 1], &events[eventCount + 1], &status); + in->data = hb_ocl->clEnqueueMapBuffer(kenv->command_queue, in->cl.buffer, + CL_FALSE, CL_MAP_READ|CL_MAP_WRITE, + 0, in->alloc, + eventCount ? 1 : 0, + eventCount ? &events[eventCount - 1] : NULL, + &events[eventCount], &status); + out->data = hb_ocl->clEnqueueMapBuffer(kenv->command_queue, out->cl.buffer, + CL_FALSE, CL_MAP_READ|CL_MAP_WRITE, + 0, out->alloc, + eventCount ? 1 : 0, + eventCount ? &events[eventCount - 1] : NULL, + &events[eventCount + 1], &status); eventCount += 2; } - clFlush(kenv->command_queue); - clWaitForEvents(eventCount, &events[0]); + hb_ocl->clFlush(kenv->command_queue); + hb_ocl->clWaitForEvents(eventCount, &events[0]); int i; for (i = 0; i < eventCount; ++i) - clReleaseEvent(events[i]); + { + hb_ocl->clReleaseEvent(events[i]); + } } return 1; } -int setupScaleWeights(cl_float xscale, cl_float yscale, int width, int height, hb_oclscale_t *os, KernelEnv *kenv) { +int setupScaleWeights(cl_float xscale, cl_float yscale, int width, int height, hb_oclscale_t *os, KernelEnv *kenv) +{ cl_int status; - if (os->xscale != xscale || os->width < width) { + + if (hb_ocl == NULL) + { + hb_error("setupScaleWeights: OpenCL support not available"); + return 1; + } + + if (os->xscale != xscale || os->width < width) + { cl_float *xweights = hb_bicubic_weights(xscale, width); - CL_FREE(os->bicubic_x_weights); - CREATEBUF(os->bicubic_x_weights, CL_MEM_READ_ONLY, sizeof(cl_float) * width * 4); - OCLCHECK(clEnqueueWriteBuffer, kenv->command_queue, os->bicubic_x_weights, CL_TRUE, 0, sizeof(cl_float) * width * 4, xweights, 0, NULL, NULL ); + HB_OCL_BUF_FREE (hb_ocl, os->bicubic_x_weights); + HB_OCL_BUF_CREATE(hb_ocl, os->bicubic_x_weights, CL_MEM_READ_ONLY, + sizeof(cl_float) * width * 4); + HB_OCL_CHECK(hb_ocl->clEnqueueWriteBuffer, kenv->command_queue, os->bicubic_x_weights, + CL_TRUE, 0, sizeof(cl_float) * width * 4, xweights, 0, NULL, NULL); os->width = width; os->xscale = xscale; free(xweights); } - if ((os->yscale != yscale) || (os->height < height)) { + if ((os->yscale != yscale) || (os->height < height)) + { cl_float *yweights = hb_bicubic_weights(yscale, height); - CL_FREE(os->bicubic_y_weights); - CREATEBUF(os->bicubic_y_weights, CL_MEM_READ_ONLY, sizeof(cl_float) * height * 4); - OCLCHECK(clEnqueueWriteBuffer, kenv->command_queue, os->bicubic_y_weights, CL_TRUE, 0, sizeof(cl_float) * height * 4, yweights, 0, NULL, NULL ); + HB_OCL_BUF_FREE (hb_ocl, os->bicubic_y_weights); + HB_OCL_BUF_CREATE(hb_ocl, os->bicubic_y_weights, CL_MEM_READ_ONLY, + sizeof(cl_float) * height * 4); + HB_OCL_CHECK(hb_ocl->clEnqueueWriteBuffer, kenv->command_queue, os->bicubic_y_weights, + CL_TRUE, 0, sizeof(cl_float) * height * 4, yweights, 0, NULL, NULL); os->height = height; os->yscale = yscale; free(yweights); @@ -211,10 +248,10 @@ int setupScaleWeights(cl_float xscale, cl_float yscale, int width, int height, h /** * function describe: this function is used to scaling video frame. it uses the gausi scaling algorithm * parameter: -* inputFrameBuffer: the source video frame opencl buffer
-* outputdata: the destination video frame buffer
-* inputWidth: the width of the source video frame
-* inputHeight: the height of the source video frame
+* inputFrameBuffer: the source video frame opencl buffer +* outputdata: the destination video frame buffer +* inputWidth: the width of the source video frame +* inputHeight: the height of the source video frame * outputWidth: the width of destination video frame * outputHeight: the height of destination video frame */ @@ -263,9 +300,3 @@ int hb_ocl_scale(hb_buffer_t *in, hb_buffer_t *out, int *crop, hb_oclscale_t *os hb_log( "run kernel[%s] failed", "frame_scale" ); return 0; } - - - - - -#endif |