diff options
author | sr55 <[email protected]> | 2013-09-21 20:16:51 +0000 |
---|---|---|
committer | sr55 <[email protected]> | 2013-09-21 20:16:51 +0000 |
commit | f69b7f1dfc98c90d454078f1a3aabef3bae36fd2 (patch) | |
tree | 4792942c10f6b1c4418228bcc53b648b773d5098 /libhb/common.h | |
parent | 8bccfabca28d059978f1eb8e516592f4e2f06c1a (diff) |
Merging-in the OpenCL Scaling code from the OpenCL branch to trunk.
Patch originally by the Multicoreware Inc team, followed by improvements and fixes by Micheal Wootton from AMD Inc,
OpenCL:
This patch implements Bicubic Scaling in OpenCL.
Note that HandBrake currently uses Lanczos so the performance difference appears to be much more significant. We may offer an option of BiCubic in software later.
Bicubic scaling may appear a bit sharper than the equivalent Lanczos encode and may increase file size a bit. Quality may be better or worse depending on the scaling and content and personal preference towards sharpness.
When comparing performance with a custom HandBrake build that runs Software Bicubic to OpenCL Bicubic, performance increase is about 5~7% on average on a modern GPU.
Hardware Decode via DXVA:
We also have optional DXVA decoding which may come in useful for slower/lower end systems that have a capable GPU.
This is only available on input sources that use the libav decode path.
Most GPU hardware for decoding is designed for playback, so if you are running on a high end CPU, it will bottleneck the encode process.
Requires OpenCL 1.1 or later supporting GPU.
Front end changes and testing framework are not included in this patch. This will be resolved later.
Patch will be revised further before the UI is implemented.
git-svn-id: svn://svn.handbrake.fr/HandBrake/trunk@5792 b64f7644-9d1e-0410-96f1-a4d463321fa5
Diffstat (limited to 'libhb/common.h')
-rw-r--r-- | libhb/common.h | 70 |
1 files changed, 68 insertions, 2 deletions
diff --git a/libhb/common.h b/libhb/common.h index 19c1049cd..abc799ef1 100644 --- a/libhb/common.h +++ b/libhb/common.h @@ -20,7 +20,13 @@ #include <sys/types.h> #include <sys/stat.h> #include <dirent.h> - +#ifdef USE_OPENCL +#if defined(__APPLE__) +#include <OpenCL/cl.h> +#else +#include <CL/cl.h> +#endif +#endif /* * It seems WinXP doesn't align the stack of new threads to 16 bytes. * To prevent crashes in SSE functions, we need to force stack alignement @@ -154,6 +160,7 @@ int hb_subtitle_can_burn( int source ); int hb_subtitle_can_pass( int source, int mux ); hb_attachment_t *hb_attachment_copy(const hb_attachment_t *src); + hb_list_t *hb_attachment_list_copy(const hb_list_t *src); void hb_attachment_close(hb_attachment_t **attachment); @@ -353,6 +360,8 @@ struct hb_title_set_s int feature; // Detected DVD feature title }; +extern int hb_gui_use_hwd_flag; + /****************************************************************************** * hb_job_t: settings to be filled by the UI * Update win/CS/HandBrake.Interop/HandBrakeInterop/HbLib/hb_job_s.cs when changing this struct @@ -370,7 +379,7 @@ struct hb_job_s int chapter_start; int chapter_end; - /* Include chapter marker track in mp4? */ + /* Include chapter marker track in mp4? */ int chapter_markers; /* Picture settings: @@ -518,6 +527,10 @@ struct hb_job_s uint32_t frames_to_skip; // decode but discard this many frames // initially (for frame accurate positioning // to non-I frames). + int use_opencl;/* 0 is disable use of opencl. 1 is enable use of opencl */ + int use_hwd; + int use_decomb; + int use_detelecine; #ifdef USE_QSV // QSV-specific settings struct @@ -895,6 +908,8 @@ struct hb_title_s uint32_t flags; // set if video stream doesn't have IDR frames + int opencl_support; + int hwd_support; #define HBTF_NO_IDR (1 << 0) #define HBTF_SCAN_COMPLETE (1 << 0) }; @@ -1070,6 +1085,52 @@ extern hb_work_object_t hb_reader; #define HB_FILTER_DROP 3 #define HB_FILTER_DONE 4 +typedef struct hb_oclscale_s +{ +#ifdef USE_OPENCL + int initialized; + // bicubic scale weights + cl_mem bicubic_x_weights; + cl_mem bicubic_y_weights; + cl_float xscale; + cl_float yscale; + int width; + int height; + // horizontal scaling and vertical scaling kernel handle + cl_kernel m_kernel; + int use_ocl_mem; // 0 use host memory. 1 use gpu oclmem +#endif +} hb_oclscale_t; + +#ifdef USE_OPENCL +int hb_ocl_scale( hb_buffer_t *in, hb_buffer_t *out, int *crop, hb_oclscale_t *os ); +#endif + +#ifdef USE_OPENCL +int hb_use_dxva( hb_title_t * title ); +// create opencl buffer +#define CREATEBUF( out, flags, size )\ + {\ + out = clCreateBuffer( kenv->context, (flags), (size), NULL, &status );\ + if( CL_SUCCESS != status ) return -1;\ + } + +#define OCLCHECK( method, ... )\ + status = method( __VA_ARGS__ ); if( status != CL_SUCCESS ) {\ + printf( # method " error '%d'\n", status ); return status; } + +#define CL_FREE( buf )\ +{\ + if( buf )\ + {\ + { clReleaseMemObject( buf ); }\ + buf = NULL;\ + }\ +} + + +#endif + typedef struct hb_filter_init_s { hb_job_t * job; @@ -1082,6 +1143,10 @@ typedef struct hb_filter_init_s int vrate_base; int vrate; int cfr; +#ifdef USE_OPENCL + int use_dxva; +#endif + } hb_filter_init_t; typedef struct hb_filter_info_s @@ -1140,6 +1205,7 @@ enum HB_FILTER_DENOISE, HB_FILTER_RENDER_SUB, HB_FILTER_CROP_SCALE, + // Finally filters that don't care what order they are in, // except that they must be after the above filters HB_FILTER_ROTATE, |