diff options
author | sr55 <[email protected]> | 2013-09-21 20:16:51 +0000 |
---|---|---|
committer | sr55 <[email protected]> | 2013-09-21 20:16:51 +0000 |
commit | f69b7f1dfc98c90d454078f1a3aabef3bae36fd2 (patch) | |
tree | 4792942c10f6b1c4418228bcc53b648b773d5098 /libhb/fifo.c | |
parent | 8bccfabca28d059978f1eb8e516592f4e2f06c1a (diff) |
Merging-in the OpenCL Scaling code from the OpenCL branch to trunk.
Patch originally by the Multicoreware Inc team, followed by improvements and fixes by Micheal Wootton from AMD Inc,
OpenCL:
This patch implements Bicubic Scaling in OpenCL.
Note that HandBrake currently uses Lanczos so the performance difference appears to be much more significant. We may offer an option of BiCubic in software later.
Bicubic scaling may appear a bit sharper than the equivalent Lanczos encode and may increase file size a bit. Quality may be better or worse depending on the scaling and content and personal preference towards sharpness.
When comparing performance with a custom HandBrake build that runs Software Bicubic to OpenCL Bicubic, performance increase is about 5~7% on average on a modern GPU.
Hardware Decode via DXVA:
We also have optional DXVA decoding which may come in useful for slower/lower end systems that have a capable GPU.
This is only available on input sources that use the libav decode path.
Most GPU hardware for decoding is designed for playback, so if you are running on a high end CPU, it will bottleneck the encode process.
Requires OpenCL 1.1 or later supporting GPU.
Front end changes and testing framework are not included in this patch. This will be resolved later.
Patch will be revised further before the UI is implemented.
git-svn-id: svn://svn.handbrake.fr/HandBrake/trunk@5792 b64f7644-9d1e-0410-96f1-a4d463321fa5
Diffstat (limited to 'libhb/fifo.c')
-rw-r--r-- | libhb/fifo.c | 83 |
1 files changed, 79 insertions, 4 deletions
diff --git a/libhb/fifo.c b/libhb/fifo.c index be69616f1..bd1a45e40 100644 --- a/libhb/fifo.c +++ b/libhb/fifo.c @@ -242,7 +242,14 @@ void hb_buffer_pool_free( void ) if( b->data ) { freed += b->alloc; - free( b->data ); +#ifdef USE_OPENCL + if (b->cl.buffer != NULL) { + if (hb_cl_free_mapped_buffer(b->cl.buffer, b->data) == 0) + hb_log("bad free: %.16x -> buffer %.16x map %.16x", b, b->cl.buffer, b->data); + } + else +#endif + free( b->data ); } free( b ); count++; @@ -273,7 +280,7 @@ static hb_fifo_t *size_to_pool( int size ) return NULL; } -hb_buffer_t * hb_buffer_init( int size ) +hb_buffer_t * hb_buffer_init_internal( int size , int needsMapped ) { hb_buffer_t * b; // Certain libraries (hrm ffmpeg) expect buffers passed to them to @@ -288,6 +295,18 @@ hb_buffer_t * hb_buffer_init( int size ) { b = hb_fifo_get( buffer_pool ); +#ifdef USE_OPENCL + if (b && (needsMapped != 0) && (b->cl.buffer == NULL)) + { + // We need a mapped OpenCL buffer and that is not what we got out of the pool. + // Ditch it. It will get replaced with what we need. + if (b->data) + free(b->data); + free(b); + b = NULL; + } +#endif + if( b ) { /* @@ -295,6 +314,12 @@ hb_buffer_t * hb_buffer_init( int size ) * didn't have to do this. */ uint8_t *data = b->data; +#ifdef USE_OPENCL + cl_mem buffer = b->cl.buffer; + cl_event last_event = b->cl.last_event; + int loc = b->cl.buffer_location; +#endif + memset( b, 0, sizeof(hb_buffer_t) ); b->alloc = buffer_pool->buffer_size; b->size = size; @@ -302,6 +327,11 @@ hb_buffer_t * hb_buffer_init( int size ) b->s.start = -1; b->s.stop = -1; b->s.renderOffset = -1; +#ifdef USE_OPENCL + b->cl.buffer = buffer; + b->cl.last_event = last_event; + b->cl.buffer_location = loc; +#endif return( b ); } } @@ -320,6 +350,20 @@ hb_buffer_t * hb_buffer_init( int size ) if (size) { +#ifdef USE_OPENCL + b->cl.last_event = NULL; + b->cl.buffer_location = HOST; + + if (needsMapped != 0) + { + int status; + status = hb_cl_create_mapped_buffer(&b->cl.buffer, &b->data, b->alloc); + //hb_log("buf: %.16x -> buffer %.16x map %.16x size %d", b, b->cl.buffer, b->data, size); + } + else { + b->cl.buffer = NULL; +#endif + #if defined( SYS_DARWIN ) || defined( SYS_FREEBSD ) || defined( SYS_MINGW ) b->data = malloc( b->alloc ); #elif defined( SYS_CYGWIN ) @@ -328,6 +372,10 @@ hb_buffer_t * hb_buffer_init( int size ) #else b->data = memalign( 16, b->alloc ); #endif +#ifdef USE_OPENCL + } +#endif + if( !b->data ) { hb_log( "out of memory" ); @@ -344,6 +392,11 @@ hb_buffer_t * hb_buffer_init( int size ) return b; } +hb_buffer_t * hb_buffer_init( int size ) +{ + return hb_buffer_init_internal(size, 0); +} + void hb_buffer_realloc( hb_buffer_t * b, int size ) { if ( size > b->alloc || b->data == NULL ) @@ -361,6 +414,7 @@ void hb_buffer_realloc( hb_buffer_t * b, int size ) void hb_buffer_reduce( hb_buffer_t * b, int size ) { + if ( size < b->alloc / 8 || b->data == NULL ) { hb_buffer_t * tmp = hb_buffer_init( size ); @@ -374,6 +428,7 @@ void hb_buffer_reduce( hb_buffer_t * b, int size ) hb_buffer_t * hb_buffer_dup( const hb_buffer_t * src ) { + hb_buffer_t * buf; if ( src == NULL ) @@ -470,8 +525,11 @@ hb_buffer_t * hb_frame_buffer_init( int pix_fmt, int width, int height ) hb_image_height_stride( pix_fmt, height, p ); } } - +#ifdef USE_OPENCL + buf = hb_buffer_init_internal( size , hb_use_buffers() ); +#else buf = hb_buffer_init( size ); +#endif if( buf == NULL ) return NULL; @@ -524,12 +582,22 @@ void hb_buffer_swap_copy( hb_buffer_t *src, hb_buffer_t *dst ) uint8_t *data = dst->data; int size = dst->size; int alloc = dst->alloc; +#ifdef USE_OPENCL + cl_mem buffer = dst->cl.buffer; + cl_event last_event = dst->cl.last_event; + int loc = dst->cl.buffer_location; +#endif *dst = *src; src->data = data; src->size = size; src->alloc = alloc; +#ifdef USE_OPENCL + src->cl.buffer = buffer; + src->cl.last_event = last_event; + src->cl.buffer_location = loc; +#endif } // Frees the specified buffer list. @@ -557,7 +625,14 @@ void hb_buffer_close( hb_buffer_t ** _b ) // free the buf if( b->data ) { - free( b->data ); +#ifdef USE_OPENCL + if (b->cl.buffer != NULL) { + if (hb_cl_free_mapped_buffer(b->cl.buffer, b->data) == 0) + hb_log("bad free2: %.16x -> buffer %.16x map %.16x", b, b->cl.buffer, b->data); + } + else +#endif + free( b->data ); hb_lock(buffers.lock); buffers.allocated -= b->alloc; hb_unlock(buffers.lock); |