summaryrefslogtreecommitdiffstats
path: root/libhb/cropscale.c
diff options
context:
space:
mode:
authorsr55 <[email protected]>2013-09-21 20:16:51 +0000
committersr55 <[email protected]>2013-09-21 20:16:51 +0000
commitf69b7f1dfc98c90d454078f1a3aabef3bae36fd2 (patch)
tree4792942c10f6b1c4418228bcc53b648b773d5098 /libhb/cropscale.c
parent8bccfabca28d059978f1eb8e516592f4e2f06c1a (diff)
Merging-in the OpenCL Scaling code from the OpenCL branch to trunk.
Patch originally by the Multicoreware Inc team, followed by improvements and fixes by Micheal Wootton from AMD Inc, OpenCL: This patch implements Bicubic Scaling in OpenCL. Note that HandBrake currently uses Lanczos so the performance difference appears to be much more significant. We may offer an option of BiCubic in software later. Bicubic scaling may appear a bit sharper than the equivalent Lanczos encode and may increase file size a bit. Quality may be better or worse depending on the scaling and content and personal preference towards sharpness. When comparing performance with a custom HandBrake build that runs Software Bicubic to OpenCL Bicubic, performance increase is about 5~7% on average on a modern GPU. Hardware Decode via DXVA: We also have optional DXVA decoding which may come in useful for slower/lower end systems that have a capable GPU. This is only available on input sources that use the libav decode path. Most GPU hardware for decoding is designed for playback, so if you are running on a high end CPU, it will bottleneck the encode process. Requires OpenCL 1.1 or later supporting GPU. Front end changes and testing framework are not included in this patch. This will be resolved later. Patch will be revised further before the UI is implemented. git-svn-id: svn://svn.handbrake.fr/HandBrake/trunk@5792 b64f7644-9d1e-0410-96f1-a4d463321fa5
Diffstat (limited to 'libhb/cropscale.c')
-rw-r--r--libhb/cropscale.c81
1 files changed, 79 insertions, 2 deletions
diff --git a/libhb/cropscale.c b/libhb/cropscale.c
index d217393d1..c7d7d9948 100644
--- a/libhb/cropscale.c
+++ b/libhb/cropscale.c
@@ -9,9 +9,12 @@
#include "hb.h"
#include "hbffmpeg.h"
+#include "common.h"
+
struct hb_filter_private_s
{
+ hb_job_t *job;
int width_in;
int height_in;
int pix_fmt;
@@ -19,6 +22,13 @@ struct hb_filter_private_s
int width_out;
int height_out;
int crop[4];
+
+#ifdef USE_OPENCL
+ int use_dxva;
+ int use_decomb;
+ int use_detelecine;
+ hb_oclscale_t *os; //ocl scaler handler
+#endif
struct SwsContext * context;
};
@@ -53,11 +63,23 @@ static int hb_crop_scale_init( hb_filter_object_t * filter,
hb_filter_private_t * pv = filter->private_data;
// TODO: add pix format option to settings
+ pv->job = init->job;
pv->pix_fmt_out = init->pix_fmt;
pv->width_in = init->width;
pv->height_in = init->height;
pv->width_out = init->width - (init->crop[2] + init->crop[3]);
pv->height_out = init->height - (init->crop[0] + init->crop[1]);
+#ifdef USE_OPENCL
+ pv->use_dxva = init->use_dxva;
+ pv->use_decomb = init->job->use_decomb;
+ pv->use_detelecine = init->job->use_detelecine;
+
+ if( pv->job->use_opencl )
+ {
+ pv->os = ( hb_oclscale_t * )malloc( sizeof( hb_oclscale_t ) );
+ memset( pv->os, 0, sizeof( hb_oclscale_t ) );
+ }
+#endif
memcpy( pv->crop, init->crop, sizeof( int[4] ) );
if( filter->settings )
{
@@ -71,6 +93,9 @@ static int hb_crop_scale_init( hb_filter_object_t * filter,
init->width = pv->width_out;
init->height = pv->height_out;
memcpy( init->crop, pv->crop, sizeof( int[4] ) );
+#ifdef USE_OPENCL
+ pv->use_dxva = init->use_dxva;
+#endif
return 0;
}
@@ -111,8 +136,16 @@ static void hb_crop_scale_close( hb_filter_object_t * filter )
{
return;
}
+#ifdef USE_OPENCL
- if ( pv->context )
+ if( pv->job->use_opencl && pv->os )
+ {
+ CL_FREE( pv->os->bicubic_x_weights );
+ CL_FREE( pv->os->bicubic_y_weights );
+ free( pv->os );
+ }
+#endif
+ if( pv->context )
{
sws_freeContext( pv->context );
}
@@ -121,6 +154,25 @@ static void hb_crop_scale_close( hb_filter_object_t * filter )
filter->private_data = NULL;
}
+#ifdef USE_OPENCL
+static uint8_t *copy_plane( uint8_t *dst, uint8_t* src, int dstride, int sstride, int h )
+{
+ if( dstride == sstride )
+ {
+ memcpy( dst, src, dstride * h );
+ return dst + dstride * h;
+ }
+ int lbytes = dstride <= sstride ? dstride : sstride;
+ while( --h >= 0 )
+ {
+ memcpy( dst, src, lbytes );
+ src += sstride;
+ dst += dstride;
+ }
+ return dst;
+}
+#endif
+
static hb_buffer_t* crop_scale( hb_filter_private_t * pv, hb_buffer_t * in )
{
AVPicture pic_in;
@@ -137,6 +189,15 @@ static hb_buffer_t* crop_scale( hb_filter_private_t * pv, hb_buffer_t * in )
av_picture_crop( &pic_crop, &pic_in, in->f.fmt,
pv->crop[0], pv->crop[2] );
+#ifdef USE_OPENCL
+ // Use bicubic OpenCL scaling when selected and when downsampling < 4:1;
+ if ((pv->job->use_opencl) && (pv->width_out * 4 > pv->width_in) && (in->cl.buffer != NULL) && (out->cl.buffer != NULL))
+ {
+ hb_ocl_scale(in, out, pv->crop, pv->os);
+ }
+ else
+ {
+#endif
if ( !pv->context ||
pv->width_in != in->f.width ||
pv->height_in != in->f.height ||
@@ -164,7 +225,9 @@ static hb_buffer_t* crop_scale( hb_filter_private_t * pv, hb_buffer_t * in )
pic_crop.linesize,
0, in->f.height - (pv->crop[0] + pv->crop[1]),
pic_out.data, pic_out.linesize);
-
+#ifdef USE_OPENCL
+ }
+#endif
out->s = in->s;
hb_buffer_move_subs( out, in );
return out;
@@ -198,6 +261,18 @@ static int hb_crop_scale_work( hb_filter_object_t * filter,
pv->width_out = in->f.width - (pv->crop[2] + pv->crop[3]);
pv->height_out = in->f.height - (pv->crop[0] + pv->crop[1]);
}
+#ifdef USE_OPENCL
+ if ( (in->f.fmt == pv->pix_fmt_out &&
+ !pv->crop[0] && !pv->crop[1] && !pv->crop[2] && !pv->crop[3] &&
+ in->f.width == pv->width_out && in->f.height == pv->height_out) &&
+ (pv->use_decomb == 0) && (pv->use_detelecine == 0) ||
+ (pv->use_dxva && in->f.width == pv->width_out && in->f.height == pv->height_out) )
+ {
+ *buf_out = in;
+ *buf_in = NULL;
+ return HB_FILTER_OK;
+ }
+#else
if ( in->f.fmt == pv->pix_fmt_out &&
!pv->crop[0] && !pv->crop[1] && !pv->crop[2] && !pv->crop[3] &&
in->f.width == pv->width_out && in->f.height == pv->height_out )
@@ -206,6 +281,8 @@ static int hb_crop_scale_work( hb_filter_object_t * filter,
*buf_in = NULL;
return HB_FILTER_OK;
}
+#endif
+
*buf_out = crop_scale( pv, in );
return HB_FILTER_OK;