1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
|
/* scale.h
Copyright (c) 2003-2012 HandBrake Team
This file is part of the HandBrake source code
Homepage: <http://handbrake.fr/>.
It may be used under the terms of the GNU General Public License v2.
For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
Authors: Peng Gao <peng@multicorewareinc.com> <http://www.multicorewareinc.com/>
Li Cao <li@multicorewareinc.com> <http://www.multicorewareinc.com/>
*/
#ifndef SCALE_H
#define SCALE_H
#ifdef USE_OPENCL
#include <stdint.h>
#include "vadxva2.h"
#include "libavutil/pixfmt.h"
#include "hbffmpeg.h"
#define YUVRGB_TABLE_HEADROOM 128
#define MAX_FILTER_SIZE 256
#define is16BPS(x) \
(av_pix_fmt_descriptors[x].comp[0].depth_minus1 == 15)
#define is9_OR_10BPS(x) \
(av_pix_fmt_descriptors[x].comp[0].depth_minus1 == 8 || \
av_pix_fmt_descriptors[x].comp[0].depth_minus1 == 9)
#if ARCH_X86_64
# define APCK_PTR2 8
# define APCK_COEF 16
# define APCK_SIZE 24
#else
# define APCK_PTR2 4
# define APCK_COEF 8
# define APCK_SIZE 16
#endif
typedef void (*yuv2planar1_fn)(const int16_t *src, uint8_t *dest, int dstW,
const uint8_t *dither, int offset);
typedef void (*yuv2planarX_fn)(const int16_t *filter, int filterSize,
const int16_t **src, uint8_t *dest, int dstW,
const uint8_t *dither, int offset);
typedef void (*yuv2interleavedX_fn)(struct ScaleContext *c,
const int16_t *chrFilter,
int chrFilterSize,
const int16_t **chrUSrc,
const int16_t **chrVSrc,
uint8_t *dest, int dstW);
typedef void (*yuv2packed1_fn)(struct ScaleContext *c, const int16_t *lumSrc,
const int16_t *chrUSrc[2],
const int16_t *chrVSrc[2],
const int16_t *alpSrc, uint8_t *dest,
int dstW, int uvalpha, int y);
typedef void (*yuv2packed2_fn)(struct SCaleContext *c, const int16_t *lumSrc[2],
const int16_t *chrUSrc[2],
const int16_t *chrVSrc[2],
const int16_t *alpSrc[2],
uint8_t *dest,
int dstW, int yalpha, int uvalpha, int y);
typedef void (*yuv2packedX_fn)(struct SCaleContext *c, const int16_t *lumFilter,
const int16_t **lumSrc, int lumFilterSize,
const int16_t *chrFilter,
const int16_t **chrUSrc,
const int16_t **chrVSrc, int chrFilterSize,
const int16_t **alpSrc, uint8_t *dest,
int dstW, int y);
typedef int (*SwsFunc)(struct ScaleContext *context, const uint8_t *src[],
int srcStride[], int srcSliceY, int srcSliceH,
uint8_t *dst[], int dstStride[]);
typedef struct {
double *coeff; ///< pointer to the list of coefficients
int length; ///< number of coefficients in the vector
} ScaleVector;
typedef struct {
ScaleVector *lumH;
ScaleVector *lumV;
ScaleVector *chrH;
ScaleVector *chrV;
} ScaleFilter;
typedef struct ScaleContext {
SwsFunc swScale;
int srcW; ///< Width of source luma/alpha planes.
int srcH; ///< Height of source luma/alpha planes.
int dstH; ///< Height of destination luma/alpha planes.
int chrSrcW; ///< Width of source chroma planes.
int chrSrcH; ///< Height of source chroma planes.
int chrDstW; ///< Width of destination chroma planes.
int chrDstH; ///< Height of destination chroma planes.
int lumXInc, chrXInc;
int lumYInc, chrYInc;
enum PixelFormat dstFormat; ///< Destination pixel format.
enum PixelFormat srcFormat; ///< Source pixel format.
int dstFormatBpp; ///< Number of bits per pixel of the destination pixel format.
int srcFormatBpp; ///< Number of bits per pixel of the source pixel format.
int dstBpc, srcBpc;
int chrSrcHSubSample; ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in source image.
int chrSrcVSubSample; ///< Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in source image.
int chrDstHSubSample; ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in destination image.
int chrDstVSubSample; ///< Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in destination image.
int vChrDrop; ///< Binary logarithm of extra vertical subsampling factor in source image chroma planes specified by user.
int sliceDir; ///< Direction that slices are fed to the scaler (1 = top-to-bottom, -1 = bottom-to-top).
double param[2]; ///< Input parameters for scaling algorithms that need them.
uint32_t pal_yuv[256];
uint32_t pal_rgb[256];
int16_t **lumPixBuf; ///< Ring buffer for scaled horizontal luma plane lines to be fed to the vertical scaler.
int16_t **chrUPixBuf; ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler.
int16_t **chrVPixBuf; ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler.
int16_t **alpPixBuf; ///< Ring buffer for scaled horizontal alpha plane lines to be fed to the vertical scaler.
int vLumBufSize; ///< Number of vertical luma/alpha lines allocated in the ring buffer.
int vChrBufSize; ///< Number of vertical chroma lines allocated in the ring buffer.
int lastInLumBuf; ///< Last scaled horizontal luma/alpha line from source in the ring buffer.
int lastInChrBuf; ///< Last scaled horizontal chroma line from source in the ring buffer.
int lumBufIndex; ///< Index in ring buffer of the last scaled horizontal luma/alpha line from source.
int chrBufIndex; ///< Index in ring buffer of the last scaled horizontal chroma line from source.
uint8_t *formatConvBuffer;
int16_t *hLumFilter; ///< Array of horizontal filter coefficients for luma/alpha planes.
int16_t *hChrFilter; ///< Array of horizontal filter coefficients for chroma planes.
int16_t *vLumFilter; ///< Array of vertical filter coefficients for luma/alpha planes.
int16_t *vChrFilter; ///< Array of vertical filter coefficients for chroma planes.
int32_t *hLumFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for luma/alpha planes.
int32_t *hChrFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for chroma planes.
int32_t *vLumFilterPos; ///< Array of vertical filter starting positions for each dst[i] for luma/alpha planes.
int32_t *vChrFilterPos; ///< Array of vertical filter starting positions for each dst[i] for chroma planes.
int hLumFilterSize; ///< Horizontal filter size for luma/alpha pixels.
int hChrFilterSize; ///< Horizontal filter size for chroma pixels.
int vLumFilterSize; ///< Vertical filter size for luma/alpha pixels.
int vChrFilterSize; ///< Vertical filter size for chroma pixels.
int lumMmx2FilterCodeSize; ///< Runtime-generated MMX2 horizontal fast bilinear scaler code size for luma/alpha planes.
int chrMmx2FilterCodeSize; ///< Runtime-generated MMX2 horizontal fast bilinear scaler code size for chroma planes.
uint8_t *lumMmx2FilterCode; ///< Runtime-generated MMX2 horizontal fast bilinear scaler code for luma/alpha planes.
uint8_t *chrMmx2FilterCode; ///< Runtime-generated MMX2 horizontal fast bilinear scaler code for chroma planes.
int canMMX2BeUsed;
unsigned char *dest;
unsigned char *source;
int dstY; ///< Last destination vertical line output from last slice.
int flags; ///< Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc...
void *yuvTable; ///<s pointer to the yuv->rgb table start so it can be freed()
uint8_t *table_rV[256 + 2*YUVRGB_TABLE_HEADROOM];
uint8_t *table_gU[256 + 2*YUVRGB_TABLE_HEADROOM];
int table_gV[256 + 2*YUVRGB_TABLE_HEADROOM];
uint8_t *table_bU[256 + 2*YUVRGB_TABLE_HEADROOM];
//Colorspace stuff
int contrast, brightness, saturation; // for sws_getColorspaceDetails
int srcColorspaceTable[4];
int dstColorspaceTable[4];
int srcRange; ///< 0 = MPG YUV range, 1 = JPG YUV range (source image).
int dstRange; ///< 0 = MPG YUV range, 1 = JPG YUV range (destination image).
int src0Alpha;
int dst0Alpha;
int yuv2rgb_y_offset;
int yuv2rgb_y_coeff;
int yuv2rgb_v2r_coeff;
int yuv2rgb_v2g_coeff;
int yuv2rgb_u2g_coeff;
int yuv2rgb_u2b_coeff;
#define RED_DITHER "0*8"
#define GREEN_DITHER "1*8"
#define BLUE_DITHER "2*8"
#define Y_COEFF "3*8"
#define VR_COEFF "4*8"
#define UB_COEFF "5*8"
#define VG_COEFF "6*8"
#define UG_COEFF "7*8"
#define Y_OFFSET "8*8"
#define U_OFFSET "9*8"
#define V_OFFSET "10*8"
#define LUM_MMX_FILTER_OFFSET "11*8"
#define CHR_MMX_FILTER_OFFSET "11*8+4*4*256"
#define DSTW_OFFSET "11*8+4*4*256*2" //do not change, it is hardcoded in the ASM
#define ESP_OFFSET "11*8+4*4*256*2+8"
#define VROUNDER_OFFSET "11*8+4*4*256*2+16"
#define U_TEMP "11*8+4*4*256*2+24"
#define V_TEMP "11*8+4*4*256*2+32"
#define Y_TEMP "11*8+4*4*256*2+40"
#define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48"
#define UV_OFF_PX "11*8+4*4*256*3+48"
#define UV_OFF_BYTE "11*8+4*4*256*3+56"
#define DITHER16 "11*8+4*4*256*3+64"
#define DITHER32 "11*8+4*4*256*3+80"
DECLARE_ALIGNED(8, uint64_t, redDither);
DECLARE_ALIGNED(8, uint64_t, greenDither);
DECLARE_ALIGNED(8, uint64_t, blueDither);
DECLARE_ALIGNED(8, uint64_t, yCoeff);
DECLARE_ALIGNED(8, uint64_t, vrCoeff);
DECLARE_ALIGNED(8, uint64_t, ubCoeff);
DECLARE_ALIGNED(8, uint64_t, vgCoeff);
DECLARE_ALIGNED(8, uint64_t, ugCoeff);
DECLARE_ALIGNED(8, uint64_t, yOffset);
DECLARE_ALIGNED(8, uint64_t, uOffset);
DECLARE_ALIGNED(8, uint64_t, vOffset);
int32_t lumMmxFilter[4 * MAX_FILTER_SIZE];
int32_t chrMmxFilter[4 * MAX_FILTER_SIZE];
int dstW; ///< Width of destination luma/alpha planes.
DECLARE_ALIGNED(8, uint64_t, esp);
DECLARE_ALIGNED(8, uint64_t, vRounder);
DECLARE_ALIGNED(8, uint64_t, u_temp);
DECLARE_ALIGNED(8, uint64_t, v_temp);
DECLARE_ALIGNED(8, uint64_t, y_temp);
int32_t alpMmxFilter[4 * MAX_FILTER_SIZE];
DECLARE_ALIGNED(8, ptrdiff_t, uv_off); ///< offset (in pixels) between u and v planes
DECLARE_ALIGNED(8, ptrdiff_t, uv_offx2); ///< offset (in bytes) between u and v planes
DECLARE_ALIGNED(8, uint16_t, dither16)[8];
DECLARE_ALIGNED(8, uint32_t, dither32)[8];
const uint8_t *chrDither8, *lumDither8;
#if HAVE_ALTIVEC
vector signed short CY;
vector signed short CRV;
vector signed short CBU;
vector signed short CGU;
vector signed short CGV;
vector signed short OY;
vector unsigned short CSHIFT;
vector signed short *vYCoeffsBank, *vCCoeffsBank;
#endif
#if ARCH_BFIN
DECLARE_ALIGNED(4, uint32_t, oy);
DECLARE_ALIGNED(4, uint32_t, oc);
DECLARE_ALIGNED(4, uint32_t, zero);
DECLARE_ALIGNED(4, uint32_t, cy);
DECLARE_ALIGNED(4, uint32_t, crv);
DECLARE_ALIGNED(4, uint32_t, rmask);
DECLARE_ALIGNED(4, uint32_t, cbu);
DECLARE_ALIGNED(4, uint32_t, bmask);
DECLARE_ALIGNED(4, uint32_t, cgu);
DECLARE_ALIGNED(4, uint32_t, cgv);
DECLARE_ALIGNED(4, uint32_t, gmask);
#endif
#if HAVE_VIS
DECLARE_ALIGNED(8, uint64_t, sparc_coeffs)[10];
#endif
int use_mmx_vfilter;
/* function pointers for swScale() */
yuv2planar1_fn yuv2plane1;
yuv2planarX_fn yuv2planeX;
yuv2interleavedX_fn yuv2nv12cX;
yuv2packed1_fn yuv2packed1;
yuv2packed2_fn yuv2packed2;
yuv2packedX_fn yuv2packedX;
/// Unscaled conversion of luma plane to YV12 for horizontal scaler.
void (*lumToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3,
int width, uint32_t *pal);
/// Unscaled conversion of alpha plane to YV12 for horizontal scaler.
void (*alpToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3,
int width, uint32_t *pal);
/// Unscaled conversion of chroma planes to YV12 for horizontal scaler.
void (*chrToYV12)(uint8_t *dstU, uint8_t *dstV,
const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
int width, uint32_t *pal);
void (*readLumPlanar)(uint8_t *dst, const uint8_t *src[4], int width);
void (*readChrPlanar)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4],
int width);
void (*hyscale_fast)(struct SwsContext *c,
int16_t *dst, int dstWidth,
const uint8_t *src, int srcW, int xInc);
void (*hcscale_fast)(struct SwsContext *c,
int16_t *dst1, int16_t *dst2, int dstWidth,
const uint8_t *src1, const uint8_t *src2,
int srcW, int xInc);
void (*hyScale)(struct SwsContext *c, int16_t *dst, int dstW,
const uint8_t *src, const int16_t *filter,
const int32_t *filterPos, int filterSize);
void (*hcScale)(struct SwsContext *c, int16_t *dst, int dstW,
const uint8_t *src, const int16_t *filter,
const int32_t *filterPos, int filterSize);
void (*lumConvertRange)(int16_t *dst, int width);
void (*chrConvertRange)(int16_t *dst1, int16_t *dst2, int width);
int needs_hcscale; ///< Set if there are chroma planes to be converted.
cl_mem cl_hLumFilter;
cl_mem cl_hLumFilterPos;
cl_mem cl_hChrFilter;
cl_mem cl_hChrFilterPos;
cl_mem cl_vLumFilter;
cl_mem cl_vLumFilterPos;
cl_mem cl_vChrFilter;
cl_mem cl_vChrFilterPos;
cl_mem cl_intermediaBuf;
cl_mem cl_src;
cl_mem cl_dst;
} ScaleContext;
void scale_init(int, int, int, int);
void scale_release();
int scale_run(cl_mem inbuf, cl_mem outbuf, int linesizey, int linesizeuv, int height);
#endif
#endif
|