swscale: fix overflows in RGB rounding constants.
[libav.git] / libswscale / swscale.c
1 /*
2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This file is part of Libav.
5 *
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 /*
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
25
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
28 x -> x
29 YUV9 -> YV12
30 YUV9/YV12 -> Y800
31 Y800 -> YUV9/YV12
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
34 BGR15 -> BGR16
35 */
36
37 /*
38 tested special converters (most are tested actually, but I did not write it down ...)
39 YV12 -> BGR12/BGR16
40 YV12 -> YV12
41 BGR15 -> BGR16
42 BGR16 -> BGR16
43 YVU9 -> YV12
44
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
51 BGR24 -> YV12
52 */
53
54 #include <inttypes.h>
55 #include <string.h>
56 #include <math.h>
57 #include <stdio.h>
58 #include "config.h"
59 #include <assert.h>
60 #include "swscale.h"
61 #include "swscale_internal.h"
62 #include "rgb2rgb.h"
63 #include "libavutil/intreadwrite.h"
64 #include "libavutil/cpu.h"
65 #include "libavutil/avutil.h"
66 #include "libavutil/mathematics.h"
67 #include "libavutil/bswap.h"
68 #include "libavutil/pixdesc.h"
69
70 #define DITHER1XBPP
71
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
82
83 /*
84 NOTES
85 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
86
87 TODO
88 more intelligent misalignment avoidance for the horizontal scaler
89 write special vertical cubic upscale version
90 optimize C code (YV12 / minmax)
91 add support for packed pixel YUV input & output
92 add support for Y8 output
93 optimize BGR24 & BGR32
94 add BGR4 output support
95 write special BGR->BGR scaler
96 */
97
98 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
99 { 1, 3, 1, 3, 1, 3, 1, 3, },
100 { 2, 0, 2, 0, 2, 0, 2, 0, },
101 };
102
103 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
104 { 6, 2, 6, 2, 6, 2, 6, 2, },
105 { 0, 4, 0, 4, 0, 4, 0, 4, },
106 };
107
108 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
109 { 8, 4, 11, 7, 8, 4, 11, 7, },
110 { 2, 14, 1, 13, 2, 14, 1, 13, },
111 { 10, 6, 9, 5, 10, 6, 9, 5, },
112 { 0, 12, 3, 15, 0, 12, 3, 15, },
113 };
114
115 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
116 { 17, 9, 23, 15, 16, 8, 22, 14, },
117 { 5, 29, 3, 27, 4, 28, 2, 26, },
118 { 21, 13, 19, 11, 20, 12, 18, 10, },
119 { 0, 24, 6, 30, 1, 25, 7, 31, },
120 { 16, 8, 22, 14, 17, 9, 23, 15, },
121 { 4, 28, 2, 26, 5, 29, 3, 27, },
122 { 20, 12, 18, 10, 21, 13, 19, 11, },
123 { 1, 25, 7, 31, 0, 24, 6, 30, },
124 };
125
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
127 { 0, 55, 14, 68, 3, 58, 17, 72, },
128 { 37, 18, 50, 32, 40, 22, 54, 35, },
129 { 9, 64, 5, 59, 13, 67, 8, 63, },
130 { 46, 27, 41, 23, 49, 31, 44, 26, },
131 { 2, 57, 16, 71, 1, 56, 15, 70, },
132 { 39, 21, 52, 34, 38, 19, 51, 33, },
133 { 11, 66, 7, 62, 10, 65, 6, 60, },
134 { 48, 30, 43, 25, 47, 29, 42, 24, },
135 };
136
137 #if 1
138 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
139 {117, 62, 158, 103, 113, 58, 155, 100, },
140 { 34, 199, 21, 186, 31, 196, 17, 182, },
141 {144, 89, 131, 76, 141, 86, 127, 72, },
142 { 0, 165, 41, 206, 10, 175, 52, 217, },
143 {110, 55, 151, 96, 120, 65, 162, 107, },
144 { 28, 193, 14, 179, 38, 203, 24, 189, },
145 {138, 83, 124, 69, 148, 93, 134, 79, },
146 { 7, 172, 48, 213, 3, 168, 45, 210, },
147 };
148 #elif 1
149 // tries to correct a gamma of 1.5
150 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
151 { 0, 143, 18, 200, 2, 156, 25, 215, },
152 { 78, 28, 125, 64, 89, 36, 138, 74, },
153 { 10, 180, 3, 161, 16, 195, 8, 175, },
154 {109, 51, 93, 38, 121, 60, 105, 47, },
155 { 1, 152, 23, 210, 0, 147, 20, 205, },
156 { 85, 33, 134, 71, 81, 30, 130, 67, },
157 { 14, 190, 6, 171, 12, 185, 5, 166, },
158 {117, 57, 101, 44, 113, 54, 97, 41, },
159 };
160 #elif 1
161 // tries to correct a gamma of 2.0
162 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
163 { 0, 124, 8, 193, 0, 140, 12, 213, },
164 { 55, 14, 104, 42, 66, 19, 119, 52, },
165 { 3, 168, 1, 145, 6, 187, 3, 162, },
166 { 86, 31, 70, 21, 99, 39, 82, 28, },
167 { 0, 134, 11, 206, 0, 129, 9, 200, },
168 { 62, 17, 114, 48, 58, 16, 109, 45, },
169 { 5, 181, 2, 157, 4, 175, 1, 151, },
170 { 95, 36, 78, 26, 90, 34, 74, 24, },
171 };
172 #else
173 // tries to correct a gamma of 2.5
174 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
175 { 0, 107, 3, 187, 0, 125, 6, 212, },
176 { 39, 7, 86, 28, 49, 11, 102, 36, },
177 { 1, 158, 0, 131, 3, 180, 1, 151, },
178 { 68, 19, 52, 12, 81, 25, 64, 17, },
179 { 0, 119, 5, 203, 0, 113, 4, 195, },
180 { 45, 9, 96, 33, 42, 8, 91, 30, },
181 { 2, 172, 1, 144, 2, 165, 0, 137, },
182 { 77, 23, 60, 15, 72, 21, 56, 14, },
183 };
184 #endif
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
186 { 36, 68, 60, 92, 34, 66, 58, 90,},
187 { 100, 4,124, 28, 98, 2,122, 26,},
188 { 52, 84, 44, 76, 50, 82, 42, 74,},
189 { 116, 20,108, 12,114, 18,106, 10,},
190 { 32, 64, 56, 88, 38, 70, 62, 94,},
191 { 96, 0,120, 24,102, 6,126, 30,},
192 { 48, 80, 40, 72, 54, 86, 46, 78,},
193 { 112, 16,104, 8,118, 22,110, 14,},
194 };
195 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
196 { 64, 64, 64, 64, 64, 64, 64, 64 };
197
198 #define output_pixel(pos, val, bias, signedness) \
199 if (big_endian) { \
200 AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
201 } else { \
202 AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
203 }
204
205 static av_always_inline void
206 yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW,
207 int big_endian, int output_bits)
208 {
209 int i;
210 int shift = 19 - output_bits;
211
212 for (i = 0; i < dstW; i++) {
213 int val = src[i] + (1 << (shift - 1));
214 output_pixel(&dest[i], val, 0, uint);
215 }
216 }
217
218 static av_always_inline void
219 yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
220 const int32_t **src, uint16_t *dest, int dstW,
221 int big_endian, int output_bits)
222 {
223 int i;
224 int shift = 15 + 16 - output_bits;
225
226 for (i = 0; i < dstW; i++) {
227 int val = 1 << (30-output_bits);
228 int j;
229
230 /* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline
231 * filters (or anything with negative coeffs, the range can be slightly
232 * wider in both directions. To account for this overflow, we subtract
233 * a constant so it always fits in the signed range (assuming a
234 * reasonable filterSize), and re-add that at the end. */
235 val -= 0x40000000;
236 for (j = 0; j < filterSize; j++)
237 val += src[j][i] * filter[j];
238
239 output_pixel(&dest[i], val, 0x8000, int);
240 }
241 }
242
243 #undef output_pixel
244
245 #define output_pixel(pos, val) \
246 if (big_endian) { \
247 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
248 } else { \
249 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
250 }
251
252 static av_always_inline void
253 yuv2plane1_10_c_template(const int16_t *src, uint16_t *dest, int dstW,
254 int big_endian, int output_bits)
255 {
256 int i;
257 int shift = 15 - output_bits;
258
259 for (i = 0; i < dstW; i++) {
260 int val = src[i] + (1 << (shift - 1));
261 output_pixel(&dest[i], val);
262 }
263 }
264
265 static av_always_inline void
266 yuv2planeX_10_c_template(const int16_t *filter, int filterSize,
267 const int16_t **src, uint16_t *dest, int dstW,
268 int big_endian, int output_bits)
269 {
270 int i;
271 int shift = 11 + 16 - output_bits;
272
273 for (i = 0; i < dstW; i++) {
274 int val = 1 << (26-output_bits);
275 int j;
276
277 for (j = 0; j < filterSize; j++)
278 val += src[j][i] * filter[j];
279
280 output_pixel(&dest[i], val);
281 }
282 }
283
284 #undef output_pixel
285
286 #define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t) \
287 static void yuv2plane1_ ## bits ## BE_LE ## _c(const int16_t *src, \
288 uint8_t *dest, int dstW, \
289 const uint8_t *dither, int offset)\
290 { \
291 yuv2plane1_ ## template_size ## _c_template((const typeX_t *) src, \
292 (uint16_t *) dest, dstW, is_be, bits); \
293 }\
294 static void yuv2planeX_ ## bits ## BE_LE ## _c(const int16_t *filter, int filterSize, \
295 const int16_t **src, uint8_t *dest, int dstW, \
296 const uint8_t *dither, int offset)\
297 { \
298 yuv2planeX_## template_size ## _c_template(filter, \
299 filterSize, (const typeX_t **) src, \
300 (uint16_t *) dest, dstW, is_be, bits); \
301 }
302 yuv2NBPS( 9, BE, 1, 10, int16_t)
303 yuv2NBPS( 9, LE, 0, 10, int16_t)
304 yuv2NBPS(10, BE, 1, 10, int16_t)
305 yuv2NBPS(10, LE, 0, 10, int16_t)
306 yuv2NBPS(16, BE, 1, 16, int32_t)
307 yuv2NBPS(16, LE, 0, 16, int32_t)
308
309 static void yuv2planeX_8_c(const int16_t *filter, int filterSize,
310 const int16_t **src, uint8_t *dest, int dstW,
311 const uint8_t *dither, int offset)
312 {
313 int i;
314 for (i=0; i<dstW; i++) {
315 int val = dither[(i + offset) & 7] << 12;
316 int j;
317 for (j=0; j<filterSize; j++)
318 val += src[j][i] * filter[j];
319
320 dest[i]= av_clip_uint8(val>>19);
321 }
322 }
323
324 static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW,
325 const uint8_t *dither, int offset)
326 {
327 int i;
328 for (i=0; i<dstW; i++) {
329 int val = (src[i] + dither[(i + offset) & 7]) >> 7;
330 dest[i]= av_clip_uint8(val);
331 }
332 }
333
334 static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
335 const int16_t **chrUSrc, const int16_t **chrVSrc,
336 uint8_t *dest, int chrDstW)
337 {
338 enum PixelFormat dstFormat = c->dstFormat;
339 const uint8_t *chrDither = c->chrDither8;
340 int i;
341
342 if (dstFormat == PIX_FMT_NV12)
343 for (i=0; i<chrDstW; i++) {
344 int u = chrDither[i & 7] << 12;
345 int v = chrDither[(i + 3) & 7] << 12;
346 int j;
347 for (j=0; j<chrFilterSize; j++) {
348 u += chrUSrc[j][i] * chrFilter[j];
349 v += chrVSrc[j][i] * chrFilter[j];
350 }
351
352 dest[2*i]= av_clip_uint8(u>>19);
353 dest[2*i+1]= av_clip_uint8(v>>19);
354 }
355 else
356 for (i=0; i<chrDstW; i++) {
357 int u = chrDither[i & 7] << 12;
358 int v = chrDither[(i + 3) & 7] << 12;
359 int j;
360 for (j=0; j<chrFilterSize; j++) {
361 u += chrUSrc[j][i] * chrFilter[j];
362 v += chrVSrc[j][i] * chrFilter[j];
363 }
364
365 dest[2*i]= av_clip_uint8(v>>19);
366 dest[2*i+1]= av_clip_uint8(u>>19);
367 }
368 }
369
370 #define output_pixel(pos, val) \
371 if (target == PIX_FMT_GRAY16BE) { \
372 AV_WB16(pos, val); \
373 } else { \
374 AV_WL16(pos, val); \
375 }
376
377 static av_always_inline void
378 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
379 const int32_t **lumSrc, int lumFilterSize,
380 const int16_t *chrFilter, const int32_t **chrUSrc,
381 const int32_t **chrVSrc, int chrFilterSize,
382 const int32_t **alpSrc, uint16_t *dest, int dstW,
383 int y, enum PixelFormat target)
384 {
385 int i;
386
387 for (i = 0; i < (dstW >> 1); i++) {
388 int j;
389 int Y1 = 1 << 14;
390 int Y2 = 1 << 14;
391
392 for (j = 0; j < lumFilterSize; j++) {
393 Y1 += lumSrc[j][i * 2] * lumFilter[j];
394 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
395 }
396 Y1 >>= 15;
397 Y2 >>= 15;
398 if ((Y1 | Y2) & 0x10000) {
399 Y1 = av_clip_uint16(Y1);
400 Y2 = av_clip_uint16(Y2);
401 }
402 output_pixel(&dest[i * 2 + 0], Y1);
403 output_pixel(&dest[i * 2 + 1], Y2);
404 }
405 }
406
407 static av_always_inline void
408 yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
409 const int32_t *ubuf[2], const int32_t *vbuf[2],
410 const int32_t *abuf[2], uint16_t *dest, int dstW,
411 int yalpha, int uvalpha, int y,
412 enum PixelFormat target)
413 {
414 int yalpha1 = 4095 - yalpha;
415 int i;
416 const int32_t *buf0 = buf[0], *buf1 = buf[1];
417
418 for (i = 0; i < (dstW >> 1); i++) {
419 int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15;
420 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
421
422 output_pixel(&dest[i * 2 + 0], Y1);
423 output_pixel(&dest[i * 2 + 1], Y2);
424 }
425 }
426
427 static av_always_inline void
428 yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
429 const int32_t *ubuf[2], const int32_t *vbuf[2],
430 const int32_t *abuf0, uint16_t *dest, int dstW,
431 int uvalpha, int y, enum PixelFormat target)
432 {
433 int i;
434
435 for (i = 0; i < (dstW >> 1); i++) {
436 int Y1 = buf0[i * 2 ] << 1;
437 int Y2 = buf0[i * 2 + 1] << 1;
438
439 output_pixel(&dest[i * 2 + 0], Y1);
440 output_pixel(&dest[i * 2 + 1], Y2);
441 }
442 }
443
444 #undef output_pixel
445
446 #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
447 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
448 const int16_t **_lumSrc, int lumFilterSize, \
449 const int16_t *chrFilter, const int16_t **_chrUSrc, \
450 const int16_t **_chrVSrc, int chrFilterSize, \
451 const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
452 int y) \
453 { \
454 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
455 **chrUSrc = (const int32_t **) _chrUSrc, \
456 **chrVSrc = (const int32_t **) _chrVSrc, \
457 **alpSrc = (const int32_t **) _alpSrc; \
458 uint16_t *dest = (uint16_t *) _dest; \
459 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
460 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
461 alpSrc, dest, dstW, y, fmt); \
462 } \
463 \
464 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
465 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
466 const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
467 int yalpha, int uvalpha, int y) \
468 { \
469 const int32_t **buf = (const int32_t **) _buf, \
470 **ubuf = (const int32_t **) _ubuf, \
471 **vbuf = (const int32_t **) _vbuf, \
472 **abuf = (const int32_t **) _abuf; \
473 uint16_t *dest = (uint16_t *) _dest; \
474 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
475 dest, dstW, yalpha, uvalpha, y, fmt); \
476 } \
477 \
478 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
479 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
480 const int16_t *_abuf0, uint8_t *_dest, int dstW, \
481 int uvalpha, int y) \
482 { \
483 const int32_t *buf0 = (const int32_t *) _buf0, \
484 **ubuf = (const int32_t **) _ubuf, \
485 **vbuf = (const int32_t **) _vbuf, \
486 *abuf0 = (const int32_t *) _abuf0; \
487 uint16_t *dest = (uint16_t *) _dest; \
488 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
489 dstW, uvalpha, y, fmt); \
490 }
491
492 YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE)
493 YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE)
494
495 #define output_pixel(pos, acc) \
496 if (target == PIX_FMT_MONOBLACK) { \
497 pos = acc; \
498 } else { \
499 pos = ~acc; \
500 }
501
502 static av_always_inline void
503 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
504 const int16_t **lumSrc, int lumFilterSize,
505 const int16_t *chrFilter, const int16_t **chrUSrc,
506 const int16_t **chrVSrc, int chrFilterSize,
507 const int16_t **alpSrc, uint8_t *dest, int dstW,
508 int y, enum PixelFormat target)
509 {
510 const uint8_t * const d128=dither_8x8_220[y&7];
511 uint8_t *g = c->table_gU[128] + c->table_gV[128];
512 int i;
513 unsigned acc = 0;
514
515 for (i = 0; i < dstW - 1; i += 2) {
516 int j;
517 int Y1 = 1 << 18;
518 int Y2 = 1 << 18;
519
520 for (j = 0; j < lumFilterSize; j++) {
521 Y1 += lumSrc[j][i] * lumFilter[j];
522 Y2 += lumSrc[j][i+1] * lumFilter[j];
523 }
524 Y1 >>= 19;
525 Y2 >>= 19;
526 if ((Y1 | Y2) & 0x100) {
527 Y1 = av_clip_uint8(Y1);
528 Y2 = av_clip_uint8(Y2);
529 }
530 acc += acc + g[Y1 + d128[(i + 0) & 7]];
531 acc += acc + g[Y2 + d128[(i + 1) & 7]];
532 if ((i & 7) == 6) {
533 output_pixel(*dest++, acc);
534 }
535 }
536 }
537
538 static av_always_inline void
539 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
540 const int16_t *ubuf[2], const int16_t *vbuf[2],
541 const int16_t *abuf[2], uint8_t *dest, int dstW,
542 int yalpha, int uvalpha, int y,
543 enum PixelFormat target)
544 {
545 const int16_t *buf0 = buf[0], *buf1 = buf[1];
546 const uint8_t * const d128 = dither_8x8_220[y & 7];
547 uint8_t *g = c->table_gU[128] + c->table_gV[128];
548 int yalpha1 = 4095 - yalpha;
549 int i;
550
551 for (i = 0; i < dstW - 7; i += 8) {
552 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
553 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
554 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
555 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
556 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
557 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
558 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
559 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
560 output_pixel(*dest++, acc);
561 }
562 }
563
564 static av_always_inline void
565 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
566 const int16_t *ubuf[2], const int16_t *vbuf[2],
567 const int16_t *abuf0, uint8_t *dest, int dstW,
568 int uvalpha, int y, enum PixelFormat target)
569 {
570 const uint8_t * const d128 = dither_8x8_220[y & 7];
571 uint8_t *g = c->table_gU[128] + c->table_gV[128];
572 int i;
573
574 for (i = 0; i < dstW - 7; i += 8) {
575 int acc = g[(buf0[i ] >> 7) + d128[0]];
576 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
577 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
578 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
579 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
580 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
581 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
582 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
583 output_pixel(*dest++, acc);
584 }
585 }
586
587 #undef output_pixel
588
589 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
590 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
591 const int16_t **lumSrc, int lumFilterSize, \
592 const int16_t *chrFilter, const int16_t **chrUSrc, \
593 const int16_t **chrVSrc, int chrFilterSize, \
594 const int16_t **alpSrc, uint8_t *dest, int dstW, \
595 int y) \
596 { \
597 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
598 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
599 alpSrc, dest, dstW, y, fmt); \
600 } \
601 \
602 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
603 const int16_t *ubuf[2], const int16_t *vbuf[2], \
604 const int16_t *abuf[2], uint8_t *dest, int dstW, \
605 int yalpha, int uvalpha, int y) \
606 { \
607 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
608 dest, dstW, yalpha, uvalpha, y, fmt); \
609 } \
610 \
611 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
612 const int16_t *ubuf[2], const int16_t *vbuf[2], \
613 const int16_t *abuf0, uint8_t *dest, int dstW, \
614 int uvalpha, int y) \
615 { \
616 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
617 abuf0, dest, dstW, uvalpha, \
618 y, fmt); \
619 }
620
621 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE)
622 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK)
623
624 #define output_pixels(pos, Y1, U, Y2, V) \
625 if (target == PIX_FMT_YUYV422) { \
626 dest[pos + 0] = Y1; \
627 dest[pos + 1] = U; \
628 dest[pos + 2] = Y2; \
629 dest[pos + 3] = V; \
630 } else { \
631 dest[pos + 0] = U; \
632 dest[pos + 1] = Y1; \
633 dest[pos + 2] = V; \
634 dest[pos + 3] = Y2; \
635 }
636
637 static av_always_inline void
638 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
639 const int16_t **lumSrc, int lumFilterSize,
640 const int16_t *chrFilter, const int16_t **chrUSrc,
641 const int16_t **chrVSrc, int chrFilterSize,
642 const int16_t **alpSrc, uint8_t *dest, int dstW,
643 int y, enum PixelFormat target)
644 {
645 int i;
646
647 for (i = 0; i < (dstW >> 1); i++) {
648 int j;
649 int Y1 = 1 << 18;
650 int Y2 = 1 << 18;
651 int U = 1 << 18;
652 int V = 1 << 18;
653
654 for (j = 0; j < lumFilterSize; j++) {
655 Y1 += lumSrc[j][i * 2] * lumFilter[j];
656 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
657 }
658 for (j = 0; j < chrFilterSize; j++) {
659 U += chrUSrc[j][i] * chrFilter[j];
660 V += chrVSrc[j][i] * chrFilter[j];
661 }
662 Y1 >>= 19;
663 Y2 >>= 19;
664 U >>= 19;
665 V >>= 19;
666 if ((Y1 | Y2 | U | V) & 0x100) {
667 Y1 = av_clip_uint8(Y1);
668 Y2 = av_clip_uint8(Y2);
669 U = av_clip_uint8(U);
670 V = av_clip_uint8(V);
671 }
672 output_pixels(4*i, Y1, U, Y2, V);
673 }
674 }
675
676 static av_always_inline void
677 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
678 const int16_t *ubuf[2], const int16_t *vbuf[2],
679 const int16_t *abuf[2], uint8_t *dest, int dstW,
680 int yalpha, int uvalpha, int y,
681 enum PixelFormat target)
682 {
683 const int16_t *buf0 = buf[0], *buf1 = buf[1],
684 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
685 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
686 int yalpha1 = 4095 - yalpha;
687 int uvalpha1 = 4095 - uvalpha;
688 int i;
689
690 for (i = 0; i < (dstW >> 1); i++) {
691 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
692 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
693 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
694 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
695
696 output_pixels(i * 4, Y1, U, Y2, V);
697 }
698 }
699
700 static av_always_inline void
701 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
702 const int16_t *ubuf[2], const int16_t *vbuf[2],
703 const int16_t *abuf0, uint8_t *dest, int dstW,
704 int uvalpha, int y, enum PixelFormat target)
705 {
706 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
707 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
708 int i;
709
710 if (uvalpha < 2048) {
711 for (i = 0; i < (dstW >> 1); i++) {
712 int Y1 = buf0[i * 2] >> 7;
713 int Y2 = buf0[i * 2 + 1] >> 7;
714 int U = ubuf1[i] >> 7;
715 int V = vbuf1[i] >> 7;
716
717 output_pixels(i * 4, Y1, U, Y2, V);
718 }
719 } else {
720 for (i = 0; i < (dstW >> 1); i++) {
721 int Y1 = buf0[i * 2] >> 7;
722 int Y2 = buf0[i * 2 + 1] >> 7;
723 int U = (ubuf0[i] + ubuf1[i]) >> 8;
724 int V = (vbuf0[i] + vbuf1[i]) >> 8;
725
726 output_pixels(i * 4, Y1, U, Y2, V);
727 }
728 }
729 }
730
731 #undef output_pixels
732
733 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422)
734 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422)
735
736 #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
737 #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
738 #define output_pixel(pos, val) \
739 if (isBE(target)) { \
740 AV_WB16(pos, val); \
741 } else { \
742 AV_WL16(pos, val); \
743 }
744
745 static av_always_inline void
746 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
747 const int32_t **lumSrc, int lumFilterSize,
748 const int16_t *chrFilter, const int32_t **chrUSrc,
749 const int32_t **chrVSrc, int chrFilterSize,
750 const int32_t **alpSrc, uint16_t *dest, int dstW,
751 int y, enum PixelFormat target)
752 {
753 int i;
754
755 for (i = 0; i < (dstW >> 1); i++) {
756 int j;
757 int Y1 = 0;
758 int Y2 = 0;
759 int U = -128 << 23; // 19
760 int V = -128 << 23;
761 int R, G, B;
762
763 for (j = 0; j < lumFilterSize; j++) {
764 Y1 += lumSrc[j][i * 2] * lumFilter[j];
765 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
766 }
767 for (j = 0; j < chrFilterSize; j++) {
768 U += chrUSrc[j][i] * chrFilter[j];
769 V += chrVSrc[j][i] * chrFilter[j];
770 }
771
772 // 8bit: 12+15=27; 16-bit: 12+19=31
773 Y1 >>= 14; // 10
774 Y2 >>= 14;
775 U >>= 14;
776 V >>= 14;
777
778 // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
779 Y1 -= c->yuv2rgb_y_offset;
780 Y2 -= c->yuv2rgb_y_offset;
781 Y1 *= c->yuv2rgb_y_coeff;
782 Y2 *= c->yuv2rgb_y_coeff;
783 Y1 += 1 << 13; // 21
784 Y2 += 1 << 13;
785 // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
786
787 R = V * c->yuv2rgb_v2r_coeff;
788 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
789 B = U * c->yuv2rgb_u2b_coeff;
790
791 // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
792 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
793 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
794 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
795 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
796 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
797 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
798 dest += 6;
799 }
800 }
801
802 static av_always_inline void
803 yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
804 const int32_t *ubuf[2], const int32_t *vbuf[2],
805 const int32_t *abuf[2], uint16_t *dest, int dstW,
806 int yalpha, int uvalpha, int y,
807 enum PixelFormat target)
808 {
809 const int32_t *buf0 = buf[0], *buf1 = buf[1],
810 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
811 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
812 int yalpha1 = 4095 - yalpha;
813 int uvalpha1 = 4095 - uvalpha;
814 int i;
815
816 for (i = 0; i < (dstW >> 1); i++) {
817 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
818 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
819 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
820 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
821 int R, G, B;
822
823 Y1 -= c->yuv2rgb_y_offset;
824 Y2 -= c->yuv2rgb_y_offset;
825 Y1 *= c->yuv2rgb_y_coeff;
826 Y2 *= c->yuv2rgb_y_coeff;
827 Y1 += 1 << 13;
828 Y2 += 1 << 13;
829
830 R = V * c->yuv2rgb_v2r_coeff;
831 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
832 B = U * c->yuv2rgb_u2b_coeff;
833
834 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
835 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
836 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
837 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
838 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
839 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
840 dest += 6;
841 }
842 }
843
844 static av_always_inline void
845 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
846 const int32_t *ubuf[2], const int32_t *vbuf[2],
847 const int32_t *abuf0, uint16_t *dest, int dstW,
848 int uvalpha, int y, enum PixelFormat target)
849 {
850 const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
851 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
852 int i;
853
854 if (uvalpha < 2048) {
855 for (i = 0; i < (dstW >> 1); i++) {
856 int Y1 = (buf0[i * 2] ) >> 2;
857 int Y2 = (buf0[i * 2 + 1]) >> 2;
858 int U = (ubuf0[i] + (-128 << 11)) >> 2;
859 int V = (vbuf0[i] + (-128 << 11)) >> 2;
860 int R, G, B;
861
862 Y1 -= c->yuv2rgb_y_offset;
863 Y2 -= c->yuv2rgb_y_offset;
864 Y1 *= c->yuv2rgb_y_coeff;
865 Y2 *= c->yuv2rgb_y_coeff;
866 Y1 += 1 << 13;
867 Y2 += 1 << 13;
868
869 R = V * c->yuv2rgb_v2r_coeff;
870 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
871 B = U * c->yuv2rgb_u2b_coeff;
872
873 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
874 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
875 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
876 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
877 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
878 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
879 dest += 6;
880 }
881 } else {
882 for (i = 0; i < (dstW >> 1); i++) {
883 int Y1 = (buf0[i * 2] ) >> 2;
884 int Y2 = (buf0[i * 2 + 1]) >> 2;
885 int U = (ubuf0[i] + ubuf1[i] + (-128 << 11)) >> 3;
886 int V = (vbuf0[i] + vbuf1[i] + (-128 << 11)) >> 3;
887 int R, G, B;
888
889 Y1 -= c->yuv2rgb_y_offset;
890 Y2 -= c->yuv2rgb_y_offset;
891 Y1 *= c->yuv2rgb_y_coeff;
892 Y2 *= c->yuv2rgb_y_coeff;
893 Y1 += 1 << 13;
894 Y2 += 1 << 13;
895
896 R = V * c->yuv2rgb_v2r_coeff;
897 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
898 B = U * c->yuv2rgb_u2b_coeff;
899
900 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
901 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
902 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
903 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
904 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
905 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
906 dest += 6;
907 }
908 }
909 }
910
911 #undef output_pixel
912 #undef r_b
913 #undef b_r
914
915 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE)
916 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE)
917 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE)
918 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE)
919
920 static av_always_inline void
921 yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
922 int U, int V, int A1, int A2,
923 const void *_r, const void *_g, const void *_b, int y,
924 enum PixelFormat target, int hasAlpha)
925 {
926 if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
927 target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
928 uint32_t *dest = (uint32_t *) _dest;
929 const uint32_t *r = (const uint32_t *) _r;
930 const uint32_t *g = (const uint32_t *) _g;
931 const uint32_t *b = (const uint32_t *) _b;
932
933 #if CONFIG_SMALL
934 int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
935
936 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
937 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
938 #else
939 if (hasAlpha) {
940 int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
941
942 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
943 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
944 } else {
945 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
946 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
947 }
948 #endif
949 } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
950 uint8_t *dest = (uint8_t *) _dest;
951 const uint8_t *r = (const uint8_t *) _r;
952 const uint8_t *g = (const uint8_t *) _g;
953 const uint8_t *b = (const uint8_t *) _b;
954
955 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
956 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
957 dest[i * 6 + 0] = r_b[Y1];
958 dest[i * 6 + 1] = g[Y1];
959 dest[i * 6 + 2] = b_r[Y1];
960 dest[i * 6 + 3] = r_b[Y2];
961 dest[i * 6 + 4] = g[Y2];
962 dest[i * 6 + 5] = b_r[Y2];
963 #undef r_b
964 #undef b_r
965 } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
966 target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
967 target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
968 uint16_t *dest = (uint16_t *) _dest;
969 const uint16_t *r = (const uint16_t *) _r;
970 const uint16_t *g = (const uint16_t *) _g;
971 const uint16_t *b = (const uint16_t *) _b;
972 int dr1, dg1, db1, dr2, dg2, db2;
973
974 if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
975 dr1 = dither_2x2_8[ y & 1 ][0];
976 dg1 = dither_2x2_4[ y & 1 ][0];
977 db1 = dither_2x2_8[(y & 1) ^ 1][0];
978 dr2 = dither_2x2_8[ y & 1 ][1];
979 dg2 = dither_2x2_4[ y & 1 ][1];
980 db2 = dither_2x2_8[(y & 1) ^ 1][1];
981 } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
982 dr1 = dither_2x2_8[ y & 1 ][0];
983 dg1 = dither_2x2_8[ y & 1 ][1];
984 db1 = dither_2x2_8[(y & 1) ^ 1][0];
985 dr2 = dither_2x2_8[ y & 1 ][1];
986 dg2 = dither_2x2_8[ y & 1 ][0];
987 db2 = dither_2x2_8[(y & 1) ^ 1][1];
988 } else {
989 dr1 = dither_4x4_16[ y & 3 ][0];
990 dg1 = dither_4x4_16[ y & 3 ][1];
991 db1 = dither_4x4_16[(y & 3) ^ 3][0];
992 dr2 = dither_4x4_16[ y & 3 ][1];
993 dg2 = dither_4x4_16[ y & 3 ][0];
994 db2 = dither_4x4_16[(y & 3) ^ 3][1];
995 }
996
997 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
998 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
999 } else /* 8/4-bit */ {
1000 uint8_t *dest = (uint8_t *) _dest;
1001 const uint8_t *r = (const uint8_t *) _r;
1002 const uint8_t *g = (const uint8_t *) _g;
1003 const uint8_t *b = (const uint8_t *) _b;
1004 int dr1, dg1, db1, dr2, dg2, db2;
1005
1006 if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
1007 const uint8_t * const d64 = dither_8x8_73[y & 7];
1008 const uint8_t * const d32 = dither_8x8_32[y & 7];
1009 dr1 = dg1 = d32[(i * 2 + 0) & 7];
1010 db1 = d64[(i * 2 + 0) & 7];
1011 dr2 = dg2 = d32[(i * 2 + 1) & 7];
1012 db2 = d64[(i * 2 + 1) & 7];
1013 } else {
1014 const uint8_t * const d64 = dither_8x8_73 [y & 7];
1015 const uint8_t * const d128 = dither_8x8_220[y & 7];
1016 dr1 = db1 = d128[(i * 2 + 0) & 7];
1017 dg1 = d64[(i * 2 + 0) & 7];
1018 dr2 = db2 = d128[(i * 2 + 1) & 7];
1019 dg2 = d64[(i * 2 + 1) & 7];
1020 }
1021
1022 if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
1023 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
1024 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
1025 } else {
1026 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1027 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1028 }
1029 }
1030 }
1031
1032 static av_always_inline void
1033 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1034 const int16_t **lumSrc, int lumFilterSize,
1035 const int16_t *chrFilter, const int16_t **chrUSrc,
1036 const int16_t **chrVSrc, int chrFilterSize,
1037 const int16_t **alpSrc, uint8_t *dest, int dstW,
1038 int y, enum PixelFormat target, int hasAlpha)
1039 {
1040 int i;
1041
1042 for (i = 0; i < (dstW >> 1); i++) {
1043 int j;
1044 int Y1 = 1 << 18;
1045 int Y2 = 1 << 18;
1046 int U = 1 << 18;
1047 int V = 1 << 18;
1048 int av_unused A1, A2;
1049 const void *r, *g, *b;
1050
1051 for (j = 0; j < lumFilterSize; j++) {
1052 Y1 += lumSrc[j][i * 2] * lumFilter[j];
1053 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1054 }
1055 for (j = 0; j < chrFilterSize; j++) {
1056 U += chrUSrc[j][i] * chrFilter[j];
1057 V += chrVSrc[j][i] * chrFilter[j];
1058 }
1059 Y1 >>= 19;
1060 Y2 >>= 19;
1061 U >>= 19;
1062 V >>= 19;
1063 if ((Y1 | Y2 | U | V) & 0x100) {
1064 Y1 = av_clip_uint8(Y1);
1065 Y2 = av_clip_uint8(Y2);
1066 U = av_clip_uint8(U);
1067 V = av_clip_uint8(V);
1068 }
1069 if (hasAlpha) {
1070 A1 = 1 << 18;
1071 A2 = 1 << 18;
1072 for (j = 0; j < lumFilterSize; j++) {
1073 A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1074 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1075 }
1076 A1 >>= 19;
1077 A2 >>= 19;
1078 if ((A1 | A2) & 0x100) {
1079 A1 = av_clip_uint8(A1);
1080 A2 = av_clip_uint8(A2);
1081 }
1082 }
1083
1084 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1085 r = c->table_rV[V];
1086 g = (c->table_gU[U] + c->table_gV[V]);
1087 b = c->table_bU[U];
1088
1089 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1090 r, g, b, y, target, hasAlpha);
1091 }
1092 }
1093
1094 static av_always_inline void
1095 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1096 const int16_t *ubuf[2], const int16_t *vbuf[2],
1097 const int16_t *abuf[2], uint8_t *dest, int dstW,
1098 int yalpha, int uvalpha, int y,
1099 enum PixelFormat target, int hasAlpha)
1100 {
1101 const int16_t *buf0 = buf[0], *buf1 = buf[1],
1102 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1103 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1104 *abuf0 = hasAlpha ? abuf[0] : NULL,
1105 *abuf1 = hasAlpha ? abuf[1] : NULL;
1106 int yalpha1 = 4095 - yalpha;
1107 int uvalpha1 = 4095 - uvalpha;
1108 int i;
1109
1110 for (i = 0; i < (dstW >> 1); i++) {
1111 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1112 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1113 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1114 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1115 int A1, A2;
1116 const void *r = c->table_rV[V],
1117 *g = (c->table_gU[U] + c->table_gV[V]),
1118 *b = c->table_bU[U];
1119
1120 if (hasAlpha) {
1121 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1122 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1123 }
1124
1125 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1126 r, g, b, y, target, hasAlpha);
1127 }
1128 }
1129
1130 static av_always_inline void
1131 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1132 const int16_t *ubuf[2], const int16_t *vbuf[2],
1133 const int16_t *abuf0, uint8_t *dest, int dstW,
1134 int uvalpha, int y, enum PixelFormat target,
1135 int hasAlpha)
1136 {
1137 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1138 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1139 int i;
1140
1141 if (uvalpha < 2048) {
1142 for (i = 0; i < (dstW >> 1); i++) {
1143 int Y1 = buf0[i * 2] >> 7;
1144 int Y2 = buf0[i * 2 + 1] >> 7;
1145 int U = ubuf1[i] >> 7;
1146 int V = vbuf1[i] >> 7;
1147 int A1, A2;
1148 const void *r = c->table_rV[V],
1149 *g = (c->table_gU[U] + c->table_gV[V]),
1150 *b = c->table_bU[U];
1151
1152 if (hasAlpha) {
1153 A1 = abuf0[i * 2 ] >> 7;
1154 A2 = abuf0[i * 2 + 1] >> 7;
1155 }
1156
1157 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1158 r, g, b, y, target, hasAlpha);
1159 }
1160 } else {
1161 for (i = 0; i < (dstW >> 1); i++) {
1162 int Y1 = buf0[i * 2] >> 7;
1163 int Y2 = buf0[i * 2 + 1] >> 7;
1164 int U = (ubuf0[i] + ubuf1[i]) >> 8;
1165 int V = (vbuf0[i] + vbuf1[i]) >> 8;
1166 int A1, A2;
1167 const void *r = c->table_rV[V],
1168 *g = (c->table_gU[U] + c->table_gV[V]),
1169 *b = c->table_bU[U];
1170
1171 if (hasAlpha) {
1172 A1 = abuf0[i * 2 ] >> 7;
1173 A2 = abuf0[i * 2 + 1] >> 7;
1174 }
1175
1176 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1177 r, g, b, y, target, hasAlpha);
1178 }
1179 }
1180 }
1181
1182 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1183 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1184 const int16_t **lumSrc, int lumFilterSize, \
1185 const int16_t *chrFilter, const int16_t **chrUSrc, \
1186 const int16_t **chrVSrc, int chrFilterSize, \
1187 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1188 int y) \
1189 { \
1190 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1191 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1192 alpSrc, dest, dstW, y, fmt, hasAlpha); \
1193 }
1194 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1195 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1196 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1197 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1198 const int16_t *abuf[2], uint8_t *dest, int dstW, \
1199 int yalpha, int uvalpha, int y) \
1200 { \
1201 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1202 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1203 } \
1204 \
1205 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1206 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1207 const int16_t *abuf0, uint8_t *dest, int dstW, \
1208 int uvalpha, int y) \
1209 { \
1210 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1211 dstW, uvalpha, y, fmt, hasAlpha); \
1212 }
1213
1214 #if CONFIG_SMALL
1215 YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1216 YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1217 #else
1218 #if CONFIG_SWSCALE_ALPHA
1219 YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1)
1220 YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1)
1221 #endif
1222 YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0)
1223 YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0)
1224 #endif
1225 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0)
1226 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0)
1227 YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0)
1228 YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0)
1229 YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0)
1230 YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0)
1231 YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0)
1232 YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0)
1233
1234 static av_always_inline void
1235 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1236 const int16_t **lumSrc, int lumFilterSize,
1237 const int16_t *chrFilter, const int16_t **chrUSrc,
1238 const int16_t **chrVSrc, int chrFilterSize,
1239 const int16_t **alpSrc, uint8_t *dest,
1240 int dstW, int y, enum PixelFormat target, int hasAlpha)
1241 {
1242 int i;
1243 int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1244
1245 for (i = 0; i < dstW; i++) {
1246 int j;
1247 int Y = 0;
1248 int U = -128 << 19;
1249 int V = -128 << 19;
1250 int av_unused A;
1251 int R, G, B;
1252
1253 for (j = 0; j < lumFilterSize; j++) {
1254 Y += lumSrc[j][i] * lumFilter[j];
1255 }
1256 for (j = 0; j < chrFilterSize; j++) {
1257 U += chrUSrc[j][i] * chrFilter[j];
1258 V += chrVSrc[j][i] * chrFilter[j];
1259 }
1260 Y >>= 10;
1261 U >>= 10;
1262 V >>= 10;
1263 if (hasAlpha) {
1264 A = 1 << 21;
1265 for (j = 0; j < lumFilterSize; j++) {
1266 A += alpSrc[j][i] * lumFilter[j];
1267 }
1268 A >>= 19;
1269 if (A & 0x100)
1270 A = av_clip_uint8(A);
1271 }
1272 Y -= c->yuv2rgb_y_offset;
1273 Y *= c->yuv2rgb_y_coeff;
1274 Y += 1 << 21;
1275 R = Y + V*c->yuv2rgb_v2r_coeff;
1276 G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1277 B = Y + U*c->yuv2rgb_u2b_coeff;
1278 if ((R | G | B) & 0xC0000000) {
1279 R = av_clip_uintp2(R, 30);
1280 G = av_clip_uintp2(G, 30);
1281 B = av_clip_uintp2(B, 30);
1282 }
1283
1284 switch(target) {
1285 case PIX_FMT_ARGB:
1286 dest[0] = hasAlpha ? A : 255;
1287 dest[1] = R >> 22;
1288 dest[2] = G >> 22;
1289 dest[3] = B >> 22;
1290 break;
1291 case PIX_FMT_RGB24:
1292 dest[0] = R >> 22;
1293 dest[1] = G >> 22;
1294 dest[2] = B >> 22;
1295 break;
1296 case PIX_FMT_RGBA:
1297 dest[0] = R >> 22;
1298 dest[1] = G >> 22;
1299 dest[2] = B >> 22;
1300 dest[3] = hasAlpha ? A : 255;
1301 break;
1302 case PIX_FMT_ABGR:
1303 dest[0] = hasAlpha ? A : 255;
1304 dest[1] = B >> 22;
1305 dest[2] = G >> 22;
1306 dest[3] = R >> 22;
1307 dest += 4;
1308 break;
1309 case PIX_FMT_BGR24:
1310 dest[0] = B >> 22;
1311 dest[1] = G >> 22;
1312 dest[2] = R >> 22;
1313 break;
1314 case PIX_FMT_BGRA:
1315 dest[0] = B >> 22;
1316 dest[1] = G >> 22;
1317 dest[2] = R >> 22;
1318 dest[3] = hasAlpha ? A : 255;
1319 break;
1320 }
1321 dest += step;
1322 }
1323 }
1324
1325 #if CONFIG_SMALL
1326 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1327 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1328 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1329 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1330 #else
1331 #if CONFIG_SWSCALE_ALPHA
1332 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1)
1333 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1)
1334 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1)
1335 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1)
1336 #endif
1337 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0)
1338 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0)
1339 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0)
1340 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0)
1341 #endif
1342 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0)
1343 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0)
1344
1345 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1346 int width, int height,
1347 int y, uint8_t val)
1348 {
1349 int i;
1350 uint8_t *ptr = plane + stride*y;
1351 for (i=0; i<height; i++) {
1352 memset(ptr, val, width);
1353 ptr += stride;
1354 }
1355 }
1356
1357 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1358
1359 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1360 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1361
1362 static av_always_inline void
1363 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
1364 enum PixelFormat origin)
1365 {
1366 int i;
1367 for (i = 0; i < width; i++) {
1368 unsigned int r_b = input_pixel(&src[i*3+0]);
1369 unsigned int g = input_pixel(&src[i*3+1]);
1370 unsigned int b_r = input_pixel(&src[i*3+2]);
1371
1372 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1373 }
1374 }
1375
1376 static av_always_inline void
1377 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
1378 const uint16_t *src1, const uint16_t *src2,
1379 int width, enum PixelFormat origin)
1380 {
1381 int i;
1382 assert(src1==src2);
1383 for (i = 0; i < width; i++) {
1384 int r_b = input_pixel(&src1[i*3+0]);
1385 int g = input_pixel(&src1[i*3+1]);
1386 int b_r = input_pixel(&src1[i*3+2]);
1387
1388 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1389 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1390 }
1391 }
1392
1393 static av_always_inline void
1394 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
1395 const uint16_t *src1, const uint16_t *src2,
1396 int width, enum PixelFormat origin)
1397 {
1398 int i;
1399 assert(src1==src2);
1400 for (i = 0; i < width; i++) {
1401 int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
1402 int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
1403 int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
1404
1405 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1406 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1407 }
1408 }
1409
1410 #undef r
1411 #undef b
1412 #undef input_pixel
1413
1414 #define rgb48funcs(pattern, BE_LE, origin) \
1415 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, \
1416 int width, uint32_t *unused) \
1417 { \
1418 const uint16_t *src = (const uint16_t *) _src; \
1419 uint16_t *dst = (uint16_t *) _dst; \
1420 rgb48ToY_c_template(dst, src, width, origin); \
1421 } \
1422 \
1423 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
1424 const uint8_t *_src1, const uint8_t *_src2, \
1425 int width, uint32_t *unused) \
1426 { \
1427 const uint16_t *src1 = (const uint16_t *) _src1, \
1428 *src2 = (const uint16_t *) _src2; \
1429 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1430 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1431 } \
1432 \
1433 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
1434 const uint8_t *_src1, const uint8_t *_src2, \
1435 int width, uint32_t *unused) \
1436 { \
1437 const uint16_t *src1 = (const uint16_t *) _src1, \
1438 *src2 = (const uint16_t *) _src2; \
1439 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1440 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1441 }
1442
1443 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE)
1444 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE)
1445 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE)
1446 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE)
1447
1448 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1449 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1450 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1451
1452 static av_always_inline void
1453 rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src,
1454 int width, enum PixelFormat origin,
1455 int shr, int shg, int shb, int shp,
1456 int maskr, int maskg, int maskb,
1457 int rsh, int gsh, int bsh, int S)
1458 {
1459 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh;
1460 const unsigned rnd = 33u << (S - 1);
1461 int i;
1462
1463 for (i = 0; i < width; i++) {
1464 int px = input_pixel(i) >> shp;
1465 int b = (px & maskb) >> shb;
1466 int g = (px & maskg) >> shg;
1467 int r = (px & maskr) >> shr;
1468
1469 dst[i] = (ry * r + gy * g + by * b + rnd) >> S;
1470 }
1471 }
1472
1473 static av_always_inline void
1474 rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1475 const uint8_t *src, int width,
1476 enum PixelFormat origin,
1477 int shr, int shg, int shb, int shp,
1478 int maskr, int maskg, int maskb,
1479 int rsh, int gsh, int bsh, int S)
1480 {
1481 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1482 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh;
1483 const unsigned rnd = 257u << (S - 1);
1484 int i;
1485
1486 for (i = 0; i < width; i++) {
1487 int px = input_pixel(i) >> shp;
1488 int b = (px & maskb) >> shb;
1489 int g = (px & maskg) >> shg;
1490 int r = (px & maskr) >> shr;
1491
1492 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S;
1493 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S;
1494 }
1495 }
1496
1497 static av_always_inline void
1498 rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
1499 const uint8_t *src, int width,
1500 enum PixelFormat origin,
1501 int shr, int shg, int shb, int shp,
1502 int maskr, int maskg, int maskb,
1503 int rsh, int gsh, int bsh, int S)
1504 {
1505 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1506 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1507 maskgx = ~(maskr | maskb);
1508 const unsigned rnd = 257u << S;
1509 int i;
1510
1511 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1512 for (i = 0; i < width; i++) {
1513 int px0 = input_pixel(2 * i + 0) >> shp;
1514 int px1 = input_pixel(2 * i + 1) >> shp;
1515 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1516 int rb = px0 + px1 - g;
1517
1518 b = (rb & maskb) >> shb;
1519 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1520 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1521 g >>= shg;
1522 } else {
1523 g = (g & maskg) >> shg;
1524 }
1525 r = (rb & maskr) >> shr;
1526
1527 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1);
1528 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1);
1529 }
1530 }
1531
1532 #undef input_pixel
1533
1534 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1535 maskg, maskb, rsh, gsh, bsh, S) \
1536 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
1537 int width, uint32_t *unused) \
1538 { \
1539 rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
1540 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1541 } \
1542 \
1543 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1544 const uint8_t *src, const uint8_t *dummy, \
1545 int width, uint32_t *unused) \
1546 { \
1547 rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1548 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1549 } \
1550 \
1551 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1552 const uint8_t *src, const uint8_t *dummy, \
1553 int width, uint32_t *unused) \
1554 { \
1555 rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1556 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1557 }
1558
1559 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8)
1560 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8)
1561 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
1562 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
1563 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
1564 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
1565 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
1566 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
1567 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
1568 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
1569 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
1570 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
1571
1572 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1573 {
1574 int i;
1575 for (i=0; i<width; i++) {
1576 dst[i]= src[4*i];
1577 }
1578 }
1579
1580 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1581 {
1582 int i;
1583 for (i=0; i<width; i++) {
1584 dst[i]= src[4*i+3];
1585 }
1586 }
1587
1588 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1589 {
1590 int i;
1591 for (i=0; i<width; i++) {
1592 int d= src[i];
1593
1594 dst[i]= pal[d] & 0xFF;
1595 }
1596 }
1597
1598 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1599 const uint8_t *src1, const uint8_t *src2,
1600 int width, uint32_t *pal)
1601 {
1602 int i;
1603 assert(src1 == src2);
1604 for (i=0; i<width; i++) {
1605 int p= pal[src1[i]];
1606
1607 dstU[i]= p>>8;
1608 dstV[i]= p>>16;
1609 }
1610 }
1611
1612 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1613 int width, uint32_t *unused)
1614 {
1615 int i, j;
1616 for (i=0; i<width/8; i++) {
1617 int d= ~src[i];
1618 for(j=0; j<8; j++)
1619 dst[8*i+j]= ((d>>(7-j))&1)*255;
1620 }
1621 }
1622
1623 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1624 int width, uint32_t *unused)
1625 {
1626 int i, j;
1627 for (i=0; i<width/8; i++) {
1628 int d= src[i];
1629 for(j=0; j<8; j++)
1630 dst[8*i+j]= ((d>>(7-j))&1)*255;
1631 }
1632 }
1633
1634 //FIXME yuy2* can read up to 7 samples too much
1635
1636 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1637 uint32_t *unused)
1638 {
1639 int i;
1640 for (i=0; i<width; i++)
1641 dst[i]= src[2*i];
1642 }
1643
1644 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1645 const uint8_t *src2, int width, uint32_t *unused)
1646 {
1647 int i;
1648 for (i=0; i<width; i++) {
1649 dstU[i]= src1[4*i + 1];
1650 dstV[i]= src1[4*i + 3];
1651 }
1652 assert(src1 == src2);
1653 }
1654
1655 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t *unused)
1656 {
1657 int i;
1658 const uint16_t *src = (const uint16_t *) _src;
1659 uint16_t *dst = (uint16_t *) _dst;
1660 for (i=0; i<width; i++) {
1661 dst[i] = av_bswap16(src[i]);
1662 }
1663 }
1664
1665 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1,
1666 const uint8_t *_src2, int width, uint32_t *unused)
1667 {
1668 int i;
1669 const uint16_t *src1 = (const uint16_t *) _src1,
1670 *src2 = (const uint16_t *) _src2;
1671 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
1672 for (i=0; i<width; i++) {
1673 dstU[i] = av_bswap16(src1[i]);
1674 dstV[i] = av_bswap16(src2[i]);
1675 }
1676 }
1677
1678 /* This is almost identical to the previous, end exists only because
1679 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1680 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1681 uint32_t *unused)
1682 {
1683 int i;
1684 for (i=0; i<width; i++)
1685 dst[i]= src[2*i+1];
1686 }
1687
1688 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1689 const uint8_t *src2, int width, uint32_t *unused)
1690 {
1691 int i;
1692 for (i=0; i<width; i++) {
1693 dstU[i]= src1[4*i + 0];
1694 dstV[i]= src1[4*i + 2];
1695 }
1696 assert(src1 == src2);
1697 }
1698
1699 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1700 const uint8_t *src, int width)
1701 {
1702 int i;
1703 for (i = 0; i < width; i++) {
1704 dst1[i] = src[2*i+0];
1705 dst2[i] = src[2*i+1];
1706 }
1707 }
1708
1709 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1710 const uint8_t *src1, const uint8_t *src2,
1711 int width, uint32_t *unused)
1712 {
1713 nvXXtoUV_c(dstU, dstV, src1, width);
1714 }
1715
1716 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1717 const uint8_t *src1, const uint8_t *src2,
1718 int width, uint32_t *unused)
1719 {
1720 nvXXtoUV_c(dstV, dstU, src1, width);
1721 }
1722
1723 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1724
1725 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1726 int width, uint32_t *unused)
1727 {
1728 int i;
1729 for (i=0; i<width; i++) {
1730 int b= src[i*3+0];
1731 int g= src[i*3+1];
1732 int r= src[i*3+2];
1733
1734 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1735 }
1736 }
1737
1738 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1739 const uint8_t *src2, int width, uint32_t *unused)
1740 {
1741 int i;
1742 for (i=0; i<width; i++) {
1743 int b= src1[3*i + 0];
1744 int g= src1[3*i + 1];
1745 int r= src1[3*i + 2];
1746
1747 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1748 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1749 }
1750 assert(src1 == src2);
1751 }
1752
1753 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1754 const uint8_t *src2, int width, uint32_t *unused)
1755 {
1756 int i;
1757 for (i=0; i<width; i++) {
1758 int b= src1[6*i + 0] + src1[6*i + 3];
1759 int g= src1[6*i + 1] + src1[6*i + 4];
1760 int r= src1[6*i + 2] + src1[6*i + 5];
1761
1762 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1763 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1764 }
1765 assert(src1 == src2);
1766 }
1767
1768 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1769 uint32_t *unused)
1770 {
1771 int i;
1772 for (i=0; i<width; i++) {
1773 int r= src[i*3+0];
1774 int g= src[i*3+1];
1775 int b= src[i*3+2];
1776
1777 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1778 }
1779 }
1780
1781 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1782 const uint8_t *src2, int width, uint32_t *unused)
1783 {
1784 int i;
1785 assert(src1==src2);
1786 for (i=0; i<width; i++) {
1787 int r= src1[3*i + 0];
1788 int g= src1[3*i + 1];
1789 int b= src1[3*i + 2];
1790
1791 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1792 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1793 }
1794 }
1795
1796 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1797 const uint8_t *src2, int width, uint32_t *unused)
1798 {
1799 int i;
1800 assert(src1==src2);
1801 for (i=0; i<width; i++) {
1802 int r= src1[6*i + 0] + src1[6*i + 3];
1803 int g= src1[6*i + 1] + src1[6*i + 4];
1804 int b= src1[6*i + 2] + src1[6*i + 5];
1805
1806 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1807 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1808 }
1809 }
1810
1811 static void planar_rgb_to_y(uint8_t *dst, const uint8_t *src[4], int width)
1812 {
1813 int i;
1814 for (i = 0; i < width; i++) {
1815 int g = src[0][i];
1816 int b = src[1][i];
1817 int r = src[2][i];
1818
1819 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1820 }
1821 }
1822
1823 static void planar_rgb16le_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
1824 {
1825 int i;
1826 const uint16_t **src = (const uint16_t **) _src;
1827 uint16_t *dst = (uint16_t *) _dst;
1828 for (i = 0; i < width; i++) {
1829 int g = AV_RL16(src[0] + i);
1830 int b = AV_RL16(src[1] + i);
1831 int r = AV_RL16(src[2] + i);
1832
1833 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1834 }
1835 }
1836
1837 static void planar_rgb16be_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
1838 {
1839 int i;
1840 const uint16_t **src = (const uint16_t **) _src;
1841 uint16_t *dst = (uint16_t *) _dst;
1842 for (i = 0; i < width; i++) {
1843 int g = AV_RB16(src[0] + i);
1844 int b = AV_RB16(src[1] + i);
1845 int r = AV_RB16(src[2] + i);
1846
1847 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1848 }
1849 }
1850
1851 static void planar_rgb_to_uv(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4], int width)
1852 {
1853 int i;
1854 for (i = 0; i < width; i++) {
1855 int g = src[0][i];
1856 int b = src[1][i];
1857 int r = src[2][i];
1858
1859 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1860 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1861 }
1862 }
1863
1864 static void planar_rgb16le_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
1865 {
1866 int i;
1867 const uint16_t **src = (const uint16_t **) _src;
1868 uint16_t *dstU = (uint16_t *) _dstU;
1869 uint16_t *dstV = (uint16_t *) _dstV;
1870 for (i = 0; i < width; i++) {
1871 int g = AV_RL16(src[0] + i);
1872 int b = AV_RL16(src[1] + i);
1873 int r = AV_RL16(src[2] + i);
1874
1875 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1876 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1877 }
1878 }
1879
1880 static void planar_rgb16be_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
1881 {
1882 int i;
1883 const uint16_t **src = (const uint16_t **) _src;
1884 uint16_t *dstU = (uint16_t *) _dstU;
1885 uint16_t *dstV = (uint16_t *) _dstV;
1886 for (i = 0; i < width; i++) {
1887 int g = AV_RB16(src[0] + i);
1888 int b = AV_RB16(src[1] + i);
1889 int r = AV_RB16(src[2] + i);
1890
1891 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1892 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1893 }
1894 }
1895
1896 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
1897 const int16_t *filter,
1898 const int16_t *filterPos, int filterSize)
1899 {
1900 int i;
1901 int32_t *dst = (int32_t *) _dst;
1902 const uint16_t *src = (const uint16_t *) _src;
1903 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1904 int sh = bits - 4;
1905
1906 for (i = 0; i < dstW; i++) {
1907 int j;
1908 int srcPos = filterPos[i];
1909 int val = 0;
1910
1911 for (j = 0; j < filterSize; j++) {
1912 val += src[srcPos + j] * filter[filterSize * i + j];
1913 }
1914 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
1915 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
1916 }
1917 }
1918
1919 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
1920 const int16_t *filter,
1921 const int16_t *filterPos, int filterSize)
1922 {
1923 int i;
1924 const uint16_t *src = (const uint16_t *) _src;
1925 int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1926
1927 for (i = 0; i < dstW; i++) {
1928 int j;
1929 int srcPos = filterPos[i];
1930 int val = 0;
1931
1932 for (j = 0; j < filterSize; j++) {
1933 val += src[srcPos + j] * filter[filterSize * i + j];
1934 }
1935 // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
1936 dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
1937 }
1938 }
1939
1940 // bilinear / bicubic scaling
1941 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
1942 const int16_t *filter, const int16_t *filterPos,
1943 int filterSize)
1944 {
1945 int i;
1946 for (i=0; i<dstW; i++) {
1947 int j;
1948 int srcPos= filterPos[i];
1949 int val=0;
1950 for (j=0; j<filterSize; j++) {
1951 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1952 }
1953 //filter += hFilterSize;
1954 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1955 //dst[i] = val>>7;
1956 }
1957 }
1958
1959 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
1960 const int16_t *filter, const int16_t *filterPos,
1961 int filterSize)
1962 {
1963 int i;
1964 int32_t *dst = (int32_t *) _dst;
1965 for (i=0; i<dstW; i++) {
1966 int j;
1967 int srcPos= filterPos[i];
1968 int val=0;
1969 for (j=0; j<filterSize; j++) {
1970 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1971 }
1972 //filter += hFilterSize;
1973 dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
1974 //dst[i] = val>>7;
1975 }
1976 }
1977
1978 //FIXME all pal and rgb srcFormats could do this convertion as well
1979 //FIXME all scalers more complex than bilinear could do half of this transform
1980 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1981 {
1982 int i;
1983 for (i = 0; i < width; i++) {
1984 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1985 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1986 }
1987 }
1988 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1989 {
1990 int i;
1991 for (i = 0; i < width; i++) {
1992 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1993 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1994 }
1995 }
1996 static void lumRangeToJpeg_c(int16_t *dst, int width)
1997 {
1998 int i;
1999 for (i = 0; i < width; i++)
2000 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
2001 }
2002 static void lumRangeFromJpeg_c(int16_t *dst, int width)
2003 {
2004 int i;
2005 for (i = 0; i < width; i++)
2006 dst[i] = (dst[i]*14071 + 33561947)>>14;
2007 }
2008
2009 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2010 {
2011 int i;
2012 int32_t *dstU = (int32_t *) _dstU;
2013 int32_t *dstV = (int32_t *) _dstV;
2014 for (i = 0; i < width; i++) {
2015 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2016 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2017 }
2018 }
2019 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2020 {
2021 int i;
2022 int32_t *dstU = (int32_t *) _dstU;
2023 int32_t *dstV = (int32_t *) _dstV;
2024 for (i = 0; i < width; i++) {
2025 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
2026 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
2027 }
2028 }
2029 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
2030 {
2031 int i;
2032 int32_t *dst = (int32_t *) _dst;
2033 for (i = 0; i < width; i++)
2034 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
2035 }
2036 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
2037 {
2038 int i;
2039 int32_t *dst = (int32_t *) _dst;
2040 for (i = 0; i < width; i++)
2041 dst[i] = (dst[i]*14071 + (33561947<<4))>>14;
2042 }
2043
2044 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
2045 const uint8_t *src, int srcW, int xInc)
2046 {
2047 int i;
2048 unsigned int xpos=0;
2049 for (i=0;i<dstWidth;i++) {
2050 register unsigned int xx=xpos>>16;
2051 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2052 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
2053 xpos+=xInc;
2054 }
2055 }
2056
2057 // *** horizontal scale Y line to temp buffer
2058 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
2059 const uint8_t *src_in[4], int srcW, int xInc,
2060 const int16_t *hLumFilter,
2061 const int16_t *hLumFilterPos, int hLumFilterSize,
2062 uint8_t *formatConvBuffer,
2063 uint32_t *pal, int isAlpha)
2064 {
2065 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
2066 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
2067 const uint8_t *src = src_in[isAlpha ? 3 : 0];
2068
2069 if (toYV12) {
2070 toYV12(formatConvBuffer, src, srcW, pal);
2071 src= formatConvBuffer;
2072 } else if (c->readLumPlanar && !isAlpha) {
2073 c->readLumPlanar(formatConvBuffer, src_in, srcW);
2074 src = formatConvBuffer;
2075 }
2076
2077 if (!c->hyscale_fast) {
2078 c->hyScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
2079 } else { // fast bilinear upscale / crap downscale
2080 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
2081 }
2082
2083 if (convertRange)
2084 convertRange(dst, dstWidth);
2085 }
2086
2087 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
2088 int dstWidth, const uint8_t *src1,
2089 const uint8_t *src2, int srcW, int xInc)
2090 {
2091 int i;
2092 unsigned int xpos=0;
2093 for (i=0;i<dstWidth;i++) {
2094 register unsigned int xx=xpos>>16;
2095 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2096 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
2097 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
2098 xpos+=xInc;
2099 }
2100 }
2101
2102 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
2103 const uint8_t *src_in[4],
2104 int srcW, int xInc, const int16_t *hChrFilter,
2105 const int16_t *hChrFilterPos, int hChrFilterSize,
2106 uint8_t *formatConvBuffer, uint32_t *pal)
2107 {
2108 const uint8_t *src1 = src_in[1], *src2 = src_in[2];
2109 if (c->chrToYV12) {
2110 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
2111 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
2112 src1= formatConvBuffer;
2113 src2= buf2;
2114 } else if (c->readChrPlanar) {
2115 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
2116 c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW);
2117 src1= formatConvBuffer;
2118 src2= buf2;
2119 }
2120
2121 if (!c->hcscale_fast) {
2122 c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
2123 c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
2124 } else { // fast bilinear upscale / crap downscale
2125 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
2126 }
2127
2128 if (c->chrConvertRange)
2129 c->chrConvertRange(dst1, dst2, dstWidth);
2130 }
2131
2132 static av_always_inline void
2133 find_c_packed_planar_out_funcs(SwsContext *c,
2134 yuv2planar1_fn *yuv2plane1, yuv2planarX_fn *yuv2planeX,
2135 yuv2interleavedX_fn *yuv2nv12cX,
2136 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
2137 yuv2packedX_fn *yuv2packedX)
2138 {
2139 enum PixelFormat dstFormat = c->dstFormat;
2140
2141 if (is16BPS(dstFormat)) {
2142 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c : yuv2planeX_16LE_c;
2143 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c : yuv2plane1_16LE_c;
2144 } else if (is9_OR_10BPS(dstFormat)) {
2145 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
2146 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c : yuv2planeX_9LE_c;
2147 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_9BE_c : yuv2plane1_9LE_c;
2148 } else {
2149 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_c : yuv2planeX_10LE_c;
2150 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_10BE_c : yuv2plane1_10LE_c;
2151 }
2152 } else {
2153 *yuv2plane1 = yuv2plane1_8_c;
2154 *yuv2planeX = yuv2planeX_8_c;
2155 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)
2156 *yuv2nv12cX = yuv2nv12cX_c;
2157 }
2158
2159 if(c->flags & SWS_FULL_CHR_H_INT) {
2160 switch (dstFormat) {
2161 case PIX_FMT_RGBA:
2162 #if CONFIG_SMALL
2163 *yuv2packedX = yuv2rgba32_full_X_c;
2164 #else
2165 #if CONFIG_SWSCALE_ALPHA
2166 if (c->alpPixBuf) {
2167 *yuv2packedX = yuv2rgba32_full_X_c;
2168 } else
2169 #endif /* CONFIG_SWSCALE_ALPHA */
2170 {
2171 *yuv2packedX = yuv2rgbx32_full_X_c;
2172 }
2173 #endif /* !CONFIG_SMALL */
2174 break;
2175 case PIX_FMT_ARGB:
2176 #if CONFIG_SMALL
2177 *yuv2packedX = yuv2argb32_full_X_c;
2178 #else
2179 #if CONFIG_SWSCALE_ALPHA
2180 if (c->alpPixBuf) {
2181 *yuv2packedX = yuv2argb32_full_X_c;
2182 } else
2183 #endif /* CONFIG_SWSCALE_ALPHA */
2184 {
2185 *yuv2packedX = yuv2xrgb32_full_X_c;
2186 }
2187 #endif /* !CONFIG_SMALL */
2188 break;
2189 case PIX_FMT_BGRA:
2190 #if CONFIG_SMALL
2191 *yuv2packedX = yuv2bgra32_full_X_c;
2192 #else
2193 #if CONFIG_SWSCALE_ALPHA
2194 if (c->alpPixBuf) {
2195 *yuv2packedX = yuv2bgra32_full_X_c;
2196 } else
2197 #endif /* CONFIG_SWSCALE_ALPHA */
2198 {
2199 *yuv2packedX = yuv2bgrx32_full_X_c;
2200 }
2201 #endif /* !CONFIG_SMALL */
2202 break;
2203 case PIX_FMT_ABGR:
2204 #if CONFIG_SMALL
2205 *yuv2packedX = yuv2abgr32_full_X_c;
2206 #else
2207 #if CONFIG_SWSCALE_ALPHA
2208 if (c->alpPixBuf) {
2209 *yuv2packedX = yuv2abgr32_full_X_c;
2210 } else
2211 #endif /* CONFIG_SWSCALE_ALPHA */
2212 {
2213 *yuv2packedX = yuv2xbgr32_full_X_c;
2214 }
2215 #endif /* !CONFIG_SMALL */
2216 break;
2217 case PIX_FMT_RGB24:
2218 *yuv2packedX = yuv2rgb24_full_X_c;
2219 break;
2220 case PIX_FMT_BGR24:
2221 *yuv2packedX = yuv2bgr24_full_X_c;
2222 break;
2223 }
2224 } else {
2225 switch (dstFormat) {
2226 case PIX_FMT_GRAY16BE:
2227 *yuv2packed1 = yuv2gray16BE_1_c;
2228 *yuv2packed2 = yuv2gray16BE_2_c;
2229 *yuv2packedX = yuv2gray16BE_X_c;
2230 break;
2231 case PIX_FMT_GRAY16LE:
2232 *yuv2packed1 = yuv2gray16LE_1_c;
2233 *yuv2packed2 = yuv2gray16LE_2_c;
2234 *yuv2packedX = yuv2gray16LE_X_c;
2235 break;
2236 case PIX_FMT_MONOWHITE:
2237 *yuv2packed1 = yuv2monowhite_1_c;
2238 *yuv2packed2 = yuv2monowhite_2_c;
2239 *yuv2packedX = yuv2monowhite_X_c;
2240 break;
2241 case PIX_FMT_MONOBLACK:
2242 *yuv2packed1 = yuv2monoblack_1_c;
2243 *yuv2packed2 = yuv2monoblack_2_c;
2244 *yuv2packedX = yuv2monoblack_X_c;
2245 break;
2246 case PIX_FMT_YUYV422:
2247 *yuv2packed1 = yuv2yuyv422_1_c;
2248 *yuv2packed2 = yuv2yuyv422_2_c;
2249 *yuv2packedX = yuv2yuyv422_X_c;
2250 break;
2251 case PIX_FMT_UYVY422:
2252 *yuv2packed1 = yuv2uyvy422_1_c;
2253 *yuv2packed2 = yuv2uyvy422_2_c;
2254 *yuv2packedX = yuv2uyvy422_X_c;
2255 break;
2256 case PIX_FMT_RGB48LE:
2257 *yuv2packed1 = yuv2rgb48le_1_c;
2258 *yuv2packed2 = yuv2rgb48le_2_c;
2259 *yuv2packedX = yuv2rgb48le_X_c;
2260 break;
2261 case PIX_FMT_RGB48BE:
2262 *yuv2packed1 = yuv2rgb48be_1_c;
2263 *yuv2packed2 = yuv2rgb48be_2_c;
2264 *yuv2packedX = yuv2rgb48be_X_c;
2265 break;
2266 case PIX_FMT_BGR48LE:
2267 *yuv2packed1 = yuv2bgr48le_1_c;
2268 *yuv2packed2 = yuv2bgr48le_2_c;
2269 *yuv2packedX = yuv2bgr48le_X_c;
2270 break;
2271 case PIX_FMT_BGR48BE:
2272 *yuv2packed1 = yuv2bgr48be_1_c;
2273 *yuv2packed2 = yuv2bgr48be_2_c;
2274 *yuv2packedX = yuv2bgr48be_X_c;
2275 break;
2276 case PIX_FMT_RGB32:
2277 case PIX_FMT_BGR32:
2278 #if CONFIG_SMALL
2279 *yuv2packed1 = yuv2rgb32_1_c;
2280 *yuv2packed2 = yuv2rgb32_2_c;
2281 *yuv2packedX = yuv2rgb32_X_c;
2282 #else
2283 #if CONFIG_SWSCALE_ALPHA
2284 if (c->alpPixBuf) {
2285 *yuv2packed1 = yuv2rgba32_1_c;
2286 *yuv2packed2 = yuv2rgba32_2_c;
2287 *yuv2packedX = yuv2rgba32_X_c;
2288 } else
2289 #endif /* CONFIG_SWSCALE_ALPHA */
2290 {
2291 *yuv2packed1 = yuv2rgbx32_1_c;
2292 *yuv2packed2 = yuv2rgbx32_2_c;
2293 *yuv2packedX = yuv2rgbx32_X_c;
2294 }
2295 #endif /* !CONFIG_SMALL */
2296 break;
2297 case PIX_FMT_RGB32_1:
2298 case PIX_FMT_BGR32_1:
2299 #if CONFIG_SMALL
2300 *yuv2packed1 = yuv2rgb32_1_1_c;
2301 *yuv2packed2 = yuv2rgb32_1_2_c;
2302 *yuv2packedX = yuv2rgb32_1_X_c;
2303 #else
2304 #if CONFIG_SWSCALE_ALPHA
2305 if (c->alpPixBuf) {
2306 *yuv2packed1 = yuv2rgba32_1_1_c;
2307 *yuv2packed2 = yuv2rgba32_1_2_c;
2308 *yuv2packedX = yuv2rgba32_1_X_c;
2309 } else
2310 #endif /* CONFIG_SWSCALE_ALPHA */
2311 {
2312 *yuv2packed1 = yuv2rgbx32_1_1_c;
2313 *yuv2packed2 = yuv2rgbx32_1_2_c;
2314 *yuv2packedX = yuv2rgbx32_1_X_c;
2315 }
2316 #endif /* !CONFIG_SMALL */
2317 break;
2318 case PIX_FMT_RGB24:
2319 *yuv2packed1 = yuv2rgb24_1_c;
2320 *yuv2packed2 = yuv2rgb24_2_c;
2321 *yuv2packedX = yuv2rgb24_X_c;
2322 break;
2323 case PIX_FMT_BGR24:
2324 *yuv2packed1 = yuv2bgr24_1_c;
2325 *yuv2packed2 = yuv2bgr24_2_c;
2326 *yuv2packedX = yuv2bgr24_X_c;
2327 break;
2328 case PIX_FMT_RGB565LE:
2329 case PIX_FMT_RGB565BE:
2330 case PIX_FMT_BGR565LE:
2331 case PIX_FMT_BGR565BE:
2332 *yuv2packed1 = yuv2rgb16_1_c;
2333 *yuv2packed2 = yuv2rgb16_2_c;
2334 *yuv2packedX = yuv2rgb16_X_c;
2335 break;
2336 case PIX_FMT_RGB555LE:
2337 case PIX_FMT_RGB555BE:
2338 case PIX_FMT_BGR555LE:
2339 case PIX_FMT_BGR555BE:
2340 *yuv2packed1 = yuv2rgb15_1_c;
2341 *yuv2packed2 = yuv2rgb15_2_c;
2342 *yuv2packedX = yuv2rgb15_X_c;
2343 break;
2344 case PIX_FMT_RGB444LE:
2345 case PIX_FMT_RGB444BE:
2346 case PIX_FMT_BGR444LE:
2347 case PIX_FMT_BGR444BE:
2348 *yuv2packed1 = yuv2rgb12_1_c;
2349 *yuv2packed2 = yuv2rgb12_2_c;
2350 *yuv2packedX = yuv2rgb12_X_c;
2351 break;
2352 case PIX_FMT_RGB8:
2353 case PIX_FMT_BGR8:
2354 *yuv2packed1 = yuv2rgb8_1_c;
2355 *yuv2packed2 = yuv2rgb8_2_c;
2356 *yuv2packedX = yuv2rgb8_X_c;
2357 break;
2358 case PIX_FMT_RGB4:
2359 case PIX_FMT_BGR4:
2360 *yuv2packed1 = yuv2rgb4_1_c;
2361 *yuv2packed2 = yuv2rgb4_2_c;
2362 *yuv2packedX = yuv2rgb4_X_c;
2363 break;
2364 case PIX_FMT_RGB4_BYTE:
2365 case PIX_FMT_BGR4_BYTE:
2366 *yuv2packed1 = yuv2rgb4b_1_c;
2367 *yuv2packed2 = yuv2rgb4b_2_c;
2368 *yuv2packedX = yuv2rgb4b_X_c;
2369 break;
2370 }
2371 }
2372 }
2373
2374 #define DEBUG_SWSCALE_BUFFERS 0
2375 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2376
2377 static int swScale(SwsContext *c, const uint8_t* src[],
2378 int srcStride[], int srcSliceY,
2379 int srcSliceH, uint8_t* dst[], int dstStride[])
2380 {
2381 /* load a few things into local vars to make the code more readable? and faster */
2382 const int srcW= c->srcW;
2383 const int dstW= c->dstW;
2384 const int dstH= c->dstH;
2385 const int chrDstW= c->chrDstW;
2386 const int chrSrcW= c->chrSrcW;
2387 const int lumXInc= c->lumXInc;
2388 const int chrXInc= c->chrXInc;
2389 const enum PixelFormat dstFormat= c->dstFormat;
2390 const int flags= c->flags;
2391 int16_t *vLumFilterPos= c->vLumFilterPos;
2392 int16_t *vChrFilterPos= c->vChrFilterPos;
2393 int16_t *hLumFilterPos= c->hLumFilterPos;
2394 int16_t *hChrFilterPos= c->hChrFilterPos;
2395 int16_t *vLumFilter= c->vLumFilter;
2396 int16_t *vChrFilter= c->vChrFilter;
2397 int16_t *hLumFilter= c->hLumFilter;
2398 int16_t *hChrFilter= c->hChrFilter;
2399 int32_t *lumMmxFilter= c->lumMmxFilter;
2400 int32_t *chrMmxFilter= c->chrMmxFilter;
2401 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2402 const int vLumFilterSize= c->vLumFilterSize;
2403 const int vChrFilterSize= c->vChrFilterSize;
2404 const int hLumFilterSize= c->hLumFilterSize;
2405 const int hChrFilterSize= c->hChrFilterSize;
2406 int16_t **lumPixBuf= c->lumPixBuf;
2407 int16_t **chrUPixBuf= c->chrUPixBuf;
2408 int16_t **chrVPixBuf= c->chrVPixBuf;
2409 int16_t **alpPixBuf= c->alpPixBuf;
2410 const int vLumBufSize= c->vLumBufSize;
2411 const int vChrBufSize= c->vChrBufSize;
2412 uint8_t *formatConvBuffer= c->formatConvBuffer;
2413 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2414 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2415 int lastDstY;
2416 uint32_t *pal=c->pal_yuv;
2417 yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
2418 yuv2planarX_fn yuv2planeX = c->yuv2planeX;
2419 yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
2420 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2421 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2422 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2423 int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat);
2424
2425 /* vars which will change and which we need to store back in the context */
2426 int dstY= c->dstY;
2427 int lumBufIndex= c->lumBufIndex;
2428 int chrBufIndex= c->chrBufIndex;
2429 int lastInLumBuf= c->lastInLumBuf;
2430 int lastInChrBuf= c->lastInChrBuf;
2431
2432 if (isPacked(c->srcFormat)) {
2433 src[0]=
2434 src[1]=
2435 src[2]=
2436 src[3]= src[0];
2437 srcStride[0]=
2438 srcStride[1]=
2439 srcStride[2]=
2440 srcStride[3]= srcStride[0];
2441 }
2442 srcStride[1]<<= c->vChrDrop;
2443 srcStride[2]<<= c->vChrDrop;
2444
2445 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2446 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2447 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2448 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2449 srcSliceY, srcSliceH, dstY, dstH);
2450 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2451 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2452
2453 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
2454 static int warnedAlready=0; //FIXME move this into the context perhaps
2455 if (flags & SWS_PRINT_INFO && !warnedAlready) {
2456 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2457 " ->cannot do aligned memory accesses anymore\n");
2458 warnedAlready=1;
2459 }
2460 }
2461
2462 /* Note the user might start scaling the picture in the middle so this
2463 will not get executed. This is not really intended but works
2464 currently, so people might do it. */
2465 if (srcSliceY ==0) {
2466 lumBufIndex=-1;
2467 chrBufIndex=-1;
2468 dstY=0;
2469 lastInLumBuf= -1;
2470 lastInChrBuf= -1;
2471 }
2472
2473 if (!should_dither) {
2474 c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
2475 }
2476 lastDstY= dstY;
2477
2478 for (;dstY < dstH; dstY++) {
2479 const int chrDstY= dstY>>c->chrDstVSubSample;
2480 uint8_t *dest[4] = {
2481 dst[0] + dstStride[0] * dstY,
2482 dst[1] + dstStride[1] * chrDstY,
2483 dst[2] + dstStride[2] * chrDstY,
2484 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
2485 };
2486
2487 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2488 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2489 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2490 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
2491 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
2492 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
2493 int enough_lines;
2494
2495 //handle holes (FAST_BILINEAR & weird filters)
2496 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2497 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2498 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2499 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2500
2501 DEBUG_BUFFERS("dstY: %d\n", dstY);
2502 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2503 firstLumSrcY, lastLumSrcY, lastInLumBuf);
2504 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2505 firstChrSrcY, lastChrSrcY, lastInChrBuf);
2506
2507 // Do we have enough lines in this slice to output the dstY line
2508 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2509
2510 if (!enough_lines) {
2511 lastLumSrcY = srcSliceY + srcSliceH - 1;
2512 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2513 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2514 lastLumSrcY, lastChrSrcY);
2515 }
2516
2517 //Do horizontal scaling
2518 while(lastInLumBuf < lastLumSrcY) {
2519 const uint8_t *src1[4] = {
2520 src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0],
2521 src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1],
2522 src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2],
2523 src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3],
2524 };
2525 lumBufIndex++;
2526 assert(lumBufIndex < 2*vLumBufSize);
2527 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2528 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2529 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2530 hLumFilter, hLumFilterPos, hLumFilterSize,
2531 formatConvBuffer,
2532 pal, 0);
2533 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2534 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src1, srcW,
2535 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2536 formatConvBuffer,
2537 pal, 1);
2538 lastInLumBuf++;
2539 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2540 lumBufIndex, lastInLumBuf);
2541 }
2542 while(lastInChrBuf < lastChrSrcY) {
2543 const uint8_t *src1[4] = {
2544 src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0],
2545 src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1],
2546 src[2] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[2],
2547 src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3],
2548 };
2549 chrBufIndex++;
2550 assert(chrBufIndex < 2*vChrBufSize);
2551 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2552 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2553 //FIXME replace parameters through context struct (some at least)
2554
2555 if (c->needs_hcscale)
2556 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2557 chrDstW, src1, chrSrcW, chrXInc,
2558 hChrFilter, hChrFilterPos, hChrFilterSize,
2559 formatConvBuffer, pal);
2560 lastInChrBuf++;
2561 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2562 chrBufIndex, lastInChrBuf);
2563 }
2564 //wrap buf index around to stay inside the ring buffer
2565 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2566 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2567 if (!enough_lines)
2568 break; //we can't output a dstY line so let's try with the next slice
2569
2570 #if HAVE_MMX
2571 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2572 #endif
2573 if (should_dither) {
2574 c->chrDither8 = dither_8x8_128[chrDstY & 7];
2575 c->lumDither8 = dither_8x8_128[dstY & 7];
2576 }
2577 if (dstY >= dstH-2) {
2578 // hmm looks like we can't use MMX here without overwriting this array's tail
2579 find_c_packed_planar_out_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
2580 &yuv2packed1, &yuv2packed2, &yuv2packedX);
2581 }
2582
2583 {
2584 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2585 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2586 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2587 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2588 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2589 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2590
2591 if (vLumFilterSize == 1) {
2592 yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
2593 } else {
2594 yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
2595 lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
2596 }
2597
2598 if (!((dstY&chrSkipMask) || isGray(dstFormat))) {
2599 if (yuv2nv12cX) {
2600 yuv2nv12cX(c, vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
2601 } else if (vChrFilterSize == 1) {
2602 yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
2603 yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
2604 } else {
2605 yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
2606 chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0);
2607 yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
2608 chrVSrcPtr, dest[2], chrDstW, c->chrDither8, 3);
2609 }
2610 }
2611
2612 if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
2613 if (vLumFilterSize == 1) {
2614 yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0);
2615 } else {
2616 yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
2617 alpSrcPtr, dest[3], dstW, c->lumDither8, 0);
2618 }
2619 }
2620 } else {
2621 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2622 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2623 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2624 int chrAlpha = vChrFilter[2 * dstY + 1];
2625 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2626 alpPixBuf ? *alpSrcPtr : NULL,
2627 dest[0], dstW, chrAlpha, dstY);
2628 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2629 int lumAlpha = vLumFilter[2 * dstY + 1];
2630 int chrAlpha = vChrFilter[2 * dstY + 1];
2631 lumMmxFilter[2] =
2632 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
2633 chrMmxFilter[2] =
2634 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
2635 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2636 alpPixBuf ? alpSrcPtr : NULL,
2637 dest[0], dstW, lumAlpha, chrAlpha, dstY);
2638 } else { //general RGB
2639 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
2640 lumSrcPtr, vLumFilterSize,
2641 vChrFilter + dstY * vChrFilterSize,
2642 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2643 alpSrcPtr, dest[0], dstW, dstY);
2644 }
2645 }
2646 }
2647 }
2648
2649 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2650 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2651
2652 #if HAVE_MMX2
2653 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2654 __asm__ volatile("sfence":::"memory");
2655 #endif
2656 emms_c();
2657
2658 /* store changed local vars back in the context */
2659 c->dstY= dstY;
2660 c->lumBufIndex= lumBufIndex;
2661 c->chrBufIndex= chrBufIndex;
2662 c->lastInLumBuf= lastInLumBuf;
2663 c->lastInChrBuf= lastInChrBuf;
2664
2665 return dstY - lastDstY;
2666 }
2667
2668 static av_cold void sws_init_swScale_c(SwsContext *c)
2669 {
2670 enum PixelFormat srcFormat = c->srcFormat;
2671
2672 find_c_packed_planar_out_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
2673 &c->yuv2nv12cX, &c->yuv2packed1, &c->yuv2packed2,
2674 &c->yuv2packedX);
2675
2676 c->chrToYV12 = NULL;
2677 switch(srcFormat) {
2678 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2679 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2680 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2681 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2682 case PIX_FMT_RGB8 :
2683 case PIX_FMT_BGR8 :
2684 case PIX_FMT_PAL8 :
2685 case PIX_FMT_BGR4_BYTE:
2686 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2687 case PIX_FMT_GBRP9LE:
2688 case PIX_FMT_GBRP10LE:
2689 case PIX_FMT_GBRP16LE: c->readChrPlanar = planar_rgb16le_to_uv; break;
2690 case PIX_FMT_GBRP9BE:
2691 case PIX_FMT_GBRP10BE:
2692 case PIX_FMT_GBRP16BE: c->readChrPlanar = planar_rgb16be_to_uv; break;
2693 case PIX_FMT_GBRP: c->readChrPlanar = planar_rgb_to_uv; break;
2694 #if HAVE_BIGENDIAN
2695 case PIX_FMT_YUV444P9LE:
2696 case PIX_FMT_YUV422P9LE:
2697 case PIX_FMT_YUV420P9LE:
2698 case PIX_FMT_YUV422P10LE:
2699 case PIX_FMT_YUV444P10LE:
2700 case PIX_FMT_YUV420P10LE:
2701 case PIX_FMT_YUV420P16LE:
2702 case PIX_FMT_YUV422P16LE:
2703 case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
2704 #else
2705 case PIX_FMT_YUV444P9BE:
2706 case PIX_FMT_YUV422P9BE:
2707 case PIX_FMT_YUV420P9BE:
2708 case PIX_FMT_YUV444P10BE:
2709 case PIX_FMT_YUV422P10BE:
2710 case PIX_FMT_YUV420P10BE:
2711 case PIX_FMT_YUV420P16BE:
2712 case PIX_FMT_YUV422P16BE:
2713 case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
2714 #endif
2715 }
2716 if (c->chrSrcHSubSample) {
2717 switch(srcFormat) {
2718 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2719 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2720 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2721 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2722 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2723 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2724 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2725 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2726 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2727 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2728 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2729 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2730 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2731 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2732 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2733 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2734 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2735 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2736 }
2737 } else {
2738 switch(srcFormat) {
2739 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2740 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2741 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2742 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2743 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2744 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2745 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2746 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2747 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2748 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2749 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2750 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2751 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2752 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2753 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2754 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2755 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2756 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2757 }
2758 }
2759
2760 c->lumToYV12 = NULL;
2761 c->alpToYV12 = NULL;
2762 switch (srcFormat) {
2763 case PIX_FMT_GBRP9LE:
2764 case PIX_FMT_GBRP10LE:
2765 case PIX_FMT_GBRP16LE: c->readLumPlanar = planar_rgb16le_to_y; break;
2766 case PIX_FMT_GBRP9BE:
2767 case PIX_FMT_GBRP10BE:
2768 case PIX_FMT_GBRP16BE: c->readLumPlanar = planar_rgb16be_to_y; break;
2769 case PIX_FMT_GBRP: c->readLumPlanar = planar_rgb_to_y; break;
2770 #if HAVE_BIGENDIAN
2771 case PIX_FMT_YUV444P9LE:
2772 case PIX_FMT_YUV422P9LE:
2773 case PIX_FMT_YUV420P9LE:
2774 case PIX_FMT_YUV444P10LE:
2775 case PIX_FMT_YUV422P10LE:
2776 case PIX_FMT_YUV420P10LE:
2777 case PIX_FMT_YUV420P16LE:
2778 case PIX_FMT_YUV422P16LE:
2779 case PIX_FMT_YUV444P16LE:
2780 case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
2781 #else
2782 case PIX_FMT_YUV444P9BE: